@swarmclawai/swarmclaw 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +19 -76
  2. package/package.json +1 -1
  3. package/skills/swarmclaw.md +17 -0
  4. package/src/app/api/agents/[id]/dream/route.ts +45 -0
  5. package/src/app/api/knowledge/[id]/route.ts +48 -49
  6. package/src/app/api/knowledge/hygiene/route.ts +13 -0
  7. package/src/app/api/knowledge/route.ts +70 -42
  8. package/src/app/api/knowledge/sources/[id]/archive/route.ts +15 -0
  9. package/src/app/api/knowledge/sources/[id]/restore/route.ts +10 -0
  10. package/src/app/api/knowledge/sources/[id]/route.ts +1 -0
  11. package/src/app/api/knowledge/sources/[id]/supersede/route.ts +26 -0
  12. package/src/app/api/knowledge/sources/[id]/sync/route.ts +17 -0
  13. package/src/app/api/knowledge/sources/route.ts +1 -0
  14. package/src/app/api/knowledge/upload/route.ts +3 -51
  15. package/src/app/api/memory/dream/[id]/route.ts +19 -0
  16. package/src/app/api/memory/dream/route.ts +34 -0
  17. package/src/app/knowledge/layout.tsx +1 -1
  18. package/src/app/knowledge/page.tsx +2 -22
  19. package/src/app/protocols/page.tsx +21 -2
  20. package/src/cli/index.js +16 -0
  21. package/src/cli/spec.js +5 -0
  22. package/src/components/agents/agent-sheet.tsx +65 -0
  23. package/src/components/chat/message-bubble.tsx +10 -0
  24. package/src/components/knowledge/grounding-panel.tsx +99 -0
  25. package/src/components/knowledge/knowledge-detail.tsx +402 -0
  26. package/src/components/knowledge/knowledge-list.tsx +351 -126
  27. package/src/components/knowledge/knowledge-sheet.tsx +208 -119
  28. package/src/components/memory/dream-history.tsx +155 -0
  29. package/src/components/memory/memory-card.tsx +7 -0
  30. package/src/components/memory/memory-detail.tsx +46 -0
  31. package/src/components/runs/run-list.tsx +23 -0
  32. package/src/lib/providers/cli-utils.ts +3 -4
  33. package/src/lib/providers/index.ts +12 -22
  34. package/src/lib/providers/openclaw.ts +1 -2
  35. package/src/lib/server/agents/subagent-swarm.ts +2 -7
  36. package/src/lib/server/api-routes.test.ts +43 -2
  37. package/src/lib/server/chat-execution/chat-execution-grounding.test.ts +127 -0
  38. package/src/lib/server/chat-execution/chat-execution-types.ts +8 -1
  39. package/src/lib/server/chat-execution/chat-execution.ts +1 -0
  40. package/src/lib/server/chat-execution/chat-turn-finalization.ts +23 -6
  41. package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +6 -1
  42. package/src/lib/server/chat-execution/post-stream-finalization.ts +15 -3
  43. package/src/lib/server/chat-execution/prompt-builder.ts +4 -6
  44. package/src/lib/server/chat-execution/prompt-sections.ts +29 -3
  45. package/src/lib/server/chat-execution/stream-agent-chat.ts +6 -1
  46. package/src/lib/server/connectors/openclaw.ts +1 -2
  47. package/src/lib/server/execution-engine/task-attempt.ts +8 -2
  48. package/src/lib/server/knowledge-import.ts +159 -0
  49. package/src/lib/server/knowledge-sources.test.ts +215 -0
  50. package/src/lib/server/knowledge-sources.ts +1266 -0
  51. package/src/lib/server/memory/dream-cycles.ts +49 -0
  52. package/src/lib/server/memory/dream-idle-callback.ts +38 -0
  53. package/src/lib/server/memory/dream-service.ts +315 -0
  54. package/src/lib/server/memory/memory-db.ts +37 -2
  55. package/src/lib/server/protocols/protocol-agent-turn.ts +7 -0
  56. package/src/lib/server/protocols/protocol-run-lifecycle.ts +19 -6
  57. package/src/lib/server/protocols/protocol-service.test.ts +99 -0
  58. package/src/lib/server/protocols/protocol-step-helpers.ts +7 -1
  59. package/src/lib/server/protocols/protocol-step-processors.ts +16 -3
  60. package/src/lib/server/protocols/protocol-types.ts +4 -0
  61. package/src/lib/server/provider-health.ts +2 -7
  62. package/src/lib/server/runtime/daemon-state/core.ts +6 -1
  63. package/src/lib/server/runtime/run-ledger.test.ts +120 -0
  64. package/src/lib/server/runtime/run-ledger.ts +27 -1
  65. package/src/lib/server/runtime/session-run-manager/drain.ts +5 -0
  66. package/src/lib/server/runtime/session-run-manager/state.ts +19 -2
  67. package/src/lib/server/storage-normalization.ts +5 -0
  68. package/src/lib/server/storage.ts +16 -1
  69. package/src/stores/slices/ui-slice.ts +4 -0
  70. package/src/types/agent.ts +7 -0
  71. package/src/types/dream.ts +45 -0
  72. package/src/types/index.ts +1 -0
  73. package/src/types/message.ts +3 -0
  74. package/src/types/misc.ts +131 -0
  75. package/src/types/protocol.ts +4 -0
  76. package/src/types/run.ts +4 -1
@@ -0,0 +1,1266 @@
1
+ import { createHash } from 'crypto'
2
+ import path from 'path'
3
+
4
+ import { genId } from '@/lib/id'
5
+ import type {
6
+ KnowledgeCitation,
7
+ KnowledgeHygieneAction,
8
+ KnowledgeHygieneFinding,
9
+ KnowledgeHygieneSummary,
10
+ KnowledgeSource,
11
+ KnowledgeSourceDetail,
12
+ KnowledgeSourceKind,
13
+ KnowledgeSourceSummary,
14
+ KnowledgeRetrievalTrace,
15
+ KnowledgeSearchHit,
16
+ MemoryEntry,
17
+ } from '@/types'
18
+ import {
19
+ deleteKnowledgeSource as deleteKnowledgeSourceRecord,
20
+ loadKnowledgeSource,
21
+ loadKnowledgeSources,
22
+ patchKnowledgeSource,
23
+ upsertKnowledgeSource,
24
+ } from '@/lib/server/storage'
25
+ import { getMemoryDb } from '@/lib/server/memory/memory-db'
26
+ import {
27
+ deriveKnowledgeTitle,
28
+ extractKnowledgeTextFromFile,
29
+ extractKnowledgeTextFromUrl,
30
+ } from '@/lib/server/knowledge-import'
31
+ import { onNextIdleWindow } from '@/lib/server/runtime/idle-window'
32
+
33
+ const KNOWLEDGE_STALE_AFTER_MS = 1000 * 60 * 60 * 24 * 14
34
+ const CHUNK_TARGET_CHARS = 2200
35
+ const CHUNK_OVERLAP_CHARS = 320
36
+ const MAX_KNOWLEDGE_SCAN = 10_000
37
+ const MAX_HYGIENE_FINDINGS = 120
38
+ const MAX_GROUNDING_HITS = 4
39
+
40
+ interface KnowledgeSourceInput {
41
+ kind?: KnowledgeSourceKind
42
+ title?: string
43
+ content?: string | null
44
+ tags?: string[]
45
+ scope?: 'global' | 'agent'
46
+ agentIds?: string[]
47
+ sourceLabel?: string | null
48
+ sourceUrl?: string | null
49
+ sourcePath?: string | null
50
+ metadata?: Record<string, unknown>
51
+ }
52
+
53
+ interface IndexedChunk {
54
+ title: string
55
+ content: string
56
+ chunkIndex: number
57
+ chunkCount: number
58
+ charStart: number
59
+ charEnd: number
60
+ sectionLabel?: string | null
61
+ }
62
+
63
+ let backfillPromise: Promise<void> | null = null
64
+ let backfillComplete = false
65
+ let maintenanceRegistered = false
66
+ let maintenanceHistory: KnowledgeHygieneAction[] = []
67
+
68
+ function normalizeText(value: unknown): string {
69
+ return typeof value === 'string' ? value.trim() : ''
70
+ }
71
+
72
+ function normalizeOptionalText(value: unknown): string | null {
73
+ const trimmed = normalizeText(value)
74
+ return trimmed || null
75
+ }
76
+
77
+ function normalizeTags(tags: unknown): string[] {
78
+ if (!Array.isArray(tags)) return []
79
+ const seen = new Set<string>()
80
+ const out: string[] = []
81
+ for (const tag of tags) {
82
+ if (typeof tag !== 'string') continue
83
+ const trimmed = tag.trim()
84
+ const key = trimmed.toLowerCase()
85
+ if (!trimmed || seen.has(key)) continue
86
+ seen.add(key)
87
+ out.push(trimmed)
88
+ }
89
+ return out
90
+ }
91
+
92
+ function matchesTagFilter(sourceTags: string[], filterTags: string[]): boolean {
93
+ if (filterTags.length === 0) return true
94
+ const tagSet = new Set(sourceTags.map((tag) => tag.toLowerCase()))
95
+ return filterTags.some((tag) => tagSet.has(tag.toLowerCase()))
96
+ }
97
+
98
+ function normalizeAgentIds(agentIds: unknown): string[] {
99
+ if (!Array.isArray(agentIds)) return []
100
+ const seen = new Set<string>()
101
+ const out: string[] = []
102
+ for (const id of agentIds) {
103
+ if (typeof id !== 'string') continue
104
+ const trimmed = id.trim()
105
+ if (!trimmed || seen.has(trimmed)) continue
106
+ seen.add(trimmed)
107
+ out.push(trimmed)
108
+ }
109
+ return out
110
+ }
111
+
112
+ function normalizeScope(scope: unknown): 'global' | 'agent' {
113
+ return scope === 'agent' ? 'agent' : 'global'
114
+ }
115
+
116
+ function normalizeKind(kind: unknown): KnowledgeSourceKind {
117
+ return kind === 'file' || kind === 'url' ? kind : 'manual'
118
+ }
119
+
120
+ function contentHash(content: string): string {
121
+ return createHash('sha256').update(content).digest('hex')
122
+ }
123
+
124
+ function isStaleSource(source: KnowledgeSource): boolean {
125
+ if (source.archivedAt || source.supersededBySourceId) return false
126
+ if (source.syncStatus === 'error') return true
127
+ if (source.kind === 'manual') return false
128
+ const indexedAt = typeof source.lastIndexedAt === 'number' ? source.lastIndexedAt : 0
129
+ if (!indexedAt) return true
130
+ return (Date.now() - indexedAt) > KNOWLEDGE_STALE_AFTER_MS
131
+ }
132
+
133
+ function coerceSource(source: KnowledgeSource): KnowledgeSource {
134
+ const now = Date.now()
135
+ return {
136
+ id: source.id,
137
+ kind: normalizeKind(source.kind),
138
+ title: normalizeText(source.title) || 'Knowledge Source',
139
+ content: typeof source.content === 'string' ? source.content : null,
140
+ sourceLabel: normalizeOptionalText(source.sourceLabel),
141
+ sourceUrl: normalizeOptionalText(source.sourceUrl),
142
+ sourcePath: normalizeOptionalText(source.sourcePath),
143
+ sourceHash: normalizeOptionalText(source.sourceHash),
144
+ scope: normalizeScope(source.scope),
145
+ agentIds: normalizeAgentIds(source.agentIds),
146
+ tags: normalizeTags(source.tags),
147
+ syncStatus: source.syncStatus === 'syncing' || source.syncStatus === 'error' ? source.syncStatus : 'ready',
148
+ lastIndexedAt: typeof source.lastIndexedAt === 'number' ? source.lastIndexedAt : null,
149
+ lastSyncedAt: typeof source.lastSyncedAt === 'number' ? source.lastSyncedAt : null,
150
+ lastError: normalizeOptionalText(source.lastError),
151
+ archivedAt: typeof source.archivedAt === 'number' ? source.archivedAt : null,
152
+ archivedReason: normalizeOptionalText(source.archivedReason),
153
+ duplicateOfSourceId: normalizeOptionalText(source.duplicateOfSourceId),
154
+ supersededBySourceId: normalizeOptionalText(source.supersededBySourceId),
155
+ maintenanceUpdatedAt: typeof source.maintenanceUpdatedAt === 'number' ? source.maintenanceUpdatedAt : null,
156
+ maintenanceNotes: normalizeOptionalText(source.maintenanceNotes),
157
+ nextSyncAt: typeof source.nextSyncAt === 'number' ? source.nextSyncAt : null,
158
+ lastAutoSyncAt: typeof source.lastAutoSyncAt === 'number' ? source.lastAutoSyncAt : null,
159
+ chunkCount: typeof source.chunkCount === 'number' ? source.chunkCount : 0,
160
+ contentLength: typeof source.contentLength === 'number' ? source.contentLength : 0,
161
+ createdAt: typeof source.createdAt === 'number' ? source.createdAt : now,
162
+ updatedAt: typeof source.updatedAt === 'number' ? source.updatedAt : now,
163
+ metadata: source.metadata && typeof source.metadata === 'object' ? source.metadata : undefined,
164
+ }
165
+ }
166
+
167
+ function sourceIsArchived(source: KnowledgeSource): boolean {
168
+ return typeof source.archivedAt === 'number' && source.archivedAt > 0
169
+ }
170
+
171
+ function sourceIsSuperseded(source: KnowledgeSource): boolean {
172
+ return typeof source.supersededBySourceId === 'string' && source.supersededBySourceId.trim().length > 0
173
+ }
174
+
175
+ function sourceIsExcludedByDefault(source: KnowledgeSource): boolean {
176
+ return sourceIsArchived(source) || sourceIsSuperseded(source)
177
+ }
178
+
179
+ function sourceVisibleToAgent(source: KnowledgeSource, viewerAgentId?: string | null): boolean {
180
+ if (source.scope === 'global') return true
181
+ if (!viewerAgentId) return false
182
+ return source.agentIds.includes(viewerAgentId)
183
+ }
184
+
185
+ function cleanKnowledgeTokens(value: string): string[] {
186
+ return Array.from(new Set(
187
+ String(value || '')
188
+ .toLowerCase()
189
+ .replace(/[^a-z0-9]+/g, ' ')
190
+ .split(/\s+/)
191
+ .map((token) => token.trim())
192
+ .filter((token) => token.length >= 3),
193
+ ))
194
+ }
195
+
196
+ function tokenOverlapScore(left: string, right: string): number {
197
+ const leftTokens = cleanKnowledgeTokens(left)
198
+ const rightSet = new Set(cleanKnowledgeTokens(right))
199
+ if (leftTokens.length === 0 || rightSet.size === 0) return 0
200
+ let matches = 0
201
+ for (const token of leftTokens) {
202
+ if (rightSet.has(token)) matches += 1
203
+ }
204
+ return matches / Math.max(leftTokens.length, 1)
205
+ }
206
+
207
+ function jaccardSimilarity(left: string, right: string): number {
208
+ const leftSet = new Set(cleanKnowledgeTokens(left))
209
+ const rightSet = new Set(cleanKnowledgeTokens(right))
210
+ if (leftSet.size === 0 || rightSet.size === 0) return 0
211
+ let intersection = 0
212
+ for (const token of leftSet) {
213
+ if (rightSet.has(token)) intersection += 1
214
+ }
215
+ const union = leftSet.size + rightSet.size - intersection
216
+ return union > 0 ? intersection / union : 0
217
+ }
218
+
219
+ function whyMatched(query: string, title: string, content: string, sectionLabel?: string | null): string {
220
+ const queryTokens = cleanKnowledgeTokens(query)
221
+ const contentText = `${title}\n${sectionLabel || ''}\n${content}`
222
+ const contentTokens = new Set(cleanKnowledgeTokens(contentText))
223
+ const matched = queryTokens.filter((token) => contentTokens.has(token))
224
+ if (matched.length > 0) {
225
+ const head = matched.slice(0, 4).join(', ')
226
+ return `Matched query terms: ${head}${matched.length > 4 ? ', ...' : ''}`
227
+ }
228
+ if (sectionLabel?.trim()) return `Matched the ${sectionLabel.trim()} section`
229
+ return 'Retrieved as a high-relevance knowledge chunk'
230
+ }
231
+
232
+ function toCitation(hit: KnowledgeSearchHit): KnowledgeCitation {
233
+ return {
234
+ sourceId: hit.sourceId,
235
+ sourceTitle: hit.sourceTitle,
236
+ sourceKind: hit.sourceKind,
237
+ sourceUrl: hit.sourceUrl || null,
238
+ sourceLabel: hit.sourceLabel || null,
239
+ chunkId: hit.id,
240
+ chunkIndex: hit.chunkIndex,
241
+ chunkCount: hit.chunkCount,
242
+ charStart: hit.charStart,
243
+ charEnd: hit.charEnd,
244
+ sectionLabel: hit.sectionLabel || null,
245
+ snippet: hit.snippet,
246
+ whyMatched: hit.whyMatched || null,
247
+ score: hit.score,
248
+ }
249
+ }
250
+
251
+ function listStoredSources(): KnowledgeSource[] {
252
+ return Object.values(loadKnowledgeSources())
253
+ .map((source) => coerceSource(source))
254
+ .sort((left, right) => right.updatedAt - left.updatedAt)
255
+ }
256
+
257
+ function sourceTitleFromUrl(sourceUrl: string): string {
258
+ try {
259
+ const parsed = new URL(sourceUrl)
260
+ const leaf = path.basename(parsed.pathname || '')
261
+ return leaf ? deriveKnowledgeTitle(leaf) : parsed.hostname
262
+ } catch {
263
+ return sourceUrl
264
+ }
265
+ }
266
+
267
+ function sourceLabelFromUrl(sourceUrl: string): string | null {
268
+ try {
269
+ const parsed = new URL(sourceUrl)
270
+ return parsed.hostname || null
271
+ } catch {
272
+ return null
273
+ }
274
+ }
275
+
276
+ function headingLabel(text: string): string | null {
277
+ const match = text.match(/^#{1,6}\s+(.+)$/m)
278
+ return match?.[1]?.trim() || null
279
+ }
280
+
281
+ function previewSnippet(content: string, query?: string): string {
282
+ const normalized = String(content || '').replace(/\s+/g, ' ').trim()
283
+ if (!normalized) return ''
284
+ if (!query) return normalized.slice(0, 180)
285
+
286
+ const queryTokens = Array.from(new Set(
287
+ query
288
+ .toLowerCase()
289
+ .split(/\s+/)
290
+ .map((token) => token.trim())
291
+ .filter((token) => token.length >= 3),
292
+ ))
293
+
294
+ const lower = normalized.toLowerCase()
295
+ let matchIndex = -1
296
+ for (const token of queryTokens) {
297
+ const idx = lower.indexOf(token)
298
+ if (idx !== -1 && (matchIndex === -1 || idx < matchIndex)) {
299
+ matchIndex = idx
300
+ }
301
+ }
302
+
303
+ if (matchIndex === -1) return normalized.slice(0, 180)
304
+ const start = Math.max(0, matchIndex - 80)
305
+ const end = Math.min(normalized.length, matchIndex + 220)
306
+ const prefix = start > 0 ? '…' : ''
307
+ const suffix = end < normalized.length ? '…' : ''
308
+ return `${prefix}${normalized.slice(start, end)}${suffix}`
309
+ }
310
+
311
+ function splitParagraphs(content: string): Array<{
312
+ text: string
313
+ start: number
314
+ end: number
315
+ sectionLabel: string | null
316
+ }> {
317
+ const normalized = content.replace(/\r\n/g, '\n').trim()
318
+ if (!normalized) return []
319
+
320
+ const paragraphs: Array<{ text: string; start: number; end: number; sectionLabel: string | null }> = []
321
+ let cursor = 0
322
+ let lastSection: string | null = null
323
+ const breakRegex = /\n{2,}/g
324
+
325
+ const pushParagraph = (rawStart: number, rawEnd: number) => {
326
+ const raw = normalized.slice(rawStart, rawEnd)
327
+ const leadingWhitespace = raw.match(/^\s*/)?.[0].length || 0
328
+ const trailingWhitespace = raw.match(/\s*$/)?.[0].length || 0
329
+ const text = raw.trim()
330
+ if (!text) return
331
+ const sectionLabel = headingLabel(text)
332
+ if (sectionLabel) lastSection = sectionLabel
333
+ paragraphs.push({
334
+ text,
335
+ start: rawStart + leadingWhitespace,
336
+ end: rawEnd - trailingWhitespace,
337
+ sectionLabel: lastSection,
338
+ })
339
+ }
340
+
341
+ for (const match of normalized.matchAll(breakRegex)) {
342
+ const boundary = match.index ?? 0
343
+ pushParagraph(cursor, boundary)
344
+ cursor = boundary + match[0].length
345
+ }
346
+ pushParagraph(cursor, normalized.length)
347
+ return paragraphs
348
+ }
349
+
350
+ function splitOversizedParagraph(
351
+ paragraph: { text: string; start: number; end: number; sectionLabel: string | null },
352
+ sourceTitle: string,
353
+ ): IndexedChunk[] {
354
+ const chunks: IndexedChunk[] = []
355
+ let cursor = 0
356
+
357
+ while (cursor < paragraph.text.length) {
358
+ let end = Math.min(paragraph.text.length, cursor + CHUNK_TARGET_CHARS)
359
+ if (end < paragraph.text.length) {
360
+ const boundary = paragraph.text.lastIndexOf(' ', end)
361
+ if (boundary > cursor + 400) end = boundary
362
+ }
363
+
364
+ const raw = paragraph.text.slice(cursor, end)
365
+ const leadingWhitespace = raw.match(/^\s*/)?.[0].length || 0
366
+ const trailingWhitespace = raw.match(/\s*$/)?.[0].length || 0
367
+ const content = raw.trim()
368
+ if (content) {
369
+ const relativeStart = cursor + leadingWhitespace
370
+ const relativeEnd = end - trailingWhitespace
371
+ chunks.push({
372
+ title: paragraph.sectionLabel ? `${sourceTitle} · ${paragraph.sectionLabel}` : sourceTitle,
373
+ content,
374
+ chunkIndex: 0,
375
+ chunkCount: 0,
376
+ charStart: paragraph.start + relativeStart,
377
+ charEnd: paragraph.start + relativeEnd,
378
+ sectionLabel: paragraph.sectionLabel,
379
+ })
380
+ }
381
+
382
+ if (end >= paragraph.text.length) break
383
+ cursor = Math.max(cursor + 1, end - CHUNK_OVERLAP_CHARS)
384
+ }
385
+
386
+ return chunks
387
+ }
388
+
389
+ function chunkKnowledgeContent(sourceTitle: string, content: string): IndexedChunk[] {
390
+ const normalized = content.replace(/\r\n/g, '\n').trim()
391
+ if (!normalized) return []
392
+
393
+ const paragraphs = splitParagraphs(normalized)
394
+ if (paragraphs.length === 0) return []
395
+
396
+ const chunks: IndexedChunk[] = []
397
+ let index = 0
398
+
399
+ while (index < paragraphs.length) {
400
+ const firstIndex = index
401
+ const first = paragraphs[index]
402
+
403
+ if (first.text.length > CHUNK_TARGET_CHARS) {
404
+ chunks.push(...splitOversizedParagraph(first, sourceTitle))
405
+ index += 1
406
+ continue
407
+ }
408
+
409
+ let combined = first.text
410
+ const charStart = first.start
411
+ let charEnd = first.end
412
+ let sectionLabel = first.sectionLabel
413
+ let nextIndex = index + 1
414
+
415
+ while (nextIndex < paragraphs.length) {
416
+ const nextParagraph = paragraphs[nextIndex]
417
+ if (nextParagraph.text.length > CHUNK_TARGET_CHARS) break
418
+ const candidate = `${combined}\n\n${nextParagraph.text}`
419
+ if (candidate.length > CHUNK_TARGET_CHARS) break
420
+ combined = candidate
421
+ charEnd = nextParagraph.end
422
+ sectionLabel = sectionLabel || nextParagraph.sectionLabel
423
+ nextIndex += 1
424
+ }
425
+
426
+ chunks.push({
427
+ title: sectionLabel ? `${sourceTitle} · ${sectionLabel}` : sourceTitle,
428
+ content: combined,
429
+ chunkIndex: 0,
430
+ chunkCount: 0,
431
+ charStart,
432
+ charEnd,
433
+ sectionLabel,
434
+ })
435
+
436
+ if (nextIndex >= paragraphs.length) break
437
+
438
+ let overlapChars = 0
439
+ let overlapStart = nextIndex
440
+ for (let back = nextIndex - 1; back > firstIndex; back--) {
441
+ overlapChars += paragraphs[back].text.length
442
+ overlapStart = back
443
+ if (overlapChars >= CHUNK_OVERLAP_CHARS) break
444
+ }
445
+ index = Math.max(firstIndex + 1, overlapStart)
446
+ }
447
+
448
+ const chunkCount = chunks.length
449
+ return chunks.map((chunk, chunkIndex) => ({
450
+ ...chunk,
451
+ chunkIndex,
452
+ chunkCount,
453
+ }))
454
+ }
455
+
456
+ function memorySourceMeta(entry: MemoryEntry): Record<string, unknown> {
457
+ return entry.metadata && typeof entry.metadata === 'object'
458
+ ? entry.metadata as Record<string, unknown>
459
+ : {}
460
+ }
461
+
462
+ function buildSourceSummary(source: KnowledgeSource, chunks?: MemoryEntry[]): KnowledgeSourceSummary {
463
+ const firstChunk = chunks?.[0] || null
464
+ const preview = typeof source.content === 'string' && source.content.trim()
465
+ ? source.content
466
+ : firstChunk?.content || ''
467
+
468
+ return {
469
+ ...source,
470
+ stale: isStaleSource(source),
471
+ topSnippet: preview ? previewSnippet(preview) : null,
472
+ }
473
+ }
474
+
475
+ function buildSearchHit(source: KnowledgeSource, entry: MemoryEntry, score: number, query: string): KnowledgeSearchHit {
476
+ const metadata = memorySourceMeta(entry)
477
+ return {
478
+ id: entry.id,
479
+ sourceId: source.id,
480
+ sourceTitle: source.title,
481
+ sourceKind: source.kind,
482
+ sourceUrl: source.sourceUrl || null,
483
+ sourceLabel: source.sourceLabel || null,
484
+ scope: source.scope,
485
+ agentIds: source.agentIds,
486
+ tags: source.tags,
487
+ syncStatus: source.syncStatus,
488
+ stale: isStaleSource(source),
489
+ title: entry.title || source.title,
490
+ snippet: previewSnippet(entry.content, query),
491
+ content: entry.content,
492
+ chunkIndex: typeof metadata.chunkIndex === 'number' ? metadata.chunkIndex : 0,
493
+ chunkCount: typeof metadata.chunkCount === 'number' ? metadata.chunkCount : source.chunkCount,
494
+ charStart: typeof metadata.charStart === 'number' ? metadata.charStart : 0,
495
+ charEnd: typeof metadata.charEnd === 'number' ? metadata.charEnd : entry.content.length,
496
+ sectionLabel: typeof metadata.sectionLabel === 'string' ? metadata.sectionLabel : null,
497
+ score,
498
+ whyMatched: whyMatched(query, entry.title || source.title, entry.content, typeof metadata.sectionLabel === 'string' ? metadata.sectionLabel : null),
499
+ createdAt: entry.createdAt,
500
+ updatedAt: entry.updatedAt,
501
+ }
502
+ }
503
+
504
+ async function resolveSourceContent(
505
+ source: KnowledgeSource,
506
+ overrideContent?: string | null,
507
+ ): Promise<{ content: string; title: string; sourceLabel?: string | null }> {
508
+ const inlineContent = typeof overrideContent === 'string' ? overrideContent.trim() : ''
509
+ if (inlineContent) {
510
+ return {
511
+ content: overrideContent || '',
512
+ title: source.title,
513
+ sourceLabel: source.sourceLabel || null,
514
+ }
515
+ }
516
+
517
+ if (source.kind === 'manual') {
518
+ if (!source.content?.trim()) throw new Error('Content is required for manual knowledge.')
519
+ return {
520
+ content: source.content,
521
+ title: source.title,
522
+ sourceLabel: source.sourceLabel || null,
523
+ }
524
+ }
525
+
526
+ if (source.kind === 'file') {
527
+ if (source.sourcePath) {
528
+ return {
529
+ content: await extractKnowledgeTextFromFile(source.sourcePath, source.sourceLabel || source.title),
530
+ title: source.title,
531
+ sourceLabel: source.sourceLabel || path.basename(source.sourcePath),
532
+ }
533
+ }
534
+ if (source.content?.trim()) {
535
+ return {
536
+ content: source.content,
537
+ title: source.title,
538
+ sourceLabel: source.sourceLabel || null,
539
+ }
540
+ }
541
+ throw new Error('A file path or extracted content is required for file knowledge.')
542
+ }
543
+
544
+ if (!source.sourceUrl) {
545
+ if (source.content?.trim()) {
546
+ return {
547
+ content: source.content,
548
+ title: source.title,
549
+ sourceLabel: source.sourceLabel || null,
550
+ }
551
+ }
552
+ throw new Error('A URL is required for URL knowledge.')
553
+ }
554
+
555
+ const extracted = await extractKnowledgeTextFromUrl(source.sourceUrl)
556
+ return {
557
+ content: extracted.content,
558
+ title: source.title || extracted.title || sourceTitleFromUrl(source.sourceUrl),
559
+ sourceLabel: source.sourceLabel || extracted.title || sourceLabelFromUrl(source.sourceUrl),
560
+ }
561
+ }
562
+
563
+ function sharedWithForSource(source: KnowledgeSource): string[] | undefined {
564
+ return source.scope === 'agent' && source.agentIds.length > 0 ? source.agentIds : undefined
565
+ }
566
+
567
+ function toChunkMetadata(source: KnowledgeSource, chunk: IndexedChunk): Record<string, unknown> {
568
+ return {
569
+ sourceId: source.id,
570
+ sourceTitle: source.title,
571
+ sourceKind: source.kind,
572
+ sourceUrl: source.sourceUrl || null,
573
+ sourceLabel: source.sourceLabel || null,
574
+ tags: source.tags,
575
+ scope: source.scope,
576
+ agentIds: source.agentIds,
577
+ chunkIndex: chunk.chunkIndex,
578
+ chunkCount: chunk.chunkCount,
579
+ charStart: chunk.charStart,
580
+ charEnd: chunk.charEnd,
581
+ sectionLabel: chunk.sectionLabel || null,
582
+ indexedAt: Date.now(),
583
+ }
584
+ }
585
+
586
+ function replaceSourceChunks(source: KnowledgeSource, chunks: IndexedChunk[]): MemoryEntry[] {
587
+ const db = getMemoryDb()
588
+ for (const existingChunk of db.listKnowledgeSourceChunks(source.id)) {
589
+ db.delete(existingChunk.id)
590
+ }
591
+
592
+ return chunks.map((chunk) => db.add({
593
+ agentId: null,
594
+ sessionId: null,
595
+ category: 'knowledge',
596
+ title: chunk.title,
597
+ content: chunk.content,
598
+ metadata: toChunkMetadata(source, chunk),
599
+ sharedWith: sharedWithForSource(source),
600
+ }))
601
+ }
602
+
603
+ async function ensureLegacyKnowledgeBackfill(): Promise<void> {
604
+ if (backfillComplete) return
605
+ if (backfillPromise) return backfillPromise
606
+ backfillPromise = (async () => {
607
+ const db = getMemoryDb()
608
+ const entries = db.listByCategory('knowledge', undefined, MAX_KNOWLEDGE_SCAN)
609
+
610
+ for (const entry of entries) {
611
+ const metadata = memorySourceMeta(entry)
612
+ const existingSourceId = typeof metadata.sourceId === 'string' ? metadata.sourceId.trim() : ''
613
+ if (existingSourceId) continue
614
+
615
+ const scope = normalizeScope(metadata.scope)
616
+ const agentIds = normalizeAgentIds(metadata.agentIds)
617
+ const sourceId = entry.id
618
+ const source = coerceSource({
619
+ id: sourceId,
620
+ kind: 'manual',
621
+ title: entry.title || 'Knowledge Source',
622
+ content: entry.content,
623
+ sourceLabel: typeof metadata.source === 'string' ? metadata.source : null,
624
+ sourceUrl: typeof metadata.sourceUrl === 'string' ? metadata.sourceUrl : null,
625
+ sourcePath: typeof metadata.sourcePath === 'string' ? metadata.sourcePath : null,
626
+ sourceHash: contentHash(entry.content || ''),
627
+ scope,
628
+ agentIds,
629
+ tags: normalizeTags(metadata.tags),
630
+ syncStatus: 'ready',
631
+ lastIndexedAt: entry.updatedAt,
632
+ lastSyncedAt: entry.updatedAt,
633
+ chunkCount: 1,
634
+ contentLength: entry.content.length,
635
+ createdAt: entry.createdAt,
636
+ updatedAt: entry.updatedAt,
637
+ metadata: {
638
+ legacyMemoryId: entry.id,
639
+ migratedAt: Date.now(),
640
+ },
641
+ })
642
+
643
+ upsertKnowledgeSource(sourceId, source)
644
+ db.update(entry.id, {
645
+ sharedWith: sharedWithForSource(source),
646
+ metadata: {
647
+ ...metadata,
648
+ sourceId,
649
+ sourceTitle: source.title,
650
+ sourceKind: source.kind,
651
+ sourceLabel: source.sourceLabel,
652
+ sourceUrl: source.sourceUrl,
653
+ tags: source.tags,
654
+ scope: source.scope,
655
+ agentIds: source.agentIds,
656
+ chunkIndex: typeof metadata.chunkIndex === 'number' ? metadata.chunkIndex : 0,
657
+ chunkCount: typeof metadata.chunkCount === 'number' ? metadata.chunkCount : 1,
658
+ charStart: typeof metadata.charStart === 'number' ? metadata.charStart : 0,
659
+ charEnd: typeof metadata.charEnd === 'number' ? metadata.charEnd : entry.content.length,
660
+ sectionLabel: typeof metadata.sectionLabel === 'string' ? metadata.sectionLabel : null,
661
+ indexedAt: typeof metadata.indexedAt === 'number' ? metadata.indexedAt : entry.updatedAt,
662
+ },
663
+ })
664
+ }
665
+ backfillComplete = true
666
+ })().finally(() => {
667
+ backfillPromise = null
668
+ })
669
+
670
+ return backfillPromise
671
+ }
672
+
673
+ export async function listKnowledgeSourceSummaries(options?: {
674
+ tags?: string[]
675
+ limit?: number
676
+ includeArchived?: boolean
677
+ }): Promise<KnowledgeSourceSummary[]> {
678
+ await ensureLegacyKnowledgeBackfill()
679
+ registerKnowledgeMaintenanceIdleCallback()
680
+ const tagFilter = normalizeTags(options?.tags)
681
+ const limit = Math.max(1, Math.min(500, Math.trunc(options?.limit || 200)))
682
+ const includeArchived = options?.includeArchived === true
683
+
684
+ const sources = listStoredSources()
685
+ .filter((source) => includeArchived || !sourceIsExcludedByDefault(source))
686
+ .filter((source) => matchesTagFilter(source.tags, tagFilter))
687
+ .slice(0, limit)
688
+
689
+ return sources.map((source) => buildSourceSummary(source))
690
+ }
691
+
692
+ export async function searchKnowledgeHits(options: {
693
+ query: string
694
+ tags?: string[]
695
+ limit?: number
696
+ includeArchived?: boolean
697
+ viewerAgentId?: string | null
698
+ }): Promise<KnowledgeSearchHit[]> {
699
+ await ensureLegacyKnowledgeBackfill()
700
+ registerKnowledgeMaintenanceIdleCallback()
701
+ const query = normalizeText(options.query)
702
+ if (!query) return []
703
+
704
+ const tagFilter = normalizeTags(options.tags)
705
+ const limit = Math.max(1, Math.min(500, Math.trunc(options.limit || 50)))
706
+ const includeArchived = options.includeArchived === true
707
+ const viewerAgentId = typeof options.viewerAgentId === 'string' ? options.viewerAgentId.trim() : ''
708
+ const sourceMap = new Map(listStoredSources().map((source) => [source.id, source] as const))
709
+ const matches = getMemoryDb().search(query)
710
+ .filter((entry) => entry.category === 'knowledge')
711
+
712
+ const hits: KnowledgeSearchHit[] = []
713
+ for (const entry of matches) {
714
+ const metadata = memorySourceMeta(entry)
715
+ const sourceId = typeof metadata.sourceId === 'string' ? metadata.sourceId : ''
716
+ const source = sourceMap.get(sourceId)
717
+ if (!source) continue
718
+ if (!includeArchived && sourceIsExcludedByDefault(source)) continue
719
+ if (viewerAgentId && !sourceVisibleToAgent(source, viewerAgentId)) continue
720
+ if (!matchesTagFilter(source.tags, tagFilter)) continue
721
+ hits.push(buildSearchHit(source, entry, Math.max(0, 1 - hits.length / Math.max(matches.length, 1)), query))
722
+ if (hits.length >= limit) break
723
+ }
724
+
725
+ return hits
726
+ }
727
+
728
+ export async function getKnowledgeSourceDetail(id: string): Promise<KnowledgeSourceDetail | null> {
729
+ await ensureLegacyKnowledgeBackfill()
730
+ const source = loadKnowledgeSource(id)
731
+ if (!source) return null
732
+ const normalized = coerceSource(source)
733
+ const chunks = getMemoryDb().listKnowledgeSourceChunks(id)
734
+ return {
735
+ source: buildSourceSummary(normalized, chunks),
736
+ chunks,
737
+ }
738
+ }
739
+
740
+ export async function buildKnowledgeRetrievalTrace(options: {
741
+ query: string
742
+ viewerAgentId?: string | null
743
+ limit?: number
744
+ }): Promise<KnowledgeRetrievalTrace | null> {
745
+ const hits = await searchKnowledgeHits({
746
+ query: options.query,
747
+ limit: Math.max(1, Math.min(MAX_GROUNDING_HITS, Math.trunc(options.limit || MAX_GROUNDING_HITS))),
748
+ viewerAgentId: options.viewerAgentId || null,
749
+ })
750
+ if (hits.length === 0) return null
751
+ return {
752
+ query: normalizeText(options.query),
753
+ scope: 'source_knowledge',
754
+ hits: hits.map(toCitation),
755
+ retrievedAt: Date.now(),
756
+ selectorStatus: 'not_run',
757
+ }
758
+ }
759
+
760
+ export function selectKnowledgeCitations(params: {
761
+ responseText: string
762
+ retrievalTrace?: KnowledgeRetrievalTrace | null
763
+ limit?: number
764
+ }): { citations: KnowledgeCitation[]; retrievalTrace: KnowledgeRetrievalTrace | null } {
765
+ const trace = params.retrievalTrace
766
+ if (!trace || !Array.isArray(trace.hits) || trace.hits.length === 0) {
767
+ return { citations: [], retrievalTrace: trace || null }
768
+ }
769
+
770
+ const responseText = normalizeText(params.responseText)
771
+ if (!responseText) {
772
+ return {
773
+ citations: [],
774
+ retrievalTrace: { ...trace, selectorStatus: 'no_match' },
775
+ }
776
+ }
777
+
778
+ const ranked = trace.hits
779
+ .map((hit) => ({
780
+ hit,
781
+ overlap: tokenOverlapScore(responseText, `${hit.sourceTitle}\n${hit.sectionLabel || ''}\n${hit.snippet}`),
782
+ }))
783
+ .sort((left, right) => {
784
+ const overlapDelta = right.overlap - left.overlap
785
+ if (overlapDelta !== 0) return overlapDelta
786
+ return right.hit.score - left.hit.score
787
+ })
788
+
789
+ const limit = Math.max(1, Math.min(4, Math.trunc(params.limit || 3)))
790
+ const selected = ranked
791
+ .filter((entry, index) => entry.overlap >= 0.08 || (entry.hit.score >= 0.7 && index === 0))
792
+ .slice(0, limit)
793
+ .map((entry) => entry.hit)
794
+
795
+ return {
796
+ citations: selected,
797
+ retrievalTrace: {
798
+ ...trace,
799
+ selectorStatus: selected.length > 0 ? 'selected' : 'no_match',
800
+ },
801
+ }
802
+ }
803
+
804
+ async function syncSourceRecord(
805
+ source: KnowledgeSource,
806
+ options?: { overrideContent?: string | null; forceRewrite?: boolean },
807
+ ): Promise<KnowledgeSourceDetail> {
808
+ const loading = coerceSource({
809
+ ...source,
810
+ syncStatus: 'syncing',
811
+ lastError: null,
812
+ updatedAt: Date.now(),
813
+ })
814
+ upsertKnowledgeSource(loading.id, loading)
815
+
816
+ try {
817
+ const resolved = await resolveSourceContent(loading, options?.overrideContent)
818
+ const chunks = chunkKnowledgeContent(resolved.title, resolved.content)
819
+ if (chunks.length === 0) {
820
+ throw new Error('No readable content was extracted for this source.')
821
+ }
822
+
823
+ const nextHash = contentHash(resolved.content)
824
+ const metadataChanged = options?.forceRewrite === true
825
+ || loading.title !== resolved.title
826
+ || (loading.sourceLabel || null) !== (resolved.sourceLabel || null)
827
+
828
+ let indexedChunks = getMemoryDb().listKnowledgeSourceChunks(loading.id)
829
+ if (indexedChunks.length === 0 || metadataChanged || loading.sourceHash !== nextHash) {
830
+ const rewrittenSource = coerceSource({
831
+ ...loading,
832
+ title: resolved.title,
833
+ content: resolved.content,
834
+ sourceLabel: resolved.sourceLabel ?? loading.sourceLabel ?? null,
835
+ sourceHash: nextHash,
836
+ chunkCount: chunks.length,
837
+ contentLength: resolved.content.length,
838
+ syncStatus: 'ready',
839
+ lastError: null,
840
+ lastIndexedAt: Date.now(),
841
+ lastSyncedAt: Date.now(),
842
+ nextSyncAt: Date.now() + KNOWLEDGE_STALE_AFTER_MS,
843
+ updatedAt: Date.now(),
844
+ })
845
+ upsertKnowledgeSource(rewrittenSource.id, rewrittenSource)
846
+ indexedChunks = replaceSourceChunks(rewrittenSource, chunks)
847
+ return {
848
+ source: buildSourceSummary(rewrittenSource, indexedChunks),
849
+ chunks: indexedChunks,
850
+ }
851
+ }
852
+
853
+ const refreshedSource = coerceSource({
854
+ ...loading,
855
+ content: resolved.content,
856
+ sourceHash: nextHash,
857
+ syncStatus: 'ready',
858
+ lastError: null,
859
+ lastSyncedAt: Date.now(),
860
+ nextSyncAt: Date.now() + KNOWLEDGE_STALE_AFTER_MS,
861
+ updatedAt: Date.now(),
862
+ })
863
+ upsertKnowledgeSource(refreshedSource.id, refreshedSource)
864
+ return {
865
+ source: buildSourceSummary(refreshedSource, indexedChunks),
866
+ chunks: indexedChunks,
867
+ }
868
+ } catch (error) {
869
+ const message = error instanceof Error ? error.message : 'Knowledge sync failed'
870
+ const failed = coerceSource({
871
+ ...loading,
872
+ syncStatus: 'error',
873
+ lastError: message,
874
+ updatedAt: Date.now(),
875
+ })
876
+ upsertKnowledgeSource(failed.id, failed)
877
+ throw error
878
+ }
879
+ }
880
+
881
+ export async function createKnowledgeSource(input: KnowledgeSourceInput): Promise<KnowledgeSourceDetail> {
882
+ await ensureLegacyKnowledgeBackfill()
883
+
884
+ const now = Date.now()
885
+ const kind = normalizeKind(input.kind)
886
+ const title = normalizeText(input.title)
887
+ || (kind === 'file' && input.sourcePath ? deriveKnowledgeTitle(path.basename(input.sourcePath)) : '')
888
+ || (kind === 'url' && input.sourceUrl ? sourceTitleFromUrl(input.sourceUrl) : '')
889
+ || 'Knowledge Source'
890
+
891
+ const source: KnowledgeSource = coerceSource({
892
+ id: genId(8),
893
+ kind,
894
+ title,
895
+ content: typeof input.content === 'string' ? input.content : null,
896
+ sourceLabel: normalizeOptionalText(input.sourceLabel),
897
+ sourceUrl: normalizeOptionalText(input.sourceUrl),
898
+ sourcePath: normalizeOptionalText(input.sourcePath),
899
+ sourceHash: null,
900
+ scope: normalizeScope(input.scope),
901
+ agentIds: normalizeAgentIds(input.agentIds),
902
+ tags: normalizeTags(input.tags),
903
+ syncStatus: 'syncing',
904
+ lastIndexedAt: null,
905
+ lastSyncedAt: null,
906
+ lastError: null,
907
+ chunkCount: 0,
908
+ contentLength: 0,
909
+ createdAt: now,
910
+ updatedAt: now,
911
+ metadata: input.metadata,
912
+ })
913
+
914
+ upsertKnowledgeSource(source.id, source)
915
+ return syncSourceRecord(source, { overrideContent: input.content, forceRewrite: true })
916
+ }
917
+
918
+ export async function updateKnowledgeSource(
919
+ id: string,
920
+ input: KnowledgeSourceInput,
921
+ ): Promise<KnowledgeSourceDetail | null> {
922
+ await ensureLegacyKnowledgeBackfill()
923
+ const existing = loadKnowledgeSource(id)
924
+ if (!existing) return null
925
+
926
+ const normalizedExisting = coerceSource(existing)
927
+ const next: KnowledgeSource = coerceSource({
928
+ ...normalizedExisting,
929
+ kind: normalizeKind(input.kind ?? normalizedExisting.kind),
930
+ title: normalizeText(input.title) || normalizedExisting.title,
931
+ content: typeof input.content === 'string' ? input.content : normalizedExisting.content,
932
+ sourceLabel: input.sourceLabel !== undefined ? normalizeOptionalText(input.sourceLabel) : normalizedExisting.sourceLabel,
933
+ sourceUrl: input.sourceUrl !== undefined ? normalizeOptionalText(input.sourceUrl) : normalizedExisting.sourceUrl,
934
+ sourcePath: input.sourcePath !== undefined ? normalizeOptionalText(input.sourcePath) : normalizedExisting.sourcePath,
935
+ scope: normalizeScope(input.scope ?? normalizedExisting.scope),
936
+ agentIds: normalizeAgentIds(input.agentIds ?? normalizedExisting.agentIds),
937
+ tags: normalizeTags(input.tags ?? normalizedExisting.tags),
938
+ metadata: input.metadata ? { ...(normalizedExisting.metadata || {}), ...input.metadata } : normalizedExisting.metadata,
939
+ updatedAt: Date.now(),
940
+ })
941
+
942
+ upsertKnowledgeSource(next.id, next)
943
+ return syncSourceRecord(next, { overrideContent: input.content, forceRewrite: true })
944
+ }
945
+
946
+ export async function syncKnowledgeSource(id: string): Promise<KnowledgeSourceDetail | null> {
947
+ await ensureLegacyKnowledgeBackfill()
948
+ const existing = loadKnowledgeSource(id)
949
+ if (!existing) return null
950
+ return syncSourceRecord(coerceSource(existing))
951
+ }
952
+
953
+ export async function deleteKnowledgeSource(id: string): Promise<boolean> {
954
+ await ensureLegacyKnowledgeBackfill()
955
+ const existing = loadKnowledgeSource(id)
956
+ if (!existing) return false
957
+
958
+ for (const chunk of getMemoryDb().listKnowledgeSourceChunks(id)) {
959
+ getMemoryDb().delete(chunk.id)
960
+ }
961
+ deleteKnowledgeSourceRecord(id)
962
+ return true
963
+ }
964
+
965
+ function recordMaintenanceAction(action: KnowledgeHygieneAction): void {
966
+ maintenanceHistory = [action, ...maintenanceHistory].slice(0, 48)
967
+ }
968
+
969
+ function upsertSourceLifecycle(id: string, updater: (source: KnowledgeSource) => KnowledgeSource): KnowledgeSource | null {
970
+ const updated = patchKnowledgeSource(id, (current) => {
971
+ if (!current) return null
972
+ return coerceSource(updater(coerceSource(current)))
973
+ })
974
+ return updated ? coerceSource(updated) : null
975
+ }
976
+
977
+ export async function archiveKnowledgeSource(
978
+ id: string,
979
+ input?: { reason?: string | null; duplicateOfSourceId?: string | null; supersededBySourceId?: string | null },
980
+ ): Promise<KnowledgeSourceDetail | null> {
981
+ await ensureLegacyKnowledgeBackfill()
982
+ const updated = upsertSourceLifecycle(id, (source) => ({
983
+ ...source,
984
+ archivedAt: source.archivedAt || Date.now(),
985
+ archivedReason: normalizeOptionalText(input?.reason) || source.archivedReason || 'archived',
986
+ duplicateOfSourceId: normalizeOptionalText(input?.duplicateOfSourceId) || source.duplicateOfSourceId || null,
987
+ supersededBySourceId: normalizeOptionalText(input?.supersededBySourceId) || source.supersededBySourceId || null,
988
+ maintenanceUpdatedAt: Date.now(),
989
+ maintenanceNotes: normalizeOptionalText(input?.reason) || source.maintenanceNotes || null,
990
+ updatedAt: Date.now(),
991
+ }))
992
+ if (!updated) return null
993
+ recordMaintenanceAction({
994
+ kind: 'archive',
995
+ sourceId: updated.id,
996
+ relatedSourceId: updated.duplicateOfSourceId || updated.supersededBySourceId || null,
997
+ summary: `Archived ${updated.title}`,
998
+ createdAt: Date.now(),
999
+ })
1000
+ return getKnowledgeSourceDetail(updated.id)
1001
+ }
1002
+
1003
+ export async function restoreKnowledgeSource(id: string): Promise<KnowledgeSourceDetail | null> {
1004
+ await ensureLegacyKnowledgeBackfill()
1005
+ const updated = upsertSourceLifecycle(id, (source) => ({
1006
+ ...source,
1007
+ archivedAt: null,
1008
+ archivedReason: null,
1009
+ duplicateOfSourceId: null,
1010
+ supersededBySourceId: null,
1011
+ maintenanceUpdatedAt: Date.now(),
1012
+ maintenanceNotes: 'restored',
1013
+ updatedAt: Date.now(),
1014
+ }))
1015
+ if (!updated) return null
1016
+ recordMaintenanceAction({
1017
+ kind: 'restore',
1018
+ sourceId: updated.id,
1019
+ summary: `Restored ${updated.title}`,
1020
+ createdAt: Date.now(),
1021
+ })
1022
+ return getKnowledgeSourceDetail(updated.id)
1023
+ }
1024
+
1025
+ export async function supersedeKnowledgeSource(
1026
+ id: string,
1027
+ supersededBySourceId: string,
1028
+ ): Promise<KnowledgeSourceDetail | null> {
1029
+ await ensureLegacyKnowledgeBackfill()
1030
+ const target = loadKnowledgeSource(supersededBySourceId)
1031
+ if (!target) throw new Error('Superseding source not found.')
1032
+ const updated = upsertSourceLifecycle(id, (source) => ({
1033
+ ...source,
1034
+ supersededBySourceId,
1035
+ archivedAt: source.archivedAt || Date.now(),
1036
+ archivedReason: source.archivedReason || 'superseded',
1037
+ maintenanceUpdatedAt: Date.now(),
1038
+ maintenanceNotes: `Superseded by ${supersededBySourceId}`,
1039
+ updatedAt: Date.now(),
1040
+ }))
1041
+ if (!updated) return null
1042
+ recordMaintenanceAction({
1043
+ kind: 'supersede',
1044
+ sourceId: updated.id,
1045
+ relatedSourceId: supersededBySourceId,
1046
+ summary: `Marked ${updated.title} as superseded`,
1047
+ createdAt: Date.now(),
1048
+ })
1049
+ return getKnowledgeSourceDetail(updated.id)
1050
+ }
1051
+
1052
+ function sameSourceOrigin(left: KnowledgeSource, right: KnowledgeSource): boolean {
1053
+ if (left.id === right.id) return false
1054
+ if (left.sourceUrl && right.sourceUrl) return left.sourceUrl === right.sourceUrl
1055
+ if (left.sourcePath && right.sourcePath) return left.sourcePath === right.sourcePath
1056
+ return false
1057
+ }
1058
+
1059
+ function canonicalSourceForGroup(group: KnowledgeSource[]): KnowledgeSource {
1060
+ return [...group].sort((left, right) => {
1061
+ const archiveDelta = Number(sourceIsExcludedByDefault(left)) - Number(sourceIsExcludedByDefault(right))
1062
+ if (archiveDelta !== 0) return archiveDelta
1063
+ const indexedDelta = (right.lastIndexedAt || 0) - (left.lastIndexedAt || 0)
1064
+ if (indexedDelta !== 0) return indexedDelta
1065
+ return left.createdAt - right.createdAt
1066
+ })[0]
1067
+ }
1068
+
1069
+ function buildHygieneSummary(sources: KnowledgeSource[]): KnowledgeHygieneSummary {
1070
+ const scannedAt = Date.now()
1071
+ const findings: KnowledgeHygieneFinding[] = []
1072
+ const pushFinding = (finding: KnowledgeHygieneFinding) => {
1073
+ if (findings.length < MAX_HYGIENE_FINDINGS) findings.push(finding)
1074
+ }
1075
+
1076
+ const duplicateGroups = new Map<string, KnowledgeSource[]>()
1077
+ for (const source of sources) {
1078
+ if (!source.sourceHash) continue
1079
+ const group = duplicateGroups.get(source.sourceHash) || []
1080
+ group.push(source)
1081
+ duplicateGroups.set(source.sourceHash, group)
1082
+ }
1083
+
1084
+ for (const source of sources) {
1085
+ if (sourceIsArchived(source)) {
1086
+ pushFinding({
1087
+ kind: 'archived',
1088
+ sourceId: source.id,
1089
+ sourceTitle: source.title,
1090
+ detail: source.archivedReason || 'Archived source',
1091
+ createdAt: source.archivedAt || source.updatedAt,
1092
+ })
1093
+ }
1094
+ if (sourceIsSuperseded(source)) {
1095
+ pushFinding({
1096
+ kind: 'superseded',
1097
+ sourceId: source.id,
1098
+ sourceTitle: source.title,
1099
+ relatedSourceId: source.supersededBySourceId || null,
1100
+ detail: `Superseded by ${source.supersededBySourceId}`,
1101
+ createdAt: source.updatedAt,
1102
+ })
1103
+ }
1104
+ if (source.syncStatus === 'error') {
1105
+ pushFinding({
1106
+ kind: 'broken',
1107
+ sourceId: source.id,
1108
+ sourceTitle: source.title,
1109
+ detail: source.lastError || 'Last sync failed',
1110
+ createdAt: source.updatedAt,
1111
+ })
1112
+ } else if (isStaleSource(source)) {
1113
+ pushFinding({
1114
+ kind: 'stale',
1115
+ sourceId: source.id,
1116
+ sourceTitle: source.title,
1117
+ detail: 'Source is due for re-sync',
1118
+ createdAt: source.updatedAt,
1119
+ })
1120
+ }
1121
+ }
1122
+
1123
+ for (const group of duplicateGroups.values()) {
1124
+ if (group.length < 2) continue
1125
+ const canonical = canonicalSourceForGroup(group)
1126
+ for (const source of group) {
1127
+ if (source.id === canonical.id) continue
1128
+ pushFinding({
1129
+ kind: 'duplicate',
1130
+ sourceId: source.id,
1131
+ sourceTitle: source.title,
1132
+ relatedSourceId: canonical.id,
1133
+ relatedSourceTitle: canonical.title,
1134
+ detail: 'Exact duplicate content hash',
1135
+ createdAt: source.updatedAt,
1136
+ })
1137
+ }
1138
+ }
1139
+
1140
+ const activeSources = sources.filter((source) => !sourceIsExcludedByDefault(source))
1141
+ for (let index = 0; index < activeSources.length; index += 1) {
1142
+ const left = activeSources[index]
1143
+ const leftBody = `${left.title}\n${left.content || ''}`
1144
+ if (!leftBody.trim()) continue
1145
+ for (let compareIndex = index + 1; compareIndex < activeSources.length; compareIndex += 1) {
1146
+ const right = activeSources[compareIndex]
1147
+ const rightBody = `${right.title}\n${right.content || ''}`
1148
+ if (!rightBody.trim()) continue
1149
+ if (sameSourceOrigin(left, right)) continue
1150
+ const overlap = jaccardSimilarity(leftBody, rightBody)
1151
+ if (overlap < 0.6) continue
1152
+ pushFinding({
1153
+ kind: 'overlap',
1154
+ sourceId: left.id,
1155
+ sourceTitle: left.title,
1156
+ relatedSourceId: right.id,
1157
+ relatedSourceTitle: right.title,
1158
+ detail: `High content overlap (${Math.round(overlap * 100)}%)`,
1159
+ createdAt: Math.max(left.updatedAt, right.updatedAt),
1160
+ })
1161
+ }
1162
+ }
1163
+
1164
+ return {
1165
+ scannedAt,
1166
+ counts: {
1167
+ stale: findings.filter((finding) => finding.kind === 'stale').length,
1168
+ duplicate: findings.filter((finding) => finding.kind === 'duplicate').length,
1169
+ overlap: findings.filter((finding) => finding.kind === 'overlap').length,
1170
+ broken: findings.filter((finding) => finding.kind === 'broken').length,
1171
+ archived: findings.filter((finding) => finding.kind === 'archived').length,
1172
+ superseded: findings.filter((finding) => finding.kind === 'superseded').length,
1173
+ },
1174
+ findings,
1175
+ recentActions: [...maintenanceHistory],
1176
+ }
1177
+ }
1178
+
1179
+ export async function getKnowledgeHygieneSummary(): Promise<KnowledgeHygieneSummary> {
1180
+ await ensureLegacyKnowledgeBackfill()
1181
+ registerKnowledgeMaintenanceIdleCallback()
1182
+ return buildHygieneSummary(listStoredSources())
1183
+ }
1184
+
1185
+ export async function runKnowledgeHygieneMaintenance(): Promise<KnowledgeHygieneSummary> {
1186
+ await ensureLegacyKnowledgeBackfill()
1187
+ const sources = listStoredSources()
1188
+
1189
+ const duplicateGroups = new Map<string, KnowledgeSource[]>()
1190
+ for (const source of sources) {
1191
+ if (!source.sourceHash) continue
1192
+ const group = duplicateGroups.get(source.sourceHash) || []
1193
+ group.push(source)
1194
+ duplicateGroups.set(source.sourceHash, group)
1195
+ }
1196
+
1197
+ for (const source of sources) {
1198
+ if (sourceIsExcludedByDefault(source)) continue
1199
+ if (source.kind !== 'manual' && (isStaleSource(source) || source.syncStatus === 'error')) {
1200
+ try {
1201
+ const synced = await syncKnowledgeSource(source.id)
1202
+ if (synced?.source) {
1203
+ upsertSourceLifecycle(source.id, (current) => ({
1204
+ ...current,
1205
+ lastAutoSyncAt: Date.now(),
1206
+ maintenanceUpdatedAt: Date.now(),
1207
+ maintenanceNotes: 'auto-sync completed',
1208
+ updatedAt: Date.now(),
1209
+ }))
1210
+ recordMaintenanceAction({
1211
+ kind: source.sourceHash === synced.source.sourceHash ? 'sync' : 'reindex',
1212
+ sourceId: source.id,
1213
+ summary: `Auto-synced ${synced.source.title}`,
1214
+ createdAt: Date.now(),
1215
+ })
1216
+ }
1217
+ } catch {
1218
+ // Keep the existing error state for manual review.
1219
+ }
1220
+ }
1221
+ }
1222
+
1223
+ for (const group of duplicateGroups.values()) {
1224
+ if (group.length < 2) continue
1225
+ const canonical = canonicalSourceForGroup(group)
1226
+ for (const source of group) {
1227
+ if (source.id === canonical.id || sourceIsExcludedByDefault(source)) continue
1228
+ await archiveKnowledgeSource(source.id, {
1229
+ reason: 'duplicate',
1230
+ duplicateOfSourceId: canonical.id,
1231
+ })
1232
+ }
1233
+ }
1234
+
1235
+ const refreshed = listStoredSources()
1236
+ const originGroups = new Map<string, KnowledgeSource[]>()
1237
+ for (const source of refreshed) {
1238
+ if (sourceIsExcludedByDefault(source)) continue
1239
+ const origin = source.sourceUrl || source.sourcePath || ''
1240
+ if (!origin) continue
1241
+ const group = originGroups.get(origin) || []
1242
+ group.push(source)
1243
+ originGroups.set(origin, group)
1244
+ }
1245
+ for (const group of originGroups.values()) {
1246
+ if (group.length < 2) continue
1247
+ const canonical = canonicalSourceForGroup(group)
1248
+ for (const source of group) {
1249
+ if (source.id === canonical.id || sourceIsSuperseded(source)) continue
1250
+ if ((source.lastIndexedAt || 0) >= (canonical.lastIndexedAt || 0)) continue
1251
+ await supersedeKnowledgeSource(source.id, canonical.id)
1252
+ }
1253
+ }
1254
+
1255
+ return buildHygieneSummary(listStoredSources())
1256
+ }
1257
+
1258
+ export function registerKnowledgeMaintenanceIdleCallback(): void {
1259
+ if (maintenanceRegistered) return
1260
+ maintenanceRegistered = true
1261
+ onNextIdleWindow(async () => {
1262
+ maintenanceRegistered = false
1263
+ await runKnowledgeHygieneMaintenance()
1264
+ registerKnowledgeMaintenanceIdleCallback()
1265
+ })
1266
+ }