@swarmclawai/swarmclaw 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -76
- package/package.json +1 -1
- package/skills/swarmclaw.md +17 -0
- package/src/app/api/agents/[id]/dream/route.ts +45 -0
- package/src/app/api/knowledge/[id]/route.ts +48 -49
- package/src/app/api/knowledge/hygiene/route.ts +13 -0
- package/src/app/api/knowledge/route.ts +70 -42
- package/src/app/api/knowledge/sources/[id]/archive/route.ts +15 -0
- package/src/app/api/knowledge/sources/[id]/restore/route.ts +10 -0
- package/src/app/api/knowledge/sources/[id]/route.ts +1 -0
- package/src/app/api/knowledge/sources/[id]/supersede/route.ts +26 -0
- package/src/app/api/knowledge/sources/[id]/sync/route.ts +17 -0
- package/src/app/api/knowledge/sources/route.ts +1 -0
- package/src/app/api/knowledge/upload/route.ts +3 -51
- package/src/app/api/memory/dream/[id]/route.ts +19 -0
- package/src/app/api/memory/dream/route.ts +34 -0
- package/src/app/knowledge/layout.tsx +1 -1
- package/src/app/knowledge/page.tsx +2 -22
- package/src/app/protocols/page.tsx +21 -2
- package/src/cli/index.js +16 -0
- package/src/cli/spec.js +5 -0
- package/src/components/agents/agent-sheet.tsx +65 -0
- package/src/components/chat/message-bubble.tsx +10 -0
- package/src/components/knowledge/grounding-panel.tsx +99 -0
- package/src/components/knowledge/knowledge-detail.tsx +402 -0
- package/src/components/knowledge/knowledge-list.tsx +351 -126
- package/src/components/knowledge/knowledge-sheet.tsx +208 -119
- package/src/components/memory/dream-history.tsx +155 -0
- package/src/components/memory/memory-card.tsx +7 -0
- package/src/components/memory/memory-detail.tsx +46 -0
- package/src/components/runs/run-list.tsx +23 -0
- package/src/lib/providers/cli-utils.ts +3 -4
- package/src/lib/providers/index.ts +12 -22
- package/src/lib/providers/openclaw.ts +1 -2
- package/src/lib/server/agents/subagent-swarm.ts +2 -7
- package/src/lib/server/api-routes.test.ts +43 -2
- package/src/lib/server/chat-execution/chat-execution-grounding.test.ts +127 -0
- package/src/lib/server/chat-execution/chat-execution-types.ts +8 -1
- package/src/lib/server/chat-execution/chat-execution.ts +1 -0
- package/src/lib/server/chat-execution/chat-turn-finalization.ts +23 -6
- package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +6 -1
- package/src/lib/server/chat-execution/post-stream-finalization.ts +15 -3
- package/src/lib/server/chat-execution/prompt-builder.ts +4 -6
- package/src/lib/server/chat-execution/prompt-sections.ts +29 -3
- package/src/lib/server/chat-execution/stream-agent-chat.ts +6 -1
- package/src/lib/server/connectors/openclaw.ts +1 -2
- package/src/lib/server/execution-engine/task-attempt.ts +8 -2
- package/src/lib/server/knowledge-import.ts +159 -0
- package/src/lib/server/knowledge-sources.test.ts +215 -0
- package/src/lib/server/knowledge-sources.ts +1266 -0
- package/src/lib/server/memory/dream-cycles.ts +49 -0
- package/src/lib/server/memory/dream-idle-callback.ts +38 -0
- package/src/lib/server/memory/dream-service.ts +315 -0
- package/src/lib/server/memory/memory-db.ts +37 -2
- package/src/lib/server/protocols/protocol-agent-turn.ts +7 -0
- package/src/lib/server/protocols/protocol-run-lifecycle.ts +19 -6
- package/src/lib/server/protocols/protocol-service.test.ts +99 -0
- package/src/lib/server/protocols/protocol-step-helpers.ts +7 -1
- package/src/lib/server/protocols/protocol-step-processors.ts +16 -3
- package/src/lib/server/protocols/protocol-types.ts +4 -0
- package/src/lib/server/provider-health.ts +2 -7
- package/src/lib/server/runtime/daemon-state/core.ts +6 -1
- package/src/lib/server/runtime/run-ledger.test.ts +120 -0
- package/src/lib/server/runtime/run-ledger.ts +27 -1
- package/src/lib/server/runtime/session-run-manager/drain.ts +5 -0
- package/src/lib/server/runtime/session-run-manager/state.ts +19 -2
- package/src/lib/server/storage-normalization.ts +5 -0
- package/src/lib/server/storage.ts +16 -1
- package/src/stores/slices/ui-slice.ts +4 -0
- package/src/types/agent.ts +7 -0
- package/src/types/dream.ts +45 -0
- package/src/types/index.ts +1 -0
- package/src/types/message.ts +3 -0
- package/src/types/misc.ts +131 -0
- package/src/types/protocol.ts +4 -0
- package/src/types/run.ts +4 -1
|
@@ -0,0 +1,1266 @@
|
|
|
1
|
+
import { createHash } from 'crypto'
|
|
2
|
+
import path from 'path'
|
|
3
|
+
|
|
4
|
+
import { genId } from '@/lib/id'
|
|
5
|
+
import type {
|
|
6
|
+
KnowledgeCitation,
|
|
7
|
+
KnowledgeHygieneAction,
|
|
8
|
+
KnowledgeHygieneFinding,
|
|
9
|
+
KnowledgeHygieneSummary,
|
|
10
|
+
KnowledgeSource,
|
|
11
|
+
KnowledgeSourceDetail,
|
|
12
|
+
KnowledgeSourceKind,
|
|
13
|
+
KnowledgeSourceSummary,
|
|
14
|
+
KnowledgeRetrievalTrace,
|
|
15
|
+
KnowledgeSearchHit,
|
|
16
|
+
MemoryEntry,
|
|
17
|
+
} from '@/types'
|
|
18
|
+
import {
|
|
19
|
+
deleteKnowledgeSource as deleteKnowledgeSourceRecord,
|
|
20
|
+
loadKnowledgeSource,
|
|
21
|
+
loadKnowledgeSources,
|
|
22
|
+
patchKnowledgeSource,
|
|
23
|
+
upsertKnowledgeSource,
|
|
24
|
+
} from '@/lib/server/storage'
|
|
25
|
+
import { getMemoryDb } from '@/lib/server/memory/memory-db'
|
|
26
|
+
import {
|
|
27
|
+
deriveKnowledgeTitle,
|
|
28
|
+
extractKnowledgeTextFromFile,
|
|
29
|
+
extractKnowledgeTextFromUrl,
|
|
30
|
+
} from '@/lib/server/knowledge-import'
|
|
31
|
+
import { onNextIdleWindow } from '@/lib/server/runtime/idle-window'
|
|
32
|
+
|
|
33
|
+
const KNOWLEDGE_STALE_AFTER_MS = 1000 * 60 * 60 * 24 * 14
|
|
34
|
+
const CHUNK_TARGET_CHARS = 2200
|
|
35
|
+
const CHUNK_OVERLAP_CHARS = 320
|
|
36
|
+
const MAX_KNOWLEDGE_SCAN = 10_000
|
|
37
|
+
const MAX_HYGIENE_FINDINGS = 120
|
|
38
|
+
const MAX_GROUNDING_HITS = 4
|
|
39
|
+
|
|
40
|
+
interface KnowledgeSourceInput {
|
|
41
|
+
kind?: KnowledgeSourceKind
|
|
42
|
+
title?: string
|
|
43
|
+
content?: string | null
|
|
44
|
+
tags?: string[]
|
|
45
|
+
scope?: 'global' | 'agent'
|
|
46
|
+
agentIds?: string[]
|
|
47
|
+
sourceLabel?: string | null
|
|
48
|
+
sourceUrl?: string | null
|
|
49
|
+
sourcePath?: string | null
|
|
50
|
+
metadata?: Record<string, unknown>
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface IndexedChunk {
|
|
54
|
+
title: string
|
|
55
|
+
content: string
|
|
56
|
+
chunkIndex: number
|
|
57
|
+
chunkCount: number
|
|
58
|
+
charStart: number
|
|
59
|
+
charEnd: number
|
|
60
|
+
sectionLabel?: string | null
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
let backfillPromise: Promise<void> | null = null
|
|
64
|
+
let backfillComplete = false
|
|
65
|
+
let maintenanceRegistered = false
|
|
66
|
+
let maintenanceHistory: KnowledgeHygieneAction[] = []
|
|
67
|
+
|
|
68
|
+
function normalizeText(value: unknown): string {
|
|
69
|
+
return typeof value === 'string' ? value.trim() : ''
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function normalizeOptionalText(value: unknown): string | null {
|
|
73
|
+
const trimmed = normalizeText(value)
|
|
74
|
+
return trimmed || null
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function normalizeTags(tags: unknown): string[] {
|
|
78
|
+
if (!Array.isArray(tags)) return []
|
|
79
|
+
const seen = new Set<string>()
|
|
80
|
+
const out: string[] = []
|
|
81
|
+
for (const tag of tags) {
|
|
82
|
+
if (typeof tag !== 'string') continue
|
|
83
|
+
const trimmed = tag.trim()
|
|
84
|
+
const key = trimmed.toLowerCase()
|
|
85
|
+
if (!trimmed || seen.has(key)) continue
|
|
86
|
+
seen.add(key)
|
|
87
|
+
out.push(trimmed)
|
|
88
|
+
}
|
|
89
|
+
return out
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function matchesTagFilter(sourceTags: string[], filterTags: string[]): boolean {
|
|
93
|
+
if (filterTags.length === 0) return true
|
|
94
|
+
const tagSet = new Set(sourceTags.map((tag) => tag.toLowerCase()))
|
|
95
|
+
return filterTags.some((tag) => tagSet.has(tag.toLowerCase()))
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function normalizeAgentIds(agentIds: unknown): string[] {
|
|
99
|
+
if (!Array.isArray(agentIds)) return []
|
|
100
|
+
const seen = new Set<string>()
|
|
101
|
+
const out: string[] = []
|
|
102
|
+
for (const id of agentIds) {
|
|
103
|
+
if (typeof id !== 'string') continue
|
|
104
|
+
const trimmed = id.trim()
|
|
105
|
+
if (!trimmed || seen.has(trimmed)) continue
|
|
106
|
+
seen.add(trimmed)
|
|
107
|
+
out.push(trimmed)
|
|
108
|
+
}
|
|
109
|
+
return out
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function normalizeScope(scope: unknown): 'global' | 'agent' {
|
|
113
|
+
return scope === 'agent' ? 'agent' : 'global'
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function normalizeKind(kind: unknown): KnowledgeSourceKind {
|
|
117
|
+
return kind === 'file' || kind === 'url' ? kind : 'manual'
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function contentHash(content: string): string {
|
|
121
|
+
return createHash('sha256').update(content).digest('hex')
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function isStaleSource(source: KnowledgeSource): boolean {
|
|
125
|
+
if (source.archivedAt || source.supersededBySourceId) return false
|
|
126
|
+
if (source.syncStatus === 'error') return true
|
|
127
|
+
if (source.kind === 'manual') return false
|
|
128
|
+
const indexedAt = typeof source.lastIndexedAt === 'number' ? source.lastIndexedAt : 0
|
|
129
|
+
if (!indexedAt) return true
|
|
130
|
+
return (Date.now() - indexedAt) > KNOWLEDGE_STALE_AFTER_MS
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function coerceSource(source: KnowledgeSource): KnowledgeSource {
|
|
134
|
+
const now = Date.now()
|
|
135
|
+
return {
|
|
136
|
+
id: source.id,
|
|
137
|
+
kind: normalizeKind(source.kind),
|
|
138
|
+
title: normalizeText(source.title) || 'Knowledge Source',
|
|
139
|
+
content: typeof source.content === 'string' ? source.content : null,
|
|
140
|
+
sourceLabel: normalizeOptionalText(source.sourceLabel),
|
|
141
|
+
sourceUrl: normalizeOptionalText(source.sourceUrl),
|
|
142
|
+
sourcePath: normalizeOptionalText(source.sourcePath),
|
|
143
|
+
sourceHash: normalizeOptionalText(source.sourceHash),
|
|
144
|
+
scope: normalizeScope(source.scope),
|
|
145
|
+
agentIds: normalizeAgentIds(source.agentIds),
|
|
146
|
+
tags: normalizeTags(source.tags),
|
|
147
|
+
syncStatus: source.syncStatus === 'syncing' || source.syncStatus === 'error' ? source.syncStatus : 'ready',
|
|
148
|
+
lastIndexedAt: typeof source.lastIndexedAt === 'number' ? source.lastIndexedAt : null,
|
|
149
|
+
lastSyncedAt: typeof source.lastSyncedAt === 'number' ? source.lastSyncedAt : null,
|
|
150
|
+
lastError: normalizeOptionalText(source.lastError),
|
|
151
|
+
archivedAt: typeof source.archivedAt === 'number' ? source.archivedAt : null,
|
|
152
|
+
archivedReason: normalizeOptionalText(source.archivedReason),
|
|
153
|
+
duplicateOfSourceId: normalizeOptionalText(source.duplicateOfSourceId),
|
|
154
|
+
supersededBySourceId: normalizeOptionalText(source.supersededBySourceId),
|
|
155
|
+
maintenanceUpdatedAt: typeof source.maintenanceUpdatedAt === 'number' ? source.maintenanceUpdatedAt : null,
|
|
156
|
+
maintenanceNotes: normalizeOptionalText(source.maintenanceNotes),
|
|
157
|
+
nextSyncAt: typeof source.nextSyncAt === 'number' ? source.nextSyncAt : null,
|
|
158
|
+
lastAutoSyncAt: typeof source.lastAutoSyncAt === 'number' ? source.lastAutoSyncAt : null,
|
|
159
|
+
chunkCount: typeof source.chunkCount === 'number' ? source.chunkCount : 0,
|
|
160
|
+
contentLength: typeof source.contentLength === 'number' ? source.contentLength : 0,
|
|
161
|
+
createdAt: typeof source.createdAt === 'number' ? source.createdAt : now,
|
|
162
|
+
updatedAt: typeof source.updatedAt === 'number' ? source.updatedAt : now,
|
|
163
|
+
metadata: source.metadata && typeof source.metadata === 'object' ? source.metadata : undefined,
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function sourceIsArchived(source: KnowledgeSource): boolean {
|
|
168
|
+
return typeof source.archivedAt === 'number' && source.archivedAt > 0
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function sourceIsSuperseded(source: KnowledgeSource): boolean {
|
|
172
|
+
return typeof source.supersededBySourceId === 'string' && source.supersededBySourceId.trim().length > 0
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function sourceIsExcludedByDefault(source: KnowledgeSource): boolean {
|
|
176
|
+
return sourceIsArchived(source) || sourceIsSuperseded(source)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function sourceVisibleToAgent(source: KnowledgeSource, viewerAgentId?: string | null): boolean {
|
|
180
|
+
if (source.scope === 'global') return true
|
|
181
|
+
if (!viewerAgentId) return false
|
|
182
|
+
return source.agentIds.includes(viewerAgentId)
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function cleanKnowledgeTokens(value: string): string[] {
|
|
186
|
+
return Array.from(new Set(
|
|
187
|
+
String(value || '')
|
|
188
|
+
.toLowerCase()
|
|
189
|
+
.replace(/[^a-z0-9]+/g, ' ')
|
|
190
|
+
.split(/\s+/)
|
|
191
|
+
.map((token) => token.trim())
|
|
192
|
+
.filter((token) => token.length >= 3),
|
|
193
|
+
))
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function tokenOverlapScore(left: string, right: string): number {
|
|
197
|
+
const leftTokens = cleanKnowledgeTokens(left)
|
|
198
|
+
const rightSet = new Set(cleanKnowledgeTokens(right))
|
|
199
|
+
if (leftTokens.length === 0 || rightSet.size === 0) return 0
|
|
200
|
+
let matches = 0
|
|
201
|
+
for (const token of leftTokens) {
|
|
202
|
+
if (rightSet.has(token)) matches += 1
|
|
203
|
+
}
|
|
204
|
+
return matches / Math.max(leftTokens.length, 1)
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function jaccardSimilarity(left: string, right: string): number {
|
|
208
|
+
const leftSet = new Set(cleanKnowledgeTokens(left))
|
|
209
|
+
const rightSet = new Set(cleanKnowledgeTokens(right))
|
|
210
|
+
if (leftSet.size === 0 || rightSet.size === 0) return 0
|
|
211
|
+
let intersection = 0
|
|
212
|
+
for (const token of leftSet) {
|
|
213
|
+
if (rightSet.has(token)) intersection += 1
|
|
214
|
+
}
|
|
215
|
+
const union = leftSet.size + rightSet.size - intersection
|
|
216
|
+
return union > 0 ? intersection / union : 0
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function whyMatched(query: string, title: string, content: string, sectionLabel?: string | null): string {
|
|
220
|
+
const queryTokens = cleanKnowledgeTokens(query)
|
|
221
|
+
const contentText = `${title}\n${sectionLabel || ''}\n${content}`
|
|
222
|
+
const contentTokens = new Set(cleanKnowledgeTokens(contentText))
|
|
223
|
+
const matched = queryTokens.filter((token) => contentTokens.has(token))
|
|
224
|
+
if (matched.length > 0) {
|
|
225
|
+
const head = matched.slice(0, 4).join(', ')
|
|
226
|
+
return `Matched query terms: ${head}${matched.length > 4 ? ', ...' : ''}`
|
|
227
|
+
}
|
|
228
|
+
if (sectionLabel?.trim()) return `Matched the ${sectionLabel.trim()} section`
|
|
229
|
+
return 'Retrieved as a high-relevance knowledge chunk'
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function toCitation(hit: KnowledgeSearchHit): KnowledgeCitation {
|
|
233
|
+
return {
|
|
234
|
+
sourceId: hit.sourceId,
|
|
235
|
+
sourceTitle: hit.sourceTitle,
|
|
236
|
+
sourceKind: hit.sourceKind,
|
|
237
|
+
sourceUrl: hit.sourceUrl || null,
|
|
238
|
+
sourceLabel: hit.sourceLabel || null,
|
|
239
|
+
chunkId: hit.id,
|
|
240
|
+
chunkIndex: hit.chunkIndex,
|
|
241
|
+
chunkCount: hit.chunkCount,
|
|
242
|
+
charStart: hit.charStart,
|
|
243
|
+
charEnd: hit.charEnd,
|
|
244
|
+
sectionLabel: hit.sectionLabel || null,
|
|
245
|
+
snippet: hit.snippet,
|
|
246
|
+
whyMatched: hit.whyMatched || null,
|
|
247
|
+
score: hit.score,
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function listStoredSources(): KnowledgeSource[] {
|
|
252
|
+
return Object.values(loadKnowledgeSources())
|
|
253
|
+
.map((source) => coerceSource(source))
|
|
254
|
+
.sort((left, right) => right.updatedAt - left.updatedAt)
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function sourceTitleFromUrl(sourceUrl: string): string {
|
|
258
|
+
try {
|
|
259
|
+
const parsed = new URL(sourceUrl)
|
|
260
|
+
const leaf = path.basename(parsed.pathname || '')
|
|
261
|
+
return leaf ? deriveKnowledgeTitle(leaf) : parsed.hostname
|
|
262
|
+
} catch {
|
|
263
|
+
return sourceUrl
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function sourceLabelFromUrl(sourceUrl: string): string | null {
|
|
268
|
+
try {
|
|
269
|
+
const parsed = new URL(sourceUrl)
|
|
270
|
+
return parsed.hostname || null
|
|
271
|
+
} catch {
|
|
272
|
+
return null
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function headingLabel(text: string): string | null {
|
|
277
|
+
const match = text.match(/^#{1,6}\s+(.+)$/m)
|
|
278
|
+
return match?.[1]?.trim() || null
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function previewSnippet(content: string, query?: string): string {
|
|
282
|
+
const normalized = String(content || '').replace(/\s+/g, ' ').trim()
|
|
283
|
+
if (!normalized) return ''
|
|
284
|
+
if (!query) return normalized.slice(0, 180)
|
|
285
|
+
|
|
286
|
+
const queryTokens = Array.from(new Set(
|
|
287
|
+
query
|
|
288
|
+
.toLowerCase()
|
|
289
|
+
.split(/\s+/)
|
|
290
|
+
.map((token) => token.trim())
|
|
291
|
+
.filter((token) => token.length >= 3),
|
|
292
|
+
))
|
|
293
|
+
|
|
294
|
+
const lower = normalized.toLowerCase()
|
|
295
|
+
let matchIndex = -1
|
|
296
|
+
for (const token of queryTokens) {
|
|
297
|
+
const idx = lower.indexOf(token)
|
|
298
|
+
if (idx !== -1 && (matchIndex === -1 || idx < matchIndex)) {
|
|
299
|
+
matchIndex = idx
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (matchIndex === -1) return normalized.slice(0, 180)
|
|
304
|
+
const start = Math.max(0, matchIndex - 80)
|
|
305
|
+
const end = Math.min(normalized.length, matchIndex + 220)
|
|
306
|
+
const prefix = start > 0 ? '…' : ''
|
|
307
|
+
const suffix = end < normalized.length ? '…' : ''
|
|
308
|
+
return `${prefix}${normalized.slice(start, end)}${suffix}`
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function splitParagraphs(content: string): Array<{
|
|
312
|
+
text: string
|
|
313
|
+
start: number
|
|
314
|
+
end: number
|
|
315
|
+
sectionLabel: string | null
|
|
316
|
+
}> {
|
|
317
|
+
const normalized = content.replace(/\r\n/g, '\n').trim()
|
|
318
|
+
if (!normalized) return []
|
|
319
|
+
|
|
320
|
+
const paragraphs: Array<{ text: string; start: number; end: number; sectionLabel: string | null }> = []
|
|
321
|
+
let cursor = 0
|
|
322
|
+
let lastSection: string | null = null
|
|
323
|
+
const breakRegex = /\n{2,}/g
|
|
324
|
+
|
|
325
|
+
const pushParagraph = (rawStart: number, rawEnd: number) => {
|
|
326
|
+
const raw = normalized.slice(rawStart, rawEnd)
|
|
327
|
+
const leadingWhitespace = raw.match(/^\s*/)?.[0].length || 0
|
|
328
|
+
const trailingWhitespace = raw.match(/\s*$/)?.[0].length || 0
|
|
329
|
+
const text = raw.trim()
|
|
330
|
+
if (!text) return
|
|
331
|
+
const sectionLabel = headingLabel(text)
|
|
332
|
+
if (sectionLabel) lastSection = sectionLabel
|
|
333
|
+
paragraphs.push({
|
|
334
|
+
text,
|
|
335
|
+
start: rawStart + leadingWhitespace,
|
|
336
|
+
end: rawEnd - trailingWhitespace,
|
|
337
|
+
sectionLabel: lastSection,
|
|
338
|
+
})
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
for (const match of normalized.matchAll(breakRegex)) {
|
|
342
|
+
const boundary = match.index ?? 0
|
|
343
|
+
pushParagraph(cursor, boundary)
|
|
344
|
+
cursor = boundary + match[0].length
|
|
345
|
+
}
|
|
346
|
+
pushParagraph(cursor, normalized.length)
|
|
347
|
+
return paragraphs
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function splitOversizedParagraph(
|
|
351
|
+
paragraph: { text: string; start: number; end: number; sectionLabel: string | null },
|
|
352
|
+
sourceTitle: string,
|
|
353
|
+
): IndexedChunk[] {
|
|
354
|
+
const chunks: IndexedChunk[] = []
|
|
355
|
+
let cursor = 0
|
|
356
|
+
|
|
357
|
+
while (cursor < paragraph.text.length) {
|
|
358
|
+
let end = Math.min(paragraph.text.length, cursor + CHUNK_TARGET_CHARS)
|
|
359
|
+
if (end < paragraph.text.length) {
|
|
360
|
+
const boundary = paragraph.text.lastIndexOf(' ', end)
|
|
361
|
+
if (boundary > cursor + 400) end = boundary
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const raw = paragraph.text.slice(cursor, end)
|
|
365
|
+
const leadingWhitespace = raw.match(/^\s*/)?.[0].length || 0
|
|
366
|
+
const trailingWhitespace = raw.match(/\s*$/)?.[0].length || 0
|
|
367
|
+
const content = raw.trim()
|
|
368
|
+
if (content) {
|
|
369
|
+
const relativeStart = cursor + leadingWhitespace
|
|
370
|
+
const relativeEnd = end - trailingWhitespace
|
|
371
|
+
chunks.push({
|
|
372
|
+
title: paragraph.sectionLabel ? `${sourceTitle} · ${paragraph.sectionLabel}` : sourceTitle,
|
|
373
|
+
content,
|
|
374
|
+
chunkIndex: 0,
|
|
375
|
+
chunkCount: 0,
|
|
376
|
+
charStart: paragraph.start + relativeStart,
|
|
377
|
+
charEnd: paragraph.start + relativeEnd,
|
|
378
|
+
sectionLabel: paragraph.sectionLabel,
|
|
379
|
+
})
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
if (end >= paragraph.text.length) break
|
|
383
|
+
cursor = Math.max(cursor + 1, end - CHUNK_OVERLAP_CHARS)
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
return chunks
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function chunkKnowledgeContent(sourceTitle: string, content: string): IndexedChunk[] {
|
|
390
|
+
const normalized = content.replace(/\r\n/g, '\n').trim()
|
|
391
|
+
if (!normalized) return []
|
|
392
|
+
|
|
393
|
+
const paragraphs = splitParagraphs(normalized)
|
|
394
|
+
if (paragraphs.length === 0) return []
|
|
395
|
+
|
|
396
|
+
const chunks: IndexedChunk[] = []
|
|
397
|
+
let index = 0
|
|
398
|
+
|
|
399
|
+
while (index < paragraphs.length) {
|
|
400
|
+
const firstIndex = index
|
|
401
|
+
const first = paragraphs[index]
|
|
402
|
+
|
|
403
|
+
if (first.text.length > CHUNK_TARGET_CHARS) {
|
|
404
|
+
chunks.push(...splitOversizedParagraph(first, sourceTitle))
|
|
405
|
+
index += 1
|
|
406
|
+
continue
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
let combined = first.text
|
|
410
|
+
const charStart = first.start
|
|
411
|
+
let charEnd = first.end
|
|
412
|
+
let sectionLabel = first.sectionLabel
|
|
413
|
+
let nextIndex = index + 1
|
|
414
|
+
|
|
415
|
+
while (nextIndex < paragraphs.length) {
|
|
416
|
+
const nextParagraph = paragraphs[nextIndex]
|
|
417
|
+
if (nextParagraph.text.length > CHUNK_TARGET_CHARS) break
|
|
418
|
+
const candidate = `${combined}\n\n${nextParagraph.text}`
|
|
419
|
+
if (candidate.length > CHUNK_TARGET_CHARS) break
|
|
420
|
+
combined = candidate
|
|
421
|
+
charEnd = nextParagraph.end
|
|
422
|
+
sectionLabel = sectionLabel || nextParagraph.sectionLabel
|
|
423
|
+
nextIndex += 1
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
chunks.push({
|
|
427
|
+
title: sectionLabel ? `${sourceTitle} · ${sectionLabel}` : sourceTitle,
|
|
428
|
+
content: combined,
|
|
429
|
+
chunkIndex: 0,
|
|
430
|
+
chunkCount: 0,
|
|
431
|
+
charStart,
|
|
432
|
+
charEnd,
|
|
433
|
+
sectionLabel,
|
|
434
|
+
})
|
|
435
|
+
|
|
436
|
+
if (nextIndex >= paragraphs.length) break
|
|
437
|
+
|
|
438
|
+
let overlapChars = 0
|
|
439
|
+
let overlapStart = nextIndex
|
|
440
|
+
for (let back = nextIndex - 1; back > firstIndex; back--) {
|
|
441
|
+
overlapChars += paragraphs[back].text.length
|
|
442
|
+
overlapStart = back
|
|
443
|
+
if (overlapChars >= CHUNK_OVERLAP_CHARS) break
|
|
444
|
+
}
|
|
445
|
+
index = Math.max(firstIndex + 1, overlapStart)
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
const chunkCount = chunks.length
|
|
449
|
+
return chunks.map((chunk, chunkIndex) => ({
|
|
450
|
+
...chunk,
|
|
451
|
+
chunkIndex,
|
|
452
|
+
chunkCount,
|
|
453
|
+
}))
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
function memorySourceMeta(entry: MemoryEntry): Record<string, unknown> {
|
|
457
|
+
return entry.metadata && typeof entry.metadata === 'object'
|
|
458
|
+
? entry.metadata as Record<string, unknown>
|
|
459
|
+
: {}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
function buildSourceSummary(source: KnowledgeSource, chunks?: MemoryEntry[]): KnowledgeSourceSummary {
|
|
463
|
+
const firstChunk = chunks?.[0] || null
|
|
464
|
+
const preview = typeof source.content === 'string' && source.content.trim()
|
|
465
|
+
? source.content
|
|
466
|
+
: firstChunk?.content || ''
|
|
467
|
+
|
|
468
|
+
return {
|
|
469
|
+
...source,
|
|
470
|
+
stale: isStaleSource(source),
|
|
471
|
+
topSnippet: preview ? previewSnippet(preview) : null,
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
function buildSearchHit(source: KnowledgeSource, entry: MemoryEntry, score: number, query: string): KnowledgeSearchHit {
|
|
476
|
+
const metadata = memorySourceMeta(entry)
|
|
477
|
+
return {
|
|
478
|
+
id: entry.id,
|
|
479
|
+
sourceId: source.id,
|
|
480
|
+
sourceTitle: source.title,
|
|
481
|
+
sourceKind: source.kind,
|
|
482
|
+
sourceUrl: source.sourceUrl || null,
|
|
483
|
+
sourceLabel: source.sourceLabel || null,
|
|
484
|
+
scope: source.scope,
|
|
485
|
+
agentIds: source.agentIds,
|
|
486
|
+
tags: source.tags,
|
|
487
|
+
syncStatus: source.syncStatus,
|
|
488
|
+
stale: isStaleSource(source),
|
|
489
|
+
title: entry.title || source.title,
|
|
490
|
+
snippet: previewSnippet(entry.content, query),
|
|
491
|
+
content: entry.content,
|
|
492
|
+
chunkIndex: typeof metadata.chunkIndex === 'number' ? metadata.chunkIndex : 0,
|
|
493
|
+
chunkCount: typeof metadata.chunkCount === 'number' ? metadata.chunkCount : source.chunkCount,
|
|
494
|
+
charStart: typeof metadata.charStart === 'number' ? metadata.charStart : 0,
|
|
495
|
+
charEnd: typeof metadata.charEnd === 'number' ? metadata.charEnd : entry.content.length,
|
|
496
|
+
sectionLabel: typeof metadata.sectionLabel === 'string' ? metadata.sectionLabel : null,
|
|
497
|
+
score,
|
|
498
|
+
whyMatched: whyMatched(query, entry.title || source.title, entry.content, typeof metadata.sectionLabel === 'string' ? metadata.sectionLabel : null),
|
|
499
|
+
createdAt: entry.createdAt,
|
|
500
|
+
updatedAt: entry.updatedAt,
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
async function resolveSourceContent(
|
|
505
|
+
source: KnowledgeSource,
|
|
506
|
+
overrideContent?: string | null,
|
|
507
|
+
): Promise<{ content: string; title: string; sourceLabel?: string | null }> {
|
|
508
|
+
const inlineContent = typeof overrideContent === 'string' ? overrideContent.trim() : ''
|
|
509
|
+
if (inlineContent) {
|
|
510
|
+
return {
|
|
511
|
+
content: overrideContent || '',
|
|
512
|
+
title: source.title,
|
|
513
|
+
sourceLabel: source.sourceLabel || null,
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
if (source.kind === 'manual') {
|
|
518
|
+
if (!source.content?.trim()) throw new Error('Content is required for manual knowledge.')
|
|
519
|
+
return {
|
|
520
|
+
content: source.content,
|
|
521
|
+
title: source.title,
|
|
522
|
+
sourceLabel: source.sourceLabel || null,
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (source.kind === 'file') {
|
|
527
|
+
if (source.sourcePath) {
|
|
528
|
+
return {
|
|
529
|
+
content: await extractKnowledgeTextFromFile(source.sourcePath, source.sourceLabel || source.title),
|
|
530
|
+
title: source.title,
|
|
531
|
+
sourceLabel: source.sourceLabel || path.basename(source.sourcePath),
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
if (source.content?.trim()) {
|
|
535
|
+
return {
|
|
536
|
+
content: source.content,
|
|
537
|
+
title: source.title,
|
|
538
|
+
sourceLabel: source.sourceLabel || null,
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
throw new Error('A file path or extracted content is required for file knowledge.')
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
if (!source.sourceUrl) {
|
|
545
|
+
if (source.content?.trim()) {
|
|
546
|
+
return {
|
|
547
|
+
content: source.content,
|
|
548
|
+
title: source.title,
|
|
549
|
+
sourceLabel: source.sourceLabel || null,
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
throw new Error('A URL is required for URL knowledge.')
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
const extracted = await extractKnowledgeTextFromUrl(source.sourceUrl)
|
|
556
|
+
return {
|
|
557
|
+
content: extracted.content,
|
|
558
|
+
title: source.title || extracted.title || sourceTitleFromUrl(source.sourceUrl),
|
|
559
|
+
sourceLabel: source.sourceLabel || extracted.title || sourceLabelFromUrl(source.sourceUrl),
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
function sharedWithForSource(source: KnowledgeSource): string[] | undefined {
|
|
564
|
+
return source.scope === 'agent' && source.agentIds.length > 0 ? source.agentIds : undefined
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
function toChunkMetadata(source: KnowledgeSource, chunk: IndexedChunk): Record<string, unknown> {
|
|
568
|
+
return {
|
|
569
|
+
sourceId: source.id,
|
|
570
|
+
sourceTitle: source.title,
|
|
571
|
+
sourceKind: source.kind,
|
|
572
|
+
sourceUrl: source.sourceUrl || null,
|
|
573
|
+
sourceLabel: source.sourceLabel || null,
|
|
574
|
+
tags: source.tags,
|
|
575
|
+
scope: source.scope,
|
|
576
|
+
agentIds: source.agentIds,
|
|
577
|
+
chunkIndex: chunk.chunkIndex,
|
|
578
|
+
chunkCount: chunk.chunkCount,
|
|
579
|
+
charStart: chunk.charStart,
|
|
580
|
+
charEnd: chunk.charEnd,
|
|
581
|
+
sectionLabel: chunk.sectionLabel || null,
|
|
582
|
+
indexedAt: Date.now(),
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function replaceSourceChunks(source: KnowledgeSource, chunks: IndexedChunk[]): MemoryEntry[] {
|
|
587
|
+
const db = getMemoryDb()
|
|
588
|
+
for (const existingChunk of db.listKnowledgeSourceChunks(source.id)) {
|
|
589
|
+
db.delete(existingChunk.id)
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
return chunks.map((chunk) => db.add({
|
|
593
|
+
agentId: null,
|
|
594
|
+
sessionId: null,
|
|
595
|
+
category: 'knowledge',
|
|
596
|
+
title: chunk.title,
|
|
597
|
+
content: chunk.content,
|
|
598
|
+
metadata: toChunkMetadata(source, chunk),
|
|
599
|
+
sharedWith: sharedWithForSource(source),
|
|
600
|
+
}))
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
async function ensureLegacyKnowledgeBackfill(): Promise<void> {
|
|
604
|
+
if (backfillComplete) return
|
|
605
|
+
if (backfillPromise) return backfillPromise
|
|
606
|
+
backfillPromise = (async () => {
|
|
607
|
+
const db = getMemoryDb()
|
|
608
|
+
const entries = db.listByCategory('knowledge', undefined, MAX_KNOWLEDGE_SCAN)
|
|
609
|
+
|
|
610
|
+
for (const entry of entries) {
|
|
611
|
+
const metadata = memorySourceMeta(entry)
|
|
612
|
+
const existingSourceId = typeof metadata.sourceId === 'string' ? metadata.sourceId.trim() : ''
|
|
613
|
+
if (existingSourceId) continue
|
|
614
|
+
|
|
615
|
+
const scope = normalizeScope(metadata.scope)
|
|
616
|
+
const agentIds = normalizeAgentIds(metadata.agentIds)
|
|
617
|
+
const sourceId = entry.id
|
|
618
|
+
const source = coerceSource({
|
|
619
|
+
id: sourceId,
|
|
620
|
+
kind: 'manual',
|
|
621
|
+
title: entry.title || 'Knowledge Source',
|
|
622
|
+
content: entry.content,
|
|
623
|
+
sourceLabel: typeof metadata.source === 'string' ? metadata.source : null,
|
|
624
|
+
sourceUrl: typeof metadata.sourceUrl === 'string' ? metadata.sourceUrl : null,
|
|
625
|
+
sourcePath: typeof metadata.sourcePath === 'string' ? metadata.sourcePath : null,
|
|
626
|
+
sourceHash: contentHash(entry.content || ''),
|
|
627
|
+
scope,
|
|
628
|
+
agentIds,
|
|
629
|
+
tags: normalizeTags(metadata.tags),
|
|
630
|
+
syncStatus: 'ready',
|
|
631
|
+
lastIndexedAt: entry.updatedAt,
|
|
632
|
+
lastSyncedAt: entry.updatedAt,
|
|
633
|
+
chunkCount: 1,
|
|
634
|
+
contentLength: entry.content.length,
|
|
635
|
+
createdAt: entry.createdAt,
|
|
636
|
+
updatedAt: entry.updatedAt,
|
|
637
|
+
metadata: {
|
|
638
|
+
legacyMemoryId: entry.id,
|
|
639
|
+
migratedAt: Date.now(),
|
|
640
|
+
},
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
upsertKnowledgeSource(sourceId, source)
|
|
644
|
+
db.update(entry.id, {
|
|
645
|
+
sharedWith: sharedWithForSource(source),
|
|
646
|
+
metadata: {
|
|
647
|
+
...metadata,
|
|
648
|
+
sourceId,
|
|
649
|
+
sourceTitle: source.title,
|
|
650
|
+
sourceKind: source.kind,
|
|
651
|
+
sourceLabel: source.sourceLabel,
|
|
652
|
+
sourceUrl: source.sourceUrl,
|
|
653
|
+
tags: source.tags,
|
|
654
|
+
scope: source.scope,
|
|
655
|
+
agentIds: source.agentIds,
|
|
656
|
+
chunkIndex: typeof metadata.chunkIndex === 'number' ? metadata.chunkIndex : 0,
|
|
657
|
+
chunkCount: typeof metadata.chunkCount === 'number' ? metadata.chunkCount : 1,
|
|
658
|
+
charStart: typeof metadata.charStart === 'number' ? metadata.charStart : 0,
|
|
659
|
+
charEnd: typeof metadata.charEnd === 'number' ? metadata.charEnd : entry.content.length,
|
|
660
|
+
sectionLabel: typeof metadata.sectionLabel === 'string' ? metadata.sectionLabel : null,
|
|
661
|
+
indexedAt: typeof metadata.indexedAt === 'number' ? metadata.indexedAt : entry.updatedAt,
|
|
662
|
+
},
|
|
663
|
+
})
|
|
664
|
+
}
|
|
665
|
+
backfillComplete = true
|
|
666
|
+
})().finally(() => {
|
|
667
|
+
backfillPromise = null
|
|
668
|
+
})
|
|
669
|
+
|
|
670
|
+
return backfillPromise
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
export async function listKnowledgeSourceSummaries(options?: {
|
|
674
|
+
tags?: string[]
|
|
675
|
+
limit?: number
|
|
676
|
+
includeArchived?: boolean
|
|
677
|
+
}): Promise<KnowledgeSourceSummary[]> {
|
|
678
|
+
await ensureLegacyKnowledgeBackfill()
|
|
679
|
+
registerKnowledgeMaintenanceIdleCallback()
|
|
680
|
+
const tagFilter = normalizeTags(options?.tags)
|
|
681
|
+
const limit = Math.max(1, Math.min(500, Math.trunc(options?.limit || 200)))
|
|
682
|
+
const includeArchived = options?.includeArchived === true
|
|
683
|
+
|
|
684
|
+
const sources = listStoredSources()
|
|
685
|
+
.filter((source) => includeArchived || !sourceIsExcludedByDefault(source))
|
|
686
|
+
.filter((source) => matchesTagFilter(source.tags, tagFilter))
|
|
687
|
+
.slice(0, limit)
|
|
688
|
+
|
|
689
|
+
return sources.map((source) => buildSourceSummary(source))
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
export async function searchKnowledgeHits(options: {
|
|
693
|
+
query: string
|
|
694
|
+
tags?: string[]
|
|
695
|
+
limit?: number
|
|
696
|
+
includeArchived?: boolean
|
|
697
|
+
viewerAgentId?: string | null
|
|
698
|
+
}): Promise<KnowledgeSearchHit[]> {
|
|
699
|
+
await ensureLegacyKnowledgeBackfill()
|
|
700
|
+
registerKnowledgeMaintenanceIdleCallback()
|
|
701
|
+
const query = normalizeText(options.query)
|
|
702
|
+
if (!query) return []
|
|
703
|
+
|
|
704
|
+
const tagFilter = normalizeTags(options.tags)
|
|
705
|
+
const limit = Math.max(1, Math.min(500, Math.trunc(options.limit || 50)))
|
|
706
|
+
const includeArchived = options.includeArchived === true
|
|
707
|
+
const viewerAgentId = typeof options.viewerAgentId === 'string' ? options.viewerAgentId.trim() : ''
|
|
708
|
+
const sourceMap = new Map(listStoredSources().map((source) => [source.id, source] as const))
|
|
709
|
+
const matches = getMemoryDb().search(query)
|
|
710
|
+
.filter((entry) => entry.category === 'knowledge')
|
|
711
|
+
|
|
712
|
+
const hits: KnowledgeSearchHit[] = []
|
|
713
|
+
for (const entry of matches) {
|
|
714
|
+
const metadata = memorySourceMeta(entry)
|
|
715
|
+
const sourceId = typeof metadata.sourceId === 'string' ? metadata.sourceId : ''
|
|
716
|
+
const source = sourceMap.get(sourceId)
|
|
717
|
+
if (!source) continue
|
|
718
|
+
if (!includeArchived && sourceIsExcludedByDefault(source)) continue
|
|
719
|
+
if (viewerAgentId && !sourceVisibleToAgent(source, viewerAgentId)) continue
|
|
720
|
+
if (!matchesTagFilter(source.tags, tagFilter)) continue
|
|
721
|
+
hits.push(buildSearchHit(source, entry, Math.max(0, 1 - hits.length / Math.max(matches.length, 1)), query))
|
|
722
|
+
if (hits.length >= limit) break
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
return hits
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
export async function getKnowledgeSourceDetail(id: string): Promise<KnowledgeSourceDetail | null> {
|
|
729
|
+
await ensureLegacyKnowledgeBackfill()
|
|
730
|
+
const source = loadKnowledgeSource(id)
|
|
731
|
+
if (!source) return null
|
|
732
|
+
const normalized = coerceSource(source)
|
|
733
|
+
const chunks = getMemoryDb().listKnowledgeSourceChunks(id)
|
|
734
|
+
return {
|
|
735
|
+
source: buildSourceSummary(normalized, chunks),
|
|
736
|
+
chunks,
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
export async function buildKnowledgeRetrievalTrace(options: {
|
|
741
|
+
query: string
|
|
742
|
+
viewerAgentId?: string | null
|
|
743
|
+
limit?: number
|
|
744
|
+
}): Promise<KnowledgeRetrievalTrace | null> {
|
|
745
|
+
const hits = await searchKnowledgeHits({
|
|
746
|
+
query: options.query,
|
|
747
|
+
limit: Math.max(1, Math.min(MAX_GROUNDING_HITS, Math.trunc(options.limit || MAX_GROUNDING_HITS))),
|
|
748
|
+
viewerAgentId: options.viewerAgentId || null,
|
|
749
|
+
})
|
|
750
|
+
if (hits.length === 0) return null
|
|
751
|
+
return {
|
|
752
|
+
query: normalizeText(options.query),
|
|
753
|
+
scope: 'source_knowledge',
|
|
754
|
+
hits: hits.map(toCitation),
|
|
755
|
+
retrievedAt: Date.now(),
|
|
756
|
+
selectorStatus: 'not_run',
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
export function selectKnowledgeCitations(params: {
|
|
761
|
+
responseText: string
|
|
762
|
+
retrievalTrace?: KnowledgeRetrievalTrace | null
|
|
763
|
+
limit?: number
|
|
764
|
+
}): { citations: KnowledgeCitation[]; retrievalTrace: KnowledgeRetrievalTrace | null } {
|
|
765
|
+
const trace = params.retrievalTrace
|
|
766
|
+
if (!trace || !Array.isArray(trace.hits) || trace.hits.length === 0) {
|
|
767
|
+
return { citations: [], retrievalTrace: trace || null }
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
const responseText = normalizeText(params.responseText)
|
|
771
|
+
if (!responseText) {
|
|
772
|
+
return {
|
|
773
|
+
citations: [],
|
|
774
|
+
retrievalTrace: { ...trace, selectorStatus: 'no_match' },
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
const ranked = trace.hits
|
|
779
|
+
.map((hit) => ({
|
|
780
|
+
hit,
|
|
781
|
+
overlap: tokenOverlapScore(responseText, `${hit.sourceTitle}\n${hit.sectionLabel || ''}\n${hit.snippet}`),
|
|
782
|
+
}))
|
|
783
|
+
.sort((left, right) => {
|
|
784
|
+
const overlapDelta = right.overlap - left.overlap
|
|
785
|
+
if (overlapDelta !== 0) return overlapDelta
|
|
786
|
+
return right.hit.score - left.hit.score
|
|
787
|
+
})
|
|
788
|
+
|
|
789
|
+
const limit = Math.max(1, Math.min(4, Math.trunc(params.limit || 3)))
|
|
790
|
+
const selected = ranked
|
|
791
|
+
.filter((entry, index) => entry.overlap >= 0.08 || (entry.hit.score >= 0.7 && index === 0))
|
|
792
|
+
.slice(0, limit)
|
|
793
|
+
.map((entry) => entry.hit)
|
|
794
|
+
|
|
795
|
+
return {
|
|
796
|
+
citations: selected,
|
|
797
|
+
retrievalTrace: {
|
|
798
|
+
...trace,
|
|
799
|
+
selectorStatus: selected.length > 0 ? 'selected' : 'no_match',
|
|
800
|
+
},
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
async function syncSourceRecord(
|
|
805
|
+
source: KnowledgeSource,
|
|
806
|
+
options?: { overrideContent?: string | null; forceRewrite?: boolean },
|
|
807
|
+
): Promise<KnowledgeSourceDetail> {
|
|
808
|
+
const loading = coerceSource({
|
|
809
|
+
...source,
|
|
810
|
+
syncStatus: 'syncing',
|
|
811
|
+
lastError: null,
|
|
812
|
+
updatedAt: Date.now(),
|
|
813
|
+
})
|
|
814
|
+
upsertKnowledgeSource(loading.id, loading)
|
|
815
|
+
|
|
816
|
+
try {
|
|
817
|
+
const resolved = await resolveSourceContent(loading, options?.overrideContent)
|
|
818
|
+
const chunks = chunkKnowledgeContent(resolved.title, resolved.content)
|
|
819
|
+
if (chunks.length === 0) {
|
|
820
|
+
throw new Error('No readable content was extracted for this source.')
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
const nextHash = contentHash(resolved.content)
|
|
824
|
+
const metadataChanged = options?.forceRewrite === true
|
|
825
|
+
|| loading.title !== resolved.title
|
|
826
|
+
|| (loading.sourceLabel || null) !== (resolved.sourceLabel || null)
|
|
827
|
+
|
|
828
|
+
let indexedChunks = getMemoryDb().listKnowledgeSourceChunks(loading.id)
|
|
829
|
+
if (indexedChunks.length === 0 || metadataChanged || loading.sourceHash !== nextHash) {
|
|
830
|
+
const rewrittenSource = coerceSource({
|
|
831
|
+
...loading,
|
|
832
|
+
title: resolved.title,
|
|
833
|
+
content: resolved.content,
|
|
834
|
+
sourceLabel: resolved.sourceLabel ?? loading.sourceLabel ?? null,
|
|
835
|
+
sourceHash: nextHash,
|
|
836
|
+
chunkCount: chunks.length,
|
|
837
|
+
contentLength: resolved.content.length,
|
|
838
|
+
syncStatus: 'ready',
|
|
839
|
+
lastError: null,
|
|
840
|
+
lastIndexedAt: Date.now(),
|
|
841
|
+
lastSyncedAt: Date.now(),
|
|
842
|
+
nextSyncAt: Date.now() + KNOWLEDGE_STALE_AFTER_MS,
|
|
843
|
+
updatedAt: Date.now(),
|
|
844
|
+
})
|
|
845
|
+
upsertKnowledgeSource(rewrittenSource.id, rewrittenSource)
|
|
846
|
+
indexedChunks = replaceSourceChunks(rewrittenSource, chunks)
|
|
847
|
+
return {
|
|
848
|
+
source: buildSourceSummary(rewrittenSource, indexedChunks),
|
|
849
|
+
chunks: indexedChunks,
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
const refreshedSource = coerceSource({
|
|
854
|
+
...loading,
|
|
855
|
+
content: resolved.content,
|
|
856
|
+
sourceHash: nextHash,
|
|
857
|
+
syncStatus: 'ready',
|
|
858
|
+
lastError: null,
|
|
859
|
+
lastSyncedAt: Date.now(),
|
|
860
|
+
nextSyncAt: Date.now() + KNOWLEDGE_STALE_AFTER_MS,
|
|
861
|
+
updatedAt: Date.now(),
|
|
862
|
+
})
|
|
863
|
+
upsertKnowledgeSource(refreshedSource.id, refreshedSource)
|
|
864
|
+
return {
|
|
865
|
+
source: buildSourceSummary(refreshedSource, indexedChunks),
|
|
866
|
+
chunks: indexedChunks,
|
|
867
|
+
}
|
|
868
|
+
} catch (error) {
|
|
869
|
+
const message = error instanceof Error ? error.message : 'Knowledge sync failed'
|
|
870
|
+
const failed = coerceSource({
|
|
871
|
+
...loading,
|
|
872
|
+
syncStatus: 'error',
|
|
873
|
+
lastError: message,
|
|
874
|
+
updatedAt: Date.now(),
|
|
875
|
+
})
|
|
876
|
+
upsertKnowledgeSource(failed.id, failed)
|
|
877
|
+
throw error
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
export async function createKnowledgeSource(input: KnowledgeSourceInput): Promise<KnowledgeSourceDetail> {
|
|
882
|
+
await ensureLegacyKnowledgeBackfill()
|
|
883
|
+
|
|
884
|
+
const now = Date.now()
|
|
885
|
+
const kind = normalizeKind(input.kind)
|
|
886
|
+
const title = normalizeText(input.title)
|
|
887
|
+
|| (kind === 'file' && input.sourcePath ? deriveKnowledgeTitle(path.basename(input.sourcePath)) : '')
|
|
888
|
+
|| (kind === 'url' && input.sourceUrl ? sourceTitleFromUrl(input.sourceUrl) : '')
|
|
889
|
+
|| 'Knowledge Source'
|
|
890
|
+
|
|
891
|
+
const source: KnowledgeSource = coerceSource({
|
|
892
|
+
id: genId(8),
|
|
893
|
+
kind,
|
|
894
|
+
title,
|
|
895
|
+
content: typeof input.content === 'string' ? input.content : null,
|
|
896
|
+
sourceLabel: normalizeOptionalText(input.sourceLabel),
|
|
897
|
+
sourceUrl: normalizeOptionalText(input.sourceUrl),
|
|
898
|
+
sourcePath: normalizeOptionalText(input.sourcePath),
|
|
899
|
+
sourceHash: null,
|
|
900
|
+
scope: normalizeScope(input.scope),
|
|
901
|
+
agentIds: normalizeAgentIds(input.agentIds),
|
|
902
|
+
tags: normalizeTags(input.tags),
|
|
903
|
+
syncStatus: 'syncing',
|
|
904
|
+
lastIndexedAt: null,
|
|
905
|
+
lastSyncedAt: null,
|
|
906
|
+
lastError: null,
|
|
907
|
+
chunkCount: 0,
|
|
908
|
+
contentLength: 0,
|
|
909
|
+
createdAt: now,
|
|
910
|
+
updatedAt: now,
|
|
911
|
+
metadata: input.metadata,
|
|
912
|
+
})
|
|
913
|
+
|
|
914
|
+
upsertKnowledgeSource(source.id, source)
|
|
915
|
+
return syncSourceRecord(source, { overrideContent: input.content, forceRewrite: true })
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
export async function updateKnowledgeSource(
|
|
919
|
+
id: string,
|
|
920
|
+
input: KnowledgeSourceInput,
|
|
921
|
+
): Promise<KnowledgeSourceDetail | null> {
|
|
922
|
+
await ensureLegacyKnowledgeBackfill()
|
|
923
|
+
const existing = loadKnowledgeSource(id)
|
|
924
|
+
if (!existing) return null
|
|
925
|
+
|
|
926
|
+
const normalizedExisting = coerceSource(existing)
|
|
927
|
+
const next: KnowledgeSource = coerceSource({
|
|
928
|
+
...normalizedExisting,
|
|
929
|
+
kind: normalizeKind(input.kind ?? normalizedExisting.kind),
|
|
930
|
+
title: normalizeText(input.title) || normalizedExisting.title,
|
|
931
|
+
content: typeof input.content === 'string' ? input.content : normalizedExisting.content,
|
|
932
|
+
sourceLabel: input.sourceLabel !== undefined ? normalizeOptionalText(input.sourceLabel) : normalizedExisting.sourceLabel,
|
|
933
|
+
sourceUrl: input.sourceUrl !== undefined ? normalizeOptionalText(input.sourceUrl) : normalizedExisting.sourceUrl,
|
|
934
|
+
sourcePath: input.sourcePath !== undefined ? normalizeOptionalText(input.sourcePath) : normalizedExisting.sourcePath,
|
|
935
|
+
scope: normalizeScope(input.scope ?? normalizedExisting.scope),
|
|
936
|
+
agentIds: normalizeAgentIds(input.agentIds ?? normalizedExisting.agentIds),
|
|
937
|
+
tags: normalizeTags(input.tags ?? normalizedExisting.tags),
|
|
938
|
+
metadata: input.metadata ? { ...(normalizedExisting.metadata || {}), ...input.metadata } : normalizedExisting.metadata,
|
|
939
|
+
updatedAt: Date.now(),
|
|
940
|
+
})
|
|
941
|
+
|
|
942
|
+
upsertKnowledgeSource(next.id, next)
|
|
943
|
+
return syncSourceRecord(next, { overrideContent: input.content, forceRewrite: true })
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
export async function syncKnowledgeSource(id: string): Promise<KnowledgeSourceDetail | null> {
|
|
947
|
+
await ensureLegacyKnowledgeBackfill()
|
|
948
|
+
const existing = loadKnowledgeSource(id)
|
|
949
|
+
if (!existing) return null
|
|
950
|
+
return syncSourceRecord(coerceSource(existing))
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
export async function deleteKnowledgeSource(id: string): Promise<boolean> {
|
|
954
|
+
await ensureLegacyKnowledgeBackfill()
|
|
955
|
+
const existing = loadKnowledgeSource(id)
|
|
956
|
+
if (!existing) return false
|
|
957
|
+
|
|
958
|
+
for (const chunk of getMemoryDb().listKnowledgeSourceChunks(id)) {
|
|
959
|
+
getMemoryDb().delete(chunk.id)
|
|
960
|
+
}
|
|
961
|
+
deleteKnowledgeSourceRecord(id)
|
|
962
|
+
return true
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
function recordMaintenanceAction(action: KnowledgeHygieneAction): void {
|
|
966
|
+
maintenanceHistory = [action, ...maintenanceHistory].slice(0, 48)
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
function upsertSourceLifecycle(id: string, updater: (source: KnowledgeSource) => KnowledgeSource): KnowledgeSource | null {
|
|
970
|
+
const updated = patchKnowledgeSource(id, (current) => {
|
|
971
|
+
if (!current) return null
|
|
972
|
+
return coerceSource(updater(coerceSource(current)))
|
|
973
|
+
})
|
|
974
|
+
return updated ? coerceSource(updated) : null
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
export async function archiveKnowledgeSource(
|
|
978
|
+
id: string,
|
|
979
|
+
input?: { reason?: string | null; duplicateOfSourceId?: string | null; supersededBySourceId?: string | null },
|
|
980
|
+
): Promise<KnowledgeSourceDetail | null> {
|
|
981
|
+
await ensureLegacyKnowledgeBackfill()
|
|
982
|
+
const updated = upsertSourceLifecycle(id, (source) => ({
|
|
983
|
+
...source,
|
|
984
|
+
archivedAt: source.archivedAt || Date.now(),
|
|
985
|
+
archivedReason: normalizeOptionalText(input?.reason) || source.archivedReason || 'archived',
|
|
986
|
+
duplicateOfSourceId: normalizeOptionalText(input?.duplicateOfSourceId) || source.duplicateOfSourceId || null,
|
|
987
|
+
supersededBySourceId: normalizeOptionalText(input?.supersededBySourceId) || source.supersededBySourceId || null,
|
|
988
|
+
maintenanceUpdatedAt: Date.now(),
|
|
989
|
+
maintenanceNotes: normalizeOptionalText(input?.reason) || source.maintenanceNotes || null,
|
|
990
|
+
updatedAt: Date.now(),
|
|
991
|
+
}))
|
|
992
|
+
if (!updated) return null
|
|
993
|
+
recordMaintenanceAction({
|
|
994
|
+
kind: 'archive',
|
|
995
|
+
sourceId: updated.id,
|
|
996
|
+
relatedSourceId: updated.duplicateOfSourceId || updated.supersededBySourceId || null,
|
|
997
|
+
summary: `Archived ${updated.title}`,
|
|
998
|
+
createdAt: Date.now(),
|
|
999
|
+
})
|
|
1000
|
+
return getKnowledgeSourceDetail(updated.id)
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
export async function restoreKnowledgeSource(id: string): Promise<KnowledgeSourceDetail | null> {
|
|
1004
|
+
await ensureLegacyKnowledgeBackfill()
|
|
1005
|
+
const updated = upsertSourceLifecycle(id, (source) => ({
|
|
1006
|
+
...source,
|
|
1007
|
+
archivedAt: null,
|
|
1008
|
+
archivedReason: null,
|
|
1009
|
+
duplicateOfSourceId: null,
|
|
1010
|
+
supersededBySourceId: null,
|
|
1011
|
+
maintenanceUpdatedAt: Date.now(),
|
|
1012
|
+
maintenanceNotes: 'restored',
|
|
1013
|
+
updatedAt: Date.now(),
|
|
1014
|
+
}))
|
|
1015
|
+
if (!updated) return null
|
|
1016
|
+
recordMaintenanceAction({
|
|
1017
|
+
kind: 'restore',
|
|
1018
|
+
sourceId: updated.id,
|
|
1019
|
+
summary: `Restored ${updated.title}`,
|
|
1020
|
+
createdAt: Date.now(),
|
|
1021
|
+
})
|
|
1022
|
+
return getKnowledgeSourceDetail(updated.id)
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
export async function supersedeKnowledgeSource(
|
|
1026
|
+
id: string,
|
|
1027
|
+
supersededBySourceId: string,
|
|
1028
|
+
): Promise<KnowledgeSourceDetail | null> {
|
|
1029
|
+
await ensureLegacyKnowledgeBackfill()
|
|
1030
|
+
const target = loadKnowledgeSource(supersededBySourceId)
|
|
1031
|
+
if (!target) throw new Error('Superseding source not found.')
|
|
1032
|
+
const updated = upsertSourceLifecycle(id, (source) => ({
|
|
1033
|
+
...source,
|
|
1034
|
+
supersededBySourceId,
|
|
1035
|
+
archivedAt: source.archivedAt || Date.now(),
|
|
1036
|
+
archivedReason: source.archivedReason || 'superseded',
|
|
1037
|
+
maintenanceUpdatedAt: Date.now(),
|
|
1038
|
+
maintenanceNotes: `Superseded by ${supersededBySourceId}`,
|
|
1039
|
+
updatedAt: Date.now(),
|
|
1040
|
+
}))
|
|
1041
|
+
if (!updated) return null
|
|
1042
|
+
recordMaintenanceAction({
|
|
1043
|
+
kind: 'supersede',
|
|
1044
|
+
sourceId: updated.id,
|
|
1045
|
+
relatedSourceId: supersededBySourceId,
|
|
1046
|
+
summary: `Marked ${updated.title} as superseded`,
|
|
1047
|
+
createdAt: Date.now(),
|
|
1048
|
+
})
|
|
1049
|
+
return getKnowledgeSourceDetail(updated.id)
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
function sameSourceOrigin(left: KnowledgeSource, right: KnowledgeSource): boolean {
|
|
1053
|
+
if (left.id === right.id) return false
|
|
1054
|
+
if (left.sourceUrl && right.sourceUrl) return left.sourceUrl === right.sourceUrl
|
|
1055
|
+
if (left.sourcePath && right.sourcePath) return left.sourcePath === right.sourcePath
|
|
1056
|
+
return false
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
function canonicalSourceForGroup(group: KnowledgeSource[]): KnowledgeSource {
|
|
1060
|
+
return [...group].sort((left, right) => {
|
|
1061
|
+
const archiveDelta = Number(sourceIsExcludedByDefault(left)) - Number(sourceIsExcludedByDefault(right))
|
|
1062
|
+
if (archiveDelta !== 0) return archiveDelta
|
|
1063
|
+
const indexedDelta = (right.lastIndexedAt || 0) - (left.lastIndexedAt || 0)
|
|
1064
|
+
if (indexedDelta !== 0) return indexedDelta
|
|
1065
|
+
return left.createdAt - right.createdAt
|
|
1066
|
+
})[0]
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
function buildHygieneSummary(sources: KnowledgeSource[]): KnowledgeHygieneSummary {
|
|
1070
|
+
const scannedAt = Date.now()
|
|
1071
|
+
const findings: KnowledgeHygieneFinding[] = []
|
|
1072
|
+
const pushFinding = (finding: KnowledgeHygieneFinding) => {
|
|
1073
|
+
if (findings.length < MAX_HYGIENE_FINDINGS) findings.push(finding)
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
const duplicateGroups = new Map<string, KnowledgeSource[]>()
|
|
1077
|
+
for (const source of sources) {
|
|
1078
|
+
if (!source.sourceHash) continue
|
|
1079
|
+
const group = duplicateGroups.get(source.sourceHash) || []
|
|
1080
|
+
group.push(source)
|
|
1081
|
+
duplicateGroups.set(source.sourceHash, group)
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
for (const source of sources) {
|
|
1085
|
+
if (sourceIsArchived(source)) {
|
|
1086
|
+
pushFinding({
|
|
1087
|
+
kind: 'archived',
|
|
1088
|
+
sourceId: source.id,
|
|
1089
|
+
sourceTitle: source.title,
|
|
1090
|
+
detail: source.archivedReason || 'Archived source',
|
|
1091
|
+
createdAt: source.archivedAt || source.updatedAt,
|
|
1092
|
+
})
|
|
1093
|
+
}
|
|
1094
|
+
if (sourceIsSuperseded(source)) {
|
|
1095
|
+
pushFinding({
|
|
1096
|
+
kind: 'superseded',
|
|
1097
|
+
sourceId: source.id,
|
|
1098
|
+
sourceTitle: source.title,
|
|
1099
|
+
relatedSourceId: source.supersededBySourceId || null,
|
|
1100
|
+
detail: `Superseded by ${source.supersededBySourceId}`,
|
|
1101
|
+
createdAt: source.updatedAt,
|
|
1102
|
+
})
|
|
1103
|
+
}
|
|
1104
|
+
if (source.syncStatus === 'error') {
|
|
1105
|
+
pushFinding({
|
|
1106
|
+
kind: 'broken',
|
|
1107
|
+
sourceId: source.id,
|
|
1108
|
+
sourceTitle: source.title,
|
|
1109
|
+
detail: source.lastError || 'Last sync failed',
|
|
1110
|
+
createdAt: source.updatedAt,
|
|
1111
|
+
})
|
|
1112
|
+
} else if (isStaleSource(source)) {
|
|
1113
|
+
pushFinding({
|
|
1114
|
+
kind: 'stale',
|
|
1115
|
+
sourceId: source.id,
|
|
1116
|
+
sourceTitle: source.title,
|
|
1117
|
+
detail: 'Source is due for re-sync',
|
|
1118
|
+
createdAt: source.updatedAt,
|
|
1119
|
+
})
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
for (const group of duplicateGroups.values()) {
|
|
1124
|
+
if (group.length < 2) continue
|
|
1125
|
+
const canonical = canonicalSourceForGroup(group)
|
|
1126
|
+
for (const source of group) {
|
|
1127
|
+
if (source.id === canonical.id) continue
|
|
1128
|
+
pushFinding({
|
|
1129
|
+
kind: 'duplicate',
|
|
1130
|
+
sourceId: source.id,
|
|
1131
|
+
sourceTitle: source.title,
|
|
1132
|
+
relatedSourceId: canonical.id,
|
|
1133
|
+
relatedSourceTitle: canonical.title,
|
|
1134
|
+
detail: 'Exact duplicate content hash',
|
|
1135
|
+
createdAt: source.updatedAt,
|
|
1136
|
+
})
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
const activeSources = sources.filter((source) => !sourceIsExcludedByDefault(source))
|
|
1141
|
+
for (let index = 0; index < activeSources.length; index += 1) {
|
|
1142
|
+
const left = activeSources[index]
|
|
1143
|
+
const leftBody = `${left.title}\n${left.content || ''}`
|
|
1144
|
+
if (!leftBody.trim()) continue
|
|
1145
|
+
for (let compareIndex = index + 1; compareIndex < activeSources.length; compareIndex += 1) {
|
|
1146
|
+
const right = activeSources[compareIndex]
|
|
1147
|
+
const rightBody = `${right.title}\n${right.content || ''}`
|
|
1148
|
+
if (!rightBody.trim()) continue
|
|
1149
|
+
if (sameSourceOrigin(left, right)) continue
|
|
1150
|
+
const overlap = jaccardSimilarity(leftBody, rightBody)
|
|
1151
|
+
if (overlap < 0.6) continue
|
|
1152
|
+
pushFinding({
|
|
1153
|
+
kind: 'overlap',
|
|
1154
|
+
sourceId: left.id,
|
|
1155
|
+
sourceTitle: left.title,
|
|
1156
|
+
relatedSourceId: right.id,
|
|
1157
|
+
relatedSourceTitle: right.title,
|
|
1158
|
+
detail: `High content overlap (${Math.round(overlap * 100)}%)`,
|
|
1159
|
+
createdAt: Math.max(left.updatedAt, right.updatedAt),
|
|
1160
|
+
})
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
return {
|
|
1165
|
+
scannedAt,
|
|
1166
|
+
counts: {
|
|
1167
|
+
stale: findings.filter((finding) => finding.kind === 'stale').length,
|
|
1168
|
+
duplicate: findings.filter((finding) => finding.kind === 'duplicate').length,
|
|
1169
|
+
overlap: findings.filter((finding) => finding.kind === 'overlap').length,
|
|
1170
|
+
broken: findings.filter((finding) => finding.kind === 'broken').length,
|
|
1171
|
+
archived: findings.filter((finding) => finding.kind === 'archived').length,
|
|
1172
|
+
superseded: findings.filter((finding) => finding.kind === 'superseded').length,
|
|
1173
|
+
},
|
|
1174
|
+
findings,
|
|
1175
|
+
recentActions: [...maintenanceHistory],
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
export async function getKnowledgeHygieneSummary(): Promise<KnowledgeHygieneSummary> {
|
|
1180
|
+
await ensureLegacyKnowledgeBackfill()
|
|
1181
|
+
registerKnowledgeMaintenanceIdleCallback()
|
|
1182
|
+
return buildHygieneSummary(listStoredSources())
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
export async function runKnowledgeHygieneMaintenance(): Promise<KnowledgeHygieneSummary> {
|
|
1186
|
+
await ensureLegacyKnowledgeBackfill()
|
|
1187
|
+
const sources = listStoredSources()
|
|
1188
|
+
|
|
1189
|
+
const duplicateGroups = new Map<string, KnowledgeSource[]>()
|
|
1190
|
+
for (const source of sources) {
|
|
1191
|
+
if (!source.sourceHash) continue
|
|
1192
|
+
const group = duplicateGroups.get(source.sourceHash) || []
|
|
1193
|
+
group.push(source)
|
|
1194
|
+
duplicateGroups.set(source.sourceHash, group)
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
for (const source of sources) {
|
|
1198
|
+
if (sourceIsExcludedByDefault(source)) continue
|
|
1199
|
+
if (source.kind !== 'manual' && (isStaleSource(source) || source.syncStatus === 'error')) {
|
|
1200
|
+
try {
|
|
1201
|
+
const synced = await syncKnowledgeSource(source.id)
|
|
1202
|
+
if (synced?.source) {
|
|
1203
|
+
upsertSourceLifecycle(source.id, (current) => ({
|
|
1204
|
+
...current,
|
|
1205
|
+
lastAutoSyncAt: Date.now(),
|
|
1206
|
+
maintenanceUpdatedAt: Date.now(),
|
|
1207
|
+
maintenanceNotes: 'auto-sync completed',
|
|
1208
|
+
updatedAt: Date.now(),
|
|
1209
|
+
}))
|
|
1210
|
+
recordMaintenanceAction({
|
|
1211
|
+
kind: source.sourceHash === synced.source.sourceHash ? 'sync' : 'reindex',
|
|
1212
|
+
sourceId: source.id,
|
|
1213
|
+
summary: `Auto-synced ${synced.source.title}`,
|
|
1214
|
+
createdAt: Date.now(),
|
|
1215
|
+
})
|
|
1216
|
+
}
|
|
1217
|
+
} catch {
|
|
1218
|
+
// Keep the existing error state for manual review.
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
|
|
1223
|
+
for (const group of duplicateGroups.values()) {
|
|
1224
|
+
if (group.length < 2) continue
|
|
1225
|
+
const canonical = canonicalSourceForGroup(group)
|
|
1226
|
+
for (const source of group) {
|
|
1227
|
+
if (source.id === canonical.id || sourceIsExcludedByDefault(source)) continue
|
|
1228
|
+
await archiveKnowledgeSource(source.id, {
|
|
1229
|
+
reason: 'duplicate',
|
|
1230
|
+
duplicateOfSourceId: canonical.id,
|
|
1231
|
+
})
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
const refreshed = listStoredSources()
|
|
1236
|
+
const originGroups = new Map<string, KnowledgeSource[]>()
|
|
1237
|
+
for (const source of refreshed) {
|
|
1238
|
+
if (sourceIsExcludedByDefault(source)) continue
|
|
1239
|
+
const origin = source.sourceUrl || source.sourcePath || ''
|
|
1240
|
+
if (!origin) continue
|
|
1241
|
+
const group = originGroups.get(origin) || []
|
|
1242
|
+
group.push(source)
|
|
1243
|
+
originGroups.set(origin, group)
|
|
1244
|
+
}
|
|
1245
|
+
for (const group of originGroups.values()) {
|
|
1246
|
+
if (group.length < 2) continue
|
|
1247
|
+
const canonical = canonicalSourceForGroup(group)
|
|
1248
|
+
for (const source of group) {
|
|
1249
|
+
if (source.id === canonical.id || sourceIsSuperseded(source)) continue
|
|
1250
|
+
if ((source.lastIndexedAt || 0) >= (canonical.lastIndexedAt || 0)) continue
|
|
1251
|
+
await supersedeKnowledgeSource(source.id, canonical.id)
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
return buildHygieneSummary(listStoredSources())
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
export function registerKnowledgeMaintenanceIdleCallback(): void {
|
|
1259
|
+
if (maintenanceRegistered) return
|
|
1260
|
+
maintenanceRegistered = true
|
|
1261
|
+
onNextIdleWindow(async () => {
|
|
1262
|
+
maintenanceRegistered = false
|
|
1263
|
+
await runKnowledgeHygieneMaintenance()
|
|
1264
|
+
registerKnowledgeMaintenanceIdleCallback()
|
|
1265
|
+
})
|
|
1266
|
+
}
|