recallx 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +205 -0
  2. package/app/cli/bin/recallx-mcp.js +2 -0
  3. package/app/cli/bin/recallx.js +8 -0
  4. package/app/cli/src/cli.js +808 -0
  5. package/app/cli/src/format.js +242 -0
  6. package/app/cli/src/http.js +35 -0
  7. package/app/mcp/api-client.js +101 -0
  8. package/app/mcp/index.js +128 -0
  9. package/app/mcp/server.js +786 -0
  10. package/app/server/app.js +2263 -0
  11. package/app/server/config.js +27 -0
  12. package/app/server/db.js +399 -0
  13. package/app/server/errors.js +17 -0
  14. package/app/server/governance.js +466 -0
  15. package/app/server/index.js +26 -0
  16. package/app/server/inferred-relations.js +247 -0
  17. package/app/server/observability.js +495 -0
  18. package/app/server/project-graph.js +199 -0
  19. package/app/server/relation-scoring.js +59 -0
  20. package/app/server/repositories.js +2992 -0
  21. package/app/server/retrieval.js +486 -0
  22. package/app/server/semantic/chunker.js +85 -0
  23. package/app/server/semantic/provider.js +124 -0
  24. package/app/server/semantic/types.js +1 -0
  25. package/app/server/semantic/vector-store.js +169 -0
  26. package/app/server/utils.js +43 -0
  27. package/app/server/workspace-session.js +128 -0
  28. package/app/server/workspace.js +79 -0
  29. package/app/shared/contracts.js +268 -0
  30. package/app/shared/request-runtime.js +30 -0
  31. package/app/shared/types.js +1 -0
  32. package/app/shared/version.js +1 -0
  33. package/dist/renderer/assets/ProjectGraphCanvas-BMvz9DmE.js +312 -0
  34. package/dist/renderer/assets/index-C2-KXqBO.css +1 -0
  35. package/dist/renderer/assets/index-CrDu22h7.js +76 -0
  36. package/dist/renderer/index.html +13 -0
  37. package/package.json +49 -0
@@ -0,0 +1,486 @@
1
+ import { appendCurrentTelemetryDetails } from "./observability.js";
2
+ import { computeUsageBonus, relationTypeSpecificityBonus } from "./relation-scoring.js";
3
+ const neighborhoodRetrievalRankWeights = {
4
+ canonicalBase: 2,
5
+ canonicalSpecificityMultiplier: 1,
6
+ canonicalUsageMultiplier: 1,
7
+ inferredBaseMultiplier: 1,
8
+ inferredSpecificityMultiplier: 1,
9
+ inferredUsageMultiplier: 1
10
+ };
11
+ const boostedRelationRankWeights = {
12
+ canonicalBase: 70,
13
+ canonicalSpecificityMultiplier: 100,
14
+ canonicalUsageMultiplier: 60,
15
+ inferredBaseMultiplier: 35,
16
+ inferredSpecificityMultiplier: 35,
17
+ inferredUsageMultiplier: 35
18
+ };
19
+ const semanticCandidateMinSimilarity = 0.2;
20
+ const semanticCandidateMaxBonus = 18;
21
+ function resolveSemanticAugmentationSettings(settings) {
22
+ return {
23
+ minSimilarity: typeof settings?.minSimilarity === "number" && Number.isFinite(settings.minSimilarity)
24
+ ? Math.min(Math.max(settings.minSimilarity, 0), 1)
25
+ : semanticCandidateMinSimilarity,
26
+ maxBonus: typeof settings?.maxBonus === "number" && Number.isFinite(settings.maxBonus)
27
+ ? Math.max(settings.maxBonus, 0)
28
+ : semanticCandidateMaxBonus
29
+ };
30
+ }
31
+ function prioritizeItems(items, preset, maxItems, bonuses) {
32
+ const weighted = items
33
+ .map((item) => ({
34
+ item,
35
+ score: scoreItem(item, preset) + (bonuses?.get(item.id) ?? 0)
36
+ }))
37
+ .sort((left, right) => right.score - left.score || right.item.updatedAt.localeCompare(left.item.updatedAt))
38
+ .map(({ item }) => item);
39
+ return weighted.slice(0, maxItems);
40
+ }
41
+ function buildNeighborhoodResult(repository, nodeId, options) {
42
+ const canonicalItems = repository.listRelatedNodes(nodeId, 1, options?.relationTypes).map(({ node, relation }) => ({
43
+ node,
44
+ edge: {
45
+ relationId: relation.id,
46
+ relationType: relation.relationType,
47
+ relationSource: "canonical",
48
+ relationStatus: relation.status,
49
+ relationScore: null,
50
+ retrievalRank: null,
51
+ generator: null,
52
+ reason: `Related via ${relation.relationType}`,
53
+ direction: relation.fromNodeId === nodeId ? "outgoing" : "incoming",
54
+ hop: 1
55
+ }
56
+ }));
57
+ const seenNodeIds = new Set(canonicalItems.map((item) => item.node.id));
58
+ const inferredItems = options?.includeInferred && options.maxInferred
59
+ ? (() => {
60
+ const relations = repository
61
+ .listInferredRelationsForNode(nodeId, Math.max(options.maxInferred * 3, options.maxInferred))
62
+ .filter((relation) => !options.relationTypes?.length || options.relationTypes.includes(relation.relationType));
63
+ const relatedNodeIds = relations.map((relation) => relation.fromNodeId === nodeId ? relation.toNodeId : relation.fromNodeId);
64
+ const relatedNodes = repository.getNodesByIds(relatedNodeIds);
65
+ return relations
66
+ .flatMap((relation) => {
67
+ const relatedNodeId = relation.fromNodeId === nodeId ? relation.toNodeId : relation.fromNodeId;
68
+ const node = relatedNodes.get(relatedNodeId);
69
+ if (!node) {
70
+ return [];
71
+ }
72
+ return [{
73
+ node,
74
+ edge: {
75
+ relationId: relation.id,
76
+ relationType: relation.relationType,
77
+ relationSource: "inferred",
78
+ relationStatus: relation.status,
79
+ relationScore: relation.finalScore,
80
+ retrievalRank: relation.finalScore,
81
+ generator: relation.generator,
82
+ reason: `Inferred via ${relation.relationType} (score ${relation.finalScore.toFixed(2)})`,
83
+ direction: relation.fromNodeId === nodeId ? "outgoing" : "incoming",
84
+ hop: 1
85
+ }
86
+ }];
87
+ })
88
+ .filter((item) => {
89
+ if (seenNodeIds.has(item.node.id)) {
90
+ return false;
91
+ }
92
+ seenNodeIds.add(item.node.id);
93
+ return true;
94
+ });
95
+ })()
96
+ : [];
97
+ const usageSummaries = repository.getRelationUsageSummaries([...canonicalItems, ...inferredItems].map((item) => item.edge.relationId));
98
+ const rankedCanonical = rankNeighborhoodItems(canonicalItems, usageSummaries, neighborhoodRetrievalRankWeights);
99
+ const rankedInferred = options?.includeInferred && options.maxInferred
100
+ ? rankNeighborhoodItems(inferredItems, usageSummaries, neighborhoodRetrievalRankWeights, options.maxInferred)
101
+ : [];
102
+ return {
103
+ items: [...rankedCanonical, ...rankedInferred],
104
+ usageSummaries
105
+ };
106
+ }
107
+ function matchesSearchResultFilters(item, filters) {
108
+ const typeMatches = !filters.types?.length || filters.types.includes(item.type);
109
+ const statusMatches = !filters.status?.length || filters.status.includes(item.status);
110
+ return typeMatches && statusMatches;
111
+ }
112
+ function rankNeighborhoodItems(items, usageSummaries, weights, maxItems) {
113
+ const ranked = items
114
+ .map((item) => {
115
+ const summary = usageSummaries.get(item.edge.relationId);
116
+ const rank = computeRelationRetrievalRank(item.edge, summary, weights);
117
+ return {
118
+ item: {
119
+ ...item,
120
+ edge: {
121
+ ...item.edge,
122
+ reason: formatRelationReason(item.edge.reason, summary)
123
+ }
124
+ },
125
+ rank
126
+ };
127
+ })
128
+ .sort((left, right) => right.rank - left.rank);
129
+ return (typeof maxItems === "number" ? ranked.slice(0, maxItems) : ranked).map((entry) => ({
130
+ ...entry.item,
131
+ edge: {
132
+ ...entry.item.edge,
133
+ retrievalRank: entry.rank
134
+ }
135
+ }));
136
+ }
137
+ function scoreItem(item, preset) {
138
+ let score = 0;
139
+ if (item.canonicality === "canonical")
140
+ score += 30;
141
+ if (item.status === "active")
142
+ score += 10;
143
+ if (preset === "for-coding") {
144
+ if (item.type === "project")
145
+ score += 40;
146
+ if (item.type === "decision")
147
+ score += 25;
148
+ if (item.type === "reference")
149
+ score += 20;
150
+ }
151
+ if (preset === "for-research") {
152
+ if (item.type === "reference")
153
+ score += 35;
154
+ if (item.type === "idea")
155
+ score += 20;
156
+ if (item.type === "question")
157
+ score += 20;
158
+ }
159
+ if (preset === "for-assistant") {
160
+ if (item.type === "project")
161
+ score += 25;
162
+ if (item.type === "note")
163
+ score += 20;
164
+ if (item.type === "question")
165
+ score += 10;
166
+ }
167
+ return score;
168
+ }
169
+ export function computeRelationRetrievalRank(edge, summary, weights = neighborhoodRetrievalRankWeights) {
170
+ const usageBonus = computeUsageBonus(summary);
171
+ const specificityBonus = relationTypeSpecificityBonus(edge.relationType);
172
+ if (edge.relationSource === "canonical") {
173
+ return weights.canonicalBase + specificityBonus * weights.canonicalSpecificityMultiplier + usageBonus * weights.canonicalUsageMultiplier;
174
+ }
175
+ return ((edge.relationScore ?? 0) * weights.inferredBaseMultiplier +
176
+ specificityBonus * weights.inferredSpecificityMultiplier +
177
+ usageBonus * weights.inferredUsageMultiplier);
178
+ }
179
+ export function computeRankCandidateScore(node, query, preset, relationRetrievalRank = 0) {
180
+ const normalizedQuery = query.toLowerCase();
181
+ return ((node.title?.toLowerCase().includes(normalizedQuery) ? 50 : 0) +
182
+ (node.summary?.toLowerCase().includes(normalizedQuery) ? 20 : 0) +
183
+ (preset === "for-coding" && node.type === "decision" ? 15 : 0) +
184
+ (node.canonicality === "canonical" ? 10 : 0) +
185
+ relationRetrievalRank);
186
+ }
187
+ export function shouldUseSemanticCandidateAugmentation(query, candidates) {
188
+ const normalizedQuery = query.trim().toLowerCase();
189
+ if (normalizedQuery.length < 6) {
190
+ return false;
191
+ }
192
+ return !candidates.some((candidate) => {
193
+ const title = candidate.title?.toLowerCase() ?? "";
194
+ const summary = candidate.summary?.toLowerCase() ?? "";
195
+ return title.includes(normalizedQuery) || summary.includes(normalizedQuery);
196
+ });
197
+ }
198
+ export function buildSemanticCandidateBonusMap(semanticMatches, settings) {
199
+ const resolved = resolveSemanticAugmentationSettings(settings);
200
+ return new Map([...semanticMatches.entries()]
201
+ .filter(([, match]) => Number.isFinite(match.similarity) && match.similarity >= resolved.minSimilarity)
202
+ .map(([nodeId, match]) => {
203
+ const normalizedSimilarity = resolved.minSimilarity >= 1
204
+ ? 0
205
+ : Math.min(1, Math.max(0, match.similarity - resolved.minSimilarity) / (1 - resolved.minSimilarity));
206
+ const retrievalRank = Number((normalizedSimilarity * resolved.maxBonus).toFixed(4));
207
+ return [
208
+ nodeId,
209
+ {
210
+ retrievalRank,
211
+ semanticSimilarity: Number(match.similarity.toFixed(4)),
212
+ reason: `Semantic similarity ${match.similarity.toFixed(2)} via local-ngram across ${match.matchedChunks} chunk${match.matchedChunks === 1 ? "" : "s"}`
213
+ }
214
+ ];
215
+ }));
216
+ }
217
+ function computeBundleRelationBoost(item, summary) {
218
+ return computeRelationRetrievalRank(item.edge, summary, {
219
+ canonicalBase: 120,
220
+ canonicalSpecificityMultiplier: 100,
221
+ canonicalUsageMultiplier: 80,
222
+ inferredBaseMultiplier: 40,
223
+ inferredSpecificityMultiplier: 40,
224
+ inferredUsageMultiplier: 40
225
+ });
226
+ }
227
+ function formatRelationReason(baseReason, summary) {
228
+ const usageBonus = computeUsageBonus(summary);
229
+ if (!usageBonus) {
230
+ return baseReason;
231
+ }
232
+ const direction = usageBonus > 0 ? "+" : "";
233
+ return `${baseReason}, usage ${direction}${usageBonus.toFixed(2)}`;
234
+ }
235
+ function searchResultFromNode(node) {
236
+ return {
237
+ id: node.id,
238
+ type: node.type,
239
+ title: node.title,
240
+ summary: node.summary,
241
+ status: node.status,
242
+ canonicality: node.canonicality,
243
+ sourceLabel: node.sourceLabel,
244
+ updatedAt: node.updatedAt,
245
+ tags: node.tags
246
+ };
247
+ }
248
+ function buildRetrievalCandidates(target, neighborhood) {
249
+ const candidates = new Map([[target.id, searchResultFromNode(target)]]);
250
+ for (const item of neighborhood) {
251
+ candidates.set(item.node.id, searchResultFromNode(item.node));
252
+ }
253
+ return Array.from(candidates.values());
254
+ }
255
+ export function buildNeighborhoodItems(repository, nodeId, options) {
256
+ return buildNeighborhoodResult(repository, nodeId, options).items;
257
+ }
258
+ export function buildCandidateRelationBonusMap(repository, targetNodeId, candidateNodeIds) {
259
+ const candidateNodeIdSet = new Set(candidateNodeIds);
260
+ const { items: neighborhood, usageSummaries } = buildNeighborhoodResult(repository, targetNodeId, {
261
+ includeInferred: true,
262
+ maxInferred: Math.max(4, Math.min(candidateNodeIds.length, 10))
263
+ });
264
+ return new Map(neighborhood
265
+ .filter((item) => candidateNodeIdSet.has(item.node.id))
266
+ .map((item) => [
267
+ item.node.id,
268
+ {
269
+ retrievalRank: computeRelationRetrievalRank(item.edge, usageSummaries.get(item.edge.relationId), boostedRelationRankWeights),
270
+ relationSource: item.edge.relationSource,
271
+ relationType: item.edge.relationType,
272
+ relationScore: item.edge.relationScore,
273
+ reason: item.edge.reason
274
+ }
275
+ ]));
276
+ }
277
+ export function buildTargetRelatedRetrievalItems(repository, targetId, filters, cachedItems) {
278
+ const candidates = cachedItems ?? (() => {
279
+ const target = repository.getNode(targetId);
280
+ return buildRetrievalCandidates(target, buildNeighborhoodItems(repository, target.id, { includeInferred: true, maxInferred: 4 }));
281
+ })();
282
+ return candidates.filter((item) => matchesSearchResultFilters(item, filters));
283
+ }
284
+ async function buildWorkspaceContextBundle(repository, input) {
285
+ const recentNodes = repository
286
+ .listNodes(Math.max(input.options.maxItems * 3, 18))
287
+ .filter((item) => item.status !== "archived");
288
+ const decisions = input.options.includeDecisions
289
+ ? recentNodes.filter((item) => item.type === "decision" && (item.status === "active" || item.status === "contested"))
290
+ : [];
291
+ const openQuestions = input.options.includeOpenQuestions
292
+ ? recentNodes.filter((item) => item.type === "question" && ["active", "draft", "contested"].includes(item.status))
293
+ : [];
294
+ const baseItems = prioritizeItems(recentNodes, input.preset, input.mode === "micro" ? Math.min(input.options.maxItems, 5) : input.options.maxItems);
295
+ const activityDigest = input.options.includeRecentActivities
296
+ ? repository
297
+ .searchActivities({
298
+ query: "",
299
+ filters: {},
300
+ limit: input.mode === "micro" ? 3 : 6,
301
+ offset: 0,
302
+ sort: "updated_at"
303
+ })
304
+ .items.map((activity) => `${activity.targetNodeTitle ?? activity.targetNodeId} · ${activity.activityType}: ${activity.body ?? "No details"}`)
305
+ : [];
306
+ return {
307
+ target: {
308
+ type: "workspace",
309
+ id: "workspace",
310
+ title: "Workspace context"
311
+ },
312
+ mode: input.mode,
313
+ preset: input.preset,
314
+ summary: baseItems[0]?.summary ??
315
+ "Recent workspace context across active nodes, open questions, decisions, and recent activity trails.",
316
+ items: baseItems.map((item) => ({
317
+ nodeId: item.id,
318
+ type: item.type,
319
+ title: item.title,
320
+ summary: item.summary,
321
+ reason: item.type === "project" ? "Recent workspace project context" : `Recent workspace context for ${input.preset}`
322
+ })),
323
+ activityDigest,
324
+ decisions,
325
+ openQuestions,
326
+ sources: baseItems.map((item) => ({
327
+ nodeId: item.id,
328
+ sourceLabel: item.sourceLabel
329
+ }))
330
+ };
331
+ }
332
+ export async function buildContextBundle(repository, input) {
333
+ if (!input.target?.id) {
334
+ return buildWorkspaceContextBundle(repository, input);
335
+ }
336
+ const target = repository.getNode(input.target.id);
337
+ const sharedNeighborhood = input.options.includeRelated || input.options.includeDecisions || input.options.includeOpenQuestions
338
+ ? buildNeighborhoodResult(repository, target.id, {
339
+ includeInferred: input.options.includeInferred,
340
+ maxInferred: input.options.maxInferred
341
+ })
342
+ : { items: [], usageSummaries: new Map() };
343
+ const neighborhood = input.options.includeRelated ? sharedNeighborhood.items : [];
344
+ const related = neighborhood.map((item) => ({
345
+ nodeId: item.node.id,
346
+ type: item.node.type,
347
+ title: item.node.title,
348
+ summary: item.node.summary,
349
+ reason: item.edge.reason,
350
+ relationId: item.edge.relationId,
351
+ relationType: item.edge.relationType,
352
+ relationSource: item.edge.relationSource,
353
+ relationStatus: item.edge.relationStatus,
354
+ relationScore: item.edge.relationScore ?? undefined,
355
+ retrievalRank: item.edge.retrievalRank ?? undefined,
356
+ generator: item.edge.generator
357
+ }));
358
+ const retrievalCandidates = buildRetrievalCandidates(target, sharedNeighborhood.items);
359
+ const decisions = input.options.includeDecisions
360
+ ? buildTargetRelatedRetrievalItems(repository, target.id, {
361
+ types: ["decision"],
362
+ status: ["active", "contested"]
363
+ }, retrievalCandidates)
364
+ : [];
365
+ const openQuestions = input.options.includeOpenQuestions
366
+ ? buildTargetRelatedRetrievalItems(repository, target.id, {
367
+ types: ["question"],
368
+ status: ["active", "draft", "contested"]
369
+ }, retrievalCandidates)
370
+ : [];
371
+ const targetItem = {
372
+ id: target.id,
373
+ type: target.type,
374
+ title: target.title,
375
+ summary: target.summary,
376
+ status: target.status,
377
+ canonicality: target.canonicality,
378
+ sourceLabel: target.sourceLabel,
379
+ updatedAt: target.updatedAt,
380
+ tags: target.tags
381
+ };
382
+ const relatedItems = neighborhood.map((item) => ({
383
+ id: item.node.id,
384
+ type: item.node.type,
385
+ title: item.node.title,
386
+ summary: item.node.summary,
387
+ status: item.node.status,
388
+ canonicality: item.node.canonicality,
389
+ sourceLabel: item.node.sourceLabel,
390
+ updatedAt: item.node.updatedAt,
391
+ tags: item.node.tags
392
+ }));
393
+ const relationBonuses = new Map(neighborhood.map((item) => [
394
+ item.node.id,
395
+ computeBundleRelationBoost(item, sharedNeighborhood.usageSummaries.get(item.edge.relationId))
396
+ ]));
397
+ const candidateItems = [targetItem, ...relatedItems, ...decisions, ...openQuestions];
398
+ const dedupedItems = Array.from(new Map(candidateItems.map((item) => [item.id, item])).values());
399
+ const semanticQuery = [target.title, target.summary ?? target.body].filter(Boolean).join("\n");
400
+ const semanticBonuses = shouldUseSemanticCandidateAugmentation(semanticQuery, dedupedItems.filter((item) => item.id !== target.id))
401
+ ? buildSemanticCandidateBonusMap(await repository.rankSemanticCandidates(semanticQuery, dedupedItems.filter((item) => item.id !== target.id).map((item) => item.id)), repository.getSemanticAugmentationSettings())
402
+ : new Map();
403
+ appendCurrentTelemetryDetails({
404
+ neighborhoodCount: neighborhood.length,
405
+ relatedCandidateCount: relatedItems.length,
406
+ decisionCount: decisions.length,
407
+ openQuestionCount: openQuestions.length,
408
+ semanticUsed: semanticBonuses.size > 0
409
+ });
410
+ const combinedBonuses = new Map();
411
+ for (const item of dedupedItems) {
412
+ combinedBonuses.set(item.id, (relationBonuses.get(item.id) ?? 0) + (semanticBonuses.get(item.id)?.retrievalRank ?? 0));
413
+ }
414
+ const baseItems = prioritizeItems(dedupedItems, input.preset, input.mode === "micro" ? Math.min(input.options.maxItems, 5) : input.options.maxItems, combinedBonuses);
415
+ const itemById = new Map(related.map((item) => [item.nodeId, item]));
416
+ const bundle = {
417
+ target: {
418
+ type: target.type,
419
+ id: target.id,
420
+ title: target.title
421
+ },
422
+ mode: input.mode,
423
+ preset: input.preset,
424
+ summary: target.summary ?? "No target summary yet.",
425
+ items: baseItems.map((item) => ({
426
+ nodeId: item.id,
427
+ type: item.type,
428
+ title: item.title,
429
+ summary: item.summary,
430
+ reason: [
431
+ itemById.get(item.id)?.reason ?? (item.id === target.id ? "Primary target" : `Included for ${input.preset}`),
432
+ semanticBonuses.get(item.id)?.reason ?? null
433
+ ]
434
+ .filter(Boolean)
435
+ .join("; "),
436
+ relationId: itemById.get(item.id)?.relationId,
437
+ relationType: itemById.get(item.id)?.relationType,
438
+ relationSource: itemById.get(item.id)?.relationSource,
439
+ relationStatus: itemById.get(item.id)?.relationStatus,
440
+ relationScore: itemById.get(item.id)?.relationScore,
441
+ retrievalRank: (itemById.get(item.id)?.retrievalRank ?? 0) + (semanticBonuses.get(item.id)?.retrievalRank ?? 0) || undefined,
442
+ semanticSimilarity: semanticBonuses.get(item.id)?.semanticSimilarity,
443
+ generator: itemById.get(item.id)?.generator ?? null
444
+ })),
445
+ activityDigest: input.options.includeRecentActivities
446
+ ? repository
447
+ .listNodeActivities(target.id, input.mode === "micro" ? 3 : 6)
448
+ .map((activity) => `${activity.activityType}: ${activity.body ?? "No details"}`)
449
+ : [],
450
+ decisions,
451
+ openQuestions,
452
+ sources: baseItems.map((item) => ({
453
+ nodeId: item.id,
454
+ sourceLabel: item.sourceLabel
455
+ }))
456
+ };
457
+ appendCurrentTelemetryDetails({
458
+ bundleItemCount: bundle.items.length,
459
+ bundleSourceCount: bundle.sources.length
460
+ });
461
+ return bundle;
462
+ }
463
+ export function bundleAsMarkdown(bundle) {
464
+ const sections = [
465
+ `# ${bundle.target.title ?? bundle.target.id}`,
466
+ "",
467
+ `Mode: ${bundle.mode}`,
468
+ `Preset: ${bundle.preset}`,
469
+ "",
470
+ "## Summary",
471
+ bundle.summary,
472
+ "",
473
+ "## Items",
474
+ ...bundle.items.map((item) => `- ${item.title ?? item.nodeId}: ${item.summary ?? "No summary"} (${item.reason})`)
475
+ ];
476
+ if (bundle.decisions.length) {
477
+ sections.push("", "## Decisions", ...bundle.decisions.map((item) => `- ${item.title ?? item.id}: ${item.summary ?? "No summary"}`));
478
+ }
479
+ if (bundle.openQuestions.length) {
480
+ sections.push("", "## Open Questions", ...bundle.openQuestions.map((item) => `- ${item.title ?? item.id}`));
481
+ }
482
+ if (bundle.activityDigest.length) {
483
+ sections.push("", "## Recent Activities", ...bundle.activityDigest.map((item) => `- ${item}`));
484
+ }
485
+ return sections.join("\n");
486
+ }
@@ -0,0 +1,85 @@
1
+ import { checksumText } from "../utils.js";
2
+ function normalizeTagValue(tag) {
3
+ return tag.trim().toLowerCase().replace(/\s+/g, " ");
4
+ }
5
+ export function normalizeTagList(tags) {
6
+ return Array.from(new Set(tags.map(normalizeTagValue).filter(Boolean)));
7
+ }
8
+ export function buildSemanticDocumentText(input) {
9
+ return [
10
+ input.title?.trim(),
11
+ input.summary?.trim(),
12
+ input.tags.length ? `tags: ${normalizeTagList(input.tags).join(", ")}` : null,
13
+ input.body?.trim(),
14
+ ]
15
+ .filter((part) => Boolean(part))
16
+ .join("\n\n")
17
+ .trim();
18
+ }
19
+ function estimateTokenCount(text) {
20
+ if (!text.trim()) {
21
+ return 0;
22
+ }
23
+ return Math.max(1, Math.ceil(text.length / 4));
24
+ }
25
+ function findChunkBoundary(text, startOffset, endOffset) {
26
+ for (let index = endOffset; index > startOffset + 300; index -= 1) {
27
+ if (text[index] === " ") {
28
+ return index + 1;
29
+ }
30
+ if (text[index] === "." && text[index + 1] === " ") {
31
+ return index + 1;
32
+ }
33
+ if (text[index] === "\n" && text[index - 1] === "\n") {
34
+ return index + 1;
35
+ }
36
+ }
37
+ return endOffset;
38
+ }
39
+ export function buildSemanticChunks(text, chunkEnabled) {
40
+ const normalized = text.trim();
41
+ if (!normalized) {
42
+ return [];
43
+ }
44
+ if (!chunkEnabled) {
45
+ return [
46
+ {
47
+ ordinal: 0,
48
+ chunkHash: checksumText(normalized),
49
+ chunkText: normalized,
50
+ tokenCount: estimateTokenCount(normalized),
51
+ startOffset: 0,
52
+ endOffset: normalized.length,
53
+ },
54
+ ];
55
+ }
56
+ const maxChars = 1200;
57
+ const overlapChars = 180;
58
+ const chunks = [];
59
+ let startOffset = 0;
60
+ let ordinal = 0;
61
+ while (startOffset < normalized.length) {
62
+ let endOffset = Math.min(startOffset + maxChars, normalized.length);
63
+ if (endOffset < normalized.length) {
64
+ endOffset = findChunkBoundary(normalized, startOffset, endOffset);
65
+ }
66
+ const chunkText = normalized.slice(startOffset, endOffset).trim();
67
+ if (!chunkText) {
68
+ break;
69
+ }
70
+ chunks.push({
71
+ ordinal,
72
+ chunkHash: checksumText(chunkText),
73
+ chunkText,
74
+ tokenCount: estimateTokenCount(chunkText),
75
+ startOffset,
76
+ endOffset,
77
+ });
78
+ if (endOffset >= normalized.length) {
79
+ break;
80
+ }
81
+ startOffset = Math.max(endOffset - overlapChars, startOffset + 1);
82
+ ordinal += 1;
83
+ }
84
+ return chunks;
85
+ }
@@ -0,0 +1,124 @@
1
+ const LOCAL_NGRAM_DIMENSION = 384;
2
+ const LEGACY_DETERMINISTIC_PROVIDER = "deterministic";
3
+ const LOCAL_NGRAM_PROVIDER = "local-ngram";
4
+ const LOCAL_NGRAM_MODEL = "chargram-v1";
5
+ class LocalNgramEmbeddingProvider {
6
+ provider;
7
+ model;
8
+ constructor(provider = LOCAL_NGRAM_PROVIDER, model = LOCAL_NGRAM_MODEL) {
9
+ this.provider = provider;
10
+ this.model = model;
11
+ }
12
+ version = "2";
13
+ async embedBatch(input) {
14
+ return input.map((item) => ({
15
+ nodeId: item.nodeId,
16
+ chunkOrdinal: item.chunkOrdinal,
17
+ contentHash: item.contentHash,
18
+ vector: localNgramVector(item.text, LOCAL_NGRAM_DIMENSION),
19
+ dimension: LOCAL_NGRAM_DIMENSION,
20
+ }));
21
+ }
22
+ }
23
+ function normalizeVector(vector) {
24
+ const magnitude = Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
25
+ if (!Number.isFinite(magnitude) || magnitude === 0) {
26
+ return vector;
27
+ }
28
+ return vector.map((value) => value / magnitude);
29
+ }
30
+ function normalizeText(text) {
31
+ return text.toLowerCase().replace(/\s+/g, " ").trim();
32
+ }
33
+ function forEachCharacterNgram(text, callback) {
34
+ const normalized = normalizeText(text);
35
+ if (!normalized) {
36
+ return;
37
+ }
38
+ const source = ` ${normalized} `;
39
+ for (let size = 2; size <= 4; size += 1) {
40
+ for (let index = 0; index <= source.length - size; index += 1) {
41
+ const gram = source.slice(index, index + size);
42
+ // Keep boundary grams that contain spaces, but drop grams that are only whitespace.
43
+ if (gram.trim()) {
44
+ callback(gram);
45
+ }
46
+ }
47
+ }
48
+ }
49
+ function fnv1a32(value) {
50
+ let hash = 0x811c9dc5;
51
+ for (let index = 0; index < value.length; index += 1) {
52
+ const codePoint = value.charCodeAt(index);
53
+ hash ^= codePoint & 0xff;
54
+ hash = Math.imul(hash, 0x01000193);
55
+ hash ^= codePoint >>> 8;
56
+ hash = Math.imul(hash, 0x01000193);
57
+ }
58
+ return hash >>> 0;
59
+ }
60
+ function avalanche32(value) {
61
+ let mixed = value >>> 0;
62
+ mixed ^= mixed >>> 16;
63
+ mixed = Math.imul(mixed, 0x7feb352d);
64
+ mixed ^= mixed >>> 15;
65
+ mixed = Math.imul(mixed, 0x846ca68b);
66
+ mixed ^= mixed >>> 16;
67
+ return mixed >>> 0;
68
+ }
69
+ function localNgramVector(text, dimension) {
70
+ const vector = new Array(dimension).fill(0);
71
+ let sawGram = false;
72
+ forEachCharacterNgram(text, (gram) => {
73
+ sawGram = true;
74
+ const hash = fnv1a32(gram);
75
+ const bucket = hash % dimension;
76
+ const sign = (avalanche32(hash ^ 0x9e3779b9) & 1) === 0 ? 1 : -1;
77
+ vector[bucket] += sign;
78
+ });
79
+ if (!sawGram) {
80
+ return vector;
81
+ }
82
+ for (let index = 0; index < vector.length; index += 1) {
83
+ const value = vector[index];
84
+ vector[index] = Math.sign(value) * Math.log1p(Math.abs(value));
85
+ }
86
+ return normalizeVector(vector);
87
+ }
88
+ export function resolveSemanticEmbeddingProvider(input) {
89
+ const normalized = normalizeSemanticProviderConfig(input);
90
+ if (!normalized.provider || normalized.provider === "disabled" || !normalized.model || normalized.model === "none") {
91
+ return null;
92
+ }
93
+ if (normalized.provider === LOCAL_NGRAM_PROVIDER) {
94
+ return new LocalNgramEmbeddingProvider();
95
+ }
96
+ return null;
97
+ }
98
+ export async function embedSemanticQueryText(input) {
99
+ const provider = resolveSemanticEmbeddingProvider({
100
+ provider: input.provider,
101
+ model: input.model
102
+ });
103
+ if (!provider || !input.text.trim()) {
104
+ return null;
105
+ }
106
+ const [result] = await provider.embedBatch([
107
+ {
108
+ nodeId: "__query__",
109
+ chunkOrdinal: 0,
110
+ contentHash: "__query__",
111
+ text: input.text,
112
+ }
113
+ ]);
114
+ return result ?? null;
115
+ }
116
+ export function normalizeSemanticProviderConfig(input) {
117
+ if (input.provider === LEGACY_DETERMINISTIC_PROVIDER) {
118
+ return {
119
+ provider: LOCAL_NGRAM_PROVIDER,
120
+ model: LOCAL_NGRAM_MODEL,
121
+ };
122
+ }
123
+ return input;
124
+ }
@@ -0,0 +1 @@
1
+ export {};