@agentmemory/agentmemory 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +14 -0
- package/.github/workflows/ci.yml +22 -0
- package/.github/workflows/publish.yml +28 -0
- package/AGENTS.md +113 -0
- package/LICENSE +190 -0
- package/README.md +828 -0
- package/assets/banner.png +0 -0
- package/assets/demo.gif +0 -0
- package/assets/demo.mp4 +0 -0
- package/benchmark/QUALITY.md +73 -0
- package/benchmark/REAL-EMBEDDINGS.md +67 -0
- package/benchmark/SCALE.md +110 -0
- package/benchmark/dataset.ts +293 -0
- package/benchmark/quality-eval.ts +643 -0
- package/benchmark/real-embeddings-eval.ts +405 -0
- package/benchmark/scale-eval.ts +398 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +137 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/docker-compose.yml +14 -0
- package/dist/hooks/notification.d.mts +1 -0
- package/dist/hooks/notification.mjs +45 -0
- package/dist/hooks/notification.mjs.map +1 -0
- package/dist/hooks/post-tool-failure.d.mts +1 -0
- package/dist/hooks/post-tool-failure.mjs +45 -0
- package/dist/hooks/post-tool-failure.mjs.map +1 -0
- package/dist/hooks/post-tool-use.d.mts +1 -0
- package/dist/hooks/post-tool-use.mjs +53 -0
- package/dist/hooks/post-tool-use.mjs.map +1 -0
- package/dist/hooks/pre-compact.d.mts +1 -0
- package/dist/hooks/pre-compact.mjs +50 -0
- package/dist/hooks/pre-compact.mjs.map +1 -0
- package/dist/hooks/pre-tool-use.d.mts +1 -0
- package/dist/hooks/pre-tool-use.mjs +69 -0
- package/dist/hooks/pre-tool-use.mjs.map +1 -0
- package/dist/hooks/prompt-submit.d.mts +1 -0
- package/dist/hooks/prompt-submit.mjs +40 -0
- package/dist/hooks/prompt-submit.mjs.map +1 -0
- package/dist/hooks/session-end.d.mts +1 -0
- package/dist/hooks/session-end.mjs +61 -0
- package/dist/hooks/session-end.mjs.map +1 -0
- package/dist/hooks/session-start.d.mts +1 -0
- package/dist/hooks/session-start.mjs +42 -0
- package/dist/hooks/session-start.mjs.map +1 -0
- package/dist/hooks/stop.d.mts +1 -0
- package/dist/hooks/stop.mjs +33 -0
- package/dist/hooks/stop.mjs.map +1 -0
- package/dist/hooks/subagent-start.d.mts +1 -0
- package/dist/hooks/subagent-start.mjs +43 -0
- package/dist/hooks/subagent-start.mjs.map +1 -0
- package/dist/hooks/subagent-stop.d.mts +1 -0
- package/dist/hooks/subagent-stop.mjs +45 -0
- package/dist/hooks/subagent-stop.mjs.map +1 -0
- package/dist/hooks/task-completed.d.mts +1 -0
- package/dist/hooks/task-completed.mjs +46 -0
- package/dist/hooks/task-completed.mjs.map +1 -0
- package/dist/iii-config.yaml +51 -0
- package/dist/index.d.mts +2 -0
- package/dist/index.mjs +13776 -0
- package/dist/index.mjs.map +1 -0
- package/dist/src-QxitMPfJ.mjs +13775 -0
- package/dist/src-QxitMPfJ.mjs.map +1 -0
- package/dist/standalone.d.mts +1 -0
- package/dist/standalone.mjs +1155 -0
- package/dist/standalone.mjs.map +1 -0
- package/dist/transformers-BX_tgxdO.mjs +38684 -0
- package/dist/transformers-BX_tgxdO.mjs.map +1 -0
- package/dist/transformers-KMm1i9no.mjs +38683 -0
- package/dist/transformers-KMm1i9no.mjs.map +1 -0
- package/docker-compose.yml +14 -0
- package/iii-config.yaml +51 -0
- package/package.json +59 -0
- package/plugin/.claude-plugin/plugin.json +10 -0
- package/plugin/hooks/hooks.json +77 -0
- package/plugin/scripts/diagnostics.mjs +551 -0
- package/plugin/scripts/notification.mjs +45 -0
- package/plugin/scripts/post-tool-failure.mjs +45 -0
- package/plugin/scripts/post-tool-use.mjs +53 -0
- package/plugin/scripts/pre-compact.mjs +50 -0
- package/plugin/scripts/pre-tool-use.mjs +69 -0
- package/plugin/scripts/prompt-submit.mjs +40 -0
- package/plugin/scripts/session-end.mjs +61 -0
- package/plugin/scripts/session-start.mjs +42 -0
- package/plugin/scripts/stop.mjs +33 -0
- package/plugin/scripts/subagent-start.mjs +43 -0
- package/plugin/scripts/subagent-stop.mjs +45 -0
- package/plugin/scripts/task-completed.mjs +46 -0
- package/plugin/skills/forget/SKILL.md +32 -0
- package/plugin/skills/recall/SKILL.md +18 -0
- package/plugin/skills/remember/SKILL.md +25 -0
- package/plugin/skills/session-history/SKILL.md +17 -0
- package/src/auth.ts +12 -0
- package/src/cli.ts +159 -0
- package/src/config.ts +221 -0
- package/src/eval/metrics-store.ts +65 -0
- package/src/eval/quality.ts +51 -0
- package/src/eval/schemas.ts +124 -0
- package/src/eval/self-correct.ts +28 -0
- package/src/eval/validator.ts +31 -0
- package/src/functions/actions.ts +288 -0
- package/src/functions/audit.ts +61 -0
- package/src/functions/auto-forget.ts +169 -0
- package/src/functions/branch-aware.ts +169 -0
- package/src/functions/cascade.ts +80 -0
- package/src/functions/checkpoints.ts +209 -0
- package/src/functions/claude-bridge.ts +161 -0
- package/src/functions/compress.ts +194 -0
- package/src/functions/consolidate.ts +212 -0
- package/src/functions/consolidation-pipeline.ts +258 -0
- package/src/functions/context.ts +169 -0
- package/src/functions/crystallize.ts +293 -0
- package/src/functions/dedup.ts +57 -0
- package/src/functions/diagnostics.ts +785 -0
- package/src/functions/enrich.ts +132 -0
- package/src/functions/evict.ts +163 -0
- package/src/functions/export-import.ts +508 -0
- package/src/functions/facets.ts +248 -0
- package/src/functions/file-index.ts +106 -0
- package/src/functions/flow-compress.ts +214 -0
- package/src/functions/frontier.ts +196 -0
- package/src/functions/governance.ts +131 -0
- package/src/functions/graph-retrieval.ts +277 -0
- package/src/functions/graph.ts +275 -0
- package/src/functions/leases.ts +216 -0
- package/src/functions/lessons.ts +253 -0
- package/src/functions/mesh.ts +434 -0
- package/src/functions/migrate.ts +165 -0
- package/src/functions/observe.ts +144 -0
- package/src/functions/obsidian-export.ts +310 -0
- package/src/functions/patterns.ts +138 -0
- package/src/functions/privacy.ts +39 -0
- package/src/functions/profile.ts +155 -0
- package/src/functions/query-expansion.ts +186 -0
- package/src/functions/relations.ts +237 -0
- package/src/functions/remember.ts +162 -0
- package/src/functions/retention.ts +235 -0
- package/src/functions/routines.ts +289 -0
- package/src/functions/search.ts +80 -0
- package/src/functions/sentinels.ts +417 -0
- package/src/functions/signals.ts +186 -0
- package/src/functions/sketches.ts +274 -0
- package/src/functions/sliding-window.ts +257 -0
- package/src/functions/smart-search.ts +115 -0
- package/src/functions/snapshot.ts +219 -0
- package/src/functions/summarize.ts +155 -0
- package/src/functions/team.ts +147 -0
- package/src/functions/temporal-graph.ts +476 -0
- package/src/functions/timeline.ts +138 -0
- package/src/functions/verify.ts +117 -0
- package/src/health/monitor.ts +110 -0
- package/src/health/thresholds.ts +73 -0
- package/src/hooks/notification.ts +52 -0
- package/src/hooks/post-tool-failure.ts +58 -0
- package/src/hooks/post-tool-use.ts +62 -0
- package/src/hooks/pre-compact.ts +60 -0
- package/src/hooks/pre-tool-use.ts +72 -0
- package/src/hooks/prompt-submit.ts +46 -0
- package/src/hooks/session-end.ts +71 -0
- package/src/hooks/session-start.ts +48 -0
- package/src/hooks/stop.ts +39 -0
- package/src/hooks/subagent-start.ts +49 -0
- package/src/hooks/subagent-stop.ts +54 -0
- package/src/hooks/task-completed.ts +54 -0
- package/src/index.ts +342 -0
- package/src/mcp/in-memory-kv.ts +61 -0
- package/src/mcp/server.ts +1455 -0
- package/src/mcp/standalone.ts +177 -0
- package/src/mcp/tools-registry.ts +769 -0
- package/src/mcp/transport.ts +91 -0
- package/src/prompts/compression.ts +67 -0
- package/src/prompts/consolidation.ts +48 -0
- package/src/prompts/graph-extraction.ts +35 -0
- package/src/prompts/summary.ts +38 -0
- package/src/prompts/xml.ts +26 -0
- package/src/providers/agent-sdk.ts +34 -0
- package/src/providers/anthropic.ts +35 -0
- package/src/providers/circuit-breaker.ts +82 -0
- package/src/providers/embedding/cohere.ts +46 -0
- package/src/providers/embedding/gemini.ts +54 -0
- package/src/providers/embedding/index.ts +39 -0
- package/src/providers/embedding/local.ts +52 -0
- package/src/providers/embedding/openai.ts +45 -0
- package/src/providers/embedding/openrouter.ts +51 -0
- package/src/providers/embedding/voyage.ts +46 -0
- package/src/providers/fallback-chain.ts +31 -0
- package/src/providers/index.ts +84 -0
- package/src/providers/openrouter.ts +71 -0
- package/src/providers/resilient.ts +37 -0
- package/src/state/hybrid-search.ts +295 -0
- package/src/state/index-persistence.ts +63 -0
- package/src/state/keyed-mutex.ts +18 -0
- package/src/state/kv.ts +33 -0
- package/src/state/schema.ts +71 -0
- package/src/state/search-index.ts +245 -0
- package/src/state/stemmer.ts +104 -0
- package/src/state/synonyms.ts +63 -0
- package/src/state/vector-index.ts +130 -0
- package/src/telemetry/setup.ts +116 -0
- package/src/triggers/api.ts +1904 -0
- package/src/triggers/events.ts +71 -0
- package/src/types.ts +769 -0
- package/src/version.ts +1 -0
- package/src/viewer/index.html +2497 -0
- package/src/viewer/server.ts +207 -0
- package/src/xenova.d.ts +3 -0
- package/test/actions.test.ts +490 -0
- package/test/audit.test.ts +108 -0
- package/test/auto-forget.test.ts +188 -0
- package/test/cascade.test.ts +277 -0
- package/test/checkpoints.test.ts +493 -0
- package/test/circuit-breaker.test.ts +107 -0
- package/test/claude-bridge.test.ts +178 -0
- package/test/confidence.test.ts +247 -0
- package/test/consistency.test.ts +61 -0
- package/test/consolidation-pipeline.test.ts +251 -0
- package/test/crystallize.test.ts +521 -0
- package/test/diagnostics.test.ts +638 -0
- package/test/embedding-provider.test.ts +49 -0
- package/test/enrich.test.ts +209 -0
- package/test/eval.test.ts +300 -0
- package/test/export-import.test.ts +251 -0
- package/test/facets.test.ts +448 -0
- package/test/fallback-chain.test.ts +93 -0
- package/test/frontier.test.ts +485 -0
- package/test/governance.test.ts +147 -0
- package/test/graph-retrieval.test.ts +186 -0
- package/test/graph.test.ts +160 -0
- package/test/helpers/mocks.ts +40 -0
- package/test/hybrid-search.test.ts +145 -0
- package/test/index-persistence.test.ts +124 -0
- package/test/integration.test.ts +265 -0
- package/test/leases.test.ts +399 -0
- package/test/mcp-prompts.test.ts +218 -0
- package/test/mcp-resources.test.ts +286 -0
- package/test/mcp-standalone.test.ts +113 -0
- package/test/mesh.test.ts +700 -0
- package/test/privacy.test.ts +87 -0
- package/test/profile.test.ts +161 -0
- package/test/query-expansion.test.ts +154 -0
- package/test/relations.test.ts +198 -0
- package/test/retention.test.ts +245 -0
- package/test/routines.test.ts +497 -0
- package/test/schema-fingerprint.test.ts +81 -0
- package/test/schema.test.ts +42 -0
- package/test/search-index.test.ts +128 -0
- package/test/sentinels.test.ts +626 -0
- package/test/signals.test.ts +410 -0
- package/test/sketches.test.ts +549 -0
- package/test/sliding-window.test.ts +199 -0
- package/test/smart-search.test.ts +169 -0
- package/test/snapshot.test.ts +165 -0
- package/test/team.test.ts +156 -0
- package/test/temporal-graph.test.ts +378 -0
- package/test/timeline.test.ts +148 -0
- package/test/vector-index.test.ts +79 -0
- package/test/verify.test.ts +209 -0
- package/test/xml.test.ts +65 -0
- package/tsconfig.json +22 -0
- package/tsdown.config.ts +62 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { SearchIndex } from "./search-index.js";
|
|
2
|
+
import { VectorIndex } from "./vector-index.js";
|
|
3
|
+
import type { StateKV } from "./kv.js";
|
|
4
|
+
import { KV } from "./schema.js";
|
|
5
|
+
|
|
6
|
+
const DEBOUNCE_MS = 5000;
|
|
7
|
+
|
|
8
|
+
export class IndexPersistence {
|
|
9
|
+
private timer: ReturnType<typeof setTimeout> | null = null;
|
|
10
|
+
|
|
11
|
+
constructor(
|
|
12
|
+
private kv: StateKV,
|
|
13
|
+
private bm25: SearchIndex,
|
|
14
|
+
private vector: VectorIndex | null,
|
|
15
|
+
) {}
|
|
16
|
+
|
|
17
|
+
scheduleSave(): void {
|
|
18
|
+
if (this.timer) clearTimeout(this.timer);
|
|
19
|
+
this.timer = setTimeout(() => this.save(), DEBOUNCE_MS);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async save(): Promise<void> {
|
|
23
|
+
if (this.timer) {
|
|
24
|
+
clearTimeout(this.timer);
|
|
25
|
+
this.timer = null;
|
|
26
|
+
}
|
|
27
|
+
await this.kv.set(KV.bm25Index, "data", this.bm25.serialize());
|
|
28
|
+
if (this.vector && this.vector.size > 0) {
|
|
29
|
+
await this.kv.set(KV.bm25Index, "vectors", this.vector.serialize());
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async load(): Promise<{
|
|
34
|
+
bm25: SearchIndex | null;
|
|
35
|
+
vector: VectorIndex | null;
|
|
36
|
+
}> {
|
|
37
|
+
let bm25: SearchIndex | null = null;
|
|
38
|
+
let vector: VectorIndex | null = null;
|
|
39
|
+
|
|
40
|
+
const bm25Data = await this.kv
|
|
41
|
+
.get<string>(KV.bm25Index, "data")
|
|
42
|
+
.catch(() => null);
|
|
43
|
+
if (bm25Data && typeof bm25Data === "string") {
|
|
44
|
+
bm25 = SearchIndex.deserialize(bm25Data);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const vecData = await this.kv
|
|
48
|
+
.get<string>(KV.bm25Index, "vectors")
|
|
49
|
+
.catch(() => null);
|
|
50
|
+
if (vecData && typeof vecData === "string") {
|
|
51
|
+
vector = VectorIndex.deserialize(vecData);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return { bm25, vector };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
stop(): void {
|
|
58
|
+
if (this.timer) {
|
|
59
|
+
clearTimeout(this.timer);
|
|
60
|
+
this.timer = null;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
const locks = new Map<string, Promise<void>>();
|
|
2
|
+
|
|
3
|
+
export function withKeyedLock<T>(
|
|
4
|
+
key: string,
|
|
5
|
+
fn: () => Promise<T>,
|
|
6
|
+
): Promise<T> {
|
|
7
|
+
const prev = locks.get(key) ?? Promise.resolve();
|
|
8
|
+
const next = prev.then(fn, fn);
|
|
9
|
+
const cleanup = next.then(
|
|
10
|
+
() => {},
|
|
11
|
+
() => {},
|
|
12
|
+
);
|
|
13
|
+
locks.set(key, cleanup);
|
|
14
|
+
cleanup.then(() => {
|
|
15
|
+
if (locks.get(key) === cleanup) locks.delete(key);
|
|
16
|
+
});
|
|
17
|
+
return next;
|
|
18
|
+
}
|
package/src/state/kv.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { ISdk } from 'iii-sdk'
|
|
2
|
+
|
|
3
|
+
export class StateKV {
|
|
4
|
+
constructor(private sdk: ISdk) {}
|
|
5
|
+
|
|
6
|
+
async get<T = unknown>(scope: string, key: string): Promise<T | null> {
|
|
7
|
+
return this.sdk.trigger<{ scope: string; key: string }, T | null>(
|
|
8
|
+
'state::get',
|
|
9
|
+
{ scope, key },
|
|
10
|
+
)
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async set<T = unknown>(scope: string, key: string, data: T): Promise<T> {
|
|
14
|
+
return this.sdk.trigger<{ scope: string; key: string; data: T }, T>(
|
|
15
|
+
'state::set',
|
|
16
|
+
{ scope, key, data },
|
|
17
|
+
)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async delete(scope: string, key: string): Promise<void> {
|
|
21
|
+
return this.sdk.trigger<{ scope: string; key: string }, void>(
|
|
22
|
+
'state::delete',
|
|
23
|
+
{ scope, key },
|
|
24
|
+
)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async list<T = unknown>(scope: string): Promise<T[]> {
|
|
28
|
+
return this.sdk.trigger<{ scope: string }, T[]>(
|
|
29
|
+
'state::list',
|
|
30
|
+
{ scope },
|
|
31
|
+
)
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
export const KV = {
|
|
4
|
+
sessions: "mem:sessions",
|
|
5
|
+
observations: (sessionId: string) => `mem:obs:${sessionId}`,
|
|
6
|
+
memories: "mem:memories",
|
|
7
|
+
summaries: "mem:summaries",
|
|
8
|
+
config: "mem:config",
|
|
9
|
+
metrics: "mem:metrics",
|
|
10
|
+
health: "mem:health",
|
|
11
|
+
embeddings: (obsId: string) => `mem:emb:${obsId}`,
|
|
12
|
+
bm25Index: "mem:index:bm25",
|
|
13
|
+
relations: "mem:relations",
|
|
14
|
+
profiles: "mem:profiles",
|
|
15
|
+
claudeBridge: "mem:claude-bridge",
|
|
16
|
+
graphNodes: "mem:graph:nodes",
|
|
17
|
+
graphEdges: "mem:graph:edges",
|
|
18
|
+
semantic: "mem:semantic",
|
|
19
|
+
procedural: "mem:procedural",
|
|
20
|
+
teamShared: (teamId: string) => `mem:team:${teamId}:shared`,
|
|
21
|
+
teamUsers: (teamId: string, userId: string) =>
|
|
22
|
+
`mem:team:${teamId}:users:${userId}`,
|
|
23
|
+
teamProfile: (teamId: string) => `mem:team:${teamId}:profile`,
|
|
24
|
+
audit: "mem:audit",
|
|
25
|
+
actions: "mem:actions",
|
|
26
|
+
actionEdges: "mem:action-edges",
|
|
27
|
+
leases: "mem:leases",
|
|
28
|
+
routines: "mem:routines",
|
|
29
|
+
routineRuns: "mem:routine-runs",
|
|
30
|
+
signals: "mem:signals",
|
|
31
|
+
checkpoints: "mem:checkpoints",
|
|
32
|
+
mesh: "mem:mesh",
|
|
33
|
+
sketches: "mem:sketches",
|
|
34
|
+
facets: "mem:facets",
|
|
35
|
+
sentinels: "mem:sentinels",
|
|
36
|
+
crystals: "mem:crystals",
|
|
37
|
+
lessons: "mem:lessons",
|
|
38
|
+
graphEdgeHistory: "mem:graph:edge-history",
|
|
39
|
+
enrichedChunks: (sessionId: string) => `mem:enriched:${sessionId}`,
|
|
40
|
+
latentEmbeddings: (obsId: string) => `mem:latent:${obsId}`,
|
|
41
|
+
retentionScores: "mem:retention",
|
|
42
|
+
} as const;
|
|
43
|
+
|
|
44
|
+
export const STREAM = {
|
|
45
|
+
name: "mem-live",
|
|
46
|
+
group: (sessionId: string) => sessionId,
|
|
47
|
+
viewerGroup: "viewer",
|
|
48
|
+
} as const;
|
|
49
|
+
|
|
50
|
+
export function generateId(prefix: string): string {
|
|
51
|
+
const ts = Date.now().toString(36);
|
|
52
|
+
const rand = crypto.randomUUID().replace(/-/g, "").slice(0, 12);
|
|
53
|
+
return `${prefix}_${ts}_${rand}`;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function fingerprintId(prefix: string, content: string): string {
|
|
57
|
+
const hash = createHash("sha256").update(content).digest("hex");
|
|
58
|
+
return `${prefix}_${hash.slice(0, 16)}`;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function jaccardSimilarity(a: string, b: string): number {
|
|
62
|
+
const setA = new Set(a.split(/\s+/).filter((t) => t.length > 2));
|
|
63
|
+
const setB = new Set(b.split(/\s+/).filter((t) => t.length > 2));
|
|
64
|
+
if (setA.size === 0 && setB.size === 0) return 1;
|
|
65
|
+
if (setA.size === 0 || setB.size === 0) return 0;
|
|
66
|
+
let intersection = 0;
|
|
67
|
+
for (const word of setA) {
|
|
68
|
+
if (setB.has(word)) intersection++;
|
|
69
|
+
}
|
|
70
|
+
return intersection / (setA.size + setB.size - intersection);
|
|
71
|
+
}
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import type { CompressedObservation } from "../types.js";
|
|
2
|
+
import { stem } from "./stemmer.js";
|
|
3
|
+
import { getSynonyms } from "./synonyms.js";
|
|
4
|
+
|
|
5
|
+
interface IndexEntry {
|
|
6
|
+
obsId: string;
|
|
7
|
+
sessionId: string;
|
|
8
|
+
termCount: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export class SearchIndex {
|
|
12
|
+
private entries: Map<string, IndexEntry> = new Map();
|
|
13
|
+
private invertedIndex: Map<string, Set<string>> = new Map();
|
|
14
|
+
private docTermCounts: Map<string, Map<string, number>> = new Map();
|
|
15
|
+
private totalDocLength = 0;
|
|
16
|
+
private sortedTerms: string[] | null = null;
|
|
17
|
+
|
|
18
|
+
private readonly k1 = 1.2;
|
|
19
|
+
private readonly b = 0.75;
|
|
20
|
+
|
|
21
|
+
add(obs: CompressedObservation): void {
|
|
22
|
+
const terms = this.extractTerms(obs);
|
|
23
|
+
const termFreq = new Map<string, number>();
|
|
24
|
+
let termCount = 0;
|
|
25
|
+
|
|
26
|
+
for (const term of terms) {
|
|
27
|
+
termFreq.set(term, (termFreq.get(term) || 0) + 1);
|
|
28
|
+
termCount++;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
this.entries.set(obs.id, {
|
|
32
|
+
obsId: obs.id,
|
|
33
|
+
sessionId: obs.sessionId,
|
|
34
|
+
termCount,
|
|
35
|
+
});
|
|
36
|
+
this.docTermCounts.set(obs.id, termFreq);
|
|
37
|
+
this.totalDocLength += termCount;
|
|
38
|
+
|
|
39
|
+
for (const term of termFreq.keys()) {
|
|
40
|
+
if (!this.invertedIndex.has(term)) {
|
|
41
|
+
this.invertedIndex.set(term, new Set());
|
|
42
|
+
}
|
|
43
|
+
this.invertedIndex.get(term)!.add(obs.id);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
this.sortedTerms = null;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
search(
|
|
50
|
+
query: string,
|
|
51
|
+
limit = 20,
|
|
52
|
+
): Array<{ obsId: string; sessionId: string; score: number }> {
|
|
53
|
+
const rawTerms = this.tokenize(query.toLowerCase());
|
|
54
|
+
if (rawTerms.length === 0) return [];
|
|
55
|
+
|
|
56
|
+
const N = this.entries.size;
|
|
57
|
+
if (N === 0) return [];
|
|
58
|
+
const avgDocLen = this.totalDocLength / N;
|
|
59
|
+
|
|
60
|
+
const queryTerms: Array<{ term: string; weight: number }> = [];
|
|
61
|
+
const seen = new Set<string>();
|
|
62
|
+
for (const term of rawTerms) {
|
|
63
|
+
if (!seen.has(term)) {
|
|
64
|
+
seen.add(term);
|
|
65
|
+
queryTerms.push({ term, weight: 1.0 });
|
|
66
|
+
}
|
|
67
|
+
for (const syn of getSynonyms(term)) {
|
|
68
|
+
if (!seen.has(syn)) {
|
|
69
|
+
seen.add(syn);
|
|
70
|
+
queryTerms.push({ term: syn, weight: 0.7 });
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const scores = new Map<string, number>();
|
|
76
|
+
const sorted = this.getSortedTerms();
|
|
77
|
+
|
|
78
|
+
for (const { term, weight } of queryTerms) {
|
|
79
|
+
const matchingDocs = this.invertedIndex.get(term);
|
|
80
|
+
if (matchingDocs) {
|
|
81
|
+
const df = matchingDocs.size;
|
|
82
|
+
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
83
|
+
|
|
84
|
+
for (const obsId of matchingDocs) {
|
|
85
|
+
const entry = this.entries.get(obsId)!;
|
|
86
|
+
const docTerms = this.docTermCounts.get(obsId);
|
|
87
|
+
const tf = docTerms?.get(term) || 0;
|
|
88
|
+
const docLen = entry.termCount;
|
|
89
|
+
|
|
90
|
+
const numerator = tf * (this.k1 + 1);
|
|
91
|
+
const denominator =
|
|
92
|
+
tf + this.k1 * (1 - this.b + this.b * (docLen / avgDocLen));
|
|
93
|
+
const bm25Score = idf * (numerator / denominator) * weight;
|
|
94
|
+
|
|
95
|
+
scores.set(obsId, (scores.get(obsId) || 0) + bm25Score);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const startIdx = this.lowerBound(sorted, term);
|
|
100
|
+
for (let si = startIdx; si < sorted.length; si++) {
|
|
101
|
+
const indexTerm = sorted[si];
|
|
102
|
+
if (!indexTerm.startsWith(term)) break;
|
|
103
|
+
if (indexTerm === term) continue;
|
|
104
|
+
|
|
105
|
+
const obsIds = this.invertedIndex.get(indexTerm)!;
|
|
106
|
+
const prefixDf = obsIds.size;
|
|
107
|
+
const prefixIdf =
|
|
108
|
+
Math.log((N - prefixDf + 0.5) / (prefixDf + 0.5) + 1) * 0.5;
|
|
109
|
+
for (const obsId of obsIds) {
|
|
110
|
+
const entry = this.entries.get(obsId)!;
|
|
111
|
+
const docTerms = this.docTermCounts.get(obsId);
|
|
112
|
+
const tf = docTerms?.get(indexTerm) || 0;
|
|
113
|
+
const docLen = entry.termCount;
|
|
114
|
+
const numerator = tf * (this.k1 + 1);
|
|
115
|
+
const denominator =
|
|
116
|
+
tf + this.k1 * (1 - this.b + this.b * (docLen / avgDocLen));
|
|
117
|
+
scores.set(
|
|
118
|
+
obsId,
|
|
119
|
+
(scores.get(obsId) || 0) + prefixIdf * (numerator / denominator) * weight,
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return Array.from(scores.entries())
|
|
126
|
+
.map(([obsId, score]) => {
|
|
127
|
+
const entry = this.entries.get(obsId)!;
|
|
128
|
+
return { obsId, sessionId: entry.sessionId, score };
|
|
129
|
+
})
|
|
130
|
+
.sort((a, b) => b.score - a.score)
|
|
131
|
+
.slice(0, limit);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
get size(): number {
|
|
135
|
+
return this.entries.size;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
clear(): void {
|
|
139
|
+
this.entries.clear();
|
|
140
|
+
this.invertedIndex.clear();
|
|
141
|
+
this.docTermCounts.clear();
|
|
142
|
+
this.totalDocLength = 0;
|
|
143
|
+
this.sortedTerms = null;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
restoreFrom(other: SearchIndex): void {
|
|
147
|
+
this.entries = new Map(
|
|
148
|
+
Array.from(other.entries.entries()).map(([k, v]) => [k, { ...v }]),
|
|
149
|
+
);
|
|
150
|
+
this.invertedIndex = new Map(
|
|
151
|
+
Array.from(other.invertedIndex.entries()).map(([k, v]) => [
|
|
152
|
+
k,
|
|
153
|
+
new Set(v),
|
|
154
|
+
]),
|
|
155
|
+
);
|
|
156
|
+
this.docTermCounts = new Map(
|
|
157
|
+
Array.from(other.docTermCounts.entries()).map(([k, v]) => [
|
|
158
|
+
k,
|
|
159
|
+
new Map(v),
|
|
160
|
+
]),
|
|
161
|
+
);
|
|
162
|
+
this.totalDocLength = other.totalDocLength;
|
|
163
|
+
this.sortedTerms = null;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
serialize(): string {
|
|
167
|
+
const entries = Array.from(this.entries.entries());
|
|
168
|
+
const inverted = Array.from(this.invertedIndex.entries()).map(
|
|
169
|
+
([term, ids]) => [term, Array.from(ids)] as [string, string[]],
|
|
170
|
+
);
|
|
171
|
+
const docTerms = Array.from(this.docTermCounts.entries()).map(
|
|
172
|
+
([id, counts]) =>
|
|
173
|
+
[id, Array.from(counts.entries())] as [string, [string, number][]],
|
|
174
|
+
);
|
|
175
|
+
return JSON.stringify({
|
|
176
|
+
v: 2,
|
|
177
|
+
entries,
|
|
178
|
+
inverted,
|
|
179
|
+
docTerms,
|
|
180
|
+
totalDocLength: this.totalDocLength,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
static deserialize(json: string): SearchIndex {
|
|
185
|
+
try {
|
|
186
|
+
const idx = new SearchIndex();
|
|
187
|
+
const data = JSON.parse(json);
|
|
188
|
+
if (!data?.entries || !data?.inverted || !data?.docTerms) return idx;
|
|
189
|
+
for (const [key, val] of data.entries) {
|
|
190
|
+
idx.entries.set(key, val);
|
|
191
|
+
}
|
|
192
|
+
for (const [term, ids] of data.inverted) {
|
|
193
|
+
idx.invertedIndex.set(term, new Set(ids));
|
|
194
|
+
}
|
|
195
|
+
for (const [id, counts] of data.docTerms) {
|
|
196
|
+
idx.docTermCounts.set(id, new Map(counts));
|
|
197
|
+
}
|
|
198
|
+
const rawLen = Number(data.totalDocLength);
|
|
199
|
+
idx.totalDocLength =
|
|
200
|
+
Number.isFinite(rawLen) && rawLen >= 0 ? Math.floor(rawLen) : 0;
|
|
201
|
+
return idx;
|
|
202
|
+
} catch {
|
|
203
|
+
return new SearchIndex();
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
private extractTerms(obs: CompressedObservation): string[] {
|
|
208
|
+
const parts = [
|
|
209
|
+
obs.title,
|
|
210
|
+
obs.subtitle || "",
|
|
211
|
+
obs.narrative,
|
|
212
|
+
...obs.facts,
|
|
213
|
+
...obs.concepts,
|
|
214
|
+
...obs.files,
|
|
215
|
+
obs.type,
|
|
216
|
+
];
|
|
217
|
+
return this.tokenize(parts.join(" ").toLowerCase());
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
private tokenize(text: string): string[] {
|
|
221
|
+
return text
|
|
222
|
+
.replace(/[^\w\s/.\-_]/g, " ")
|
|
223
|
+
.split(/\s+/)
|
|
224
|
+
.filter((t) => t.length > 1)
|
|
225
|
+
.map((t) => stem(t));
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
private getSortedTerms(): string[] {
|
|
229
|
+
if (!this.sortedTerms) {
|
|
230
|
+
this.sortedTerms = Array.from(this.invertedIndex.keys()).sort();
|
|
231
|
+
}
|
|
232
|
+
return this.sortedTerms;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
private lowerBound(arr: string[], target: string): number {
|
|
236
|
+
let lo = 0;
|
|
237
|
+
let hi = arr.length;
|
|
238
|
+
while (lo < hi) {
|
|
239
|
+
const mid = (lo + hi) >>> 1;
|
|
240
|
+
if (arr[mid] < target) lo = mid + 1;
|
|
241
|
+
else hi = mid;
|
|
242
|
+
}
|
|
243
|
+
return lo;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
const step2map: Record<string, string> = {
|
|
2
|
+
ational: "ate", tional: "tion", enci: "ence", anci: "ance",
|
|
3
|
+
izer: "ize", iser: "ise", abli: "able", alli: "al",
|
|
4
|
+
entli: "ent", eli: "e", ousli: "ous", ization: "ize",
|
|
5
|
+
isation: "ise", ation: "ate", ator: "ate", alism: "al",
|
|
6
|
+
iveness: "ive", fulness: "ful", ousness: "ous", aliti: "al",
|
|
7
|
+
iviti: "ive", biliti: "ble",
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
const step3map: Record<string, string> = {
|
|
11
|
+
icate: "ic", ative: "", alize: "al", alise: "al",
|
|
12
|
+
iciti: "ic", ical: "ic", ful: "", ness: "",
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
function hasVowel(s: string): boolean {
|
|
16
|
+
return /[aeiou]/.test(s);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function measure(s: string): number {
|
|
20
|
+
const reduced = s.replace(/[^aeiouy]+/g, "C").replace(/[aeiouy]+/g, "V");
|
|
21
|
+
const m = reduced.match(/VC/g);
|
|
22
|
+
return m ? m.length : 0;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function endsDoubleConsonant(s: string): boolean {
|
|
26
|
+
return s.length >= 2 && s[s.length - 1] === s[s.length - 2] && !/[aeiou]/.test(s[s.length - 1]);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function endsCVC(s: string): boolean {
|
|
30
|
+
if (s.length < 3) return false;
|
|
31
|
+
const c1 = s[s.length - 3], v = s[s.length - 2], c2 = s[s.length - 1];
|
|
32
|
+
return !/[aeiou]/.test(c1) && /[aeiou]/.test(v) && !/[aeiouwxy]/.test(c2);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function stem(word: string): string {
|
|
36
|
+
if (word.length <= 2) return word;
|
|
37
|
+
|
|
38
|
+
let w = word;
|
|
39
|
+
|
|
40
|
+
if (w.endsWith("sses")) w = w.slice(0, -2);
|
|
41
|
+
else if (w.endsWith("ies")) w = w.slice(0, -2);
|
|
42
|
+
else if (!w.endsWith("ss") && w.endsWith("s")) w = w.slice(0, -1);
|
|
43
|
+
|
|
44
|
+
if (w.endsWith("eed")) {
|
|
45
|
+
if (measure(w.slice(0, -3)) > 0) w = w.slice(0, -1);
|
|
46
|
+
} else if (w.endsWith("ed") && hasVowel(w.slice(0, -2))) {
|
|
47
|
+
w = w.slice(0, -2);
|
|
48
|
+
if (w.endsWith("at") || w.endsWith("bl") || w.endsWith("iz")) w += "e";
|
|
49
|
+
else if (endsDoubleConsonant(w) && !/[lsz]$/.test(w)) w = w.slice(0, -1);
|
|
50
|
+
else if (measure(w) === 1 && endsCVC(w)) w += "e";
|
|
51
|
+
} else if (w.endsWith("ing") && hasVowel(w.slice(0, -3))) {
|
|
52
|
+
w = w.slice(0, -3);
|
|
53
|
+
if (w.endsWith("at") || w.endsWith("bl") || w.endsWith("iz")) w += "e";
|
|
54
|
+
else if (endsDoubleConsonant(w) && !/[lsz]$/.test(w)) w = w.slice(0, -1);
|
|
55
|
+
else if (measure(w) === 1 && endsCVC(w)) w += "e";
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (w.endsWith("y") && hasVowel(w.slice(0, -1))) {
|
|
59
|
+
w = w.slice(0, -1) + "i";
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for (const [suffix, replacement] of Object.entries(step2map)) {
|
|
63
|
+
if (w.endsWith(suffix)) {
|
|
64
|
+
const base = w.slice(0, -suffix.length);
|
|
65
|
+
if (measure(base) > 0) w = base + replacement;
|
|
66
|
+
break;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
for (const [suffix, replacement] of Object.entries(step3map)) {
|
|
71
|
+
if (w.endsWith(suffix)) {
|
|
72
|
+
const base = w.slice(0, -suffix.length);
|
|
73
|
+
if (measure(base) > 0) w = base + replacement;
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (w.endsWith("al") || w.endsWith("ance") || w.endsWith("ence") ||
|
|
79
|
+
w.endsWith("er") || w.endsWith("ic") || w.endsWith("able") ||
|
|
80
|
+
w.endsWith("ible") || w.endsWith("ant") || w.endsWith("ement") ||
|
|
81
|
+
w.endsWith("ment") || w.endsWith("ent") || w.endsWith("tion") ||
|
|
82
|
+
w.endsWith("sion") || w.endsWith("ou") || w.endsWith("ism") ||
|
|
83
|
+
w.endsWith("ate") || w.endsWith("iti") || w.endsWith("ous") ||
|
|
84
|
+
w.endsWith("ive") || w.endsWith("ize") || w.endsWith("ise")) {
|
|
85
|
+
const suffixLen = w.match(/(ement|ment|tion|sion|ance|ence|able|ible|ism|ate|iti|ous|ive|ize|ise|ant|ent|al|er|ic|ou)$/)?.[0]?.length ?? 0;
|
|
86
|
+
if (suffixLen > 0) {
|
|
87
|
+
const base = w.slice(0, -suffixLen);
|
|
88
|
+
if (measure(base) > 1) w = base;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (w.endsWith("e")) {
|
|
93
|
+
const base = w.slice(0, -1);
|
|
94
|
+
if (measure(base) > 1 || (measure(base) === 1 && !endsCVC(base))) {
|
|
95
|
+
w = base;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (endsDoubleConsonant(w) && w.endsWith("l") && measure(w.slice(0, -1)) > 1) {
|
|
100
|
+
w = w.slice(0, -1);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return w;
|
|
104
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { stem } from "./stemmer.js";
|
|
2
|
+
|
|
3
|
+
const SYNONYM_GROUPS: string[][] = [
|
|
4
|
+
["auth", "authentication", "authn", "authenticating"],
|
|
5
|
+
["authz", "authorization", "authorizing"],
|
|
6
|
+
["db", "database", "datastore"],
|
|
7
|
+
["perf", "performance", "latency", "throughput", "slow", "bottleneck"],
|
|
8
|
+
["optim", "optimization", "optimizing", "optimise", "query-optimization"],
|
|
9
|
+
["k8s", "kubernetes", "kube"],
|
|
10
|
+
["config", "configuration", "configuring", "setup"],
|
|
11
|
+
["deps", "dependencies", "dependency"],
|
|
12
|
+
["env", "environment"],
|
|
13
|
+
["fn", "function"],
|
|
14
|
+
["impl", "implementation", "implementing"],
|
|
15
|
+
["msg", "message", "messaging"],
|
|
16
|
+
["repo", "repository"],
|
|
17
|
+
["req", "request"],
|
|
18
|
+
["res", "response"],
|
|
19
|
+
["ts", "typescript"],
|
|
20
|
+
["js", "javascript"],
|
|
21
|
+
["pg", "postgres", "postgresql"],
|
|
22
|
+
["err", "error", "errors"],
|
|
23
|
+
["api", "endpoint", "endpoints"],
|
|
24
|
+
["ci", "continuous-integration"],
|
|
25
|
+
["cd", "continuous-deployment"],
|
|
26
|
+
["test", "testing", "tests"],
|
|
27
|
+
["doc", "documentation", "docs"],
|
|
28
|
+
["infra", "infrastructure"],
|
|
29
|
+
["deploy", "deployment", "deploying"],
|
|
30
|
+
["cache", "caching", "cached"],
|
|
31
|
+
["log", "logging", "logs"],
|
|
32
|
+
["monitor", "monitoring"],
|
|
33
|
+
["observe", "observability"],
|
|
34
|
+
["sec", "security", "secure"],
|
|
35
|
+
["validate", "validation", "validating"],
|
|
36
|
+
["migrate", "migration", "migrations"],
|
|
37
|
+
["debug", "debugging"],
|
|
38
|
+
["container", "containerization", "docker"],
|
|
39
|
+
["crash", "crashloop", "crashloopbackoff"],
|
|
40
|
+
["webhook", "webhooks", "callback"],
|
|
41
|
+
["middleware", "mw"],
|
|
42
|
+
["paginate", "pagination"],
|
|
43
|
+
["serialize", "serialization"],
|
|
44
|
+
["encrypt", "encryption"],
|
|
45
|
+
["hash", "hashing"],
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
const synonymMap = new Map<string, Set<string>>();
|
|
49
|
+
|
|
50
|
+
for (const group of SYNONYM_GROUPS) {
|
|
51
|
+
const stemmed = group.map(t => stem(t.toLowerCase()));
|
|
52
|
+
for (const s of stemmed) {
|
|
53
|
+
if (!synonymMap.has(s)) synonymMap.set(s, new Set());
|
|
54
|
+
for (const other of stemmed) {
|
|
55
|
+
if (other !== s) synonymMap.get(s)!.add(other);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function getSynonyms(stemmedTerm: string): string[] {
|
|
61
|
+
const syns = synonymMap.get(stemmedTerm);
|
|
62
|
+
return syns ? [...syns] : [];
|
|
63
|
+
}
|