@agentmemory/agentmemory 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/README.md +41 -68
- package/dist/cli.mjs +3 -3
- package/dist/index.mjs +3 -2
- package/dist/index.mjs.map +1 -1
- package/dist/{src-1fTKFEtN.mjs → src-sYZDDbiA.mjs} +4 -3
- package/dist/src-sYZDDbiA.mjs.map +1 -0
- package/dist/standalone.mjs +1 -1
- package/dist/standalone.mjs.map +1 -1
- package/package.json +10 -1
- package/plugin/.claude-plugin/plugin.json +1 -1
- package/plugin/scripts/notification.d.mts +1 -0
- package/plugin/scripts/notification.mjs.map +1 -0
- package/plugin/scripts/post-tool-failure.d.mts +1 -0
- package/plugin/scripts/post-tool-failure.mjs.map +1 -0
- package/plugin/scripts/post-tool-use.d.mts +1 -0
- package/plugin/scripts/post-tool-use.mjs.map +1 -0
- package/plugin/scripts/pre-compact.d.mts +1 -0
- package/plugin/scripts/pre-compact.mjs.map +1 -0
- package/plugin/scripts/pre-tool-use.d.mts +1 -0
- package/plugin/scripts/pre-tool-use.mjs.map +1 -0
- package/plugin/scripts/prompt-submit.d.mts +1 -0
- package/plugin/scripts/prompt-submit.mjs.map +1 -0
- package/plugin/scripts/session-end.d.mts +1 -0
- package/plugin/scripts/session-end.mjs.map +1 -0
- package/plugin/scripts/session-start.d.mts +1 -0
- package/plugin/scripts/session-start.mjs.map +1 -0
- package/plugin/scripts/stop.d.mts +1 -0
- package/plugin/scripts/stop.mjs.map +1 -0
- package/plugin/scripts/subagent-start.d.mts +1 -0
- package/plugin/scripts/subagent-start.mjs.map +1 -0
- package/plugin/scripts/subagent-stop.d.mts +1 -0
- package/plugin/scripts/subagent-stop.mjs.map +1 -0
- package/plugin/scripts/task-completed.d.mts +1 -0
- package/plugin/scripts/task-completed.mjs.map +1 -0
- package/.claude-plugin/marketplace.json +0 -14
- package/.github/workflows/ci.yml +0 -22
- package/.github/workflows/publish.yml +0 -28
- package/assets/banner.png +0 -0
- package/assets/demo.gif +0 -0
- package/assets/demo.mp4 +0 -0
- package/benchmark/QUALITY.md +0 -73
- package/benchmark/REAL-EMBEDDINGS.md +0 -67
- package/benchmark/SCALE.md +0 -110
- package/benchmark/dataset.ts +0 -293
- package/benchmark/quality-eval.ts +0 -643
- package/benchmark/real-embeddings-eval.ts +0 -405
- package/benchmark/scale-eval.ts +0 -398
- package/dist/src-1fTKFEtN.mjs.map +0 -1
- package/src/auth.ts +0 -12
- package/src/cli.ts +0 -251
- package/src/config.ts +0 -221
- package/src/eval/metrics-store.ts +0 -65
- package/src/eval/quality.ts +0 -51
- package/src/eval/schemas.ts +0 -124
- package/src/eval/self-correct.ts +0 -28
- package/src/eval/validator.ts +0 -31
- package/src/functions/actions.ts +0 -288
- package/src/functions/audit.ts +0 -61
- package/src/functions/auto-forget.ts +0 -169
- package/src/functions/branch-aware.ts +0 -169
- package/src/functions/cascade.ts +0 -80
- package/src/functions/checkpoints.ts +0 -209
- package/src/functions/claude-bridge.ts +0 -161
- package/src/functions/compress.ts +0 -194
- package/src/functions/consolidate.ts +0 -212
- package/src/functions/consolidation-pipeline.ts +0 -258
- package/src/functions/context.ts +0 -169
- package/src/functions/crystallize.ts +0 -293
- package/src/functions/dedup.ts +0 -57
- package/src/functions/diagnostics.ts +0 -785
- package/src/functions/enrich.ts +0 -132
- package/src/functions/evict.ts +0 -163
- package/src/functions/export-import.ts +0 -508
- package/src/functions/facets.ts +0 -248
- package/src/functions/file-index.ts +0 -106
- package/src/functions/flow-compress.ts +0 -214
- package/src/functions/frontier.ts +0 -196
- package/src/functions/governance.ts +0 -131
- package/src/functions/graph-retrieval.ts +0 -277
- package/src/functions/graph.ts +0 -275
- package/src/functions/leases.ts +0 -216
- package/src/functions/lessons.ts +0 -253
- package/src/functions/mesh.ts +0 -434
- package/src/functions/migrate.ts +0 -165
- package/src/functions/observe.ts +0 -144
- package/src/functions/obsidian-export.ts +0 -310
- package/src/functions/patterns.ts +0 -138
- package/src/functions/privacy.ts +0 -39
- package/src/functions/profile.ts +0 -155
- package/src/functions/query-expansion.ts +0 -186
- package/src/functions/relations.ts +0 -237
- package/src/functions/remember.ts +0 -162
- package/src/functions/retention.ts +0 -235
- package/src/functions/routines.ts +0 -289
- package/src/functions/search.ts +0 -80
- package/src/functions/sentinels.ts +0 -417
- package/src/functions/signals.ts +0 -186
- package/src/functions/sketches.ts +0 -274
- package/src/functions/sliding-window.ts +0 -257
- package/src/functions/smart-search.ts +0 -115
- package/src/functions/snapshot.ts +0 -219
- package/src/functions/summarize.ts +0 -155
- package/src/functions/team.ts +0 -147
- package/src/functions/temporal-graph.ts +0 -476
- package/src/functions/timeline.ts +0 -138
- package/src/functions/verify.ts +0 -117
- package/src/health/monitor.ts +0 -110
- package/src/health/thresholds.ts +0 -73
- package/src/hooks/notification.ts +0 -52
- package/src/hooks/post-tool-failure.ts +0 -58
- package/src/hooks/post-tool-use.ts +0 -62
- package/src/hooks/pre-compact.ts +0 -60
- package/src/hooks/pre-tool-use.ts +0 -72
- package/src/hooks/prompt-submit.ts +0 -46
- package/src/hooks/session-end.ts +0 -71
- package/src/hooks/session-start.ts +0 -48
- package/src/hooks/stop.ts +0 -39
- package/src/hooks/subagent-start.ts +0 -49
- package/src/hooks/subagent-stop.ts +0 -54
- package/src/hooks/task-completed.ts +0 -54
- package/src/index.ts +0 -342
- package/src/mcp/in-memory-kv.ts +0 -61
- package/src/mcp/server.ts +0 -1455
- package/src/mcp/standalone.ts +0 -177
- package/src/mcp/tools-registry.ts +0 -769
- package/src/mcp/transport.ts +0 -91
- package/src/prompts/compression.ts +0 -67
- package/src/prompts/consolidation.ts +0 -48
- package/src/prompts/graph-extraction.ts +0 -35
- package/src/prompts/summary.ts +0 -38
- package/src/prompts/xml.ts +0 -26
- package/src/providers/agent-sdk.ts +0 -34
- package/src/providers/anthropic.ts +0 -35
- package/src/providers/circuit-breaker.ts +0 -82
- package/src/providers/embedding/cohere.ts +0 -46
- package/src/providers/embedding/gemini.ts +0 -54
- package/src/providers/embedding/index.ts +0 -39
- package/src/providers/embedding/local.ts +0 -52
- package/src/providers/embedding/openai.ts +0 -45
- package/src/providers/embedding/openrouter.ts +0 -51
- package/src/providers/embedding/voyage.ts +0 -46
- package/src/providers/fallback-chain.ts +0 -31
- package/src/providers/index.ts +0 -84
- package/src/providers/openrouter.ts +0 -71
- package/src/providers/resilient.ts +0 -37
- package/src/state/hybrid-search.ts +0 -295
- package/src/state/index-persistence.ts +0 -63
- package/src/state/keyed-mutex.ts +0 -18
- package/src/state/kv.ts +0 -33
- package/src/state/schema.ts +0 -71
- package/src/state/search-index.ts +0 -245
- package/src/state/stemmer.ts +0 -104
- package/src/state/synonyms.ts +0 -63
- package/src/state/vector-index.ts +0 -130
- package/src/telemetry/setup.ts +0 -116
- package/src/triggers/api.ts +0 -1904
- package/src/triggers/events.ts +0 -71
- package/src/types.ts +0 -769
- package/src/version.ts +0 -1
- package/src/viewer/index.html +0 -2556
- package/src/viewer/server.ts +0 -207
- package/src/xenova.d.ts +0 -3
- package/test/actions.test.ts +0 -490
- package/test/audit.test.ts +0 -108
- package/test/auto-forget.test.ts +0 -188
- package/test/cascade.test.ts +0 -277
- package/test/checkpoints.test.ts +0 -493
- package/test/circuit-breaker.test.ts +0 -107
- package/test/claude-bridge.test.ts +0 -178
- package/test/confidence.test.ts +0 -247
- package/test/consistency.test.ts +0 -61
- package/test/consolidation-pipeline.test.ts +0 -251
- package/test/crystallize.test.ts +0 -521
- package/test/diagnostics.test.ts +0 -638
- package/test/embedding-provider.test.ts +0 -49
- package/test/enrich.test.ts +0 -209
- package/test/eval.test.ts +0 -300
- package/test/export-import.test.ts +0 -251
- package/test/facets.test.ts +0 -448
- package/test/fallback-chain.test.ts +0 -93
- package/test/frontier.test.ts +0 -485
- package/test/governance.test.ts +0 -147
- package/test/graph-retrieval.test.ts +0 -186
- package/test/graph.test.ts +0 -160
- package/test/helpers/mocks.ts +0 -40
- package/test/hybrid-search.test.ts +0 -145
- package/test/index-persistence.test.ts +0 -124
- package/test/integration.test.ts +0 -265
- package/test/leases.test.ts +0 -399
- package/test/mcp-prompts.test.ts +0 -218
- package/test/mcp-resources.test.ts +0 -286
- package/test/mcp-standalone.test.ts +0 -113
- package/test/mesh.test.ts +0 -700
- package/test/privacy.test.ts +0 -87
- package/test/profile.test.ts +0 -161
- package/test/query-expansion.test.ts +0 -154
- package/test/relations.test.ts +0 -198
- package/test/retention.test.ts +0 -245
- package/test/routines.test.ts +0 -497
- package/test/schema-fingerprint.test.ts +0 -81
- package/test/schema.test.ts +0 -42
- package/test/search-index.test.ts +0 -128
- package/test/sentinels.test.ts +0 -626
- package/test/signals.test.ts +0 -410
- package/test/sketches.test.ts +0 -549
- package/test/sliding-window.test.ts +0 -199
- package/test/smart-search.test.ts +0 -169
- package/test/snapshot.test.ts +0 -165
- package/test/team.test.ts +0 -156
- package/test/temporal-graph.test.ts +0 -378
- package/test/timeline.test.ts +0 -148
- package/test/vector-index.test.ts +0 -79
- package/test/verify.test.ts +0 -209
- package/test/xml.test.ts +0 -65
- package/tsconfig.json +0 -22
- package/tsdown.config.ts +0 -62
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import { SearchIndex } from "./search-index.js";
|
|
2
|
-
import { VectorIndex } from "./vector-index.js";
|
|
3
|
-
import type { StateKV } from "./kv.js";
|
|
4
|
-
import { KV } from "./schema.js";
|
|
5
|
-
|
|
6
|
-
const DEBOUNCE_MS = 5000;
|
|
7
|
-
|
|
8
|
-
export class IndexPersistence {
|
|
9
|
-
private timer: ReturnType<typeof setTimeout> | null = null;
|
|
10
|
-
|
|
11
|
-
constructor(
|
|
12
|
-
private kv: StateKV,
|
|
13
|
-
private bm25: SearchIndex,
|
|
14
|
-
private vector: VectorIndex | null,
|
|
15
|
-
) {}
|
|
16
|
-
|
|
17
|
-
scheduleSave(): void {
|
|
18
|
-
if (this.timer) clearTimeout(this.timer);
|
|
19
|
-
this.timer = setTimeout(() => this.save(), DEBOUNCE_MS);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
async save(): Promise<void> {
|
|
23
|
-
if (this.timer) {
|
|
24
|
-
clearTimeout(this.timer);
|
|
25
|
-
this.timer = null;
|
|
26
|
-
}
|
|
27
|
-
await this.kv.set(KV.bm25Index, "data", this.bm25.serialize());
|
|
28
|
-
if (this.vector && this.vector.size > 0) {
|
|
29
|
-
await this.kv.set(KV.bm25Index, "vectors", this.vector.serialize());
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
async load(): Promise<{
|
|
34
|
-
bm25: SearchIndex | null;
|
|
35
|
-
vector: VectorIndex | null;
|
|
36
|
-
}> {
|
|
37
|
-
let bm25: SearchIndex | null = null;
|
|
38
|
-
let vector: VectorIndex | null = null;
|
|
39
|
-
|
|
40
|
-
const bm25Data = await this.kv
|
|
41
|
-
.get<string>(KV.bm25Index, "data")
|
|
42
|
-
.catch(() => null);
|
|
43
|
-
if (bm25Data && typeof bm25Data === "string") {
|
|
44
|
-
bm25 = SearchIndex.deserialize(bm25Data);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const vecData = await this.kv
|
|
48
|
-
.get<string>(KV.bm25Index, "vectors")
|
|
49
|
-
.catch(() => null);
|
|
50
|
-
if (vecData && typeof vecData === "string") {
|
|
51
|
-
vector = VectorIndex.deserialize(vecData);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return { bm25, vector };
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
stop(): void {
|
|
58
|
-
if (this.timer) {
|
|
59
|
-
clearTimeout(this.timer);
|
|
60
|
-
this.timer = null;
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}
|
package/src/state/keyed-mutex.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
const locks = new Map<string, Promise<void>>();
|
|
2
|
-
|
|
3
|
-
export function withKeyedLock<T>(
|
|
4
|
-
key: string,
|
|
5
|
-
fn: () => Promise<T>,
|
|
6
|
-
): Promise<T> {
|
|
7
|
-
const prev = locks.get(key) ?? Promise.resolve();
|
|
8
|
-
const next = prev.then(fn, fn);
|
|
9
|
-
const cleanup = next.then(
|
|
10
|
-
() => {},
|
|
11
|
-
() => {},
|
|
12
|
-
);
|
|
13
|
-
locks.set(key, cleanup);
|
|
14
|
-
cleanup.then(() => {
|
|
15
|
-
if (locks.get(key) === cleanup) locks.delete(key);
|
|
16
|
-
});
|
|
17
|
-
return next;
|
|
18
|
-
}
|
package/src/state/kv.ts
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import type { ISdk } from 'iii-sdk'
|
|
2
|
-
|
|
3
|
-
export class StateKV {
|
|
4
|
-
constructor(private sdk: ISdk) {}
|
|
5
|
-
|
|
6
|
-
async get<T = unknown>(scope: string, key: string): Promise<T | null> {
|
|
7
|
-
return this.sdk.trigger<{ scope: string; key: string }, T | null>(
|
|
8
|
-
'state::get',
|
|
9
|
-
{ scope, key },
|
|
10
|
-
)
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
async set<T = unknown>(scope: string, key: string, data: T): Promise<T> {
|
|
14
|
-
return this.sdk.trigger<{ scope: string; key: string; data: T }, T>(
|
|
15
|
-
'state::set',
|
|
16
|
-
{ scope, key, data },
|
|
17
|
-
)
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
async delete(scope: string, key: string): Promise<void> {
|
|
21
|
-
return this.sdk.trigger<{ scope: string; key: string }, void>(
|
|
22
|
-
'state::delete',
|
|
23
|
-
{ scope, key },
|
|
24
|
-
)
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
async list<T = unknown>(scope: string): Promise<T[]> {
|
|
28
|
-
return this.sdk.trigger<{ scope: string }, T[]>(
|
|
29
|
-
'state::list',
|
|
30
|
-
{ scope },
|
|
31
|
-
)
|
|
32
|
-
}
|
|
33
|
-
}
|
package/src/state/schema.ts
DELETED
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
import { createHash } from "node:crypto";
|
|
2
|
-
|
|
3
|
-
export const KV = {
|
|
4
|
-
sessions: "mem:sessions",
|
|
5
|
-
observations: (sessionId: string) => `mem:obs:${sessionId}`,
|
|
6
|
-
memories: "mem:memories",
|
|
7
|
-
summaries: "mem:summaries",
|
|
8
|
-
config: "mem:config",
|
|
9
|
-
metrics: "mem:metrics",
|
|
10
|
-
health: "mem:health",
|
|
11
|
-
embeddings: (obsId: string) => `mem:emb:${obsId}`,
|
|
12
|
-
bm25Index: "mem:index:bm25",
|
|
13
|
-
relations: "mem:relations",
|
|
14
|
-
profiles: "mem:profiles",
|
|
15
|
-
claudeBridge: "mem:claude-bridge",
|
|
16
|
-
graphNodes: "mem:graph:nodes",
|
|
17
|
-
graphEdges: "mem:graph:edges",
|
|
18
|
-
semantic: "mem:semantic",
|
|
19
|
-
procedural: "mem:procedural",
|
|
20
|
-
teamShared: (teamId: string) => `mem:team:${teamId}:shared`,
|
|
21
|
-
teamUsers: (teamId: string, userId: string) =>
|
|
22
|
-
`mem:team:${teamId}:users:${userId}`,
|
|
23
|
-
teamProfile: (teamId: string) => `mem:team:${teamId}:profile`,
|
|
24
|
-
audit: "mem:audit",
|
|
25
|
-
actions: "mem:actions",
|
|
26
|
-
actionEdges: "mem:action-edges",
|
|
27
|
-
leases: "mem:leases",
|
|
28
|
-
routines: "mem:routines",
|
|
29
|
-
routineRuns: "mem:routine-runs",
|
|
30
|
-
signals: "mem:signals",
|
|
31
|
-
checkpoints: "mem:checkpoints",
|
|
32
|
-
mesh: "mem:mesh",
|
|
33
|
-
sketches: "mem:sketches",
|
|
34
|
-
facets: "mem:facets",
|
|
35
|
-
sentinels: "mem:sentinels",
|
|
36
|
-
crystals: "mem:crystals",
|
|
37
|
-
lessons: "mem:lessons",
|
|
38
|
-
graphEdgeHistory: "mem:graph:edge-history",
|
|
39
|
-
enrichedChunks: (sessionId: string) => `mem:enriched:${sessionId}`,
|
|
40
|
-
latentEmbeddings: (obsId: string) => `mem:latent:${obsId}`,
|
|
41
|
-
retentionScores: "mem:retention",
|
|
42
|
-
} as const;
|
|
43
|
-
|
|
44
|
-
export const STREAM = {
|
|
45
|
-
name: "mem-live",
|
|
46
|
-
group: (sessionId: string) => sessionId,
|
|
47
|
-
viewerGroup: "viewer",
|
|
48
|
-
} as const;
|
|
49
|
-
|
|
50
|
-
export function generateId(prefix: string): string {
|
|
51
|
-
const ts = Date.now().toString(36);
|
|
52
|
-
const rand = crypto.randomUUID().replace(/-/g, "").slice(0, 12);
|
|
53
|
-
return `${prefix}_${ts}_${rand}`;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
export function fingerprintId(prefix: string, content: string): string {
|
|
57
|
-
const hash = createHash("sha256").update(content).digest("hex");
|
|
58
|
-
return `${prefix}_${hash.slice(0, 16)}`;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
export function jaccardSimilarity(a: string, b: string): number {
|
|
62
|
-
const setA = new Set(a.split(/\s+/).filter((t) => t.length > 2));
|
|
63
|
-
const setB = new Set(b.split(/\s+/).filter((t) => t.length > 2));
|
|
64
|
-
if (setA.size === 0 && setB.size === 0) return 1;
|
|
65
|
-
if (setA.size === 0 || setB.size === 0) return 0;
|
|
66
|
-
let intersection = 0;
|
|
67
|
-
for (const word of setA) {
|
|
68
|
-
if (setB.has(word)) intersection++;
|
|
69
|
-
}
|
|
70
|
-
return intersection / (setA.size + setB.size - intersection);
|
|
71
|
-
}
|
|
@@ -1,245 +0,0 @@
|
|
|
1
|
-
import type { CompressedObservation } from "../types.js";
|
|
2
|
-
import { stem } from "./stemmer.js";
|
|
3
|
-
import { getSynonyms } from "./synonyms.js";
|
|
4
|
-
|
|
5
|
-
interface IndexEntry {
|
|
6
|
-
obsId: string;
|
|
7
|
-
sessionId: string;
|
|
8
|
-
termCount: number;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export class SearchIndex {
|
|
12
|
-
private entries: Map<string, IndexEntry> = new Map();
|
|
13
|
-
private invertedIndex: Map<string, Set<string>> = new Map();
|
|
14
|
-
private docTermCounts: Map<string, Map<string, number>> = new Map();
|
|
15
|
-
private totalDocLength = 0;
|
|
16
|
-
private sortedTerms: string[] | null = null;
|
|
17
|
-
|
|
18
|
-
private readonly k1 = 1.2;
|
|
19
|
-
private readonly b = 0.75;
|
|
20
|
-
|
|
21
|
-
add(obs: CompressedObservation): void {
|
|
22
|
-
const terms = this.extractTerms(obs);
|
|
23
|
-
const termFreq = new Map<string, number>();
|
|
24
|
-
let termCount = 0;
|
|
25
|
-
|
|
26
|
-
for (const term of terms) {
|
|
27
|
-
termFreq.set(term, (termFreq.get(term) || 0) + 1);
|
|
28
|
-
termCount++;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
this.entries.set(obs.id, {
|
|
32
|
-
obsId: obs.id,
|
|
33
|
-
sessionId: obs.sessionId,
|
|
34
|
-
termCount,
|
|
35
|
-
});
|
|
36
|
-
this.docTermCounts.set(obs.id, termFreq);
|
|
37
|
-
this.totalDocLength += termCount;
|
|
38
|
-
|
|
39
|
-
for (const term of termFreq.keys()) {
|
|
40
|
-
if (!this.invertedIndex.has(term)) {
|
|
41
|
-
this.invertedIndex.set(term, new Set());
|
|
42
|
-
}
|
|
43
|
-
this.invertedIndex.get(term)!.add(obs.id);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
this.sortedTerms = null;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
search(
|
|
50
|
-
query: string,
|
|
51
|
-
limit = 20,
|
|
52
|
-
): Array<{ obsId: string; sessionId: string; score: number }> {
|
|
53
|
-
const rawTerms = this.tokenize(query.toLowerCase());
|
|
54
|
-
if (rawTerms.length === 0) return [];
|
|
55
|
-
|
|
56
|
-
const N = this.entries.size;
|
|
57
|
-
if (N === 0) return [];
|
|
58
|
-
const avgDocLen = this.totalDocLength / N;
|
|
59
|
-
|
|
60
|
-
const queryTerms: Array<{ term: string; weight: number }> = [];
|
|
61
|
-
const seen = new Set<string>();
|
|
62
|
-
for (const term of rawTerms) {
|
|
63
|
-
if (!seen.has(term)) {
|
|
64
|
-
seen.add(term);
|
|
65
|
-
queryTerms.push({ term, weight: 1.0 });
|
|
66
|
-
}
|
|
67
|
-
for (const syn of getSynonyms(term)) {
|
|
68
|
-
if (!seen.has(syn)) {
|
|
69
|
-
seen.add(syn);
|
|
70
|
-
queryTerms.push({ term: syn, weight: 0.7 });
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
const scores = new Map<string, number>();
|
|
76
|
-
const sorted = this.getSortedTerms();
|
|
77
|
-
|
|
78
|
-
for (const { term, weight } of queryTerms) {
|
|
79
|
-
const matchingDocs = this.invertedIndex.get(term);
|
|
80
|
-
if (matchingDocs) {
|
|
81
|
-
const df = matchingDocs.size;
|
|
82
|
-
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
83
|
-
|
|
84
|
-
for (const obsId of matchingDocs) {
|
|
85
|
-
const entry = this.entries.get(obsId)!;
|
|
86
|
-
const docTerms = this.docTermCounts.get(obsId);
|
|
87
|
-
const tf = docTerms?.get(term) || 0;
|
|
88
|
-
const docLen = entry.termCount;
|
|
89
|
-
|
|
90
|
-
const numerator = tf * (this.k1 + 1);
|
|
91
|
-
const denominator =
|
|
92
|
-
tf + this.k1 * (1 - this.b + this.b * (docLen / avgDocLen));
|
|
93
|
-
const bm25Score = idf * (numerator / denominator) * weight;
|
|
94
|
-
|
|
95
|
-
scores.set(obsId, (scores.get(obsId) || 0) + bm25Score);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
const startIdx = this.lowerBound(sorted, term);
|
|
100
|
-
for (let si = startIdx; si < sorted.length; si++) {
|
|
101
|
-
const indexTerm = sorted[si];
|
|
102
|
-
if (!indexTerm.startsWith(term)) break;
|
|
103
|
-
if (indexTerm === term) continue;
|
|
104
|
-
|
|
105
|
-
const obsIds = this.invertedIndex.get(indexTerm)!;
|
|
106
|
-
const prefixDf = obsIds.size;
|
|
107
|
-
const prefixIdf =
|
|
108
|
-
Math.log((N - prefixDf + 0.5) / (prefixDf + 0.5) + 1) * 0.5;
|
|
109
|
-
for (const obsId of obsIds) {
|
|
110
|
-
const entry = this.entries.get(obsId)!;
|
|
111
|
-
const docTerms = this.docTermCounts.get(obsId);
|
|
112
|
-
const tf = docTerms?.get(indexTerm) || 0;
|
|
113
|
-
const docLen = entry.termCount;
|
|
114
|
-
const numerator = tf * (this.k1 + 1);
|
|
115
|
-
const denominator =
|
|
116
|
-
tf + this.k1 * (1 - this.b + this.b * (docLen / avgDocLen));
|
|
117
|
-
scores.set(
|
|
118
|
-
obsId,
|
|
119
|
-
(scores.get(obsId) || 0) + prefixIdf * (numerator / denominator) * weight,
|
|
120
|
-
);
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
return Array.from(scores.entries())
|
|
126
|
-
.map(([obsId, score]) => {
|
|
127
|
-
const entry = this.entries.get(obsId)!;
|
|
128
|
-
return { obsId, sessionId: entry.sessionId, score };
|
|
129
|
-
})
|
|
130
|
-
.sort((a, b) => b.score - a.score)
|
|
131
|
-
.slice(0, limit);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
get size(): number {
|
|
135
|
-
return this.entries.size;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
clear(): void {
|
|
139
|
-
this.entries.clear();
|
|
140
|
-
this.invertedIndex.clear();
|
|
141
|
-
this.docTermCounts.clear();
|
|
142
|
-
this.totalDocLength = 0;
|
|
143
|
-
this.sortedTerms = null;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
restoreFrom(other: SearchIndex): void {
|
|
147
|
-
this.entries = new Map(
|
|
148
|
-
Array.from(other.entries.entries()).map(([k, v]) => [k, { ...v }]),
|
|
149
|
-
);
|
|
150
|
-
this.invertedIndex = new Map(
|
|
151
|
-
Array.from(other.invertedIndex.entries()).map(([k, v]) => [
|
|
152
|
-
k,
|
|
153
|
-
new Set(v),
|
|
154
|
-
]),
|
|
155
|
-
);
|
|
156
|
-
this.docTermCounts = new Map(
|
|
157
|
-
Array.from(other.docTermCounts.entries()).map(([k, v]) => [
|
|
158
|
-
k,
|
|
159
|
-
new Map(v),
|
|
160
|
-
]),
|
|
161
|
-
);
|
|
162
|
-
this.totalDocLength = other.totalDocLength;
|
|
163
|
-
this.sortedTerms = null;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
serialize(): string {
|
|
167
|
-
const entries = Array.from(this.entries.entries());
|
|
168
|
-
const inverted = Array.from(this.invertedIndex.entries()).map(
|
|
169
|
-
([term, ids]) => [term, Array.from(ids)] as [string, string[]],
|
|
170
|
-
);
|
|
171
|
-
const docTerms = Array.from(this.docTermCounts.entries()).map(
|
|
172
|
-
([id, counts]) =>
|
|
173
|
-
[id, Array.from(counts.entries())] as [string, [string, number][]],
|
|
174
|
-
);
|
|
175
|
-
return JSON.stringify({
|
|
176
|
-
v: 2,
|
|
177
|
-
entries,
|
|
178
|
-
inverted,
|
|
179
|
-
docTerms,
|
|
180
|
-
totalDocLength: this.totalDocLength,
|
|
181
|
-
});
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
static deserialize(json: string): SearchIndex {
|
|
185
|
-
try {
|
|
186
|
-
const idx = new SearchIndex();
|
|
187
|
-
const data = JSON.parse(json);
|
|
188
|
-
if (!data?.entries || !data?.inverted || !data?.docTerms) return idx;
|
|
189
|
-
for (const [key, val] of data.entries) {
|
|
190
|
-
idx.entries.set(key, val);
|
|
191
|
-
}
|
|
192
|
-
for (const [term, ids] of data.inverted) {
|
|
193
|
-
idx.invertedIndex.set(term, new Set(ids));
|
|
194
|
-
}
|
|
195
|
-
for (const [id, counts] of data.docTerms) {
|
|
196
|
-
idx.docTermCounts.set(id, new Map(counts));
|
|
197
|
-
}
|
|
198
|
-
const rawLen = Number(data.totalDocLength);
|
|
199
|
-
idx.totalDocLength =
|
|
200
|
-
Number.isFinite(rawLen) && rawLen >= 0 ? Math.floor(rawLen) : 0;
|
|
201
|
-
return idx;
|
|
202
|
-
} catch {
|
|
203
|
-
return new SearchIndex();
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
private extractTerms(obs: CompressedObservation): string[] {
|
|
208
|
-
const parts = [
|
|
209
|
-
obs.title,
|
|
210
|
-
obs.subtitle || "",
|
|
211
|
-
obs.narrative,
|
|
212
|
-
...obs.facts,
|
|
213
|
-
...obs.concepts,
|
|
214
|
-
...obs.files,
|
|
215
|
-
obs.type,
|
|
216
|
-
];
|
|
217
|
-
return this.tokenize(parts.join(" ").toLowerCase());
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
private tokenize(text: string): string[] {
|
|
221
|
-
return text
|
|
222
|
-
.replace(/[^\w\s/.\-_]/g, " ")
|
|
223
|
-
.split(/\s+/)
|
|
224
|
-
.filter((t) => t.length > 1)
|
|
225
|
-
.map((t) => stem(t));
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
private getSortedTerms(): string[] {
|
|
229
|
-
if (!this.sortedTerms) {
|
|
230
|
-
this.sortedTerms = Array.from(this.invertedIndex.keys()).sort();
|
|
231
|
-
}
|
|
232
|
-
return this.sortedTerms;
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
private lowerBound(arr: string[], target: string): number {
|
|
236
|
-
let lo = 0;
|
|
237
|
-
let hi = arr.length;
|
|
238
|
-
while (lo < hi) {
|
|
239
|
-
const mid = (lo + hi) >>> 1;
|
|
240
|
-
if (arr[mid] < target) lo = mid + 1;
|
|
241
|
-
else hi = mid;
|
|
242
|
-
}
|
|
243
|
-
return lo;
|
|
244
|
-
}
|
|
245
|
-
}
|
package/src/state/stemmer.ts
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
const step2map: Record<string, string> = {
|
|
2
|
-
ational: "ate", tional: "tion", enci: "ence", anci: "ance",
|
|
3
|
-
izer: "ize", iser: "ise", abli: "able", alli: "al",
|
|
4
|
-
entli: "ent", eli: "e", ousli: "ous", ization: "ize",
|
|
5
|
-
isation: "ise", ation: "ate", ator: "ate", alism: "al",
|
|
6
|
-
iveness: "ive", fulness: "ful", ousness: "ous", aliti: "al",
|
|
7
|
-
iviti: "ive", biliti: "ble",
|
|
8
|
-
};
|
|
9
|
-
|
|
10
|
-
const step3map: Record<string, string> = {
|
|
11
|
-
icate: "ic", ative: "", alize: "al", alise: "al",
|
|
12
|
-
iciti: "ic", ical: "ic", ful: "", ness: "",
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
function hasVowel(s: string): boolean {
|
|
16
|
-
return /[aeiou]/.test(s);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
function measure(s: string): number {
|
|
20
|
-
const reduced = s.replace(/[^aeiouy]+/g, "C").replace(/[aeiouy]+/g, "V");
|
|
21
|
-
const m = reduced.match(/VC/g);
|
|
22
|
-
return m ? m.length : 0;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
function endsDoubleConsonant(s: string): boolean {
|
|
26
|
-
return s.length >= 2 && s[s.length - 1] === s[s.length - 2] && !/[aeiou]/.test(s[s.length - 1]);
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
function endsCVC(s: string): boolean {
|
|
30
|
-
if (s.length < 3) return false;
|
|
31
|
-
const c1 = s[s.length - 3], v = s[s.length - 2], c2 = s[s.length - 1];
|
|
32
|
-
return !/[aeiou]/.test(c1) && /[aeiou]/.test(v) && !/[aeiouwxy]/.test(c2);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export function stem(word: string): string {
|
|
36
|
-
if (word.length <= 2) return word;
|
|
37
|
-
|
|
38
|
-
let w = word;
|
|
39
|
-
|
|
40
|
-
if (w.endsWith("sses")) w = w.slice(0, -2);
|
|
41
|
-
else if (w.endsWith("ies")) w = w.slice(0, -2);
|
|
42
|
-
else if (!w.endsWith("ss") && w.endsWith("s")) w = w.slice(0, -1);
|
|
43
|
-
|
|
44
|
-
if (w.endsWith("eed")) {
|
|
45
|
-
if (measure(w.slice(0, -3)) > 0) w = w.slice(0, -1);
|
|
46
|
-
} else if (w.endsWith("ed") && hasVowel(w.slice(0, -2))) {
|
|
47
|
-
w = w.slice(0, -2);
|
|
48
|
-
if (w.endsWith("at") || w.endsWith("bl") || w.endsWith("iz")) w += "e";
|
|
49
|
-
else if (endsDoubleConsonant(w) && !/[lsz]$/.test(w)) w = w.slice(0, -1);
|
|
50
|
-
else if (measure(w) === 1 && endsCVC(w)) w += "e";
|
|
51
|
-
} else if (w.endsWith("ing") && hasVowel(w.slice(0, -3))) {
|
|
52
|
-
w = w.slice(0, -3);
|
|
53
|
-
if (w.endsWith("at") || w.endsWith("bl") || w.endsWith("iz")) w += "e";
|
|
54
|
-
else if (endsDoubleConsonant(w) && !/[lsz]$/.test(w)) w = w.slice(0, -1);
|
|
55
|
-
else if (measure(w) === 1 && endsCVC(w)) w += "e";
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
if (w.endsWith("y") && hasVowel(w.slice(0, -1))) {
|
|
59
|
-
w = w.slice(0, -1) + "i";
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
for (const [suffix, replacement] of Object.entries(step2map)) {
|
|
63
|
-
if (w.endsWith(suffix)) {
|
|
64
|
-
const base = w.slice(0, -suffix.length);
|
|
65
|
-
if (measure(base) > 0) w = base + replacement;
|
|
66
|
-
break;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
for (const [suffix, replacement] of Object.entries(step3map)) {
|
|
71
|
-
if (w.endsWith(suffix)) {
|
|
72
|
-
const base = w.slice(0, -suffix.length);
|
|
73
|
-
if (measure(base) > 0) w = base + replacement;
|
|
74
|
-
break;
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
if (w.endsWith("al") || w.endsWith("ance") || w.endsWith("ence") ||
|
|
79
|
-
w.endsWith("er") || w.endsWith("ic") || w.endsWith("able") ||
|
|
80
|
-
w.endsWith("ible") || w.endsWith("ant") || w.endsWith("ement") ||
|
|
81
|
-
w.endsWith("ment") || w.endsWith("ent") || w.endsWith("tion") ||
|
|
82
|
-
w.endsWith("sion") || w.endsWith("ou") || w.endsWith("ism") ||
|
|
83
|
-
w.endsWith("ate") || w.endsWith("iti") || w.endsWith("ous") ||
|
|
84
|
-
w.endsWith("ive") || w.endsWith("ize") || w.endsWith("ise")) {
|
|
85
|
-
const suffixLen = w.match(/(ement|ment|tion|sion|ance|ence|able|ible|ism|ate|iti|ous|ive|ize|ise|ant|ent|al|er|ic|ou)$/)?.[0]?.length ?? 0;
|
|
86
|
-
if (suffixLen > 0) {
|
|
87
|
-
const base = w.slice(0, -suffixLen);
|
|
88
|
-
if (measure(base) > 1) w = base;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
if (w.endsWith("e")) {
|
|
93
|
-
const base = w.slice(0, -1);
|
|
94
|
-
if (measure(base) > 1 || (measure(base) === 1 && !endsCVC(base))) {
|
|
95
|
-
w = base;
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
if (endsDoubleConsonant(w) && w.endsWith("l") && measure(w.slice(0, -1)) > 1) {
|
|
100
|
-
w = w.slice(0, -1);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
return w;
|
|
104
|
-
}
|
package/src/state/synonyms.ts
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import { stem } from "./stemmer.js";
|
|
2
|
-
|
|
3
|
-
const SYNONYM_GROUPS: string[][] = [
|
|
4
|
-
["auth", "authentication", "authn", "authenticating"],
|
|
5
|
-
["authz", "authorization", "authorizing"],
|
|
6
|
-
["db", "database", "datastore"],
|
|
7
|
-
["perf", "performance", "latency", "throughput", "slow", "bottleneck"],
|
|
8
|
-
["optim", "optimization", "optimizing", "optimise", "query-optimization"],
|
|
9
|
-
["k8s", "kubernetes", "kube"],
|
|
10
|
-
["config", "configuration", "configuring", "setup"],
|
|
11
|
-
["deps", "dependencies", "dependency"],
|
|
12
|
-
["env", "environment"],
|
|
13
|
-
["fn", "function"],
|
|
14
|
-
["impl", "implementation", "implementing"],
|
|
15
|
-
["msg", "message", "messaging"],
|
|
16
|
-
["repo", "repository"],
|
|
17
|
-
["req", "request"],
|
|
18
|
-
["res", "response"],
|
|
19
|
-
["ts", "typescript"],
|
|
20
|
-
["js", "javascript"],
|
|
21
|
-
["pg", "postgres", "postgresql"],
|
|
22
|
-
["err", "error", "errors"],
|
|
23
|
-
["api", "endpoint", "endpoints"],
|
|
24
|
-
["ci", "continuous-integration"],
|
|
25
|
-
["cd", "continuous-deployment"],
|
|
26
|
-
["test", "testing", "tests"],
|
|
27
|
-
["doc", "documentation", "docs"],
|
|
28
|
-
["infra", "infrastructure"],
|
|
29
|
-
["deploy", "deployment", "deploying"],
|
|
30
|
-
["cache", "caching", "cached"],
|
|
31
|
-
["log", "logging", "logs"],
|
|
32
|
-
["monitor", "monitoring"],
|
|
33
|
-
["observe", "observability"],
|
|
34
|
-
["sec", "security", "secure"],
|
|
35
|
-
["validate", "validation", "validating"],
|
|
36
|
-
["migrate", "migration", "migrations"],
|
|
37
|
-
["debug", "debugging"],
|
|
38
|
-
["container", "containerization", "docker"],
|
|
39
|
-
["crash", "crashloop", "crashloopbackoff"],
|
|
40
|
-
["webhook", "webhooks", "callback"],
|
|
41
|
-
["middleware", "mw"],
|
|
42
|
-
["paginate", "pagination"],
|
|
43
|
-
["serialize", "serialization"],
|
|
44
|
-
["encrypt", "encryption"],
|
|
45
|
-
["hash", "hashing"],
|
|
46
|
-
];
|
|
47
|
-
|
|
48
|
-
const synonymMap = new Map<string, Set<string>>();
|
|
49
|
-
|
|
50
|
-
for (const group of SYNONYM_GROUPS) {
|
|
51
|
-
const stemmed = group.map(t => stem(t.toLowerCase()));
|
|
52
|
-
for (const s of stemmed) {
|
|
53
|
-
if (!synonymMap.has(s)) synonymMap.set(s, new Set());
|
|
54
|
-
for (const other of stemmed) {
|
|
55
|
-
if (other !== s) synonymMap.get(s)!.add(other);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
export function getSynonyms(stemmedTerm: string): string[] {
|
|
61
|
-
const syns = synonymMap.get(stemmedTerm);
|
|
62
|
-
return syns ? [...syns] : [];
|
|
63
|
-
}
|