@prometheus-ai/memory 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/types/cli.d.ts +35 -0
- package/dist/types/config.d.ts +77 -0
- package/dist/types/core/aaak.d.ts +55 -0
- package/dist/types/core/annotations.d.ts +75 -0
- package/dist/types/core/banks.d.ts +33 -0
- package/dist/types/core/beam/consolidate.d.ts +32 -0
- package/dist/types/core/beam/helpers.d.ts +76 -0
- package/dist/types/core/beam/index.d.ts +59 -0
- package/dist/types/core/beam/recall.d.ts +32 -0
- package/dist/types/core/beam/schema.d.ts +2 -0
- package/dist/types/core/beam/store.d.ts +35 -0
- package/dist/types/core/beam/types.d.ts +233 -0
- package/dist/types/core/binary-vectors.d.ts +54 -0
- package/dist/types/core/chat-normalize.d.ts +13 -0
- package/dist/types/core/content-sanitizer.d.ts +18 -0
- package/dist/types/core/cost-log.d.ts +13 -0
- package/dist/types/core/embeddings.d.ts +44 -0
- package/dist/types/core/entities.d.ts +7 -0
- package/dist/types/core/episodic-graph.d.ts +89 -0
- package/dist/types/core/extraction/client.d.ts +31 -0
- package/dist/types/core/extraction/diagnostics.d.ts +51 -0
- package/dist/types/core/extraction/prompts.d.ts +2 -0
- package/dist/types/core/extraction.d.ts +6 -0
- package/dist/types/core/index.d.ts +4 -0
- package/dist/types/core/llm-backends.d.ts +21 -0
- package/dist/types/core/local-llm.d.ts +15 -0
- package/dist/types/core/memory.d.ts +160 -0
- package/dist/types/core/migrations/e6-triplestore-split.d.ts +17 -0
- package/dist/types/core/migrations/index.d.ts +1 -0
- package/dist/types/core/mmr.d.ts +8 -0
- package/dist/types/core/orchestrator.d.ts +20 -0
- package/dist/types/core/patterns.d.ts +61 -0
- package/dist/types/core/plugins.d.ts +109 -0
- package/dist/types/core/polyphonic-recall.d.ts +66 -0
- package/dist/types/core/query-cache.d.ts +46 -0
- package/dist/types/core/query-intent.d.ts +20 -0
- package/dist/types/core/recall-diagnostics.d.ts +48 -0
- package/dist/types/core/runtime-options.d.ts +68 -0
- package/dist/types/core/shmr.d.ts +56 -0
- package/dist/types/core/streaming.d.ts +136 -0
- package/dist/types/core/synonyms.d.ts +46 -0
- package/dist/types/core/temporal-parser.d.ts +16 -0
- package/dist/types/core/token-counter.d.ts +8 -0
- package/dist/types/core/triples.d.ts +63 -0
- package/dist/types/core/typed-memory.d.ts +39 -0
- package/dist/types/core/vector-math.d.ts +1 -0
- package/dist/types/core/veracity-consolidation.d.ts +60 -0
- package/dist/types/core/weibull.d.ts +96 -0
- package/dist/types/db.d.ts +16 -0
- package/dist/types/diagnose.d.ts +24 -0
- package/dist/types/dr/index.d.ts +1 -0
- package/dist/types/dr/recovery.d.ts +68 -0
- package/dist/types/index.d.ts +5 -0
- package/dist/types/mcp-server.d.ts +40 -0
- package/dist/types/mcp-tools.d.ts +484 -0
- package/dist/types/migrations/e6-triplestore-split.d.ts +1 -0
- package/dist/types/migrations/index.d.ts +1 -0
- package/dist/types/types.d.ts +145 -0
- package/dist/types/util/datetime.d.ts +8 -0
- package/dist/types/util/env.d.ts +10 -0
- package/dist/types/util/ids.d.ts +3 -0
- package/dist/types/util/lru.d.ts +12 -0
- package/dist/types/util/regex.d.ts +10 -0
- package/package.json +85 -0
- package/src/cli.ts +398 -0
- package/src/config.ts +326 -0
- package/src/core/aaak.ts +142 -0
- package/src/core/annotations.ts +457 -0
- package/src/core/banks.ts +133 -0
- package/src/core/beam/consolidate.ts +965 -0
- package/src/core/beam/helpers.ts +977 -0
- package/src/core/beam/index.ts +353 -0
- package/src/core/beam/recall.ts +1100 -0
- package/src/core/beam/schema.ts +423 -0
- package/src/core/beam/store.ts +829 -0
- package/src/core/beam/types.ts +268 -0
- package/src/core/binary-vectors.ts +317 -0
- package/src/core/chat-normalize.ts +160 -0
- package/src/core/content-sanitizer.ts +136 -0
- package/src/core/cost-log.ts +103 -0
- package/src/core/embeddings.ts +423 -0
- package/src/core/entities.ts +259 -0
- package/src/core/episodic-graph.ts +708 -0
- package/src/core/extraction/client.ts +162 -0
- package/src/core/extraction/diagnostics.ts +193 -0
- package/src/core/extraction/prompts.ts +31 -0
- package/src/core/extraction.ts +335 -0
- package/src/core/index.ts +30 -0
- package/src/core/llm-backends.ts +51 -0
- package/src/core/local-llm.ts +436 -0
- package/src/core/memory.ts +630 -0
- package/src/core/migrations/e6-triplestore-split.ts +211 -0
- package/src/core/migrations/index.ts +1 -0
- package/src/core/mmr.ts +71 -0
- package/src/core/orchestrator.ts +62 -0
- package/src/core/patterns.ts +484 -0
- package/src/core/plugins.ts +375 -0
- package/src/core/polyphonic-recall.ts +563 -0
- package/src/core/query-cache.ts +354 -0
- package/src/core/query-intent.ts +139 -0
- package/src/core/recall-diagnostics.ts +157 -0
- package/src/core/runtime-options.ts +119 -0
- package/src/core/shmr.ts +460 -0
- package/src/core/streaming.ts +419 -0
- package/src/core/synonyms.ts +197 -0
- package/src/core/temporal-parser.ts +363 -0
- package/src/core/token-counter.ts +30 -0
- package/src/core/triples.ts +454 -0
- package/src/core/typed-memory.ts +407 -0
- package/src/core/vector-math.ts +23 -0
- package/src/core/veracity-consolidation.ts +477 -0
- package/src/core/weibull.ts +124 -0
- package/src/db.ts +128 -0
- package/src/diagnose.ts +174 -0
- package/src/dr/index.ts +1 -0
- package/src/dr/recovery.ts +405 -0
- package/src/index.ts +33 -0
- package/src/mcp-server.ts +155 -0
- package/src/mcp-tools.ts +970 -0
- package/src/migrations/e6-triplestore-split.ts +1 -0
- package/src/migrations/index.ts +1 -0
- package/src/types.ts +157 -0
- package/src/util/datetime.ts +69 -0
- package/src/util/env.ts +65 -0
- package/src/util/ids.ts +19 -0
- package/src/util/lru.ts +48 -0
- package/src/util/regex.ts +165 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "../core/migrations/e6-triplestore-split";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./e6-triplestore-split";
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
export type JsonScalar = string | number | boolean | null;
|
|
2
|
+
export type JsonValue = JsonScalar | JsonValue[] | { [key: string]: JsonValue };
|
|
3
|
+
export type Metadata = Record<string, JsonValue>;
|
|
4
|
+
export type Veracity = "stated" | "inferred" | "tool" | "imported" | "unknown" | (string & {});
|
|
5
|
+
export type Vector = Float32Array | readonly number[];
|
|
6
|
+
export type VecType = "float32" | "int8" | "bit";
|
|
7
|
+
|
|
8
|
+
export interface MemoryRow {
|
|
9
|
+
id: string;
|
|
10
|
+
content: string;
|
|
11
|
+
source: string | null;
|
|
12
|
+
timestamp: string | null;
|
|
13
|
+
session_id: string;
|
|
14
|
+
importance: number;
|
|
15
|
+
metadata_json: string | null;
|
|
16
|
+
veracity: Veracity;
|
|
17
|
+
created_at: string;
|
|
18
|
+
recall_count?: number | null;
|
|
19
|
+
last_recalled?: string | null;
|
|
20
|
+
valid_until?: string | null;
|
|
21
|
+
superseded_by?: string | null;
|
|
22
|
+
scope?: string | null;
|
|
23
|
+
memory_type?: string | null;
|
|
24
|
+
trust_tier?: string | null;
|
|
25
|
+
author_id?: string | null;
|
|
26
|
+
author_type?: string | null;
|
|
27
|
+
channel_id?: string | null;
|
|
28
|
+
topic?: string | null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export type WorkingMemoryRow = MemoryRow;
|
|
32
|
+
|
|
33
|
+
export interface EpisodicMemoryRow extends MemoryRow {
|
|
34
|
+
rowid: number;
|
|
35
|
+
summary_of: string;
|
|
36
|
+
tier?: number | null;
|
|
37
|
+
degraded_at?: string | null;
|
|
38
|
+
event_date?: string | null;
|
|
39
|
+
episode_type?: string | null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface MemoryInput {
|
|
43
|
+
content: string;
|
|
44
|
+
source?: string | null;
|
|
45
|
+
timestamp?: string | Date | null;
|
|
46
|
+
session_id?: string;
|
|
47
|
+
importance?: number;
|
|
48
|
+
metadata?: Metadata | null;
|
|
49
|
+
veracity?: Veracity;
|
|
50
|
+
scope?: string | null;
|
|
51
|
+
valid_until?: string | Date | null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface WorkingMemory {
|
|
55
|
+
id: string;
|
|
56
|
+
content: string;
|
|
57
|
+
source: string | null;
|
|
58
|
+
timestamp: string | null;
|
|
59
|
+
sessionId: string;
|
|
60
|
+
importance: number;
|
|
61
|
+
metadata: Metadata | null;
|
|
62
|
+
veracity: Veracity;
|
|
63
|
+
createdAt: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface EpisodicMemory extends WorkingMemory {
|
|
67
|
+
rowid: number;
|
|
68
|
+
summaryOf: string;
|
|
69
|
+
tier: number;
|
|
70
|
+
degradedAt: string | null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export interface RecallResult {
|
|
74
|
+
id: string;
|
|
75
|
+
content: string;
|
|
76
|
+
source: string | null;
|
|
77
|
+
timestamp: string | null;
|
|
78
|
+
session_id?: string;
|
|
79
|
+
importance?: number;
|
|
80
|
+
metadata?: Metadata | null;
|
|
81
|
+
metadata_json?: string | null;
|
|
82
|
+
veracity?: Veracity;
|
|
83
|
+
score: number;
|
|
84
|
+
vec_score?: number;
|
|
85
|
+
fts_score?: number;
|
|
86
|
+
importance_score?: number;
|
|
87
|
+
recency_score?: number;
|
|
88
|
+
temporal_score?: number;
|
|
89
|
+
rank?: number;
|
|
90
|
+
distance?: number;
|
|
91
|
+
memory_type?: string | null;
|
|
92
|
+
trust_tier?: string | null;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export interface AnnotationRow {
|
|
96
|
+
id: number;
|
|
97
|
+
memory_id: string;
|
|
98
|
+
kind: string;
|
|
99
|
+
value: string;
|
|
100
|
+
source: string | null;
|
|
101
|
+
confidence: number;
|
|
102
|
+
created_at: string;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface TripleRow {
|
|
106
|
+
id: number;
|
|
107
|
+
subject: string;
|
|
108
|
+
predicate: string;
|
|
109
|
+
object: string;
|
|
110
|
+
valid_from: string;
|
|
111
|
+
valid_until: string | null;
|
|
112
|
+
source: string | null;
|
|
113
|
+
confidence: number;
|
|
114
|
+
created_at: string;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export interface FactRow {
|
|
118
|
+
fact_id: string;
|
|
119
|
+
session_id: string;
|
|
120
|
+
subject: string;
|
|
121
|
+
predicate: string;
|
|
122
|
+
object: string;
|
|
123
|
+
timestamp: string | null;
|
|
124
|
+
source_msg_id: string | null;
|
|
125
|
+
confidence: number;
|
|
126
|
+
created_at: string;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export interface EmbeddingRow {
|
|
130
|
+
memory_id: string;
|
|
131
|
+
embedding_json: string;
|
|
132
|
+
model: string | null;
|
|
133
|
+
created_at: string;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export interface EmbeddingResult {
|
|
137
|
+
memory_id: string;
|
|
138
|
+
embedding: Vector;
|
|
139
|
+
model: string | null;
|
|
140
|
+
dim: number;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export interface VectorSearchResult {
|
|
144
|
+
rowid?: number;
|
|
145
|
+
id?: string;
|
|
146
|
+
memory_id?: string;
|
|
147
|
+
distance: number;
|
|
148
|
+
score?: number;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export interface MemoryStats {
|
|
152
|
+
working_count: number;
|
|
153
|
+
episodic_count: number;
|
|
154
|
+
embedding_count?: number;
|
|
155
|
+
annotation_count?: number;
|
|
156
|
+
triple_count?: number;
|
|
157
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { recencyHalflifeHours } from "../config";
|
|
2
|
+
import { LruCache } from "./lru";
|
|
3
|
+
|
|
4
|
+
const TZ_RE = /(?:Z|[+-]\d\d:?\d\d)$/;
|
|
5
|
+
const DATE_ONLY_RE = /^\d{4}-\d{2}-\d{2}$/;
|
|
6
|
+
const TS_CACHE = new LruCache<string, Date>(2000);
|
|
7
|
+
|
|
8
|
+
export type QueryTime = string | Date | null | undefined;
|
|
9
|
+
|
|
10
|
+
export function parseIsoDateTimeUtc(value: string): Date {
|
|
11
|
+
let text = value.trim();
|
|
12
|
+
if (!text) throw new RangeError("Invalid ISO datetime: empty string");
|
|
13
|
+
if (DATE_ONLY_RE.test(text)) text += "T00:00:00Z";
|
|
14
|
+
else if (!TZ_RE.test(text)) text += "Z";
|
|
15
|
+
const date = new Date(text);
|
|
16
|
+
if (Number.isNaN(date.getTime())) throw new RangeError(`Invalid ISO datetime: ${value}`);
|
|
17
|
+
return date;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function normalizeDateTimeUtc(value: Date): Date {
|
|
21
|
+
const time = value.getTime();
|
|
22
|
+
if (Number.isNaN(time)) throw new RangeError("Invalid Date");
|
|
23
|
+
return new Date(time);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function parseQueryTime(value: QueryTime): Date {
|
|
27
|
+
if (value === null || value === undefined) return new Date();
|
|
28
|
+
return typeof value === "string" ? parseIsoDateTimeUtc(value) : normalizeDateTimeUtc(value);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function parseTsFast(value: string): Date | undefined {
|
|
32
|
+
if (!value) return undefined;
|
|
33
|
+
const cached = TS_CACHE.get(value);
|
|
34
|
+
if (cached !== undefined) return cached;
|
|
35
|
+
try {
|
|
36
|
+
const parsed = parseIsoDateTimeUtc(value);
|
|
37
|
+
TS_CACHE.set(value, parsed);
|
|
38
|
+
return parsed;
|
|
39
|
+
} catch {
|
|
40
|
+
return undefined;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function toUtcIso(value: Date = new Date()): string {
|
|
45
|
+
return normalizeDateTimeUtc(value).toISOString();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function recencyDecay(
|
|
49
|
+
timestamp: string | Date | null | undefined,
|
|
50
|
+
halflifeHours = recencyHalflifeHours(),
|
|
51
|
+
now: Date = new Date(),
|
|
52
|
+
): number {
|
|
53
|
+
if (!timestamp) return 0.5;
|
|
54
|
+
try {
|
|
55
|
+
const ts = typeof timestamp === "string" ? parseIsoDateTimeUtc(timestamp) : normalizeDateTimeUtc(timestamp);
|
|
56
|
+
const ageHours = (now.getTime() - ts.getTime()) / 3_600_000;
|
|
57
|
+
return Math.exp(-ageHours / halflifeHours);
|
|
58
|
+
} catch {
|
|
59
|
+
return 0.5;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function temporalBoost(memoryTimestamp: string, queryTime: QueryTime = undefined, halflifeHours = 24): number {
|
|
64
|
+
let ts = parseTsFast(memoryTimestamp);
|
|
65
|
+
if (ts === undefined) return 0;
|
|
66
|
+
const query = parseQueryTime(queryTime);
|
|
67
|
+
if (ts.getTime() > query.getTime()) ts = query;
|
|
68
|
+
return Math.exp(-((query.getTime() - ts.getTime()) / 3_600_000) / halflifeHours);
|
|
69
|
+
}
|
package/src/util/env.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
export type Env = Record<string, string | undefined>;
|
|
2
|
+
|
|
3
|
+
const TRUE_VALUES: Record<string, true> = { "1": true, true: true, yes: true, on: true };
|
|
4
|
+
const FALSE_VALUES: Record<string, true> = { "0": true, false: true, no: true, off: true };
|
|
5
|
+
|
|
6
|
+
export function envValue(name: string, env: Env = process.env): string | undefined {
|
|
7
|
+
const value = env[name];
|
|
8
|
+
return value === undefined ? undefined : value;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function envString(name: string, defaultValue = "", env: Env = process.env): string {
|
|
12
|
+
const value = env[name];
|
|
13
|
+
return value === undefined ? defaultValue : value;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function envOptionalString(name: string, env: Env = process.env): string | undefined {
|
|
17
|
+
const value = env[name]?.trim();
|
|
18
|
+
return value ? value : undefined;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function envTruthy(name: string, env: Env = process.env): boolean {
|
|
22
|
+
const value = env[name]?.trim().toLowerCase();
|
|
23
|
+
return value !== undefined && TRUE_VALUES[value] === true;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function envDisabled(name: string, env: Env = process.env): boolean {
|
|
27
|
+
const value = env[name]?.trim().toLowerCase();
|
|
28
|
+
return value !== undefined && FALSE_VALUES[value] === true;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function envBool(name: string, defaultValue: boolean, env: Env = process.env): boolean {
|
|
32
|
+
const value = env[name]?.trim().toLowerCase();
|
|
33
|
+
if (!value) return defaultValue;
|
|
34
|
+
if (TRUE_VALUES[value] === true) return true;
|
|
35
|
+
if (FALSE_VALUES[value] === true) return false;
|
|
36
|
+
return defaultValue;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function envInt(name: string, defaultValue: number, env: Env = process.env): number {
|
|
40
|
+
const raw = env[name]?.trim();
|
|
41
|
+
if (!raw) return defaultValue;
|
|
42
|
+
const value = Number.parseInt(raw, 10);
|
|
43
|
+
return Number.isFinite(value) ? value : defaultValue;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function envFloat(name: string, defaultValue: number, env: Env = process.env): number {
|
|
47
|
+
const raw = env[name]?.trim();
|
|
48
|
+
if (!raw) return defaultValue;
|
|
49
|
+
const value = Number.parseFloat(raw);
|
|
50
|
+
return Number.isFinite(value) ? value : defaultValue;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function envOneOf<T extends string>(
|
|
54
|
+
name: string,
|
|
55
|
+
allowed: readonly T[],
|
|
56
|
+
defaultValue: T,
|
|
57
|
+
env: Env = process.env,
|
|
58
|
+
): T {
|
|
59
|
+
const raw = env[name]?.trim().toLowerCase();
|
|
60
|
+
if (!raw) return defaultValue;
|
|
61
|
+
for (const value of allowed) {
|
|
62
|
+
if (raw === value) return value;
|
|
63
|
+
}
|
|
64
|
+
return defaultValue;
|
|
65
|
+
}
|
package/src/util/ids.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export function sha256Hex16(value: string | Uint8Array): string {
|
|
2
|
+
return new Bun.CryptoHasher("sha256").update(value).digest("hex").slice(0, 16);
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
const idSeed = crypto.getRandomValues(new Uint32Array(2));
|
|
6
|
+
let idCounter = 0;
|
|
7
|
+
|
|
8
|
+
function nextIdNonce(): string {
|
|
9
|
+
idCounter = (idCounter + 1) >>> 0;
|
|
10
|
+
return `${idSeed[0]?.toString(36) ?? "0"}:${idSeed[1]?.toString(36) ?? "0"}:${idCounter.toString(36)}`;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function generateId(content: string, now: Date = new Date()): string {
|
|
14
|
+
return sha256Hex16(`${content}\0${now.toISOString()}\0${nextIdNonce()}`);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function stableMemoryId(content: string, source = ""): string {
|
|
18
|
+
return source ? sha256Hex16(`${content}\0${source}`) : sha256Hex16(content);
|
|
19
|
+
}
|
package/src/util/lru.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export class LruCache<K, V> {
|
|
2
|
+
readonly maxSize: number;
|
|
3
|
+
readonly #items = new Map<K, V>();
|
|
4
|
+
|
|
5
|
+
constructor(maxSize: number) {
|
|
6
|
+
this.maxSize = Math.max(0, Math.trunc(maxSize));
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
get size(): number {
|
|
10
|
+
return this.#items.size;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
has(key: K): boolean {
|
|
14
|
+
return this.#items.has(key);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
get(key: K): V | undefined {
|
|
18
|
+
const value = this.#items.get(key);
|
|
19
|
+
if (value !== undefined || this.#items.has(key)) {
|
|
20
|
+
this.#items.delete(key);
|
|
21
|
+
this.#items.set(key, value as V);
|
|
22
|
+
}
|
|
23
|
+
return value;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
set(key: K, value: V): this {
|
|
27
|
+
if (this.maxSize === 0) return this;
|
|
28
|
+
if (this.#items.has(key)) this.#items.delete(key);
|
|
29
|
+
this.#items.set(key, value);
|
|
30
|
+
if (this.#items.size > this.maxSize) {
|
|
31
|
+
const oldest = this.#items.keys().next();
|
|
32
|
+
if (!oldest.done) this.#items.delete(oldest.value);
|
|
33
|
+
}
|
|
34
|
+
return this;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
delete(key: K): boolean {
|
|
38
|
+
return this.#items.delete(key);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
clear(): void {
|
|
42
|
+
this.#items.clear();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
entries(): IterableIterator<[K, V]> {
|
|
46
|
+
return this.#items.entries();
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
const RECALL_TOKEN_RE = /[a-z0-9][a-z0-9_.:/+-]*/g;
|
|
2
|
+
const CJK_RE = /[\u3040-\u30ff\u4e00-\u9fff\uac00-\ud7af]/;
|
|
3
|
+
|
|
4
|
+
export const FACT_MATCH_STOPWORDS = new Set([
|
|
5
|
+
"a",
|
|
6
|
+
"an",
|
|
7
|
+
"and",
|
|
8
|
+
"are",
|
|
9
|
+
"as",
|
|
10
|
+
"at",
|
|
11
|
+
"be",
|
|
12
|
+
"by",
|
|
13
|
+
"can",
|
|
14
|
+
"could",
|
|
15
|
+
"did",
|
|
16
|
+
"do",
|
|
17
|
+
"does",
|
|
18
|
+
"for",
|
|
19
|
+
"from",
|
|
20
|
+
"had",
|
|
21
|
+
"has",
|
|
22
|
+
"have",
|
|
23
|
+
"how",
|
|
24
|
+
"i",
|
|
25
|
+
"in",
|
|
26
|
+
"is",
|
|
27
|
+
"it",
|
|
28
|
+
"its",
|
|
29
|
+
"me",
|
|
30
|
+
"my",
|
|
31
|
+
"of",
|
|
32
|
+
"on",
|
|
33
|
+
"or",
|
|
34
|
+
"our",
|
|
35
|
+
"related",
|
|
36
|
+
"should",
|
|
37
|
+
"that",
|
|
38
|
+
"the",
|
|
39
|
+
"their",
|
|
40
|
+
"there",
|
|
41
|
+
"this",
|
|
42
|
+
"to",
|
|
43
|
+
"totally",
|
|
44
|
+
"unrelated",
|
|
45
|
+
"use",
|
|
46
|
+
"uses",
|
|
47
|
+
"was",
|
|
48
|
+
"we",
|
|
49
|
+
"what",
|
|
50
|
+
"when",
|
|
51
|
+
"where",
|
|
52
|
+
"which",
|
|
53
|
+
"who",
|
|
54
|
+
"why",
|
|
55
|
+
"with",
|
|
56
|
+
"you",
|
|
57
|
+
"your",
|
|
58
|
+
]);
|
|
59
|
+
|
|
60
|
+
export const RECALL_SYNONYMS: Readonly<Record<string, readonly string[]>> = {
|
|
61
|
+
branding: ["brand", "positioning", "identity", "wording"],
|
|
62
|
+
preference: ["prefer", "prefers", "want", "wants", "reject", "rejects", "avoid", "grounded"],
|
|
63
|
+
professional: ["software", "builder"],
|
|
64
|
+
url: ["link", "profile"],
|
|
65
|
+
current: ["now", "live", "latest"],
|
|
66
|
+
feeling: ["feel", "feels"],
|
|
67
|
+
imposter: ["self-doubt", "doubt", "insecure"],
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export function hasCjk(text: string): boolean {
|
|
71
|
+
return CJK_RE.test(text);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export const containsSpacelessCjk = hasCjk;
|
|
75
|
+
|
|
76
|
+
export function recallTokens(text: string): string[] {
|
|
77
|
+
RECALL_TOKEN_RE.lastIndex = 0;
|
|
78
|
+
const tokens: string[] = [];
|
|
79
|
+
const lower = text.toLowerCase();
|
|
80
|
+
let match = RECALL_TOKEN_RE.exec(lower);
|
|
81
|
+
while (match !== null) {
|
|
82
|
+
const token = match[0];
|
|
83
|
+
if (token.length >= 3 && !FACT_MATCH_STOPWORDS.has(token) && !isAsciiDigits(token)) tokens.push(token);
|
|
84
|
+
match = RECALL_TOKEN_RE.exec(lower);
|
|
85
|
+
}
|
|
86
|
+
return tokens;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function factMatchTokens(text: string): Set<string> {
|
|
90
|
+
return new Set(recallTokens(text));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function expandedQueryTokens(tokens: readonly string[]): string[] {
|
|
94
|
+
const expanded: string[] = [];
|
|
95
|
+
const seen = new Set<string>();
|
|
96
|
+
for (const token of tokens) {
|
|
97
|
+
if (!seen.has(token)) {
|
|
98
|
+
seen.add(token);
|
|
99
|
+
expanded.push(token);
|
|
100
|
+
}
|
|
101
|
+
const synonyms = RECALL_SYNONYMS[token];
|
|
102
|
+
if (synonyms === undefined) continue;
|
|
103
|
+
for (const synonym of synonyms) {
|
|
104
|
+
if (seen.has(synonym)) continue;
|
|
105
|
+
seen.add(synonym);
|
|
106
|
+
expanded.push(synonym);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return expanded;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function minimumRecallRelevance(queryTokens: readonly string[]): number {
|
|
113
|
+
if (queryTokens.length >= 4) return 0.3;
|
|
114
|
+
if (queryTokens.length === 3) return 0.5;
|
|
115
|
+
return 0.15;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function cjkFtsTerms(text: string): string[] {
|
|
119
|
+
const chars: string[] = [];
|
|
120
|
+
for (let i = 0; i < text.length; i++) {
|
|
121
|
+
const ch = text.charAt(i);
|
|
122
|
+
if (isCjkCodeUnit(ch.charCodeAt(0))) chars.push(ch);
|
|
123
|
+
}
|
|
124
|
+
if (chars.length === 0) return [];
|
|
125
|
+
const terms: string[] = [];
|
|
126
|
+
const seen = new Set<string>();
|
|
127
|
+
for (const ch of chars) {
|
|
128
|
+
if (seen.has(ch)) continue;
|
|
129
|
+
seen.add(ch);
|
|
130
|
+
terms.push(ch);
|
|
131
|
+
}
|
|
132
|
+
for (let i = 1; i < chars.length; i++) {
|
|
133
|
+
const previous = chars[i - 1];
|
|
134
|
+
const current = chars[i];
|
|
135
|
+
if (previous === undefined || current === undefined) continue;
|
|
136
|
+
const bigram = `${previous}${current}`;
|
|
137
|
+
if (seen.has(bigram)) continue;
|
|
138
|
+
seen.add(bigram);
|
|
139
|
+
terms.push(`"${bigram}"`);
|
|
140
|
+
}
|
|
141
|
+
return terms;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function ftsQueryTerms(query: string): string[] {
|
|
145
|
+
const terms: string[] = [];
|
|
146
|
+
for (const term of expandedQueryTokens(recallTokens(query))) {
|
|
147
|
+
const escaped = term.replaceAll('"', '""').trim();
|
|
148
|
+
if (escaped) terms.push(`"${escaped}"`);
|
|
149
|
+
}
|
|
150
|
+
return terms;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function isAsciiDigits(value: string): boolean {
|
|
154
|
+
for (let i = 0; i < value.length; i++) {
|
|
155
|
+
const code = value.charCodeAt(i);
|
|
156
|
+
if (code < 48 || code > 57) return false;
|
|
157
|
+
}
|
|
158
|
+
return value.length > 0;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function isCjkCodeUnit(code: number): boolean {
|
|
162
|
+
return (
|
|
163
|
+
(code >= 0x4e00 && code <= 0x9fff) || (code >= 0x3040 && code <= 0x30ff) || (code >= 0xac00 && code <= 0xd7af)
|
|
164
|
+
);
|
|
165
|
+
}
|