ahok-skill 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/.prettierrc +8 -0
  2. package/Dockerfile +59 -0
  3. package/RAW_SKILL.md +219 -0
  4. package/README.md +277 -0
  5. package/SKILL.md +58 -0
  6. package/bin/opm.js +268 -0
  7. package/data/openmemory.sqlite +0 -0
  8. package/data/openmemory.sqlite-shm +0 -0
  9. package/data/openmemory.sqlite-wal +0 -0
  10. package/dist/ai/graph.js +293 -0
  11. package/dist/ai/mcp.js +397 -0
  12. package/dist/cli.js +78 -0
  13. package/dist/core/cfg.js +87 -0
  14. package/dist/core/db.js +636 -0
  15. package/dist/core/memory.js +116 -0
  16. package/dist/core/migrate.js +227 -0
  17. package/dist/core/models.js +105 -0
  18. package/dist/core/telemetry.js +57 -0
  19. package/dist/core/types.js +2 -0
  20. package/dist/core/vector/postgres.js +52 -0
  21. package/dist/core/vector/valkey.js +246 -0
  22. package/dist/core/vector_store.js +2 -0
  23. package/dist/index.js +44 -0
  24. package/dist/memory/decay.js +301 -0
  25. package/dist/memory/embed.js +675 -0
  26. package/dist/memory/hsg.js +959 -0
  27. package/dist/memory/reflect.js +131 -0
  28. package/dist/memory/user_summary.js +99 -0
  29. package/dist/migrate.js +9 -0
  30. package/dist/ops/compress.js +255 -0
  31. package/dist/ops/dynamics.js +189 -0
  32. package/dist/ops/extract.js +333 -0
  33. package/dist/ops/ingest.js +214 -0
  34. package/dist/server/index.js +109 -0
  35. package/dist/server/middleware/auth.js +137 -0
  36. package/dist/server/routes/auth.js +186 -0
  37. package/dist/server/routes/compression.js +108 -0
  38. package/dist/server/routes/dashboard.js +399 -0
  39. package/dist/server/routes/docs.js +241 -0
  40. package/dist/server/routes/dynamics.js +312 -0
  41. package/dist/server/routes/ide.js +280 -0
  42. package/dist/server/routes/index.js +33 -0
  43. package/dist/server/routes/keys.js +132 -0
  44. package/dist/server/routes/langgraph.js +61 -0
  45. package/dist/server/routes/memory.js +213 -0
  46. package/dist/server/routes/sources.js +140 -0
  47. package/dist/server/routes/system.js +63 -0
  48. package/dist/server/routes/temporal.js +293 -0
  49. package/dist/server/routes/users.js +101 -0
  50. package/dist/server/routes/vercel.js +57 -0
  51. package/dist/server/server.js +211 -0
  52. package/dist/server.js +3 -0
  53. package/dist/sources/base.js +223 -0
  54. package/dist/sources/github.js +171 -0
  55. package/dist/sources/google_drive.js +166 -0
  56. package/dist/sources/google_sheets.js +112 -0
  57. package/dist/sources/google_slides.js +139 -0
  58. package/dist/sources/index.js +34 -0
  59. package/dist/sources/notion.js +165 -0
  60. package/dist/sources/onedrive.js +143 -0
  61. package/dist/sources/web_crawler.js +166 -0
  62. package/dist/temporal_graph/index.js +20 -0
  63. package/dist/temporal_graph/query.js +240 -0
  64. package/dist/temporal_graph/store.js +116 -0
  65. package/dist/temporal_graph/timeline.js +241 -0
  66. package/dist/temporal_graph/types.js +2 -0
  67. package/dist/utils/chunking.js +60 -0
  68. package/dist/utils/index.js +31 -0
  69. package/dist/utils/keyword.js +94 -0
  70. package/dist/utils/text.js +120 -0
  71. package/nodemon.json +7 -0
  72. package/package.json +50 -0
  73. package/references/api_reference.md +66 -0
  74. package/references/examples.md +45 -0
  75. package/src/ai/graph.ts +363 -0
  76. package/src/ai/mcp.ts +494 -0
  77. package/src/cli.ts +94 -0
  78. package/src/core/cfg.ts +110 -0
  79. package/src/core/db.ts +1052 -0
  80. package/src/core/memory.ts +99 -0
  81. package/src/core/migrate.ts +302 -0
  82. package/src/core/models.ts +107 -0
  83. package/src/core/telemetry.ts +47 -0
  84. package/src/core/types.ts +130 -0
  85. package/src/core/vector/postgres.ts +61 -0
  86. package/src/core/vector/valkey.ts +261 -0
  87. package/src/core/vector_store.ts +9 -0
  88. package/src/index.ts +5 -0
  89. package/src/memory/decay.ts +427 -0
  90. package/src/memory/embed.ts +707 -0
  91. package/src/memory/hsg.ts +1245 -0
  92. package/src/memory/reflect.ts +158 -0
  93. package/src/memory/user_summary.ts +110 -0
  94. package/src/migrate.ts +8 -0
  95. package/src/ops/compress.ts +296 -0
  96. package/src/ops/dynamics.ts +272 -0
  97. package/src/ops/extract.ts +360 -0
  98. package/src/ops/ingest.ts +286 -0
  99. package/src/server/index.ts +159 -0
  100. package/src/server/middleware/auth.ts +156 -0
  101. package/src/server/routes/auth.ts +223 -0
  102. package/src/server/routes/compression.ts +106 -0
  103. package/src/server/routes/dashboard.ts +420 -0
  104. package/src/server/routes/docs.ts +380 -0
  105. package/src/server/routes/dynamics.ts +516 -0
  106. package/src/server/routes/ide.ts +283 -0
  107. package/src/server/routes/index.ts +32 -0
  108. package/src/server/routes/keys.ts +131 -0
  109. package/src/server/routes/langgraph.ts +71 -0
  110. package/src/server/routes/memory.ts +440 -0
  111. package/src/server/routes/sources.ts +111 -0
  112. package/src/server/routes/system.ts +68 -0
  113. package/src/server/routes/temporal.ts +335 -0
  114. package/src/server/routes/users.ts +111 -0
  115. package/src/server/routes/vercel.ts +55 -0
  116. package/src/server/server.js +215 -0
  117. package/src/server.ts +1 -0
  118. package/src/sources/base.ts +257 -0
  119. package/src/sources/github.ts +156 -0
  120. package/src/sources/google_drive.ts +144 -0
  121. package/src/sources/google_sheets.ts +85 -0
  122. package/src/sources/google_slides.ts +115 -0
  123. package/src/sources/index.ts +19 -0
  124. package/src/sources/notion.ts +148 -0
  125. package/src/sources/onedrive.ts +131 -0
  126. package/src/sources/web_crawler.ts +161 -0
  127. package/src/temporal_graph/index.ts +4 -0
  128. package/src/temporal_graph/query.ts +299 -0
  129. package/src/temporal_graph/store.ts +156 -0
  130. package/src/temporal_graph/timeline.ts +319 -0
  131. package/src/temporal_graph/types.ts +41 -0
  132. package/src/utils/chunking.ts +66 -0
  133. package/src/utils/index.ts +25 -0
  134. package/src/utils/keyword.ts +137 -0
  135. package/src/utils/text.ts +115 -0
  136. package/tests/test_api_workspace_management.ts +413 -0
  137. package/tests/test_bulk_delete.ts +267 -0
  138. package/tests/test_omnibus.ts +166 -0
  139. package/tests/test_workspace_management.ts +278 -0
  140. package/tests/verify.ts +104 -0
  141. package/tsconfig.json +15 -0
@@ -0,0 +1,319 @@
1
+
2
+
3
+ import { all_async } from '../core/db'
4
+ import { TemporalFact, TimelineEntry } from './types'
5
+
6
+
7
+ export const get_subject_timeline = async (
8
+ subject: string,
9
+ predicate?: string
10
+ ): Promise<TimelineEntry[]> => {
11
+ const conditions = ['subject = ?']
12
+ const params: any[] = [subject]
13
+
14
+ if (predicate) {
15
+ conditions.push('predicate = ?')
16
+ params.push(predicate)
17
+ }
18
+
19
+ const sql = `
20
+ SELECT subject, predicate, object, confidence, valid_from, valid_to
21
+ FROM temporal_facts
22
+ WHERE ${conditions.join(' AND ')}
23
+ ORDER BY valid_from ASC
24
+ `
25
+
26
+ const rows = await all_async(sql, params)
27
+ const timeline: TimelineEntry[] = []
28
+
29
+ for (const row of rows) {
30
+ // Creation event
31
+ timeline.push({
32
+ timestamp: new Date(row.valid_from),
33
+ subject: row.subject,
34
+ predicate: row.predicate,
35
+ object: row.object,
36
+ confidence: row.confidence,
37
+ change_type: 'created'
38
+ })
39
+
40
+ // Invalidation event (if applicable)
41
+ if (row.valid_to) {
42
+ timeline.push({
43
+ timestamp: new Date(row.valid_to),
44
+ subject: row.subject,
45
+ predicate: row.predicate,
46
+ object: row.object,
47
+ confidence: row.confidence,
48
+ change_type: 'invalidated'
49
+ })
50
+ }
51
+ }
52
+
53
+ return timeline.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime())
54
+ }
55
+
56
+
57
+ export const get_predicate_timeline = async (
58
+ predicate: string,
59
+ from?: Date,
60
+ to?: Date
61
+ ): Promise<TimelineEntry[]> => {
62
+ const conditions = ['predicate = ?']
63
+ const params: any[] = [predicate]
64
+
65
+ if (from) {
66
+ conditions.push('valid_from >= ?')
67
+ params.push(from.getTime())
68
+ }
69
+
70
+ if (to) {
71
+ conditions.push('valid_from <= ?')
72
+ params.push(to.getTime())
73
+ }
74
+
75
+ const sql = `
76
+ SELECT subject, predicate, object, confidence, valid_from, valid_to
77
+ FROM temporal_facts
78
+ WHERE ${conditions.join(' AND ')}
79
+ ORDER BY valid_from ASC
80
+ `
81
+
82
+ const rows = await all_async(sql, params)
83
+ const timeline: TimelineEntry[] = []
84
+
85
+ for (const row of rows) {
86
+ timeline.push({
87
+ timestamp: new Date(row.valid_from),
88
+ subject: row.subject,
89
+ predicate: row.predicate,
90
+ object: row.object,
91
+ confidence: row.confidence,
92
+ change_type: 'created'
93
+ })
94
+
95
+ if (row.valid_to) {
96
+ timeline.push({
97
+ timestamp: new Date(row.valid_to),
98
+ subject: row.subject,
99
+ predicate: row.predicate,
100
+ object: row.object,
101
+ confidence: row.confidence,
102
+ change_type: 'invalidated'
103
+ })
104
+ }
105
+ }
106
+
107
+ return timeline.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime())
108
+ }
109
+
110
+
111
+ export const get_changes_in_window = async (
112
+ from: Date,
113
+ to: Date,
114
+ subject?: string
115
+ ): Promise<TimelineEntry[]> => {
116
+ const from_ts = from.getTime()
117
+ const to_ts = to.getTime()
118
+ const conditions: string[] = []
119
+ const params: any[] = []
120
+
121
+ if (subject) {
122
+ conditions.push('subject = ?')
123
+ params.push(subject)
124
+ }
125
+
126
+ const where = conditions.length > 0 ? `AND ${conditions.join(' AND ')}` : ''
127
+
128
+ const sql = `
129
+ SELECT subject, predicate, object, confidence, valid_from, valid_to
130
+ FROM temporal_facts
131
+ WHERE ((valid_from >= ? AND valid_from <= ?) OR (valid_to >= ? AND valid_to <= ?))
132
+ ${where}
133
+ ORDER BY valid_from ASC
134
+ `
135
+
136
+ const rows = await all_async(sql, [from_ts, to_ts, from_ts, to_ts, ...params])
137
+ const timeline: TimelineEntry[] = []
138
+
139
+ for (const row of rows) {
140
+ if (row.valid_from >= from_ts && row.valid_from <= to_ts) {
141
+ timeline.push({
142
+ timestamp: new Date(row.valid_from),
143
+ subject: row.subject,
144
+ predicate: row.predicate,
145
+ object: row.object,
146
+ confidence: row.confidence,
147
+ change_type: 'created'
148
+ })
149
+ }
150
+
151
+ if (row.valid_to && row.valid_to >= from_ts && row.valid_to <= to_ts) {
152
+ timeline.push({
153
+ timestamp: new Date(row.valid_to),
154
+ subject: row.subject,
155
+ predicate: row.predicate,
156
+ object: row.object,
157
+ confidence: row.confidence,
158
+ change_type: 'invalidated'
159
+ })
160
+ }
161
+ }
162
+
163
+ return timeline.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime())
164
+ }
165
+
166
+
167
+ export const compare_time_points = async (
168
+ subject: string,
169
+ time1: Date,
170
+ time2: Date
171
+ ): Promise<{
172
+ added: TemporalFact[]
173
+ removed: TemporalFact[]
174
+ changed: Array<{ before: TemporalFact; after: TemporalFact }>
175
+ unchanged: TemporalFact[]
176
+ }> => {
177
+ const t1_ts = time1.getTime()
178
+ const t2_ts = time2.getTime()
179
+
180
+ // Get all facts for subject at both times
181
+ const facts_t1 = await all_async(`
182
+ SELECT id, subject, predicate, object, valid_from, valid_to, confidence, last_updated, metadata
183
+ FROM temporal_facts
184
+ WHERE subject = ?
185
+ AND valid_from <= ? AND (valid_to IS NULL OR valid_to >= ?)
186
+ `, [subject, t1_ts, t1_ts])
187
+
188
+ const facts_t2 = await all_async(`
189
+ SELECT id, subject, predicate, object, valid_from, valid_to, confidence, last_updated, metadata
190
+ FROM temporal_facts
191
+ WHERE subject = ?
192
+ AND valid_from <= ? AND (valid_to IS NULL OR valid_to >= ?)
193
+ `, [subject, t2_ts, t2_ts])
194
+
195
+ const map_t1 = new Map<string, any>()
196
+ const map_t2 = new Map<string, any>()
197
+
198
+ for (const f of facts_t1) {
199
+ map_t1.set(f.predicate, f)
200
+ }
201
+
202
+ for (const f of facts_t2) {
203
+ map_t2.set(f.predicate, f)
204
+ }
205
+
206
+ const added: TemporalFact[] = []
207
+ const removed: TemporalFact[] = []
208
+ const changed: Array<{ before: TemporalFact; after: TemporalFact }> = []
209
+ const unchanged: TemporalFact[] = []
210
+
211
+ // Find added and changed
212
+ for (const [pred, fact2] of map_t2) {
213
+ const fact1 = map_t1.get(pred)
214
+ if (!fact1) {
215
+ added.push(row_to_fact(fact2))
216
+ } else if (fact1.object !== fact2.object || fact1.id !== fact2.id) {
217
+ changed.push({
218
+ before: row_to_fact(fact1),
219
+ after: row_to_fact(fact2)
220
+ })
221
+ } else {
222
+ unchanged.push(row_to_fact(fact2))
223
+ }
224
+ }
225
+
226
+ // Find removed
227
+ for (const [pred, fact1] of map_t1) {
228
+ if (!map_t2.has(pred)) {
229
+ removed.push(row_to_fact(fact1))
230
+ }
231
+ }
232
+
233
+ return { added, removed, changed, unchanged }
234
+ }
235
+
236
+
237
+ export const get_change_frequency = async (
238
+ subject: string,
239
+ predicate: string,
240
+ window_days: number = 30
241
+ ): Promise<{
242
+ predicate: string
243
+ total_changes: number
244
+ avg_duration_ms: number
245
+ change_rate_per_day: number
246
+ }> => {
247
+ const now = Date.now()
248
+ const window_start = now - (window_days * 86400000)
249
+
250
+ const rows = await all_async(`
251
+ SELECT valid_from, valid_to
252
+ FROM temporal_facts
253
+ WHERE subject = ? AND predicate = ?
254
+ AND valid_from >= ?
255
+ ORDER BY valid_from ASC
256
+ `, [subject, predicate, window_start])
257
+
258
+ const total_changes = rows.length
259
+ let total_duration = 0
260
+ let valid_durations = 0
261
+
262
+ for (const row of rows) {
263
+ if (row.valid_to) {
264
+ total_duration += row.valid_to - row.valid_from
265
+ valid_durations++
266
+ }
267
+ }
268
+
269
+ const avg_duration_ms = valid_durations > 0 ? total_duration / valid_durations : 0
270
+ const change_rate_per_day = total_changes / window_days
271
+
272
+ return {
273
+ predicate,
274
+ total_changes,
275
+ avg_duration_ms,
276
+ change_rate_per_day
277
+ }
278
+ }
279
+
280
+
281
+ export const get_volatile_facts = async (
282
+ subject?: string,
283
+ limit: number = 10
284
+ ): Promise<Array<{
285
+ subject: string
286
+ predicate: string
287
+ change_count: number
288
+ avg_confidence: number
289
+ }>> => {
290
+ const where = subject ? 'WHERE subject = ?' : ''
291
+ const params = subject ? [subject] : []
292
+
293
+ const sql = `
294
+ SELECT subject, predicate, COUNT(*) as change_count, AVG(confidence) as avg_confidence
295
+ FROM temporal_facts
296
+ ${where}
297
+ GROUP BY subject, predicate
298
+ HAVING change_count > 1
299
+ ORDER BY change_count DESC, avg_confidence ASC
300
+ LIMIT ?
301
+ `
302
+
303
+ return await all_async(sql, [...params, limit])
304
+ }
305
+
306
+ // Helper function
307
+ function row_to_fact(row: any): TemporalFact {
308
+ return {
309
+ id: row.id,
310
+ subject: row.subject,
311
+ predicate: row.predicate,
312
+ object: row.object,
313
+ valid_from: new Date(row.valid_from),
314
+ valid_to: row.valid_to ? new Date(row.valid_to) : null,
315
+ confidence: row.confidence,
316
+ last_updated: new Date(row.last_updated),
317
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined
318
+ }
319
+ }
@@ -0,0 +1,41 @@
1
+ export interface TemporalFact {
2
+ id: string
3
+ subject: string
4
+ predicate: string
5
+ object: string
6
+ valid_from: Date
7
+ valid_to: Date | null
8
+ confidence: number
9
+ last_updated: Date
10
+ metadata?: Record<string, any>
11
+ }
12
+
13
+ export interface TemporalEdge {
14
+ id: string
15
+ source_id: string
16
+ target_id: string
17
+ relation_type: string
18
+ valid_from: Date
19
+ valid_to: Date | null
20
+ weight: number
21
+ metadata?: Record<string, any>
22
+ }
23
+
24
+ export interface TimelineEntry {
25
+ timestamp: Date
26
+ subject: string
27
+ predicate: string
28
+ object: string
29
+ confidence: number
30
+ change_type: 'created' | 'updated' | 'invalidated'
31
+ }
32
+
33
+ export interface TemporalQuery {
34
+ subject?: string
35
+ predicate?: string
36
+ object?: string
37
+ at?: Date
38
+ from?: Date
39
+ to?: Date
40
+ min_confidence?: number
41
+ }
@@ -0,0 +1,66 @@
1
+ export type chunk = {
2
+ text: string;
3
+ start: number;
4
+ end: number;
5
+ tokens: number;
6
+ };
7
+
8
+ const cpt = 4;
9
+ const est = (t: string) => Math.ceil(t.length / cpt);
10
+
11
+ export const chunk_text = (txt: string, tgt = 768, ovr = 0.1): chunk[] => {
12
+ const tot = est(txt);
13
+ if (tot <= tgt)
14
+ return [{ text: txt, start: 0, end: txt.length, tokens: tot }];
15
+
16
+ const tch = tgt * cpt,
17
+ och = Math.floor(tch * ovr);
18
+ const paras = txt.split(/\n\n+/);
19
+
20
+ const chks: chunk[] = [];
21
+ let cur = "",
22
+ cs = 0;
23
+
24
+ for (const p of paras) {
25
+ const sents = p.split(/(?<=[.!?])\s+/);
26
+ for (const s of sents) {
27
+ const pot = cur + (cur ? " " : "") + s;
28
+ if (pot.length > tch && cur.length > 0) {
29
+ chks.push({
30
+ text: cur,
31
+ start: cs,
32
+ end: cs + cur.length,
33
+ tokens: est(cur),
34
+ });
35
+ const ovt = cur.slice(-och);
36
+ cur = ovt + " " + s;
37
+ cs = cs + cur.length - ovt.length - 1;
38
+ } else cur = pot;
39
+ }
40
+ }
41
+
42
+ if (cur.length > 0)
43
+ chks.push({
44
+ text: cur,
45
+ start: cs,
46
+ end: cs + cur.length,
47
+ tokens: est(cur),
48
+ });
49
+ return chks;
50
+ };
51
+
52
+ export const agg_vec = (vecs: number[][]): number[] => {
53
+ const n = vecs.length;
54
+ if (!n) throw new Error("no vecs");
55
+ if (n === 1) return vecs[0].slice();
56
+
57
+ const d = vecs[0].length,
58
+ r = new Array(d).fill(0);
59
+ for (const v of vecs) for (let i = 0; i < d; i++) r[i] += v[i];
60
+ const rc = 1 / n;
61
+ for (let i = 0; i < d; i++) r[i] *= rc;
62
+ return r;
63
+ };
64
+
65
+ export const join_chunks = (cks: chunk[]) =>
66
+ cks.length ? cks.map((c) => c.text).join(" ") : "";
@@ -0,0 +1,25 @@
1
+ export const now = (): number => Date.now();
2
+ export const rid = (): string => crypto.randomUUID();
3
+ export const cos_sim = (a: Float32Array, b: Float32Array): number => {
4
+ let dot = 0,
5
+ na = 0,
6
+ nb = 0;
7
+ for (let i = 0; i < a.length; i++) {
8
+ const x = a[i],
9
+ y = b[i];
10
+ dot += x * y;
11
+ na += x * x;
12
+ nb += y * y;
13
+ }
14
+ const d = Math.sqrt(na) * Math.sqrt(nb);
15
+ return d ? dot / d : 0;
16
+ };
17
+ export const j = JSON.stringify;
18
+ export const p = <t = any>(x: string): t => JSON.parse(x);
19
+ export const vec_to_buf = (v: number[]): Buffer => {
20
+ const f32 = new Float32Array(v);
21
+ return Buffer.from(f32.buffer);
22
+ };
23
+ export const buf_to_vec = (buf: Buffer): Float32Array => {
24
+ return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
25
+ };
@@ -0,0 +1,137 @@
1
+ import { canonical_tokens_from_text } from "./text";
2
+ import { env } from "../core/cfg";
3
+
4
+ export interface keyword_match {
5
+ id: string;
6
+ score: number;
7
+ matched_terms: string[];
8
+ }
9
+
10
+ export function extract_keywords(
11
+ text: string,
12
+ min_length: number = 3,
13
+ ): Set<string> {
14
+ const tokens = canonical_tokens_from_text(text);
15
+ const keywords = new Set<string>();
16
+
17
+ for (const token of tokens) {
18
+ if (token.length >= min_length) {
19
+ keywords.add(token);
20
+
21
+ if (token.length >= 3) {
22
+ for (let i = 0; i <= token.length - 3; i++) {
23
+ keywords.add(token.slice(i, i + 3));
24
+ }
25
+ }
26
+ }
27
+ }
28
+
29
+ for (let i = 0; i < tokens.length - 1; i++) {
30
+ const bigram = `${tokens[i]}_${tokens[i + 1]}`;
31
+ if (bigram.length >= min_length) {
32
+ keywords.add(bigram);
33
+ }
34
+ }
35
+
36
+ for (let i = 0; i < tokens.length - 2; i++) {
37
+ const trigram = `${tokens[i]}_${tokens[i + 1]}_${tokens[i + 2]}`;
38
+ keywords.add(trigram);
39
+ }
40
+
41
+ return keywords;
42
+ }
43
+
44
+ export function compute_keyword_overlap(
45
+ query_keywords: Set<string>,
46
+ content_keywords: Set<string>,
47
+ ): number {
48
+ let matches = 0;
49
+ let total_weight = 0;
50
+
51
+ for (const qk of query_keywords) {
52
+ if (content_keywords.has(qk)) {
53
+ const weight = qk.includes("_") ? 2.0 : 1.0;
54
+ matches += weight;
55
+ }
56
+ total_weight += qk.includes("_") ? 2.0 : 1.0;
57
+ }
58
+
59
+ if (total_weight === 0) return 0;
60
+ return matches / total_weight;
61
+ }
62
+
63
+ export function exact_phrase_match(query: string, content: string): boolean {
64
+ const q_norm = query.toLowerCase().trim();
65
+ const c_norm = content.toLowerCase();
66
+ return c_norm.includes(q_norm);
67
+ }
68
+
69
+ export function compute_bm25_score(
70
+ query_terms: string[],
71
+ content_terms: string[],
72
+ corpus_size: number = 10000,
73
+ avg_doc_length: number = 100,
74
+ ): number {
75
+ const k1 = 1.5;
76
+ const b = 0.75;
77
+
78
+ const term_freq = new Map<string, number>();
79
+ for (const term of content_terms) {
80
+ term_freq.set(term, (term_freq.get(term) || 0) + 1);
81
+ }
82
+
83
+ const doc_length = content_terms.length;
84
+ let score = 0;
85
+
86
+ for (const q_term of query_terms) {
87
+ const tf = term_freq.get(q_term) || 0;
88
+ if (tf === 0) continue;
89
+
90
+ const idf = Math.log((corpus_size + 1) / (tf + 0.5));
91
+ const numerator = tf * (k1 + 1);
92
+ const denominator =
93
+ tf + k1 * (1 - b + b * (doc_length / avg_doc_length));
94
+
95
+ score += idf * (numerator / denominator);
96
+ }
97
+
98
+ return score;
99
+ }
100
+
101
+ export async function keyword_filter_memories(
102
+ query: string,
103
+ all_memories: Array<{ id: string; content: string }>,
104
+ threshold: number = 0.1,
105
+ ): Promise<Map<string, number>> {
106
+ const query_keywords = extract_keywords(query, env.keyword_min_length);
107
+ const query_terms = canonical_tokens_from_text(query);
108
+ const scores = new Map<string, number>();
109
+
110
+ for (const mem of all_memories) {
111
+ let total_score = 0;
112
+
113
+ if (exact_phrase_match(query, mem.content)) {
114
+ total_score += 1.0;
115
+ }
116
+
117
+ const content_keywords = extract_keywords(
118
+ mem.content,
119
+ env.keyword_min_length,
120
+ );
121
+ const keyword_score = compute_keyword_overlap(
122
+ query_keywords,
123
+ content_keywords,
124
+ );
125
+ total_score += keyword_score * 0.8;
126
+
127
+ const content_terms = canonical_tokens_from_text(mem.content);
128
+ const bm25_score = compute_bm25_score(query_terms, content_terms);
129
+ total_score += Math.min(1.0, bm25_score / 10) * 0.5;
130
+
131
+ if (total_score > threshold) {
132
+ scores.set(mem.id, total_score);
133
+ }
134
+ }
135
+
136
+ return scores;
137
+ }
@@ -0,0 +1,115 @@
1
+ const syn_grps = [
2
+ ["prefer", "like", "love", "enjoy", "favor"],
3
+ ["theme", "mode", "style", "layout"],
4
+ ["meeting", "meet", "session", "call", "sync"],
5
+ ["dark", "night", "black"],
6
+ ["light", "bright", "day"],
7
+ ["user", "person", "people", "customer"],
8
+ ["task", "todo", "job"],
9
+ ["note", "memo", "reminder"],
10
+ ["time", "schedule", "when", "date"],
11
+ ["project", "initiative", "plan"],
12
+ ["issue", "problem", "bug"],
13
+ ["document", "doc", "file"],
14
+ ["question", "query", "ask"],
15
+ ];
16
+ const cmap = new Map<string, string>();
17
+ const slook = new Map<string, Set<string>>();
18
+
19
+ for (const grp of syn_grps) {
20
+ const can = grp[0];
21
+ const sset = new Set(grp);
22
+ for (const w of grp) {
23
+ cmap.set(w, can);
24
+ slook.set(can, sset);
25
+ }
26
+ }
27
+
28
+ const stem_rules: Array<[RegExp, string]> = [
29
+ [/ies$/, "y"],
30
+ [/ing$/, ""],
31
+ [/ers?$/, "er"],
32
+ [/ed$/, ""],
33
+ [/s$/, ""],
34
+ ];
35
+ const tok_pat = /[a-z0-9]+/gi;
36
+
37
+ export const tokenize = (text: string): string[] => {
38
+ const toks: string[] = [];
39
+ let m: RegExpExecArray | null;
40
+ while ((m = tok_pat.exec(text))) {
41
+ toks.push(m[0].toLowerCase());
42
+ }
43
+ return toks;
44
+ };
45
+
46
+ const stem = (tok: string): string => {
47
+ if (tok.length <= 3) return tok;
48
+ for (const [pat, rep] of stem_rules) {
49
+ if (pat.test(tok)) {
50
+ const st = tok.replace(pat, rep);
51
+ if (st.length >= 3) return st;
52
+ }
53
+ }
54
+ return tok;
55
+ };
56
+
57
+ export const canonicalize_token = (tok: string): string => {
58
+ if (!tok) return "";
59
+ const low = tok.toLowerCase();
60
+ if (cmap.has(low)) return cmap.get(low)!;
61
+ const st = stem(low);
62
+ return cmap.get(st) || st;
63
+ };
64
+
65
+ export const canonical_tokens_from_text = (text: string): string[] => {
66
+ const res: string[] = [];
67
+ for (const tok of tokenize(text)) {
68
+ const can = canonicalize_token(tok);
69
+ if (can && can.length > 1) {
70
+ res.push(can);
71
+ }
72
+ }
73
+ return res;
74
+ };
75
+
76
+ export const synonyms_for = (tok: string): Set<string> => {
77
+ const can = canonicalize_token(tok);
78
+ return slook.get(can) || new Set([can]);
79
+ };
80
+
81
+ export const build_search_doc = (text: string): string => {
82
+ const can = canonical_tokens_from_text(text);
83
+ const exp = new Set<string>();
84
+ for (const tok of can) {
85
+ exp.add(tok);
86
+ const syns = slook.get(tok);
87
+ if (syns) {
88
+ syns.forEach((s) => exp.add(s));
89
+ }
90
+ }
91
+ return Array.from(exp).join(" ");
92
+ };
93
+
94
+ export const build_fts_query = (text: string): string => {
95
+ const can = canonical_tokens_from_text(text);
96
+ if (!can.length) return "";
97
+ const uniq = Array.from(new Set(can.filter((t) => t.length > 1)));
98
+ return uniq.map((t) => `"${t}"`).join(" OR ");
99
+ };
100
+
101
+ export const canonical_token_set = (text: string): Set<string> => {
102
+ return new Set(canonical_tokens_from_text(text));
103
+ };
104
+
105
+ export const add_synonym_tokens = (toks: Iterable<string>): Set<string> => {
106
+ const res = new Set<string>();
107
+ for (const tok of toks) {
108
+ res.add(tok);
109
+ const syns = slook.get(tok);
110
+ if (syns) {
111
+ syns.forEach((s) => res.add(canonicalize_token(s)));
112
+ }
113
+ }
114
+ return res;
115
+ };