@teammates/recall 0.4.1 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/embeddings.test.d.ts +1 -0
- package/dist/embeddings.test.js +81 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +3 -1
- package/dist/indexer.d.ts +5 -0
- package/dist/indexer.js +24 -3
- package/dist/indexer.test.js +65 -3
- package/dist/memory-index.d.ts +34 -0
- package/dist/memory-index.js +118 -0
- package/dist/memory-index.test.d.ts +1 -0
- package/dist/memory-index.test.js +96 -0
- package/dist/query-expansion.d.ts +20 -0
- package/dist/query-expansion.js +92 -0
- package/dist/query-expansion.test.d.ts +1 -0
- package/dist/query-expansion.test.js +79 -0
- package/dist/search.d.ts +20 -0
- package/dist/search.js +50 -1
- package/dist/search.test.js +263 -19
- package/package.json +1 -1
- package/src/embeddings.test.ts +106 -0
- package/src/index.ts +9 -1
- package/src/indexer.test.ts +78 -3
- package/src/indexer.ts +26 -3
- package/src/memory-index.test.ts +149 -0
- package/src/memory-index.ts +151 -0
- package/src/query-expansion.test.ts +90 -0
- package/src/query-expansion.ts +105 -0
- package/src/search.test.ts +386 -49
- package/src/search.ts +67 -1
package/src/indexer.test.ts
CHANGED
|
@@ -107,9 +107,11 @@ describe("Indexer", () => {
|
|
|
107
107
|
expect(uris).toContain("beacon/memory/project_goals.md");
|
|
108
108
|
});
|
|
109
109
|
|
|
110
|
-
it("
|
|
110
|
+
it("includes older daily logs but skips today's", async () => {
|
|
111
111
|
const memDir = join(testDir, "beacon", "memory");
|
|
112
112
|
await mkdir(memDir, { recursive: true });
|
|
113
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
114
|
+
await writeFile(join(memDir, `${today}.md`), "# Today");
|
|
113
115
|
await writeFile(join(memDir, "2026-03-14.md"), "# Day 1");
|
|
114
116
|
await writeFile(join(memDir, "2026-03-15.md"), "# Day 2");
|
|
115
117
|
await writeFile(join(memDir, "feedback_testing.md"), "# Feedback");
|
|
@@ -118,8 +120,9 @@ describe("Indexer", () => {
|
|
|
118
120
|
const { files } = await indexer.collectFiles("beacon");
|
|
119
121
|
|
|
120
122
|
const uris = files.map((f) => f.uri);
|
|
121
|
-
expect(uris).not.toContain(
|
|
122
|
-
expect(uris).
|
|
123
|
+
expect(uris).not.toContain(`beacon/memory/${today}.md`);
|
|
124
|
+
expect(uris).toContain("beacon/memory/2026-03-14.md");
|
|
125
|
+
expect(uris).toContain("beacon/memory/2026-03-15.md");
|
|
123
126
|
expect(uris).toContain("beacon/memory/feedback_testing.md");
|
|
124
127
|
});
|
|
125
128
|
|
|
@@ -230,6 +233,78 @@ describe("Indexer", () => {
|
|
|
230
233
|
});
|
|
231
234
|
});
|
|
232
235
|
|
|
236
|
+
describe("upsertFile", () => {
|
|
237
|
+
it("upserts a single file into a new index", async () => {
|
|
238
|
+
const beacon = join(testDir, "beacon");
|
|
239
|
+
await mkdir(beacon, { recursive: true });
|
|
240
|
+
const filePath = join(beacon, "WISDOM.md");
|
|
241
|
+
await writeFile(filePath, "# Upsert test wisdom");
|
|
242
|
+
|
|
243
|
+
const indexer = createIndexer(testDir);
|
|
244
|
+
await indexer.upsertFile("beacon", filePath);
|
|
245
|
+
|
|
246
|
+
// Verify index was created by syncing (which reads the index)
|
|
247
|
+
const count = await indexer.syncTeammate("beacon");
|
|
248
|
+
expect(count).toBeGreaterThanOrEqual(1);
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
it("upserts into an existing index without rebuilding", async () => {
|
|
252
|
+
const beacon = join(testDir, "beacon");
|
|
253
|
+
const memDir = join(beacon, "memory");
|
|
254
|
+
await mkdir(memDir, { recursive: true });
|
|
255
|
+
await writeFile(join(beacon, "WISDOM.md"), "# Wisdom");
|
|
256
|
+
|
|
257
|
+
const indexer = createIndexer(testDir);
|
|
258
|
+
// Build initial index
|
|
259
|
+
await indexer.indexTeammate("beacon");
|
|
260
|
+
|
|
261
|
+
// Upsert a new file
|
|
262
|
+
const newFile = join(memDir, "feedback_test.md");
|
|
263
|
+
await writeFile(newFile, "# New feedback content");
|
|
264
|
+
await indexer.upsertFile("beacon", newFile);
|
|
265
|
+
|
|
266
|
+
// Sync should see both files
|
|
267
|
+
const count = await indexer.syncTeammate("beacon");
|
|
268
|
+
expect(count).toBe(2);
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
it("skips empty files", async () => {
|
|
272
|
+
const beacon = join(testDir, "beacon");
|
|
273
|
+
await mkdir(beacon, { recursive: true });
|
|
274
|
+
const filePath = join(beacon, "WISDOM.md");
|
|
275
|
+
await writeFile(filePath, " "); // whitespace only
|
|
276
|
+
|
|
277
|
+
const indexer = createIndexer(testDir);
|
|
278
|
+
// Should not throw, just skip
|
|
279
|
+
await indexer.upsertFile("beacon", filePath);
|
|
280
|
+
});
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
describe("syncAll", () => {
|
|
284
|
+
it("syncs all discovered teammates", async () => {
|
|
285
|
+
const beacon = join(testDir, "beacon");
|
|
286
|
+
const scribe = join(testDir, "scribe");
|
|
287
|
+
await mkdir(beacon, { recursive: true });
|
|
288
|
+
await mkdir(scribe, { recursive: true });
|
|
289
|
+
await writeFile(join(beacon, "SOUL.md"), "# Beacon");
|
|
290
|
+
await writeFile(join(beacon, "WISDOM.md"), "# Beacon wisdom");
|
|
291
|
+
await writeFile(join(scribe, "SOUL.md"), "# Scribe");
|
|
292
|
+
await writeFile(join(scribe, "WISDOM.md"), "# Scribe wisdom");
|
|
293
|
+
|
|
294
|
+
const indexer = createIndexer(testDir);
|
|
295
|
+
const results = await indexer.syncAll();
|
|
296
|
+
|
|
297
|
+
expect(results.get("beacon")).toBe(1);
|
|
298
|
+
expect(results.get("scribe")).toBe(1);
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
it("returns empty map when no teammates exist", async () => {
|
|
302
|
+
const indexer = createIndexer(testDir);
|
|
303
|
+
const results = await indexer.syncAll();
|
|
304
|
+
expect(results.size).toBe(0);
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
|
|
233
308
|
describe("syncTeammate", () => {
|
|
234
309
|
it("falls back to full index when no index exists", async () => {
|
|
235
310
|
const beacon = join(testDir, "beacon");
|
package/src/indexer.ts
CHANGED
|
@@ -74,15 +74,19 @@ export class Indexer {
|
|
|
74
74
|
// No WISDOM.md
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
// memory/*.md — typed memories
|
|
77
|
+
// memory/*.md — typed memories + daily logs (day 2+)
|
|
78
|
+
// Today's daily log is excluded (still being written). Older dailies are
|
|
79
|
+
// indexed so recall can surface high-resolution episodic context beyond
|
|
80
|
+
// the 7-day prompt window. Dailies older than 30 days are purged elsewhere.
|
|
78
81
|
const memoryDir = path.join(teammateDir, "memory");
|
|
82
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
79
83
|
try {
|
|
80
84
|
const memoryEntries = await fs.readdir(memoryDir);
|
|
81
85
|
for (const entry of memoryEntries) {
|
|
82
86
|
if (!entry.endsWith(".md")) continue;
|
|
83
87
|
const stem = path.basename(entry, ".md");
|
|
84
|
-
// Skip daily
|
|
85
|
-
if (
|
|
88
|
+
// Skip today's daily log — it's still being written and already in prompt context
|
|
89
|
+
if (stem === today) continue;
|
|
86
90
|
files.push({
|
|
87
91
|
uri: `${teammate}/memory/${entry}`,
|
|
88
92
|
absolutePath: path.join(memoryDir, entry),
|
|
@@ -222,6 +226,25 @@ export class Indexer {
|
|
|
222
226
|
return count;
|
|
223
227
|
}
|
|
224
228
|
|
|
229
|
+
/**
|
|
230
|
+
* Delete a document from a teammate's index by URI.
|
|
231
|
+
* Used to purge stale daily logs after they age out on disk.
|
|
232
|
+
*/
|
|
233
|
+
async deleteDocument(teammate: string, uri: string): Promise<void> {
|
|
234
|
+
const indexPath = this.indexPath(teammate);
|
|
235
|
+
const index = new LocalDocumentIndex({
|
|
236
|
+
folderPath: indexPath,
|
|
237
|
+
embeddings: this._embeddings,
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
if (!(await index.isIndexCreated())) return;
|
|
241
|
+
|
|
242
|
+
const docId = await index.getDocumentId(uri);
|
|
243
|
+
if (docId) {
|
|
244
|
+
await index.deleteDocument(uri);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
225
248
|
/**
|
|
226
249
|
* Sync indexes for all teammates.
|
|
227
250
|
*/
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import * as fs from "node:fs/promises";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { afterAll, beforeAll, describe, expect, it } from "vitest";
|
|
4
|
+
import { matchMemoryCatalog, scanMemoryCatalog } from "./memory-index.js";
|
|
5
|
+
|
|
6
|
+
const TEST_DIR = path.join(process.cwd(), ".test-memory-index");
|
|
7
|
+
const TEAMMATE = "testmate";
|
|
8
|
+
|
|
9
|
+
beforeAll(async () => {
|
|
10
|
+
const memoryDir = path.join(TEST_DIR, TEAMMATE, "memory");
|
|
11
|
+
await fs.mkdir(memoryDir, { recursive: true });
|
|
12
|
+
|
|
13
|
+
// Create typed memory files with frontmatter
|
|
14
|
+
await fs.writeFile(
|
|
15
|
+
path.join(memoryDir, "project_goals.md"),
|
|
16
|
+
`---
|
|
17
|
+
name: project_goals
|
|
18
|
+
description: Stack-ranked feature goals for the teammates project
|
|
19
|
+
type: project
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Goals
|
|
23
|
+
|
|
24
|
+
1. Recall query architecture
|
|
25
|
+
2. CLI improvements
|
|
26
|
+
`,
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
await fs.writeFile(
|
|
30
|
+
path.join(memoryDir, "feedback_testing.md"),
|
|
31
|
+
`---
|
|
32
|
+
name: feedback_testing
|
|
33
|
+
description: Integration tests must hit a real database, not mocks
|
|
34
|
+
type: feedback
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
Use real databases in tests. Mocks hide migration bugs.
|
|
38
|
+
`,
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
// Create a file without frontmatter (should be skipped)
|
|
42
|
+
await fs.writeFile(
|
|
43
|
+
path.join(memoryDir, "notes.md"),
|
|
44
|
+
"Just some notes without frontmatter.\n",
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
// Create a daily log (should be skipped)
|
|
48
|
+
await fs.writeFile(
|
|
49
|
+
path.join(memoryDir, "2026-03-21.md"),
|
|
50
|
+
"# 2026-03-21\nDaily log content.\n",
|
|
51
|
+
);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
afterAll(async () => {
|
|
55
|
+
await fs.rm(TEST_DIR, { recursive: true, force: true });
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe("scanMemoryCatalog", () => {
|
|
59
|
+
it("returns entries with frontmatter", async () => {
|
|
60
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
61
|
+
expect(entries.length).toBe(2);
|
|
62
|
+
const names = entries.map((e) => e.name);
|
|
63
|
+
expect(names).toContain("project_goals");
|
|
64
|
+
expect(names).toContain("feedback_testing");
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("skips files without frontmatter", async () => {
|
|
68
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
69
|
+
const uris = entries.map((e) => e.uri);
|
|
70
|
+
expect(uris).not.toContain(`${TEAMMATE}/memory/notes.md`);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("skips daily logs", async () => {
|
|
74
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
75
|
+
const uris = entries.map((e) => e.uri);
|
|
76
|
+
expect(uris).not.toContain(`${TEAMMATE}/memory/2026-03-21.md`);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("returns empty array for nonexistent teammate", async () => {
|
|
80
|
+
const entries = await scanMemoryCatalog(TEST_DIR, "nobody");
|
|
81
|
+
expect(entries).toEqual([]);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("matchMemoryCatalog", () => {
|
|
86
|
+
it("matches files whose frontmatter overlaps with the query", async () => {
|
|
87
|
+
const results = await matchMemoryCatalog(
|
|
88
|
+
TEST_DIR,
|
|
89
|
+
TEAMMATE,
|
|
90
|
+
"what are our project goals and features?",
|
|
91
|
+
);
|
|
92
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
93
|
+
expect(results[0].uri).toContain("project_goals");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("matches on description keywords", async () => {
|
|
97
|
+
const results = await matchMemoryCatalog(
|
|
98
|
+
TEST_DIR,
|
|
99
|
+
TEAMMATE,
|
|
100
|
+
"database testing integration",
|
|
101
|
+
);
|
|
102
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
103
|
+
const uris = results.map((r) => r.uri);
|
|
104
|
+
expect(uris).toContain(`${TEAMMATE}/memory/feedback_testing.md`);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it("returns empty for unrelated queries", async () => {
|
|
108
|
+
const results = await matchMemoryCatalog(
|
|
109
|
+
TEST_DIR,
|
|
110
|
+
TEAMMATE,
|
|
111
|
+
"quantum physics dark matter",
|
|
112
|
+
);
|
|
113
|
+
expect(results.length).toBe(0);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("strips frontmatter from result text", async () => {
|
|
117
|
+
const results = await matchMemoryCatalog(
|
|
118
|
+
TEST_DIR,
|
|
119
|
+
TEAMMATE,
|
|
120
|
+
"project goals features",
|
|
121
|
+
);
|
|
122
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
123
|
+
expect(results[0].text).not.toContain("---");
|
|
124
|
+
expect(results[0].text).toContain("Goals");
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("assigns scores in the 0.85-0.95 range", async () => {
|
|
128
|
+
const results = await matchMemoryCatalog(
|
|
129
|
+
TEST_DIR,
|
|
130
|
+
TEAMMATE,
|
|
131
|
+
"project goals features teammates",
|
|
132
|
+
);
|
|
133
|
+
for (const r of results) {
|
|
134
|
+
expect(r.score).toBeGreaterThanOrEqual(0.85);
|
|
135
|
+
expect(r.score).toBeLessThanOrEqual(0.95);
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("sets contentType to typed_memory", async () => {
|
|
140
|
+
const results = await matchMemoryCatalog(
|
|
141
|
+
TEST_DIR,
|
|
142
|
+
TEAMMATE,
|
|
143
|
+
"project goals",
|
|
144
|
+
);
|
|
145
|
+
for (const r of results) {
|
|
146
|
+
expect(r.contentType).toBe("typed_memory");
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
});
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory frontmatter scanning for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* Reads the teammate's memory file catalog (name + description from frontmatter)
|
|
5
|
+
* and does fast text matching against the task prompt. This is a lightweight,
|
|
6
|
+
* no-embedding relevance signal — "here's a menu of what I might know about."
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import * as fs from "node:fs/promises";
|
|
10
|
+
import * as path from "node:path";
|
|
11
|
+
import type { SearchResult } from "./search.js";
|
|
12
|
+
|
|
13
|
+
interface MemoryEntry {
|
|
14
|
+
/** Relative URI (e.g. "beacon/memory/project_goals.md") */
|
|
15
|
+
uri: string;
|
|
16
|
+
/** Absolute file path */
|
|
17
|
+
absolutePath: string;
|
|
18
|
+
/** Frontmatter name field */
|
|
19
|
+
name: string;
|
|
20
|
+
/** Frontmatter description field */
|
|
21
|
+
description: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Parse YAML-ish frontmatter from a markdown file's content.
|
|
26
|
+
* Returns name and description fields, or null if no frontmatter found.
|
|
27
|
+
*/
|
|
28
|
+
function parseFrontmatter(content: string): { name: string; description: string } | null {
|
|
29
|
+
const match = content.match(/^---\s*\n([\s\S]*?)\n---/);
|
|
30
|
+
if (!match) return null;
|
|
31
|
+
|
|
32
|
+
const fm = match[1];
|
|
33
|
+
const nameMatch = fm.match(/^name:\s*(.+)$/m);
|
|
34
|
+
const descMatch = fm.match(/^description:\s*(.+)$/m);
|
|
35
|
+
|
|
36
|
+
if (!nameMatch) return null;
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
name: nameMatch[1].trim(),
|
|
40
|
+
description: descMatch?.[1]?.trim() ?? "",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Scan a teammate's memory directory and build a catalog of memory entries
|
|
46
|
+
* with their frontmatter metadata.
|
|
47
|
+
*/
|
|
48
|
+
export async function scanMemoryCatalog(
|
|
49
|
+
teammatesDir: string,
|
|
50
|
+
teammate: string,
|
|
51
|
+
): Promise<MemoryEntry[]> {
|
|
52
|
+
const memoryDir = path.join(teammatesDir, teammate, "memory");
|
|
53
|
+
const entries: MemoryEntry[] = [];
|
|
54
|
+
|
|
55
|
+
try {
|
|
56
|
+
const files = await fs.readdir(memoryDir);
|
|
57
|
+
for (const file of files) {
|
|
58
|
+
if (!file.endsWith(".md")) continue;
|
|
59
|
+
// Skip daily logs (YYYY-MM-DD.md)
|
|
60
|
+
const stem = path.basename(file, ".md");
|
|
61
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(stem)) continue;
|
|
62
|
+
|
|
63
|
+
const absolutePath = path.join(memoryDir, file);
|
|
64
|
+
const content = await fs.readFile(absolutePath, "utf-8");
|
|
65
|
+
const fm = parseFrontmatter(content);
|
|
66
|
+
if (!fm) continue;
|
|
67
|
+
|
|
68
|
+
entries.push({
|
|
69
|
+
uri: `${teammate}/memory/${file}`,
|
|
70
|
+
absolutePath,
|
|
71
|
+
name: fm.name,
|
|
72
|
+
description: fm.description,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
} catch {
|
|
76
|
+
// No memory/ directory
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return entries;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Match task prompt text against memory catalog entries.
|
|
84
|
+
* Returns memory files whose name or description has significant word overlap
|
|
85
|
+
* with the task prompt. Each match is returned as a SearchResult with the
|
|
86
|
+
* file's full content.
|
|
87
|
+
*
|
|
88
|
+
* Matching is case-insensitive. A match requires at least one word from the
|
|
89
|
+
* task prompt appearing in the name or description.
|
|
90
|
+
*/
|
|
91
|
+
export async function matchMemoryCatalog(
|
|
92
|
+
teammatesDir: string,
|
|
93
|
+
teammate: string,
|
|
94
|
+
taskPrompt: string,
|
|
95
|
+
maxTokens = 500,
|
|
96
|
+
): Promise<SearchResult[]> {
|
|
97
|
+
const catalog = await scanMemoryCatalog(teammatesDir, teammate);
|
|
98
|
+
if (catalog.length === 0) return [];
|
|
99
|
+
|
|
100
|
+
// Tokenize the task prompt into lowercase words (3+ chars)
|
|
101
|
+
const promptWords = new Set(
|
|
102
|
+
taskPrompt
|
|
103
|
+
.toLowerCase()
|
|
104
|
+
.replace(/[^\w\s@/-]/g, " ")
|
|
105
|
+
.split(/\s+/)
|
|
106
|
+
.filter((w) => w.length > 2),
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
const results: SearchResult[] = [];
|
|
110
|
+
|
|
111
|
+
for (const entry of catalog) {
|
|
112
|
+
const catalogText = `${entry.name} ${entry.description}`.toLowerCase();
|
|
113
|
+
const catalogWords = catalogText
|
|
114
|
+
.replace(/[^\w\s@/_-]/g, " ")
|
|
115
|
+
.split(/\s+/)
|
|
116
|
+
.filter((w) => w.length > 2);
|
|
117
|
+
|
|
118
|
+
// Count overlapping words
|
|
119
|
+
let overlap = 0;
|
|
120
|
+
for (const w of catalogWords) {
|
|
121
|
+
if (promptWords.has(w)) overlap++;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Also check if prompt words appear as substrings in the catalog text
|
|
125
|
+
// (e.g., "goal" matches "project_goals")
|
|
126
|
+
for (const pw of promptWords) {
|
|
127
|
+
if (catalogText.includes(pw) && !catalogWords.includes(pw)) {
|
|
128
|
+
overlap += 0.5;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (overlap >= 1) {
|
|
133
|
+
// Read full file content for matched entries
|
|
134
|
+
const content = await fs.readFile(entry.absolutePath, "utf-8");
|
|
135
|
+
// Strip frontmatter from the content
|
|
136
|
+
const body = content.replace(/^---\s*\n[\s\S]*?\n---\s*\n?/, "").trim();
|
|
137
|
+
|
|
138
|
+
results.push({
|
|
139
|
+
teammate,
|
|
140
|
+
uri: entry.uri,
|
|
141
|
+
text: body.slice(0, maxTokens * 4), // rough token limit
|
|
142
|
+
score: 0.85 + Math.min(overlap * 0.02, 0.1), // 0.85-0.95 range
|
|
143
|
+
contentType: "typed_memory",
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Sort by score descending
|
|
149
|
+
results.sort((a, b) => b.score - a.score);
|
|
150
|
+
return results;
|
|
151
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildQueryVariations, extractKeywords } from "./query-expansion.js";
|
|
3
|
+
|
|
4
|
+
describe("extractKeywords", () => {
|
|
5
|
+
it("removes stopwords", () => {
|
|
6
|
+
const result = extractKeywords("the quick brown fox jumps over the lazy dog");
|
|
7
|
+
expect(result).toContain("quick");
|
|
8
|
+
expect(result).toContain("brown");
|
|
9
|
+
expect(result).toContain("fox");
|
|
10
|
+
expect(result).toContain("jumps");
|
|
11
|
+
expect(result).toContain("lazy");
|
|
12
|
+
expect(result).toContain("dog");
|
|
13
|
+
expect(result).not.toContain("the");
|
|
14
|
+
expect(result).not.toContain("over");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("filters short tokens (length <= 2)", () => {
|
|
18
|
+
const result = extractKeywords("an AI is a type of ML system");
|
|
19
|
+
expect(result).not.toContain("an");
|
|
20
|
+
expect(result).not.toContain("is");
|
|
21
|
+
// "type" and "system" stay, "AI" and "ML" filtered (length 2)
|
|
22
|
+
expect(result).toContain("type");
|
|
23
|
+
expect(result).toContain("system");
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("deduplicates while preserving order", () => {
|
|
27
|
+
const result = extractKeywords("recall search recall index search");
|
|
28
|
+
expect(result).toEqual(["recall", "search", "index"]);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("lowercases all output", () => {
|
|
32
|
+
const result = extractKeywords("Update the HOOKS spec");
|
|
33
|
+
expect(result).toContain("update");
|
|
34
|
+
expect(result).toContain("hooks");
|
|
35
|
+
expect(result).toContain("spec");
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("returns empty array for all-stopword input", () => {
|
|
39
|
+
const result = extractKeywords("the is a");
|
|
40
|
+
expect(result).toEqual([]);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("preserves @mentions and paths", () => {
|
|
44
|
+
const result = extractKeywords("deploy @pipeline src/hooks");
|
|
45
|
+
expect(result).toContain("deploy");
|
|
46
|
+
expect(result).toContain("@pipeline");
|
|
47
|
+
expect(result).toContain("src/hooks");
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe("buildQueryVariations", () => {
|
|
52
|
+
it("always includes the original prompt as the first query", () => {
|
|
53
|
+
const result = buildQueryVariations("fix the authentication bug");
|
|
54
|
+
expect(result[0]).toBe("fix the authentication bug");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("generates a keyword-focused query when prompt is verbose", () => {
|
|
58
|
+
const verbose = "I want you to please update the recall search system so that it handles multiple queries at the same time and deduplicates the results properly";
|
|
59
|
+
const result = buildQueryVariations(verbose);
|
|
60
|
+
expect(result.length).toBeGreaterThanOrEqual(2);
|
|
61
|
+
// The keyword query should be shorter than the original
|
|
62
|
+
if (result.length > 1) {
|
|
63
|
+
expect(result[1].length).toBeLessThan(verbose.length);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("adds a conversation-derived query when context is provided", () => {
|
|
68
|
+
const conversationContext = `## Conversation History
|
|
69
|
+
|
|
70
|
+
**stevenic:** lets talk about the CI pipeline and hooks
|
|
71
|
+
|
|
72
|
+
**pipeline:** CI Pipeline Hooks — Analysis`;
|
|
73
|
+
const result = buildQueryVariations("what should we do next?", conversationContext);
|
|
74
|
+
// Should have at least the original + conversation query
|
|
75
|
+
expect(result.length).toBeGreaterThanOrEqual(2);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it("skips conversation query when no context", () => {
|
|
79
|
+
const result = buildQueryVariations("short task");
|
|
80
|
+
// Short prompts with few keywords may only produce 1 query
|
|
81
|
+
expect(result.length).toBeGreaterThanOrEqual(1);
|
|
82
|
+
expect(result[0]).toBe("short task");
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("does not generate keyword query when prompt is already concise", () => {
|
|
86
|
+
const result = buildQueryVariations("recall search");
|
|
87
|
+
// Very short — keyword query wouldn't differ meaningfully
|
|
88
|
+
expect(result[0]).toBe("recall search");
|
|
89
|
+
});
|
|
90
|
+
});
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight query expansion for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* No LLM needed — uses stopword removal and basic text analysis
|
|
5
|
+
* to generate multiple query variations from a task prompt.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/** Common English stopwords to filter from queries. */
|
|
9
|
+
const STOPWORDS = new Set([
|
|
10
|
+
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
|
|
11
|
+
"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
|
|
12
|
+
"being", "have", "has", "had", "do", "does", "did", "will", "would",
|
|
13
|
+
"could", "should", "may", "might", "shall", "can", "need", "must",
|
|
14
|
+
"it", "its", "this", "that", "these", "those", "i", "you", "he", "she",
|
|
15
|
+
"we", "they", "me", "him", "her", "us", "them", "my", "your", "his",
|
|
16
|
+
"our", "their", "what", "which", "who", "whom", "where", "when", "how",
|
|
17
|
+
"why", "if", "then", "so", "not", "no", "just", "also", "very", "too",
|
|
18
|
+
"some", "any", "all", "each", "every", "both", "few", "more", "most",
|
|
19
|
+
"other", "into", "over", "after", "before", "between", "through",
|
|
20
|
+
"about", "up", "out", "off", "down", "here", "there", "again", "once",
|
|
21
|
+
"let", "lets", "let's", "get", "got", "go", "going", "make", "made",
|
|
22
|
+
"take", "took", "come", "came", "see", "saw", "know", "knew", "think",
|
|
23
|
+
"thought", "say", "said", "tell", "told", "ask", "asked", "want",
|
|
24
|
+
"wanted", "like", "look", "use", "used", "find", "give", "work",
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Extract meaningful keywords from text by removing stopwords and short tokens.
|
|
29
|
+
* Returns lowercase keywords in order of appearance.
|
|
30
|
+
*/
|
|
31
|
+
export function extractKeywords(text: string): string[] {
|
|
32
|
+
const words = text
|
|
33
|
+
.toLowerCase()
|
|
34
|
+
.replace(/[^\w\s@/-]/g, " ")
|
|
35
|
+
.split(/\s+/)
|
|
36
|
+
.filter((w) => w.length > 2 && !STOPWORDS.has(w));
|
|
37
|
+
|
|
38
|
+
// Deduplicate while preserving order
|
|
39
|
+
const seen = new Set<string>();
|
|
40
|
+
const result: string[] = [];
|
|
41
|
+
for (const w of words) {
|
|
42
|
+
if (!seen.has(w)) {
|
|
43
|
+
seen.add(w);
|
|
44
|
+
result.push(w);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return result;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Build multiple query variations from a task prompt and optional conversation context.
|
|
52
|
+
*
|
|
53
|
+
* Returns 1-3 queries:
|
|
54
|
+
* 1. The original task prompt (always)
|
|
55
|
+
* 2. A focused keyword query (if keywords differ meaningfully from the original)
|
|
56
|
+
* 3. A conversation-derived query (if recent conversation context is provided)
|
|
57
|
+
*/
|
|
58
|
+
export function buildQueryVariations(
|
|
59
|
+
taskPrompt: string,
|
|
60
|
+
conversationContext?: string,
|
|
61
|
+
): string[] {
|
|
62
|
+
const queries: string[] = [taskPrompt];
|
|
63
|
+
|
|
64
|
+
// Query 2: Focused keywords from the task prompt
|
|
65
|
+
const keywords = extractKeywords(taskPrompt);
|
|
66
|
+
if (keywords.length >= 2 && keywords.length <= 20) {
|
|
67
|
+
const keywordQuery = keywords.slice(0, 8).join(" ");
|
|
68
|
+
// Only add if meaningfully different from original
|
|
69
|
+
if (keywordQuery.length < taskPrompt.length * 0.7) {
|
|
70
|
+
queries.push(keywordQuery);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Query 3: Recent conversation topic
|
|
75
|
+
if (conversationContext) {
|
|
76
|
+
const recentTopic = extractRecentTopic(conversationContext);
|
|
77
|
+
if (recentTopic) {
|
|
78
|
+
queries.push(recentTopic);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return queries;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Extract the most recent topic/theme from conversation context.
|
|
87
|
+
* Takes the last 1-2 meaningful entries and extracts keywords.
|
|
88
|
+
*/
|
|
89
|
+
function extractRecentTopic(conversationContext: string): string | null {
|
|
90
|
+
// Split on common conversation entry patterns
|
|
91
|
+
const entries = conversationContext
|
|
92
|
+
.split(/\n\*\*\w+:\*\*\s*/g)
|
|
93
|
+
.filter((e) => e.trim().length > 10);
|
|
94
|
+
|
|
95
|
+
if (entries.length === 0) return null;
|
|
96
|
+
|
|
97
|
+
// Take the last 1-2 entries (most recent conversation)
|
|
98
|
+
const recent = entries.slice(-2).join(" ");
|
|
99
|
+
const keywords = extractKeywords(recent);
|
|
100
|
+
|
|
101
|
+
if (keywords.length < 2) return null;
|
|
102
|
+
|
|
103
|
+
// Build a focused query from the recent conversation keywords
|
|
104
|
+
return keywords.slice(0, 6).join(" ");
|
|
105
|
+
}
|