@teammates/recall 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +3 -1
- package/dist/index.js +3 -1
- package/dist/memory-index.d.ts +34 -0
- package/dist/memory-index.js +118 -0
- package/dist/memory-index.test.d.ts +1 -0
- package/dist/memory-index.test.js +96 -0
- package/dist/query-expansion.d.ts +20 -0
- package/dist/query-expansion.js +92 -0
- package/dist/query-expansion.test.d.ts +1 -0
- package/dist/query-expansion.test.js +79 -0
- package/dist/search.d.ts +16 -0
- package/dist/search.js +49 -0
- package/package.json +1 -1
- package/src/index.ts +9 -1
- package/src/memory-index.test.ts +149 -0
- package/src/memory-index.ts +151 -0
- package/src/query-expansion.test.ts +90 -0
- package/src/query-expansion.ts +105 -0
- package/src/search.ts +66 -0
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
1
|
export { LocalEmbeddings } from "./embeddings.js";
|
|
2
2
|
export { Indexer, type IndexerConfig } from "./indexer.js";
|
|
3
|
-
export {
|
|
3
|
+
export { matchMemoryCatalog, scanMemoryCatalog } from "./memory-index.js";
|
|
4
|
+
export { buildQueryVariations, extractKeywords } from "./query-expansion.js";
|
|
5
|
+
export { type MultiSearchOptions, type SearchOptions, type SearchResult, multiSearch, search, } from "./search.js";
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
1
|
export { LocalEmbeddings } from "./embeddings.js";
|
|
2
2
|
export { Indexer } from "./indexer.js";
|
|
3
|
-
export {
|
|
3
|
+
export { matchMemoryCatalog, scanMemoryCatalog } from "./memory-index.js";
|
|
4
|
+
export { buildQueryVariations, extractKeywords } from "./query-expansion.js";
|
|
5
|
+
export { multiSearch, search, } from "./search.js";
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory frontmatter scanning for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* Reads the teammate's memory file catalog (name + description from frontmatter)
|
|
5
|
+
* and does fast text matching against the task prompt. This is a lightweight,
|
|
6
|
+
* no-embedding relevance signal — "here's a menu of what I might know about."
|
|
7
|
+
*/
|
|
8
|
+
import type { SearchResult } from "./search.js";
|
|
9
|
+
interface MemoryEntry {
|
|
10
|
+
/** Relative URI (e.g. "beacon/memory/project_goals.md") */
|
|
11
|
+
uri: string;
|
|
12
|
+
/** Absolute file path */
|
|
13
|
+
absolutePath: string;
|
|
14
|
+
/** Frontmatter name field */
|
|
15
|
+
name: string;
|
|
16
|
+
/** Frontmatter description field */
|
|
17
|
+
description: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Scan a teammate's memory directory and build a catalog of memory entries
|
|
21
|
+
* with their frontmatter metadata.
|
|
22
|
+
*/
|
|
23
|
+
export declare function scanMemoryCatalog(teammatesDir: string, teammate: string): Promise<MemoryEntry[]>;
|
|
24
|
+
/**
|
|
25
|
+
* Match task prompt text against memory catalog entries.
|
|
26
|
+
* Returns memory files whose name or description has significant word overlap
|
|
27
|
+
* with the task prompt. Each match is returned as a SearchResult with the
|
|
28
|
+
* file's full content.
|
|
29
|
+
*
|
|
30
|
+
* Matching is case-insensitive. A match requires at least one word from the
|
|
31
|
+
* task prompt appearing in the name or description.
|
|
32
|
+
*/
|
|
33
|
+
export declare function matchMemoryCatalog(teammatesDir: string, teammate: string, taskPrompt: string, maxTokens?: number): Promise<SearchResult[]>;
|
|
34
|
+
export {};
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory frontmatter scanning for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* Reads the teammate's memory file catalog (name + description from frontmatter)
|
|
5
|
+
* and does fast text matching against the task prompt. This is a lightweight,
|
|
6
|
+
* no-embedding relevance signal — "here's a menu of what I might know about."
|
|
7
|
+
*/
|
|
8
|
+
import * as fs from "node:fs/promises";
|
|
9
|
+
import * as path from "node:path";
|
|
10
|
+
/**
|
|
11
|
+
* Parse YAML-ish frontmatter from a markdown file's content.
|
|
12
|
+
* Returns name and description fields, or null if no frontmatter found.
|
|
13
|
+
*/
|
|
14
|
+
function parseFrontmatter(content) {
|
|
15
|
+
const match = content.match(/^---\s*\n([\s\S]*?)\n---/);
|
|
16
|
+
if (!match)
|
|
17
|
+
return null;
|
|
18
|
+
const fm = match[1];
|
|
19
|
+
const nameMatch = fm.match(/^name:\s*(.+)$/m);
|
|
20
|
+
const descMatch = fm.match(/^description:\s*(.+)$/m);
|
|
21
|
+
if (!nameMatch)
|
|
22
|
+
return null;
|
|
23
|
+
return {
|
|
24
|
+
name: nameMatch[1].trim(),
|
|
25
|
+
description: descMatch?.[1]?.trim() ?? "",
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Scan a teammate's memory directory and build a catalog of memory entries
|
|
30
|
+
* with their frontmatter metadata.
|
|
31
|
+
*/
|
|
32
|
+
export async function scanMemoryCatalog(teammatesDir, teammate) {
|
|
33
|
+
const memoryDir = path.join(teammatesDir, teammate, "memory");
|
|
34
|
+
const entries = [];
|
|
35
|
+
try {
|
|
36
|
+
const files = await fs.readdir(memoryDir);
|
|
37
|
+
for (const file of files) {
|
|
38
|
+
if (!file.endsWith(".md"))
|
|
39
|
+
continue;
|
|
40
|
+
// Skip daily logs (YYYY-MM-DD.md)
|
|
41
|
+
const stem = path.basename(file, ".md");
|
|
42
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(stem))
|
|
43
|
+
continue;
|
|
44
|
+
const absolutePath = path.join(memoryDir, file);
|
|
45
|
+
const content = await fs.readFile(absolutePath, "utf-8");
|
|
46
|
+
const fm = parseFrontmatter(content);
|
|
47
|
+
if (!fm)
|
|
48
|
+
continue;
|
|
49
|
+
entries.push({
|
|
50
|
+
uri: `${teammate}/memory/${file}`,
|
|
51
|
+
absolutePath,
|
|
52
|
+
name: fm.name,
|
|
53
|
+
description: fm.description,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
// No memory/ directory
|
|
59
|
+
}
|
|
60
|
+
return entries;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Match task prompt text against memory catalog entries.
|
|
64
|
+
* Returns memory files whose name or description has significant word overlap
|
|
65
|
+
* with the task prompt. Each match is returned as a SearchResult with the
|
|
66
|
+
* file's full content.
|
|
67
|
+
*
|
|
68
|
+
* Matching is case-insensitive. A match requires at least one word from the
|
|
69
|
+
* task prompt appearing in the name or description.
|
|
70
|
+
*/
|
|
71
|
+
export async function matchMemoryCatalog(teammatesDir, teammate, taskPrompt, maxTokens = 500) {
|
|
72
|
+
const catalog = await scanMemoryCatalog(teammatesDir, teammate);
|
|
73
|
+
if (catalog.length === 0)
|
|
74
|
+
return [];
|
|
75
|
+
// Tokenize the task prompt into lowercase words (3+ chars)
|
|
76
|
+
const promptWords = new Set(taskPrompt
|
|
77
|
+
.toLowerCase()
|
|
78
|
+
.replace(/[^\w\s@/-]/g, " ")
|
|
79
|
+
.split(/\s+/)
|
|
80
|
+
.filter((w) => w.length > 2));
|
|
81
|
+
const results = [];
|
|
82
|
+
for (const entry of catalog) {
|
|
83
|
+
const catalogText = `${entry.name} ${entry.description}`.toLowerCase();
|
|
84
|
+
const catalogWords = catalogText
|
|
85
|
+
.replace(/[^\w\s@/_-]/g, " ")
|
|
86
|
+
.split(/\s+/)
|
|
87
|
+
.filter((w) => w.length > 2);
|
|
88
|
+
// Count overlapping words
|
|
89
|
+
let overlap = 0;
|
|
90
|
+
for (const w of catalogWords) {
|
|
91
|
+
if (promptWords.has(w))
|
|
92
|
+
overlap++;
|
|
93
|
+
}
|
|
94
|
+
// Also check if prompt words appear as substrings in the catalog text
|
|
95
|
+
// (e.g., "goal" matches "project_goals")
|
|
96
|
+
for (const pw of promptWords) {
|
|
97
|
+
if (catalogText.includes(pw) && !catalogWords.includes(pw)) {
|
|
98
|
+
overlap += 0.5;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (overlap >= 1) {
|
|
102
|
+
// Read full file content for matched entries
|
|
103
|
+
const content = await fs.readFile(entry.absolutePath, "utf-8");
|
|
104
|
+
// Strip frontmatter from the content
|
|
105
|
+
const body = content.replace(/^---\s*\n[\s\S]*?\n---\s*\n?/, "").trim();
|
|
106
|
+
results.push({
|
|
107
|
+
teammate,
|
|
108
|
+
uri: entry.uri,
|
|
109
|
+
text: body.slice(0, maxTokens * 4), // rough token limit
|
|
110
|
+
score: 0.85 + Math.min(overlap * 0.02, 0.1), // 0.85-0.95 range
|
|
111
|
+
contentType: "typed_memory",
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Sort by score descending
|
|
116
|
+
results.sort((a, b) => b.score - a.score);
|
|
117
|
+
return results;
|
|
118
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import * as fs from "node:fs/promises";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { afterAll, beforeAll, describe, expect, it } from "vitest";
|
|
4
|
+
import { matchMemoryCatalog, scanMemoryCatalog } from "./memory-index.js";
|
|
5
|
+
const TEST_DIR = path.join(process.cwd(), ".test-memory-index");
|
|
6
|
+
const TEAMMATE = "testmate";
|
|
7
|
+
beforeAll(async () => {
|
|
8
|
+
const memoryDir = path.join(TEST_DIR, TEAMMATE, "memory");
|
|
9
|
+
await fs.mkdir(memoryDir, { recursive: true });
|
|
10
|
+
// Create typed memory files with frontmatter
|
|
11
|
+
await fs.writeFile(path.join(memoryDir, "project_goals.md"), `---
|
|
12
|
+
name: project_goals
|
|
13
|
+
description: Stack-ranked feature goals for the teammates project
|
|
14
|
+
type: project
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Goals
|
|
18
|
+
|
|
19
|
+
1. Recall query architecture
|
|
20
|
+
2. CLI improvements
|
|
21
|
+
`);
|
|
22
|
+
await fs.writeFile(path.join(memoryDir, "feedback_testing.md"), `---
|
|
23
|
+
name: feedback_testing
|
|
24
|
+
description: Integration tests must hit a real database, not mocks
|
|
25
|
+
type: feedback
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
Use real databases in tests. Mocks hide migration bugs.
|
|
29
|
+
`);
|
|
30
|
+
// Create a file without frontmatter (should be skipped)
|
|
31
|
+
await fs.writeFile(path.join(memoryDir, "notes.md"), "Just some notes without frontmatter.\n");
|
|
32
|
+
// Create a daily log (should be skipped)
|
|
33
|
+
await fs.writeFile(path.join(memoryDir, "2026-03-21.md"), "# 2026-03-21\nDaily log content.\n");
|
|
34
|
+
});
|
|
35
|
+
afterAll(async () => {
|
|
36
|
+
await fs.rm(TEST_DIR, { recursive: true, force: true });
|
|
37
|
+
});
|
|
38
|
+
describe("scanMemoryCatalog", () => {
|
|
39
|
+
it("returns entries with frontmatter", async () => {
|
|
40
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
41
|
+
expect(entries.length).toBe(2);
|
|
42
|
+
const names = entries.map((e) => e.name);
|
|
43
|
+
expect(names).toContain("project_goals");
|
|
44
|
+
expect(names).toContain("feedback_testing");
|
|
45
|
+
});
|
|
46
|
+
it("skips files without frontmatter", async () => {
|
|
47
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
48
|
+
const uris = entries.map((e) => e.uri);
|
|
49
|
+
expect(uris).not.toContain(`${TEAMMATE}/memory/notes.md`);
|
|
50
|
+
});
|
|
51
|
+
it("skips daily logs", async () => {
|
|
52
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
53
|
+
const uris = entries.map((e) => e.uri);
|
|
54
|
+
expect(uris).not.toContain(`${TEAMMATE}/memory/2026-03-21.md`);
|
|
55
|
+
});
|
|
56
|
+
it("returns empty array for nonexistent teammate", async () => {
|
|
57
|
+
const entries = await scanMemoryCatalog(TEST_DIR, "nobody");
|
|
58
|
+
expect(entries).toEqual([]);
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
describe("matchMemoryCatalog", () => {
|
|
62
|
+
it("matches files whose frontmatter overlaps with the query", async () => {
|
|
63
|
+
const results = await matchMemoryCatalog(TEST_DIR, TEAMMATE, "what are our project goals and features?");
|
|
64
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
65
|
+
expect(results[0].uri).toContain("project_goals");
|
|
66
|
+
});
|
|
67
|
+
it("matches on description keywords", async () => {
|
|
68
|
+
const results = await matchMemoryCatalog(TEST_DIR, TEAMMATE, "database testing integration");
|
|
69
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
70
|
+
const uris = results.map((r) => r.uri);
|
|
71
|
+
expect(uris).toContain(`${TEAMMATE}/memory/feedback_testing.md`);
|
|
72
|
+
});
|
|
73
|
+
it("returns empty for unrelated queries", async () => {
|
|
74
|
+
const results = await matchMemoryCatalog(TEST_DIR, TEAMMATE, "quantum physics dark matter");
|
|
75
|
+
expect(results.length).toBe(0);
|
|
76
|
+
});
|
|
77
|
+
it("strips frontmatter from result text", async () => {
|
|
78
|
+
const results = await matchMemoryCatalog(TEST_DIR, TEAMMATE, "project goals features");
|
|
79
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
80
|
+
expect(results[0].text).not.toContain("---");
|
|
81
|
+
expect(results[0].text).toContain("Goals");
|
|
82
|
+
});
|
|
83
|
+
it("assigns scores in the 0.85-0.95 range", async () => {
|
|
84
|
+
const results = await matchMemoryCatalog(TEST_DIR, TEAMMATE, "project goals features teammates");
|
|
85
|
+
for (const r of results) {
|
|
86
|
+
expect(r.score).toBeGreaterThanOrEqual(0.85);
|
|
87
|
+
expect(r.score).toBeLessThanOrEqual(0.95);
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
it("sets contentType to typed_memory", async () => {
|
|
91
|
+
const results = await matchMemoryCatalog(TEST_DIR, TEAMMATE, "project goals");
|
|
92
|
+
for (const r of results) {
|
|
93
|
+
expect(r.contentType).toBe("typed_memory");
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
});
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight query expansion for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* No LLM needed — uses stopword removal and basic text analysis
|
|
5
|
+
* to generate multiple query variations from a task prompt.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Extract meaningful keywords from text by removing stopwords and short tokens.
|
|
9
|
+
* Returns lowercase keywords in order of appearance.
|
|
10
|
+
*/
|
|
11
|
+
export declare function extractKeywords(text: string): string[];
|
|
12
|
+
/**
|
|
13
|
+
* Build multiple query variations from a task prompt and optional conversation context.
|
|
14
|
+
*
|
|
15
|
+
* Returns 1-3 queries:
|
|
16
|
+
* 1. The original task prompt (always)
|
|
17
|
+
* 2. A focused keyword query (if keywords differ meaningfully from the original)
|
|
18
|
+
* 3. A conversation-derived query (if recent conversation context is provided)
|
|
19
|
+
*/
|
|
20
|
+
export declare function buildQueryVariations(taskPrompt: string, conversationContext?: string): string[];
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight query expansion for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* No LLM needed — uses stopword removal and basic text analysis
|
|
5
|
+
* to generate multiple query variations from a task prompt.
|
|
6
|
+
*/
|
|
7
|
+
/** Common English stopwords to filter from queries. */
|
|
8
|
+
const STOPWORDS = new Set([
|
|
9
|
+
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
|
|
10
|
+
"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
|
|
11
|
+
"being", "have", "has", "had", "do", "does", "did", "will", "would",
|
|
12
|
+
"could", "should", "may", "might", "shall", "can", "need", "must",
|
|
13
|
+
"it", "its", "this", "that", "these", "those", "i", "you", "he", "she",
|
|
14
|
+
"we", "they", "me", "him", "her", "us", "them", "my", "your", "his",
|
|
15
|
+
"our", "their", "what", "which", "who", "whom", "where", "when", "how",
|
|
16
|
+
"why", "if", "then", "so", "not", "no", "just", "also", "very", "too",
|
|
17
|
+
"some", "any", "all", "each", "every", "both", "few", "more", "most",
|
|
18
|
+
"other", "into", "over", "after", "before", "between", "through",
|
|
19
|
+
"about", "up", "out", "off", "down", "here", "there", "again", "once",
|
|
20
|
+
"let", "lets", "let's", "get", "got", "go", "going", "make", "made",
|
|
21
|
+
"take", "took", "come", "came", "see", "saw", "know", "knew", "think",
|
|
22
|
+
"thought", "say", "said", "tell", "told", "ask", "asked", "want",
|
|
23
|
+
"wanted", "like", "look", "use", "used", "find", "give", "work",
|
|
24
|
+
]);
|
|
25
|
+
/**
|
|
26
|
+
* Extract meaningful keywords from text by removing stopwords and short tokens.
|
|
27
|
+
* Returns lowercase keywords in order of appearance.
|
|
28
|
+
*/
|
|
29
|
+
export function extractKeywords(text) {
|
|
30
|
+
const words = text
|
|
31
|
+
.toLowerCase()
|
|
32
|
+
.replace(/[^\w\s@/-]/g, " ")
|
|
33
|
+
.split(/\s+/)
|
|
34
|
+
.filter((w) => w.length > 2 && !STOPWORDS.has(w));
|
|
35
|
+
// Deduplicate while preserving order
|
|
36
|
+
const seen = new Set();
|
|
37
|
+
const result = [];
|
|
38
|
+
for (const w of words) {
|
|
39
|
+
if (!seen.has(w)) {
|
|
40
|
+
seen.add(w);
|
|
41
|
+
result.push(w);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return result;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Build multiple query variations from a task prompt and optional conversation context.
|
|
48
|
+
*
|
|
49
|
+
* Returns 1-3 queries:
|
|
50
|
+
* 1. The original task prompt (always)
|
|
51
|
+
* 2. A focused keyword query (if keywords differ meaningfully from the original)
|
|
52
|
+
* 3. A conversation-derived query (if recent conversation context is provided)
|
|
53
|
+
*/
|
|
54
|
+
export function buildQueryVariations(taskPrompt, conversationContext) {
|
|
55
|
+
const queries = [taskPrompt];
|
|
56
|
+
// Query 2: Focused keywords from the task prompt
|
|
57
|
+
const keywords = extractKeywords(taskPrompt);
|
|
58
|
+
if (keywords.length >= 2 && keywords.length <= 20) {
|
|
59
|
+
const keywordQuery = keywords.slice(0, 8).join(" ");
|
|
60
|
+
// Only add if meaningfully different from original
|
|
61
|
+
if (keywordQuery.length < taskPrompt.length * 0.7) {
|
|
62
|
+
queries.push(keywordQuery);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Query 3: Recent conversation topic
|
|
66
|
+
if (conversationContext) {
|
|
67
|
+
const recentTopic = extractRecentTopic(conversationContext);
|
|
68
|
+
if (recentTopic) {
|
|
69
|
+
queries.push(recentTopic);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return queries;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Extract the most recent topic/theme from conversation context.
|
|
76
|
+
* Takes the last 1-2 meaningful entries and extracts keywords.
|
|
77
|
+
*/
|
|
78
|
+
function extractRecentTopic(conversationContext) {
|
|
79
|
+
// Split on common conversation entry patterns
|
|
80
|
+
const entries = conversationContext
|
|
81
|
+
.split(/\n\*\*\w+:\*\*\s*/g)
|
|
82
|
+
.filter((e) => e.trim().length > 10);
|
|
83
|
+
if (entries.length === 0)
|
|
84
|
+
return null;
|
|
85
|
+
// Take the last 1-2 entries (most recent conversation)
|
|
86
|
+
const recent = entries.slice(-2).join(" ");
|
|
87
|
+
const keywords = extractKeywords(recent);
|
|
88
|
+
if (keywords.length < 2)
|
|
89
|
+
return null;
|
|
90
|
+
// Build a focused query from the recent conversation keywords
|
|
91
|
+
return keywords.slice(0, 6).join(" ");
|
|
92
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildQueryVariations, extractKeywords } from "./query-expansion.js";
|
|
3
|
+
describe("extractKeywords", () => {
|
|
4
|
+
it("removes stopwords", () => {
|
|
5
|
+
const result = extractKeywords("the quick brown fox jumps over the lazy dog");
|
|
6
|
+
expect(result).toContain("quick");
|
|
7
|
+
expect(result).toContain("brown");
|
|
8
|
+
expect(result).toContain("fox");
|
|
9
|
+
expect(result).toContain("jumps");
|
|
10
|
+
expect(result).toContain("lazy");
|
|
11
|
+
expect(result).toContain("dog");
|
|
12
|
+
expect(result).not.toContain("the");
|
|
13
|
+
expect(result).not.toContain("over");
|
|
14
|
+
});
|
|
15
|
+
it("filters short tokens (length <= 2)", () => {
|
|
16
|
+
const result = extractKeywords("an AI is a type of ML system");
|
|
17
|
+
expect(result).not.toContain("an");
|
|
18
|
+
expect(result).not.toContain("is");
|
|
19
|
+
// "type" and "system" stay, "AI" and "ML" filtered (length 2)
|
|
20
|
+
expect(result).toContain("type");
|
|
21
|
+
expect(result).toContain("system");
|
|
22
|
+
});
|
|
23
|
+
it("deduplicates while preserving order", () => {
|
|
24
|
+
const result = extractKeywords("recall search recall index search");
|
|
25
|
+
expect(result).toEqual(["recall", "search", "index"]);
|
|
26
|
+
});
|
|
27
|
+
it("lowercases all output", () => {
|
|
28
|
+
const result = extractKeywords("Update the HOOKS spec");
|
|
29
|
+
expect(result).toContain("update");
|
|
30
|
+
expect(result).toContain("hooks");
|
|
31
|
+
expect(result).toContain("spec");
|
|
32
|
+
});
|
|
33
|
+
it("returns empty array for all-stopword input", () => {
|
|
34
|
+
const result = extractKeywords("the is a");
|
|
35
|
+
expect(result).toEqual([]);
|
|
36
|
+
});
|
|
37
|
+
it("preserves @mentions and paths", () => {
|
|
38
|
+
const result = extractKeywords("deploy @pipeline src/hooks");
|
|
39
|
+
expect(result).toContain("deploy");
|
|
40
|
+
expect(result).toContain("@pipeline");
|
|
41
|
+
expect(result).toContain("src/hooks");
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
describe("buildQueryVariations", () => {
|
|
45
|
+
it("always includes the original prompt as the first query", () => {
|
|
46
|
+
const result = buildQueryVariations("fix the authentication bug");
|
|
47
|
+
expect(result[0]).toBe("fix the authentication bug");
|
|
48
|
+
});
|
|
49
|
+
it("generates a keyword-focused query when prompt is verbose", () => {
|
|
50
|
+
const verbose = "I want you to please update the recall search system so that it handles multiple queries at the same time and deduplicates the results properly";
|
|
51
|
+
const result = buildQueryVariations(verbose);
|
|
52
|
+
expect(result.length).toBeGreaterThanOrEqual(2);
|
|
53
|
+
// The keyword query should be shorter than the original
|
|
54
|
+
if (result.length > 1) {
|
|
55
|
+
expect(result[1].length).toBeLessThan(verbose.length);
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
it("adds a conversation-derived query when context is provided", () => {
|
|
59
|
+
const conversationContext = `## Conversation History
|
|
60
|
+
|
|
61
|
+
**stevenic:** lets talk about the CI pipeline and hooks
|
|
62
|
+
|
|
63
|
+
**pipeline:** CI Pipeline Hooks — Analysis`;
|
|
64
|
+
const result = buildQueryVariations("what should we do next?", conversationContext);
|
|
65
|
+
// Should have at least the original + conversation query
|
|
66
|
+
expect(result.length).toBeGreaterThanOrEqual(2);
|
|
67
|
+
});
|
|
68
|
+
it("skips conversation query when no context", () => {
|
|
69
|
+
const result = buildQueryVariations("short task");
|
|
70
|
+
// Short prompts with few keywords may only produce 1 query
|
|
71
|
+
expect(result.length).toBeGreaterThanOrEqual(1);
|
|
72
|
+
expect(result[0]).toBe("short task");
|
|
73
|
+
});
|
|
74
|
+
it("does not generate keyword query when prompt is already concise", () => {
|
|
75
|
+
const result = buildQueryVariations("recall search");
|
|
76
|
+
// Very short — keyword query wouldn't differ meaningfully
|
|
77
|
+
expect(result[0]).toBe("recall search");
|
|
78
|
+
});
|
|
79
|
+
});
|
package/dist/search.d.ts
CHANGED
|
@@ -18,6 +18,13 @@ export interface SearchOptions {
|
|
|
18
18
|
/** Relevance boost multiplier for typed memories over episodic summaries (default: 1.2) */
|
|
19
19
|
typedMemoryBoost?: number;
|
|
20
20
|
}
|
|
21
|
+
/** Options for multi-query search with deduplication. */
|
|
22
|
+
export interface MultiSearchOptions extends SearchOptions {
|
|
23
|
+
/** Additional queries beyond the primary (keyword-focused, conversation-derived, etc.) */
|
|
24
|
+
additionalQueries?: string[];
|
|
25
|
+
/** Pre-matched memory catalog results to merge into the final set */
|
|
26
|
+
catalogMatches?: SearchResult[];
|
|
27
|
+
}
|
|
21
28
|
export interface SearchResult {
|
|
22
29
|
teammate: string;
|
|
23
30
|
uri: string;
|
|
@@ -34,3 +41,12 @@ export interface SearchResult {
|
|
|
34
41
|
* Results are merged, deduped, and typed memories get a relevance boost.
|
|
35
42
|
*/
|
|
36
43
|
export declare function search(query: string, options: SearchOptions): Promise<SearchResult[]>;
|
|
44
|
+
/**
|
|
45
|
+
* Multi-query search with deduplication and catalog merge.
|
|
46
|
+
*
|
|
47
|
+
* Fires the primary query plus any additional queries (keyword-focused,
|
|
48
|
+
* conversation-derived) and merges results. Catalog matches (from frontmatter
|
|
49
|
+
* text matching) are also merged. Deduplication is by URI — when the same
|
|
50
|
+
* URI appears from multiple queries, the highest score wins.
|
|
51
|
+
*/
|
|
52
|
+
export declare function multiSearch(primaryQuery: string, options: MultiSearchOptions): Promise<SearchResult[]>;
|
package/dist/search.js
CHANGED
|
@@ -132,3 +132,52 @@ export async function search(query, options) {
|
|
|
132
132
|
allResults.sort((a, b) => b.score - a.score);
|
|
133
133
|
return allResults.slice(0, maxResults + recencyDepth); // allow extra slots for recency results
|
|
134
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Multi-query search with deduplication and catalog merge.
|
|
137
|
+
*
|
|
138
|
+
* Fires the primary query plus any additional queries (keyword-focused,
|
|
139
|
+
* conversation-derived) and merges results. Catalog matches (from frontmatter
|
|
140
|
+
* text matching) are also merged. Deduplication is by URI — when the same
|
|
141
|
+
* URI appears from multiple queries, the highest score wins.
|
|
142
|
+
*/
|
|
143
|
+
export async function multiSearch(primaryQuery, options) {
|
|
144
|
+
const additionalQueries = options.additionalQueries ?? [];
|
|
145
|
+
const catalogMatches = options.catalogMatches ?? [];
|
|
146
|
+
const maxResults = options.maxResults ?? 5;
|
|
147
|
+
const recencyDepth = options.recencyDepth ?? 2;
|
|
148
|
+
// Fire all queries — primary gets full treatment (recency pass + semantic)
|
|
149
|
+
// Additional queries get semantic only (skipRecency to avoid duplicate weeklies)
|
|
150
|
+
const primaryResults = await search(primaryQuery, options);
|
|
151
|
+
// Collect all results keyed by URI, keeping highest score
|
|
152
|
+
const bestByUri = new Map();
|
|
153
|
+
for (const r of primaryResults) {
|
|
154
|
+
const existing = bestByUri.get(r.uri);
|
|
155
|
+
if (!existing || r.score > existing.score) {
|
|
156
|
+
bestByUri.set(r.uri, r);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// Fire additional queries (reuse same search options minus recency to avoid dupes)
|
|
160
|
+
for (const query of additionalQueries) {
|
|
161
|
+
const results = await search(query, {
|
|
162
|
+
...options,
|
|
163
|
+
recencyDepth: 0, // primary already got the weekly summaries
|
|
164
|
+
});
|
|
165
|
+
for (const r of results) {
|
|
166
|
+
const existing = bestByUri.get(r.uri);
|
|
167
|
+
if (!existing || r.score > existing.score) {
|
|
168
|
+
bestByUri.set(r.uri, r);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Merge catalog matches (frontmatter text-matched results)
|
|
173
|
+
for (const r of catalogMatches) {
|
|
174
|
+
const existing = bestByUri.get(r.uri);
|
|
175
|
+
if (!existing || r.score > existing.score) {
|
|
176
|
+
bestByUri.set(r.uri, r);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// Sort by score descending, return top results
|
|
180
|
+
const merged = [...bestByUri.values()];
|
|
181
|
+
merged.sort((a, b) => b.score - a.score);
|
|
182
|
+
return merged.slice(0, maxResults + recencyDepth);
|
|
183
|
+
}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
1
|
export { LocalEmbeddings } from "./embeddings.js";
|
|
2
2
|
export { Indexer, type IndexerConfig } from "./indexer.js";
|
|
3
|
-
export {
|
|
3
|
+
export { matchMemoryCatalog, scanMemoryCatalog } from "./memory-index.js";
|
|
4
|
+
export { buildQueryVariations, extractKeywords } from "./query-expansion.js";
|
|
5
|
+
export {
|
|
6
|
+
type MultiSearchOptions,
|
|
7
|
+
type SearchOptions,
|
|
8
|
+
type SearchResult,
|
|
9
|
+
multiSearch,
|
|
10
|
+
search,
|
|
11
|
+
} from "./search.js";
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import * as fs from "node:fs/promises";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { afterAll, beforeAll, describe, expect, it } from "vitest";
|
|
4
|
+
import { matchMemoryCatalog, scanMemoryCatalog } from "./memory-index.js";
|
|
5
|
+
|
|
6
|
+
const TEST_DIR = path.join(process.cwd(), ".test-memory-index");
|
|
7
|
+
const TEAMMATE = "testmate";
|
|
8
|
+
|
|
9
|
+
beforeAll(async () => {
|
|
10
|
+
const memoryDir = path.join(TEST_DIR, TEAMMATE, "memory");
|
|
11
|
+
await fs.mkdir(memoryDir, { recursive: true });
|
|
12
|
+
|
|
13
|
+
// Create typed memory files with frontmatter
|
|
14
|
+
await fs.writeFile(
|
|
15
|
+
path.join(memoryDir, "project_goals.md"),
|
|
16
|
+
`---
|
|
17
|
+
name: project_goals
|
|
18
|
+
description: Stack-ranked feature goals for the teammates project
|
|
19
|
+
type: project
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Goals
|
|
23
|
+
|
|
24
|
+
1. Recall query architecture
|
|
25
|
+
2. CLI improvements
|
|
26
|
+
`,
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
await fs.writeFile(
|
|
30
|
+
path.join(memoryDir, "feedback_testing.md"),
|
|
31
|
+
`---
|
|
32
|
+
name: feedback_testing
|
|
33
|
+
description: Integration tests must hit a real database, not mocks
|
|
34
|
+
type: feedback
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
Use real databases in tests. Mocks hide migration bugs.
|
|
38
|
+
`,
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
// Create a file without frontmatter (should be skipped)
|
|
42
|
+
await fs.writeFile(
|
|
43
|
+
path.join(memoryDir, "notes.md"),
|
|
44
|
+
"Just some notes without frontmatter.\n",
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
// Create a daily log (should be skipped)
|
|
48
|
+
await fs.writeFile(
|
|
49
|
+
path.join(memoryDir, "2026-03-21.md"),
|
|
50
|
+
"# 2026-03-21\nDaily log content.\n",
|
|
51
|
+
);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
afterAll(async () => {
|
|
55
|
+
await fs.rm(TEST_DIR, { recursive: true, force: true });
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe("scanMemoryCatalog", () => {
|
|
59
|
+
it("returns entries with frontmatter", async () => {
|
|
60
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
61
|
+
expect(entries.length).toBe(2);
|
|
62
|
+
const names = entries.map((e) => e.name);
|
|
63
|
+
expect(names).toContain("project_goals");
|
|
64
|
+
expect(names).toContain("feedback_testing");
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("skips files without frontmatter", async () => {
|
|
68
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
69
|
+
const uris = entries.map((e) => e.uri);
|
|
70
|
+
expect(uris).not.toContain(`${TEAMMATE}/memory/notes.md`);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("skips daily logs", async () => {
|
|
74
|
+
const entries = await scanMemoryCatalog(TEST_DIR, TEAMMATE);
|
|
75
|
+
const uris = entries.map((e) => e.uri);
|
|
76
|
+
expect(uris).not.toContain(`${TEAMMATE}/memory/2026-03-21.md`);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("returns empty array for nonexistent teammate", async () => {
|
|
80
|
+
const entries = await scanMemoryCatalog(TEST_DIR, "nobody");
|
|
81
|
+
expect(entries).toEqual([]);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("matchMemoryCatalog", () => {
|
|
86
|
+
it("matches files whose frontmatter overlaps with the query", async () => {
|
|
87
|
+
const results = await matchMemoryCatalog(
|
|
88
|
+
TEST_DIR,
|
|
89
|
+
TEAMMATE,
|
|
90
|
+
"what are our project goals and features?",
|
|
91
|
+
);
|
|
92
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
93
|
+
expect(results[0].uri).toContain("project_goals");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("matches on description keywords", async () => {
|
|
97
|
+
const results = await matchMemoryCatalog(
|
|
98
|
+
TEST_DIR,
|
|
99
|
+
TEAMMATE,
|
|
100
|
+
"database testing integration",
|
|
101
|
+
);
|
|
102
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
103
|
+
const uris = results.map((r) => r.uri);
|
|
104
|
+
expect(uris).toContain(`${TEAMMATE}/memory/feedback_testing.md`);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it("returns empty for unrelated queries", async () => {
|
|
108
|
+
const results = await matchMemoryCatalog(
|
|
109
|
+
TEST_DIR,
|
|
110
|
+
TEAMMATE,
|
|
111
|
+
"quantum physics dark matter",
|
|
112
|
+
);
|
|
113
|
+
expect(results.length).toBe(0);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("strips frontmatter from result text", async () => {
|
|
117
|
+
const results = await matchMemoryCatalog(
|
|
118
|
+
TEST_DIR,
|
|
119
|
+
TEAMMATE,
|
|
120
|
+
"project goals features",
|
|
121
|
+
);
|
|
122
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
123
|
+
expect(results[0].text).not.toContain("---");
|
|
124
|
+
expect(results[0].text).toContain("Goals");
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("assigns scores in the 0.85-0.95 range", async () => {
|
|
128
|
+
const results = await matchMemoryCatalog(
|
|
129
|
+
TEST_DIR,
|
|
130
|
+
TEAMMATE,
|
|
131
|
+
"project goals features teammates",
|
|
132
|
+
);
|
|
133
|
+
for (const r of results) {
|
|
134
|
+
expect(r.score).toBeGreaterThanOrEqual(0.85);
|
|
135
|
+
expect(r.score).toBeLessThanOrEqual(0.95);
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("sets contentType to typed_memory", async () => {
|
|
140
|
+
const results = await matchMemoryCatalog(
|
|
141
|
+
TEST_DIR,
|
|
142
|
+
TEAMMATE,
|
|
143
|
+
"project goals",
|
|
144
|
+
);
|
|
145
|
+
for (const r of results) {
|
|
146
|
+
expect(r.contentType).toBe("typed_memory");
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
});
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory frontmatter scanning for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* Reads the teammate's memory file catalog (name + description from frontmatter)
|
|
5
|
+
* and does fast text matching against the task prompt. This is a lightweight,
|
|
6
|
+
* no-embedding relevance signal — "here's a menu of what I might know about."
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import * as fs from "node:fs/promises";
|
|
10
|
+
import * as path from "node:path";
|
|
11
|
+
import type { SearchResult } from "./search.js";
|
|
12
|
+
|
|
13
|
+
interface MemoryEntry {
|
|
14
|
+
/** Relative URI (e.g. "beacon/memory/project_goals.md") */
|
|
15
|
+
uri: string;
|
|
16
|
+
/** Absolute file path */
|
|
17
|
+
absolutePath: string;
|
|
18
|
+
/** Frontmatter name field */
|
|
19
|
+
name: string;
|
|
20
|
+
/** Frontmatter description field */
|
|
21
|
+
description: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Parse YAML-ish frontmatter from a markdown file's content.
|
|
26
|
+
* Returns name and description fields, or null if no frontmatter found.
|
|
27
|
+
*/
|
|
28
|
+
function parseFrontmatter(content: string): { name: string; description: string } | null {
|
|
29
|
+
const match = content.match(/^---\s*\n([\s\S]*?)\n---/);
|
|
30
|
+
if (!match) return null;
|
|
31
|
+
|
|
32
|
+
const fm = match[1];
|
|
33
|
+
const nameMatch = fm.match(/^name:\s*(.+)$/m);
|
|
34
|
+
const descMatch = fm.match(/^description:\s*(.+)$/m);
|
|
35
|
+
|
|
36
|
+
if (!nameMatch) return null;
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
name: nameMatch[1].trim(),
|
|
40
|
+
description: descMatch?.[1]?.trim() ?? "",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Scan a teammate's memory directory and build a catalog of memory entries
|
|
46
|
+
* with their frontmatter metadata.
|
|
47
|
+
*/
|
|
48
|
+
export async function scanMemoryCatalog(
|
|
49
|
+
teammatesDir: string,
|
|
50
|
+
teammate: string,
|
|
51
|
+
): Promise<MemoryEntry[]> {
|
|
52
|
+
const memoryDir = path.join(teammatesDir, teammate, "memory");
|
|
53
|
+
const entries: MemoryEntry[] = [];
|
|
54
|
+
|
|
55
|
+
try {
|
|
56
|
+
const files = await fs.readdir(memoryDir);
|
|
57
|
+
for (const file of files) {
|
|
58
|
+
if (!file.endsWith(".md")) continue;
|
|
59
|
+
// Skip daily logs (YYYY-MM-DD.md)
|
|
60
|
+
const stem = path.basename(file, ".md");
|
|
61
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(stem)) continue;
|
|
62
|
+
|
|
63
|
+
const absolutePath = path.join(memoryDir, file);
|
|
64
|
+
const content = await fs.readFile(absolutePath, "utf-8");
|
|
65
|
+
const fm = parseFrontmatter(content);
|
|
66
|
+
if (!fm) continue;
|
|
67
|
+
|
|
68
|
+
entries.push({
|
|
69
|
+
uri: `${teammate}/memory/${file}`,
|
|
70
|
+
absolutePath,
|
|
71
|
+
name: fm.name,
|
|
72
|
+
description: fm.description,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
} catch {
|
|
76
|
+
// No memory/ directory
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return entries;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Match task prompt text against memory catalog entries.
|
|
84
|
+
* Returns memory files whose name or description has significant word overlap
|
|
85
|
+
* with the task prompt. Each match is returned as a SearchResult with the
|
|
86
|
+
* file's full content.
|
|
87
|
+
*
|
|
88
|
+
* Matching is case-insensitive. A match requires at least one word from the
|
|
89
|
+
* task prompt appearing in the name or description.
|
|
90
|
+
*/
|
|
91
|
+
export async function matchMemoryCatalog(
|
|
92
|
+
teammatesDir: string,
|
|
93
|
+
teammate: string,
|
|
94
|
+
taskPrompt: string,
|
|
95
|
+
maxTokens = 500,
|
|
96
|
+
): Promise<SearchResult[]> {
|
|
97
|
+
const catalog = await scanMemoryCatalog(teammatesDir, teammate);
|
|
98
|
+
if (catalog.length === 0) return [];
|
|
99
|
+
|
|
100
|
+
// Tokenize the task prompt into lowercase words (3+ chars)
|
|
101
|
+
const promptWords = new Set(
|
|
102
|
+
taskPrompt
|
|
103
|
+
.toLowerCase()
|
|
104
|
+
.replace(/[^\w\s@/-]/g, " ")
|
|
105
|
+
.split(/\s+/)
|
|
106
|
+
.filter((w) => w.length > 2),
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
const results: SearchResult[] = [];
|
|
110
|
+
|
|
111
|
+
for (const entry of catalog) {
|
|
112
|
+
const catalogText = `${entry.name} ${entry.description}`.toLowerCase();
|
|
113
|
+
const catalogWords = catalogText
|
|
114
|
+
.replace(/[^\w\s@/_-]/g, " ")
|
|
115
|
+
.split(/\s+/)
|
|
116
|
+
.filter((w) => w.length > 2);
|
|
117
|
+
|
|
118
|
+
// Count overlapping words
|
|
119
|
+
let overlap = 0;
|
|
120
|
+
for (const w of catalogWords) {
|
|
121
|
+
if (promptWords.has(w)) overlap++;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Also check if prompt words appear as substrings in the catalog text
|
|
125
|
+
// (e.g., "goal" matches "project_goals")
|
|
126
|
+
for (const pw of promptWords) {
|
|
127
|
+
if (catalogText.includes(pw) && !catalogWords.includes(pw)) {
|
|
128
|
+
overlap += 0.5;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (overlap >= 1) {
|
|
133
|
+
// Read full file content for matched entries
|
|
134
|
+
const content = await fs.readFile(entry.absolutePath, "utf-8");
|
|
135
|
+
// Strip frontmatter from the content
|
|
136
|
+
const body = content.replace(/^---\s*\n[\s\S]*?\n---\s*\n?/, "").trim();
|
|
137
|
+
|
|
138
|
+
results.push({
|
|
139
|
+
teammate,
|
|
140
|
+
uri: entry.uri,
|
|
141
|
+
text: body.slice(0, maxTokens * 4), // rough token limit
|
|
142
|
+
score: 0.85 + Math.min(overlap * 0.02, 0.1), // 0.85-0.95 range
|
|
143
|
+
contentType: "typed_memory",
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Sort by score descending
|
|
149
|
+
results.sort((a, b) => b.score - a.score);
|
|
150
|
+
return results;
|
|
151
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildQueryVariations, extractKeywords } from "./query-expansion.js";
|
|
3
|
+
|
|
4
|
+
describe("extractKeywords", () => {
|
|
5
|
+
it("removes stopwords", () => {
|
|
6
|
+
const result = extractKeywords("the quick brown fox jumps over the lazy dog");
|
|
7
|
+
expect(result).toContain("quick");
|
|
8
|
+
expect(result).toContain("brown");
|
|
9
|
+
expect(result).toContain("fox");
|
|
10
|
+
expect(result).toContain("jumps");
|
|
11
|
+
expect(result).toContain("lazy");
|
|
12
|
+
expect(result).toContain("dog");
|
|
13
|
+
expect(result).not.toContain("the");
|
|
14
|
+
expect(result).not.toContain("over");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("filters short tokens (length <= 2)", () => {
|
|
18
|
+
const result = extractKeywords("an AI is a type of ML system");
|
|
19
|
+
expect(result).not.toContain("an");
|
|
20
|
+
expect(result).not.toContain("is");
|
|
21
|
+
// "type" and "system" stay, "AI" and "ML" filtered (length 2)
|
|
22
|
+
expect(result).toContain("type");
|
|
23
|
+
expect(result).toContain("system");
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("deduplicates while preserving order", () => {
|
|
27
|
+
const result = extractKeywords("recall search recall index search");
|
|
28
|
+
expect(result).toEqual(["recall", "search", "index"]);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("lowercases all output", () => {
|
|
32
|
+
const result = extractKeywords("Update the HOOKS spec");
|
|
33
|
+
expect(result).toContain("update");
|
|
34
|
+
expect(result).toContain("hooks");
|
|
35
|
+
expect(result).toContain("spec");
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("returns empty array for all-stopword input", () => {
|
|
39
|
+
const result = extractKeywords("the is a");
|
|
40
|
+
expect(result).toEqual([]);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("preserves @mentions and paths", () => {
|
|
44
|
+
const result = extractKeywords("deploy @pipeline src/hooks");
|
|
45
|
+
expect(result).toContain("deploy");
|
|
46
|
+
expect(result).toContain("@pipeline");
|
|
47
|
+
expect(result).toContain("src/hooks");
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe("buildQueryVariations", () => {
|
|
52
|
+
it("always includes the original prompt as the first query", () => {
|
|
53
|
+
const result = buildQueryVariations("fix the authentication bug");
|
|
54
|
+
expect(result[0]).toBe("fix the authentication bug");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("generates a keyword-focused query when prompt is verbose", () => {
|
|
58
|
+
const verbose = "I want you to please update the recall search system so that it handles multiple queries at the same time and deduplicates the results properly";
|
|
59
|
+
const result = buildQueryVariations(verbose);
|
|
60
|
+
expect(result.length).toBeGreaterThanOrEqual(2);
|
|
61
|
+
// The keyword query should be shorter than the original
|
|
62
|
+
if (result.length > 1) {
|
|
63
|
+
expect(result[1].length).toBeLessThan(verbose.length);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("adds a conversation-derived query when context is provided", () => {
|
|
68
|
+
const conversationContext = `## Conversation History
|
|
69
|
+
|
|
70
|
+
**stevenic:** lets talk about the CI pipeline and hooks
|
|
71
|
+
|
|
72
|
+
**pipeline:** CI Pipeline Hooks — Analysis`;
|
|
73
|
+
const result = buildQueryVariations("what should we do next?", conversationContext);
|
|
74
|
+
// Should have at least the original + conversation query
|
|
75
|
+
expect(result.length).toBeGreaterThanOrEqual(2);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it("skips conversation query when no context", () => {
|
|
79
|
+
const result = buildQueryVariations("short task");
|
|
80
|
+
// Short prompts with few keywords may only produce 1 query
|
|
81
|
+
expect(result.length).toBeGreaterThanOrEqual(1);
|
|
82
|
+
expect(result[0]).toBe("short task");
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("does not generate keyword query when prompt is already concise", () => {
|
|
86
|
+
const result = buildQueryVariations("recall search");
|
|
87
|
+
// Very short — keyword query wouldn't differ meaningfully
|
|
88
|
+
expect(result[0]).toBe("recall search");
|
|
89
|
+
});
|
|
90
|
+
});
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight query expansion for Pass 1 recall queries.
|
|
3
|
+
*
|
|
4
|
+
* No LLM needed — uses stopword removal and basic text analysis
|
|
5
|
+
* to generate multiple query variations from a task prompt.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/** Common English stopwords to filter from queries. */
|
|
9
|
+
const STOPWORDS = new Set([
|
|
10
|
+
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
|
|
11
|
+
"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
|
|
12
|
+
"being", "have", "has", "had", "do", "does", "did", "will", "would",
|
|
13
|
+
"could", "should", "may", "might", "shall", "can", "need", "must",
|
|
14
|
+
"it", "its", "this", "that", "these", "those", "i", "you", "he", "she",
|
|
15
|
+
"we", "they", "me", "him", "her", "us", "them", "my", "your", "his",
|
|
16
|
+
"our", "their", "what", "which", "who", "whom", "where", "when", "how",
|
|
17
|
+
"why", "if", "then", "so", "not", "no", "just", "also", "very", "too",
|
|
18
|
+
"some", "any", "all", "each", "every", "both", "few", "more", "most",
|
|
19
|
+
"other", "into", "over", "after", "before", "between", "through",
|
|
20
|
+
"about", "up", "out", "off", "down", "here", "there", "again", "once",
|
|
21
|
+
"let", "lets", "let's", "get", "got", "go", "going", "make", "made",
|
|
22
|
+
"take", "took", "come", "came", "see", "saw", "know", "knew", "think",
|
|
23
|
+
"thought", "say", "said", "tell", "told", "ask", "asked", "want",
|
|
24
|
+
"wanted", "like", "look", "use", "used", "find", "give", "work",
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Extract meaningful keywords from text by removing stopwords and short tokens.
|
|
29
|
+
* Returns lowercase keywords in order of appearance.
|
|
30
|
+
*/
|
|
31
|
+
export function extractKeywords(text: string): string[] {
|
|
32
|
+
const words = text
|
|
33
|
+
.toLowerCase()
|
|
34
|
+
.replace(/[^\w\s@/-]/g, " ")
|
|
35
|
+
.split(/\s+/)
|
|
36
|
+
.filter((w) => w.length > 2 && !STOPWORDS.has(w));
|
|
37
|
+
|
|
38
|
+
// Deduplicate while preserving order
|
|
39
|
+
const seen = new Set<string>();
|
|
40
|
+
const result: string[] = [];
|
|
41
|
+
for (const w of words) {
|
|
42
|
+
if (!seen.has(w)) {
|
|
43
|
+
seen.add(w);
|
|
44
|
+
result.push(w);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return result;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Build multiple query variations from a task prompt and optional conversation context.
|
|
52
|
+
*
|
|
53
|
+
* Returns 1-3 queries:
|
|
54
|
+
* 1. The original task prompt (always)
|
|
55
|
+
* 2. A focused keyword query (if keywords differ meaningfully from the original)
|
|
56
|
+
* 3. A conversation-derived query (if recent conversation context is provided)
|
|
57
|
+
*/
|
|
58
|
+
export function buildQueryVariations(
|
|
59
|
+
taskPrompt: string,
|
|
60
|
+
conversationContext?: string,
|
|
61
|
+
): string[] {
|
|
62
|
+
const queries: string[] = [taskPrompt];
|
|
63
|
+
|
|
64
|
+
// Query 2: Focused keywords from the task prompt
|
|
65
|
+
const keywords = extractKeywords(taskPrompt);
|
|
66
|
+
if (keywords.length >= 2 && keywords.length <= 20) {
|
|
67
|
+
const keywordQuery = keywords.slice(0, 8).join(" ");
|
|
68
|
+
// Only add if meaningfully different from original
|
|
69
|
+
if (keywordQuery.length < taskPrompt.length * 0.7) {
|
|
70
|
+
queries.push(keywordQuery);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Query 3: Recent conversation topic
|
|
75
|
+
if (conversationContext) {
|
|
76
|
+
const recentTopic = extractRecentTopic(conversationContext);
|
|
77
|
+
if (recentTopic) {
|
|
78
|
+
queries.push(recentTopic);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return queries;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Extract the most recent topic/theme from conversation context.
|
|
87
|
+
* Takes the last 1-2 meaningful entries and extracts keywords.
|
|
88
|
+
*/
|
|
89
|
+
function extractRecentTopic(conversationContext: string): string | null {
|
|
90
|
+
// Split on common conversation entry patterns
|
|
91
|
+
const entries = conversationContext
|
|
92
|
+
.split(/\n\*\*\w+:\*\*\s*/g)
|
|
93
|
+
.filter((e) => e.trim().length > 10);
|
|
94
|
+
|
|
95
|
+
if (entries.length === 0) return null;
|
|
96
|
+
|
|
97
|
+
// Take the last 1-2 entries (most recent conversation)
|
|
98
|
+
const recent = entries.slice(-2).join(" ");
|
|
99
|
+
const keywords = extractKeywords(recent);
|
|
100
|
+
|
|
101
|
+
if (keywords.length < 2) return null;
|
|
102
|
+
|
|
103
|
+
// Build a focused query from the recent conversation keywords
|
|
104
|
+
return keywords.slice(0, 6).join(" ");
|
|
105
|
+
}
|
package/src/search.ts
CHANGED
|
@@ -25,6 +25,14 @@ export interface SearchOptions {
|
|
|
25
25
|
typedMemoryBoost?: number;
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
/** Options for multi-query search with deduplication. */
|
|
29
|
+
export interface MultiSearchOptions extends SearchOptions {
|
|
30
|
+
/** Additional queries beyond the primary (keyword-focused, conversation-derived, etc.) */
|
|
31
|
+
additionalQueries?: string[];
|
|
32
|
+
/** Pre-matched memory catalog results to merge into the final set */
|
|
33
|
+
catalogMatches?: SearchResult[];
|
|
34
|
+
}
|
|
35
|
+
|
|
28
36
|
export interface SearchResult {
|
|
29
37
|
teammate: string;
|
|
30
38
|
uri: string;
|
|
@@ -176,3 +184,61 @@ export async function search(
|
|
|
176
184
|
allResults.sort((a, b) => b.score - a.score);
|
|
177
185
|
return allResults.slice(0, maxResults + recencyDepth); // allow extra slots for recency results
|
|
178
186
|
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Multi-query search with deduplication and catalog merge.
|
|
190
|
+
*
|
|
191
|
+
* Fires the primary query plus any additional queries (keyword-focused,
|
|
192
|
+
* conversation-derived) and merges results. Catalog matches (from frontmatter
|
|
193
|
+
* text matching) are also merged. Deduplication is by URI — when the same
|
|
194
|
+
* URI appears from multiple queries, the highest score wins.
|
|
195
|
+
*/
|
|
196
|
+
export async function multiSearch(
|
|
197
|
+
primaryQuery: string,
|
|
198
|
+
options: MultiSearchOptions,
|
|
199
|
+
): Promise<SearchResult[]> {
|
|
200
|
+
const additionalQueries = options.additionalQueries ?? [];
|
|
201
|
+
const catalogMatches = options.catalogMatches ?? [];
|
|
202
|
+
const maxResults = options.maxResults ?? 5;
|
|
203
|
+
const recencyDepth = options.recencyDepth ?? 2;
|
|
204
|
+
|
|
205
|
+
// Fire all queries — primary gets full treatment (recency pass + semantic)
|
|
206
|
+
// Additional queries get semantic only (skipRecency to avoid duplicate weeklies)
|
|
207
|
+
const primaryResults = await search(primaryQuery, options);
|
|
208
|
+
|
|
209
|
+
// Collect all results keyed by URI, keeping highest score
|
|
210
|
+
const bestByUri = new Map<string, SearchResult>();
|
|
211
|
+
for (const r of primaryResults) {
|
|
212
|
+
const existing = bestByUri.get(r.uri);
|
|
213
|
+
if (!existing || r.score > existing.score) {
|
|
214
|
+
bestByUri.set(r.uri, r);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Fire additional queries (reuse same search options minus recency to avoid dupes)
|
|
219
|
+
for (const query of additionalQueries) {
|
|
220
|
+
const results = await search(query, {
|
|
221
|
+
...options,
|
|
222
|
+
recencyDepth: 0, // primary already got the weekly summaries
|
|
223
|
+
});
|
|
224
|
+
for (const r of results) {
|
|
225
|
+
const existing = bestByUri.get(r.uri);
|
|
226
|
+
if (!existing || r.score > existing.score) {
|
|
227
|
+
bestByUri.set(r.uri, r);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Merge catalog matches (frontmatter text-matched results)
|
|
233
|
+
for (const r of catalogMatches) {
|
|
234
|
+
const existing = bestByUri.get(r.uri);
|
|
235
|
+
if (!existing || r.score > existing.score) {
|
|
236
|
+
bestByUri.set(r.uri, r);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Sort by score descending, return top results
|
|
241
|
+
const merged = [...bestByUri.values()];
|
|
242
|
+
merged.sort((a, b) => b.score - a.score);
|
|
243
|
+
return merged.slice(0, maxResults + recencyDepth);
|
|
244
|
+
}
|