@teammates/recall 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,21 +7,142 @@
7
7
 
8
8
  /** Common English stopwords to filter from queries. */
9
9
  const STOPWORDS = new Set([
10
- "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
11
- "of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
12
- "being", "have", "has", "had", "do", "does", "did", "will", "would",
13
- "could", "should", "may", "might", "shall", "can", "need", "must",
14
- "it", "its", "this", "that", "these", "those", "i", "you", "he", "she",
15
- "we", "they", "me", "him", "her", "us", "them", "my", "your", "his",
16
- "our", "their", "what", "which", "who", "whom", "where", "when", "how",
17
- "why", "if", "then", "so", "not", "no", "just", "also", "very", "too",
18
- "some", "any", "all", "each", "every", "both", "few", "more", "most",
19
- "other", "into", "over", "after", "before", "between", "through",
20
- "about", "up", "out", "off", "down", "here", "there", "again", "once",
21
- "let", "lets", "let's", "get", "got", "go", "going", "make", "made",
22
- "take", "took", "come", "came", "see", "saw", "know", "knew", "think",
23
- "thought", "say", "said", "tell", "told", "ask", "asked", "want",
24
- "wanted", "like", "look", "use", "used", "find", "give", "work",
10
+ "a",
11
+ "an",
12
+ "the",
13
+ "and",
14
+ "or",
15
+ "but",
16
+ "in",
17
+ "on",
18
+ "at",
19
+ "to",
20
+ "for",
21
+ "of",
22
+ "with",
23
+ "by",
24
+ "from",
25
+ "is",
26
+ "are",
27
+ "was",
28
+ "were",
29
+ "be",
30
+ "been",
31
+ "being",
32
+ "have",
33
+ "has",
34
+ "had",
35
+ "do",
36
+ "does",
37
+ "did",
38
+ "will",
39
+ "would",
40
+ "could",
41
+ "should",
42
+ "may",
43
+ "might",
44
+ "shall",
45
+ "can",
46
+ "need",
47
+ "must",
48
+ "it",
49
+ "its",
50
+ "this",
51
+ "that",
52
+ "these",
53
+ "those",
54
+ "i",
55
+ "you",
56
+ "he",
57
+ "she",
58
+ "we",
59
+ "they",
60
+ "me",
61
+ "him",
62
+ "her",
63
+ "us",
64
+ "them",
65
+ "my",
66
+ "your",
67
+ "his",
68
+ "our",
69
+ "their",
70
+ "what",
71
+ "which",
72
+ "who",
73
+ "whom",
74
+ "where",
75
+ "when",
76
+ "how",
77
+ "why",
78
+ "if",
79
+ "then",
80
+ "so",
81
+ "not",
82
+ "no",
83
+ "just",
84
+ "also",
85
+ "very",
86
+ "too",
87
+ "some",
88
+ "any",
89
+ "all",
90
+ "each",
91
+ "every",
92
+ "both",
93
+ "few",
94
+ "more",
95
+ "most",
96
+ "other",
97
+ "into",
98
+ "over",
99
+ "after",
100
+ "before",
101
+ "between",
102
+ "through",
103
+ "about",
104
+ "up",
105
+ "out",
106
+ "off",
107
+ "down",
108
+ "here",
109
+ "there",
110
+ "again",
111
+ "once",
112
+ "let",
113
+ "lets",
114
+ "let's",
115
+ "get",
116
+ "got",
117
+ "go",
118
+ "going",
119
+ "make",
120
+ "made",
121
+ "take",
122
+ "took",
123
+ "come",
124
+ "came",
125
+ "see",
126
+ "saw",
127
+ "know",
128
+ "knew",
129
+ "think",
130
+ "thought",
131
+ "say",
132
+ "said",
133
+ "tell",
134
+ "told",
135
+ "ask",
136
+ "asked",
137
+ "want",
138
+ "wanted",
139
+ "like",
140
+ "look",
141
+ "use",
142
+ "used",
143
+ "find",
144
+ "give",
145
+ "work",
25
146
  ]);
26
147
 
27
148
  /**
@@ -3,8 +3,8 @@ import { tmpdir } from "node:os";
3
3
  import { join } from "node:path";
4
4
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
5
5
  import { Indexer } from "./indexer.js";
6
- import { classifyUri, multiSearch, search } from "./search.js";
7
6
  import type { SearchResult } from "./search.js";
7
+ import { classifyUri, multiSearch, search } from "./search.js";
8
8
 
9
9
  // Deterministic stub embeddings based on text content
10
10
  function stubCreateEmbeddings(inputs: string | string[]) {
@@ -138,10 +138,7 @@ describe("search", () => {
138
138
  join(weeklyDir, "2026-W11.md"),
139
139
  "# Week 11\nWorked on indexer.",
140
140
  );
141
- await writeFile(
142
- join(weeklyDir, "2026-W09.md"),
143
- "# Week 9\nOld stuff.",
144
- );
141
+ await writeFile(join(weeklyDir, "2026-W09.md"), "# Week 9\nOld stuff.");
145
142
 
146
143
  const results = await search("anything", {
147
144
  teammatesDir: testDir,
@@ -162,10 +159,7 @@ describe("search", () => {
162
159
  const weeklyDir = join(beacon, "memory", "weekly");
163
160
  await mkdir(weeklyDir, { recursive: true });
164
161
  await writeFile(join(beacon, "SOUL.md"), "# Beacon");
165
- await writeFile(
166
- join(weeklyDir, "2026-W11.md"),
167
- "# Week 11\nContent here.",
168
- );
162
+ await writeFile(join(weeklyDir, "2026-W11.md"), "# Week 11\nContent here.");
169
163
 
170
164
  const results = await search("anything", {
171
165
  teammatesDir: testDir,
package/src/search.ts CHANGED
@@ -1,244 +1,244 @@
1
- import * as fs from "node:fs/promises";
2
- import * as path from "node:path";
3
- import { LocalDocumentIndex } from "vectra";
4
- import { LocalEmbeddings } from "./embeddings.js";
5
- import { Indexer } from "./indexer.js";
6
-
7
- export interface SearchOptions {
8
- /** Path to the .teammates directory */
9
- teammatesDir: string;
10
- /** Teammate name to search (searches all if omitted) */
11
- teammate?: string;
12
- /** Max results per teammate (default: 5) */
13
- maxResults?: number;
14
- /** Max chunks per document (default: 3) */
15
- maxChunks?: number;
16
- /** Max tokens per section (default: 500) */
17
- maxTokens?: number;
18
- /** Embedding model name */
19
- model?: string;
20
- /** Skip auto-sync before searching (default: false) */
21
- skipSync?: boolean;
22
- /** Number of recent weekly summaries to always include (default: 2) */
23
- recencyDepth?: number;
24
- /** Relevance boost multiplier for typed memories over episodic summaries (default: 1.2) */
25
- typedMemoryBoost?: number;
26
- }
27
-
28
- /** Options for multi-query search with deduplication. */
29
- export interface MultiSearchOptions extends SearchOptions {
30
- /** Additional queries beyond the primary (keyword-focused, conversation-derived, etc.) */
31
- additionalQueries?: string[];
32
- /** Pre-matched memory catalog results to merge into the final set */
33
- catalogMatches?: SearchResult[];
34
- }
35
-
36
- export interface SearchResult {
37
- teammate: string;
38
- uri: string;
39
- text: string;
40
- score: number;
41
- /** Content type: "typed_memory", "weekly", "monthly", or "other" */
42
- contentType?: string;
43
- }
44
-
45
- /**
46
- * Classify a URI into a content type for priority scoring.
47
- */
48
- export function classifyUri(uri: string): string {
49
- if (uri.includes("/memory/weekly/")) return "weekly";
50
- if (uri.includes("/memory/monthly/")) return "monthly";
51
- // Typed memories are in memory/ but not daily logs (YYYY-MM-DD) and not in subdirs
52
- const memoryMatch = uri.match(/\/memory\/([^/]+)\.md$/);
53
- if (memoryMatch) {
54
- const stem = memoryMatch[1];
55
- if (/^\d{4}-\d{2}-\d{2}$/.test(stem)) return "daily";
56
- return "typed_memory";
57
- }
58
- return "other";
59
- }
60
-
61
- /**
62
- * Search teammate memories using multi-pass retrieval.
63
- *
64
- * Pass 1 (Recency): Always returns the N most recent weekly summaries.
65
- * Pass 2 (Semantic): Query-driven search across all indexed content.
66
- * Results are merged, deduped, and typed memories get a relevance boost.
67
- */
68
- export async function search(
69
- query: string,
70
- options: SearchOptions,
71
- ): Promise<SearchResult[]> {
72
- const embeddings = new LocalEmbeddings(options.model);
73
- const indexer = new Indexer({
74
- teammatesDir: options.teammatesDir,
75
- model: options.model,
76
- });
77
- const maxResults = options.maxResults ?? 5;
78
- const maxChunks = options.maxChunks ?? 3;
79
- const maxTokens = options.maxTokens ?? 500;
80
- const recencyDepth = options.recencyDepth ?? 2;
81
- const typedMemoryBoost = options.typedMemoryBoost ?? 1.2;
82
-
83
- // Auto-sync: upsert any new/changed files before searching
84
- if (!options.skipSync) {
85
- if (options.teammate) {
86
- await indexer.syncTeammate(options.teammate);
87
- } else {
88
- await indexer.syncAll();
89
- }
90
- }
91
-
92
- // Determine which teammates to search
93
- let teammates: string[];
94
- if (options.teammate) {
95
- teammates = [options.teammate];
96
- } else {
97
- teammates = await indexer.discoverTeammates();
98
- }
99
-
100
- const allResults: SearchResult[] = [];
101
- const seenUris = new Set<string>();
102
-
103
- // ── Pass 1: Recency (recent weekly summaries, always included) ───
104
- for (const teammate of teammates) {
105
- const weeklyDir = path.join(
106
- options.teammatesDir,
107
- teammate,
108
- "memory",
109
- "weekly",
110
- );
111
- try {
112
- const entries = await fs.readdir(weeklyDir);
113
- const weeklyFiles = entries
114
- .filter((e) => e.endsWith(".md"))
115
- .sort()
116
- .reverse()
117
- .slice(0, recencyDepth);
118
-
119
- for (const file of weeklyFiles) {
120
- const uri = `${teammate}/memory/weekly/${file}`;
121
- const text = await fs.readFile(path.join(weeklyDir, file), "utf-8");
122
- if (text.trim().length === 0) continue;
123
- seenUris.add(uri);
124
- allResults.push({
125
- teammate,
126
- uri,
127
- text: text.slice(0, maxTokens * 4), // rough token estimate
128
- score: 0.9, // high base score for recency results
129
- contentType: "weekly",
130
- });
131
- }
132
- } catch {
133
- // No weekly/ directory for this teammate
134
- }
135
- }
136
-
137
- // ── Pass 2: Semantic (query-driven across all indexed content) ───
138
- for (const teammate of teammates) {
139
- const indexPath = indexer.indexPath(teammate);
140
- try {
141
- await fs.access(indexPath);
142
- } catch {
143
- continue;
144
- }
145
-
146
- const index = new LocalDocumentIndex({
147
- folderPath: indexPath,
148
- embeddings,
149
- });
150
-
151
- if (!(await index.isIndexCreated())) continue;
152
-
153
- const docs = await index.queryDocuments(query, {
154
- maxDocuments: maxResults,
155
- maxChunks,
156
- });
157
-
158
- for (const doc of docs) {
159
- if (seenUris.has(doc.uri)) continue; // dedup with recency pass
160
- seenUris.add(doc.uri);
161
-
162
- const sections = await doc.renderSections(maxTokens, 1);
163
- const contentType = classifyUri(doc.uri);
164
-
165
- for (const section of sections) {
166
- let score = section.score;
167
- // Apply type-based priority boost for typed memories
168
- if (contentType === "typed_memory") {
169
- score *= typedMemoryBoost;
170
- }
171
-
172
- allResults.push({
173
- teammate,
174
- uri: doc.uri,
175
- text: section.text,
176
- score,
177
- contentType,
178
- });
179
- }
180
- }
181
- }
182
-
183
- // Sort by score descending, return top results
184
- allResults.sort((a, b) => b.score - a.score);
185
- return allResults.slice(0, maxResults + recencyDepth); // allow extra slots for recency results
186
- }
187
-
188
- /**
189
- * Multi-query search with deduplication and catalog merge.
190
- *
191
- * Fires the primary query plus any additional queries (keyword-focused,
192
- * conversation-derived) and merges results. Catalog matches (from frontmatter
193
- * text matching) are also merged. Deduplication is by URI — when the same
194
- * URI appears from multiple queries, the highest score wins.
195
- */
196
- export async function multiSearch(
197
- primaryQuery: string,
198
- options: MultiSearchOptions,
199
- ): Promise<SearchResult[]> {
200
- const additionalQueries = options.additionalQueries ?? [];
201
- const catalogMatches = options.catalogMatches ?? [];
202
- const maxResults = options.maxResults ?? 5;
203
- const recencyDepth = options.recencyDepth ?? 2;
204
-
205
- // Fire all queries — primary gets full treatment (recency pass + semantic)
206
- // Additional queries get semantic only (skipRecency to avoid duplicate weeklies)
207
- const primaryResults = await search(primaryQuery, options);
208
-
209
- // Collect all results keyed by URI, keeping highest score
210
- const bestByUri = new Map<string, SearchResult>();
211
- for (const r of primaryResults) {
212
- const existing = bestByUri.get(r.uri);
213
- if (!existing || r.score > existing.score) {
214
- bestByUri.set(r.uri, r);
215
- }
216
- }
217
-
218
- // Fire additional queries (reuse same search options minus recency to avoid dupes)
219
- for (const query of additionalQueries) {
220
- const results = await search(query, {
221
- ...options,
222
- recencyDepth: 0, // primary already got the weekly summaries
223
- });
224
- for (const r of results) {
225
- const existing = bestByUri.get(r.uri);
226
- if (!existing || r.score > existing.score) {
227
- bestByUri.set(r.uri, r);
228
- }
229
- }
230
- }
231
-
232
- // Merge catalog matches (frontmatter text-matched results)
233
- for (const r of catalogMatches) {
234
- const existing = bestByUri.get(r.uri);
235
- if (!existing || r.score > existing.score) {
236
- bestByUri.set(r.uri, r);
237
- }
238
- }
239
-
240
- // Sort by score descending, return top results
241
- const merged = [...bestByUri.values()];
242
- merged.sort((a, b) => b.score - a.score);
243
- return merged.slice(0, maxResults + recencyDepth);
244
- }
1
+ import * as fs from "node:fs/promises";
2
+ import * as path from "node:path";
3
+ import { LocalDocumentIndex } from "vectra";
4
+ import { LocalEmbeddings } from "./embeddings.js";
5
+ import { Indexer } from "./indexer.js";
6
+
7
+ export interface SearchOptions {
8
+ /** Path to the .teammates directory */
9
+ teammatesDir: string;
10
+ /** Teammate name to search (searches all if omitted) */
11
+ teammate?: string;
12
+ /** Max results per teammate (default: 5) */
13
+ maxResults?: number;
14
+ /** Max chunks per document (default: 3) */
15
+ maxChunks?: number;
16
+ /** Max tokens per section (default: 500) */
17
+ maxTokens?: number;
18
+ /** Embedding model name */
19
+ model?: string;
20
+ /** Skip auto-sync before searching (default: false) */
21
+ skipSync?: boolean;
22
+ /** Number of recent weekly summaries to always include (default: 2) */
23
+ recencyDepth?: number;
24
+ /** Relevance boost multiplier for typed memories over episodic summaries (default: 1.2) */
25
+ typedMemoryBoost?: number;
26
+ }
27
+
28
+ /** Options for multi-query search with deduplication. */
29
+ export interface MultiSearchOptions extends SearchOptions {
30
+ /** Additional queries beyond the primary (keyword-focused, conversation-derived, etc.) */
31
+ additionalQueries?: string[];
32
+ /** Pre-matched memory catalog results to merge into the final set */
33
+ catalogMatches?: SearchResult[];
34
+ }
35
+
36
+ export interface SearchResult {
37
+ teammate: string;
38
+ uri: string;
39
+ text: string;
40
+ score: number;
41
+ /** Content type: "typed_memory", "weekly", "monthly", or "other" */
42
+ contentType?: string;
43
+ }
44
+
45
+ /**
46
+ * Classify a URI into a content type for priority scoring.
47
+ */
48
+ export function classifyUri(uri: string): string {
49
+ if (uri.includes("/memory/weekly/")) return "weekly";
50
+ if (uri.includes("/memory/monthly/")) return "monthly";
51
+ // Typed memories are in memory/ but not daily logs (YYYY-MM-DD) and not in subdirs
52
+ const memoryMatch = uri.match(/\/memory\/([^/]+)\.md$/);
53
+ if (memoryMatch) {
54
+ const stem = memoryMatch[1];
55
+ if (/^\d{4}-\d{2}-\d{2}$/.test(stem)) return "daily";
56
+ return "typed_memory";
57
+ }
58
+ return "other";
59
+ }
60
+
61
+ /**
62
+ * Search teammate memories using multi-pass retrieval.
63
+ *
64
+ * Pass 1 (Recency): Always returns the N most recent weekly summaries.
65
+ * Pass 2 (Semantic): Query-driven search across all indexed content.
66
+ * Results are merged, deduped, and typed memories get a relevance boost.
67
+ */
68
+ export async function search(
69
+ query: string,
70
+ options: SearchOptions,
71
+ ): Promise<SearchResult[]> {
72
+ const embeddings = new LocalEmbeddings(options.model);
73
+ const indexer = new Indexer({
74
+ teammatesDir: options.teammatesDir,
75
+ model: options.model,
76
+ });
77
+ const maxResults = options.maxResults ?? 5;
78
+ const maxChunks = options.maxChunks ?? 3;
79
+ const maxTokens = options.maxTokens ?? 500;
80
+ const recencyDepth = options.recencyDepth ?? 2;
81
+ const typedMemoryBoost = options.typedMemoryBoost ?? 1.2;
82
+
83
+ // Auto-sync: upsert any new/changed files before searching
84
+ if (!options.skipSync) {
85
+ if (options.teammate) {
86
+ await indexer.syncTeammate(options.teammate);
87
+ } else {
88
+ await indexer.syncAll();
89
+ }
90
+ }
91
+
92
+ // Determine which teammates to search
93
+ let teammates: string[];
94
+ if (options.teammate) {
95
+ teammates = [options.teammate];
96
+ } else {
97
+ teammates = await indexer.discoverTeammates();
98
+ }
99
+
100
+ const allResults: SearchResult[] = [];
101
+ const seenUris = new Set<string>();
102
+
103
+ // ── Pass 1: Recency (recent weekly summaries, always included) ───
104
+ for (const teammate of teammates) {
105
+ const weeklyDir = path.join(
106
+ options.teammatesDir,
107
+ teammate,
108
+ "memory",
109
+ "weekly",
110
+ );
111
+ try {
112
+ const entries = await fs.readdir(weeklyDir);
113
+ const weeklyFiles = entries
114
+ .filter((e) => e.endsWith(".md"))
115
+ .sort()
116
+ .reverse()
117
+ .slice(0, recencyDepth);
118
+
119
+ for (const file of weeklyFiles) {
120
+ const uri = `${teammate}/memory/weekly/${file}`;
121
+ const text = await fs.readFile(path.join(weeklyDir, file), "utf-8");
122
+ if (text.trim().length === 0) continue;
123
+ seenUris.add(uri);
124
+ allResults.push({
125
+ teammate,
126
+ uri,
127
+ text: text.slice(0, maxTokens * 4), // rough token estimate
128
+ score: 0.9, // high base score for recency results
129
+ contentType: "weekly",
130
+ });
131
+ }
132
+ } catch {
133
+ // No weekly/ directory for this teammate
134
+ }
135
+ }
136
+
137
+ // ── Pass 2: Semantic (query-driven across all indexed content) ───
138
+ for (const teammate of teammates) {
139
+ const indexPath = indexer.indexPath(teammate);
140
+ try {
141
+ await fs.access(indexPath);
142
+ } catch {
143
+ continue;
144
+ }
145
+
146
+ const index = new LocalDocumentIndex({
147
+ folderPath: indexPath,
148
+ embeddings,
149
+ });
150
+
151
+ if (!(await index.isIndexCreated())) continue;
152
+
153
+ const docs = await index.queryDocuments(query, {
154
+ maxDocuments: maxResults,
155
+ maxChunks,
156
+ });
157
+
158
+ for (const doc of docs) {
159
+ if (seenUris.has(doc.uri)) continue; // dedup with recency pass
160
+ seenUris.add(doc.uri);
161
+
162
+ const sections = await doc.renderSections(maxTokens, 1);
163
+ const contentType = classifyUri(doc.uri);
164
+
165
+ for (const section of sections) {
166
+ let score = section.score;
167
+ // Apply type-based priority boost for typed memories
168
+ if (contentType === "typed_memory") {
169
+ score *= typedMemoryBoost;
170
+ }
171
+
172
+ allResults.push({
173
+ teammate,
174
+ uri: doc.uri,
175
+ text: section.text,
176
+ score,
177
+ contentType,
178
+ });
179
+ }
180
+ }
181
+ }
182
+
183
+ // Sort by score descending, return top results
184
+ allResults.sort((a, b) => b.score - a.score);
185
+ return allResults.slice(0, maxResults + recencyDepth); // allow extra slots for recency results
186
+ }
187
+
188
+ /**
189
+ * Multi-query search with deduplication and catalog merge.
190
+ *
191
+ * Fires the primary query plus any additional queries (keyword-focused,
192
+ * conversation-derived) and merges results. Catalog matches (from frontmatter
193
+ * text matching) are also merged. Deduplication is by URI — when the same
194
+ * URI appears from multiple queries, the highest score wins.
195
+ */
196
+ export async function multiSearch(
197
+ primaryQuery: string,
198
+ options: MultiSearchOptions,
199
+ ): Promise<SearchResult[]> {
200
+ const additionalQueries = options.additionalQueries ?? [];
201
+ const catalogMatches = options.catalogMatches ?? [];
202
+ const maxResults = options.maxResults ?? 5;
203
+ const recencyDepth = options.recencyDepth ?? 2;
204
+
205
+ // Fire all queries — primary gets full treatment (recency pass + semantic)
206
+ // Additional queries get semantic only (skipRecency to avoid duplicate weeklies)
207
+ const primaryResults = await search(primaryQuery, options);
208
+
209
+ // Collect all results keyed by URI, keeping highest score
210
+ const bestByUri = new Map<string, SearchResult>();
211
+ for (const r of primaryResults) {
212
+ const existing = bestByUri.get(r.uri);
213
+ if (!existing || r.score > existing.score) {
214
+ bestByUri.set(r.uri, r);
215
+ }
216
+ }
217
+
218
+ // Fire additional queries (reuse same search options minus recency to avoid dupes)
219
+ for (const query of additionalQueries) {
220
+ const results = await search(query, {
221
+ ...options,
222
+ recencyDepth: 0, // primary already got the weekly summaries
223
+ });
224
+ for (const r of results) {
225
+ const existing = bestByUri.get(r.uri);
226
+ if (!existing || r.score > existing.score) {
227
+ bestByUri.set(r.uri, r);
228
+ }
229
+ }
230
+ }
231
+
232
+ // Merge catalog matches (frontmatter text-matched results)
233
+ for (const r of catalogMatches) {
234
+ const existing = bestByUri.get(r.uri);
235
+ if (!existing || r.score > existing.score) {
236
+ bestByUri.set(r.uri, r);
237
+ }
238
+ }
239
+
240
+ // Sort by score descending, return top results
241
+ const merged = [...bestByUri.values()];
242
+ merged.sort((a, b) => b.score - a.score);
243
+ return merged.slice(0, maxResults + recencyDepth);
244
+ }