@yeaft/webchat-agent 0.1.408 → 0.1.410

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ /**
2
+ * recall.js — 3-step memory recall with fingerprint cache
3
+ *
4
+ * Recall flow (per design doc):
5
+ * Step 1: Keyword extraction (pure rules, <1ms)
6
+ * Step 2: Scope + Tags filter (read scopes.md, <5ms) → top 15 candidates
7
+ * Step 3: LLM select (side-query via adapter.call) → ≤7 most relevant
8
+ *
9
+ * Fingerprint cache:
10
+ * fingerprint = hash(scope, top 5 keywords, task_id)
11
+ * Same fingerprint → skip recall, reuse last result
12
+ *
13
+ * Reference: yeaft-unify-core-systems.md §3.2, yeaft-unify-design.md §5.1
14
+ */
15
+
16
+ import { createHash } from 'crypto';
17
+
18
+ // ─── Constants ──────────────────────────────────────────────────
19
+
20
+ /** Max entries returned by recall. */
21
+ const MAX_RECALL_RESULTS = 7;
22
+
23
+ /** Max candidates passed to LLM select (Step 2 → Step 3). */
24
+ const MAX_CANDIDATES = 15;
25
+
26
+ // ─── Step 1: Keyword Extraction (pure rules, <1ms) ──────────────
27
+
28
+ /** Common stop words to filter out. */
29
+ const STOP_WORDS = new Set([
30
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
31
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
32
+ 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
33
+ 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during',
34
+ 'before', 'after', 'above', 'below', 'between', 'out', 'off', 'over',
35
+ 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when',
36
+ 'where', 'why', 'how', 'all', 'both', 'each', 'few', 'more', 'most',
37
+ 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same',
38
+ 'so', 'than', 'too', 'very', 'just', 'because', 'but', 'and', 'or',
39
+ 'if', 'while', 'about', 'up', 'it', 'its', 'my', 'me', 'i', 'you',
40
+ 'your', 'we', 'our', 'they', 'them', 'their', 'this', 'that', 'what',
41
+ 'which', 'who', 'whom', 'these', 'those',
42
+ // Chinese stop words
43
+ '的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都',
44
+ '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会',
45
+ '着', '没有', '看', '好', '自己', '这', '他', '她', '吗', '呢', '吧',
46
+ '把', '被', '那', '它', '让', '给', '可以', '什么', '怎么', '帮',
47
+ '帮我', '请', '能', '想',
48
+ ]);
49
+
50
+ /**
51
+ * Extract keywords from a prompt (pure rules, no LLM).
52
+ *
53
+ * @param {string} prompt
54
+ * @returns {string[]} — keywords sorted by relevance (simple freq)
55
+ */
56
+ export function extractKeywords(prompt) {
57
+ if (!prompt || !prompt.trim()) return [];
58
+
59
+ // Tokenize: split on whitespace and punctuation (keep CJK chars)
60
+ const tokens = prompt
61
+ .toLowerCase()
62
+ .replace(/[^\w\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff]+/g, ' ')
63
+ .split(/\s+/)
64
+ .filter(t => t.length > 1 && !STOP_WORDS.has(t));
65
+
66
+ // Count frequencies
67
+ const freq = new Map();
68
+ for (const t of tokens) {
69
+ freq.set(t, (freq.get(t) || 0) + 1);
70
+ }
71
+
72
+ // Sort by frequency descending, then alphabetically
73
+ return [...freq.entries()]
74
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
75
+ .map(([word]) => word);
76
+ }
77
+
78
+ // ─── Fingerprint Cache ──────────────────────────────────────────
79
+
80
+ /**
81
+ * Compute a recall fingerprint for cache checking.
82
+ *
83
+ * @param {{ scope?: string, keywords: string[], taskId?: string }} params
84
+ * @returns {string} — hex hash
85
+ */
86
+ export function computeFingerprint({ scope = '', keywords, taskId = '' }) {
87
+ const top5 = keywords.slice(0, 5).join(',');
88
+ const input = `${scope}|${top5}|${taskId}`;
89
+ return createHash('sha256').update(input).digest('hex').slice(0, 16);
90
+ }
91
+
92
+ // ─── Step 2: Scope + Tags Filter ────────────────────────────────
93
+
94
+ /**
95
+ * Filter entries by scope and tags (in-memory, no LLM).
96
+ * Uses MemoryStore.findByFilter internally.
97
+ *
98
+ * @param {import('./store.js').MemoryStore} memoryStore
99
+ * @param {{ scope?: string, keywords: string[] }} params
100
+ * @returns {object[]} — top MAX_CANDIDATES entries
101
+ */
102
+ function filterCandidates(memoryStore, { scope, keywords }) {
103
+ return memoryStore.findByFilter({
104
+ scope,
105
+ tags: keywords,
106
+ limit: MAX_CANDIDATES,
107
+ });
108
+ }
109
+
110
+ // ─── Step 3: LLM Select ────────────────────────────────────────
111
+
112
+ /**
113
+ * Use LLM side-query to select the most relevant entries.
114
+ *
115
+ * @param {object} adapter — LLM adapter with .call() method
116
+ * @param {object} config — { model }
117
+ * @param {string} prompt — user's prompt
118
+ * @param {object[]} candidates — entries with frontmatter
119
+ * @returns {Promise<string[]>} — selected entry names
120
+ */
121
+ async function llmSelect(adapter, config, prompt, candidates) {
122
+ if (candidates.length <= MAX_RECALL_RESULTS) {
123
+ // No need to filter if already under limit
124
+ return candidates.map(c => c.name);
125
+ }
126
+
127
+ const candidateList = candidates.map((c, i) =>
128
+ `${i + 1}. [${c.name}] kind=${c.kind}, scope=${c.scope}, tags=[${(c.tags || []).join(', ')}]`
129
+ ).join('\n');
130
+
131
+ const system = `You are a memory retrieval assistant. Given a user's prompt and a list of memory entries, select the most relevant ones (up to ${MAX_RECALL_RESULTS}).
132
+ Return ONLY a JSON array of entry names, like: ["entry-name-1", "entry-name-2"]
133
+ No explanation, just the JSON array.`;
134
+
135
+ const messages = [{
136
+ role: 'user',
137
+ content: `User prompt: "${prompt}"
138
+
139
+ Memory entries:
140
+ ${candidateList}
141
+
142
+ Select the ${MAX_RECALL_RESULTS} most relevant entries. Return a JSON array of entry names.`,
143
+ }];
144
+
145
+ try {
146
+ const result = await adapter.call({
147
+ model: config.model,
148
+ system,
149
+ messages,
150
+ maxTokens: 512,
151
+ });
152
+
153
+ // Parse the JSON array from the response
154
+ const text = result.text.trim();
155
+ const jsonMatch = text.match(/\[[\s\S]*\]/);
156
+ if (jsonMatch) {
157
+ const names = JSON.parse(jsonMatch[0]);
158
+ return names.filter(n => typeof n === 'string');
159
+ }
160
+ } catch {
161
+ // Fallback: return all candidates if LLM fails
162
+ }
163
+
164
+ return candidates.slice(0, MAX_RECALL_RESULTS).map(c => c.name);
165
+ }
166
+
167
+ // ─── Main Recall Function ───────────────────────────────────────
168
+
169
+ /** @type {Map<string, { entries: object[], timestamp: number }>} */
170
+ const _cache = new Map();
171
+
172
+ /** Cache TTL — 5 minutes. */
173
+ const CACHE_TTL = 5 * 60 * 1000;
174
+
175
+ /**
176
+ * Recall relevant memory entries for a given prompt.
177
+ *
178
+ * 3-step process:
179
+ * 1. Extract keywords (rules, <1ms)
180
+ * 2. Scope + Tags filter → top 15 candidates
181
+ * 3. LLM select → ≤7 entries (skipped if ≤7 candidates)
182
+ *
183
+ * Uses fingerprint cache to skip repeat recalls.
184
+ *
185
+ * @param {{ prompt: string, adapter: object, config: object, memoryStore: import('./store.js').MemoryStore, scope?: string, taskId?: string }} params
186
+ * @returns {Promise<{ entries: object[], keywords: string[], fingerprint: string, cached: boolean }>}
187
+ */
188
+ export async function recall({ prompt, adapter, config, memoryStore, scope, taskId }) {
189
+ // Step 1: Extract keywords
190
+ const keywords = extractKeywords(prompt);
191
+
192
+ if (keywords.length === 0) {
193
+ return { entries: [], keywords: [], fingerprint: '', cached: false };
194
+ }
195
+
196
+ // Check fingerprint cache
197
+ const fingerprint = computeFingerprint({ scope, keywords, taskId });
198
+
199
+ const cached = _cache.get(fingerprint);
200
+ if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
201
+ return { entries: cached.entries, keywords, fingerprint, cached: true };
202
+ }
203
+
204
+ // Step 2: Scope + Tags filter
205
+ const candidates = filterCandidates(memoryStore, { scope, keywords });
206
+
207
+ if (candidates.length === 0) {
208
+ _cache.set(fingerprint, { entries: [], timestamp: Date.now() });
209
+ return { entries: [], keywords, fingerprint, cached: false };
210
+ }
211
+
212
+ // Step 3: LLM select (only if > MAX_RECALL_RESULTS candidates)
213
+ let selectedNames;
214
+ if (candidates.length <= MAX_RECALL_RESULTS) {
215
+ selectedNames = candidates.map(c => c.name);
216
+ } else {
217
+ selectedNames = await llmSelect(adapter, config, prompt, candidates);
218
+ }
219
+
220
+ // Load full entries for selected names
221
+ const entries = [];
222
+ for (const name of selectedNames) {
223
+ const slug = name.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff-]+/g, '-').replace(/^-+|-+$/g, '');
224
+ const entry = memoryStore.readEntry(slug) || memoryStore.readEntry(name);
225
+ if (entry) {
226
+ entries.push(entry);
227
+ // Bump frequency
228
+ memoryStore.bumpFrequency(slug || name);
229
+ }
230
+ }
231
+
232
+ // Update cache
233
+ _cache.set(fingerprint, { entries, timestamp: Date.now() });
234
+
235
+ return { entries, keywords, fingerprint, cached: false };
236
+ }
237
+
238
+ /**
239
+ * Clear the recall cache. Useful for testing.
240
+ */
241
+ export function clearRecallCache() {
242
+ _cache.clear();
243
+ }
@@ -0,0 +1,273 @@
1
+ /**
2
+ * scan.js — Memory header scanning and scope/tag matching
3
+ *
4
+ * Fast in-memory scanning of entry frontmatter for:
5
+ * - Scope tree traversal
6
+ * - Tag overlap scoring
7
+ * - Kind-based filtering
8
+ * - Stale entry detection (for Dream)
9
+ *
10
+ * Reference: yeaft-unify-core-systems.md §3.3, yeaft-unify-design.md §5.1
11
+ */
12
+
13
+ import { KINDS, KIND_PRIORITY, IMPORTANCE_WEIGHT, getAncestorScopes } from './types.js';
14
+
15
+ // ─── Scan Results ──────────────────────────────────────────
16
+
17
+ /**
18
+ * @typedef {Object} ScanResult
19
+ * @property {object[]} entries — all parsed entries
20
+ * @property {Map<string, number>} scopeCount — scope → entry count
21
+ * @property {Map<string, number>} kindCount — kind → entry count
22
+ * @property {Map<string, Set<string>>} tagIndex — tag → set of entry names
23
+ * @property {number} totalEntries — total count
24
+ */
25
+
26
+ /**
27
+ * Scan all entries from a MemoryStore and build indexes.
28
+ *
29
+ * @param {import('./store.js').MemoryStore} memoryStore
30
+ * @returns {ScanResult}
31
+ */
32
+ export function scanEntries(memoryStore) {
33
+ const entries = memoryStore.listEntries();
34
+
35
+ const scopeCount = new Map();
36
+ const kindCount = new Map();
37
+ const tagIndex = new Map();
38
+
39
+ for (const entry of entries) {
40
+ // Scope count
41
+ const scope = entry.scope || 'global';
42
+ scopeCount.set(scope, (scopeCount.get(scope) || 0) + 1);
43
+
44
+ // Kind count
45
+ const kind = entry.kind || 'fact';
46
+ kindCount.set(kind, (kindCount.get(kind) || 0) + 1);
47
+
48
+ // Tag index
49
+ const tags = entry.tags || [];
50
+ for (const tag of tags) {
51
+ const lowerTag = tag.toLowerCase();
52
+ if (!tagIndex.has(lowerTag)) tagIndex.set(lowerTag, new Set());
53
+ tagIndex.get(lowerTag).add(entry.name);
54
+ }
55
+ }
56
+
57
+ return {
58
+ entries,
59
+ scopeCount,
60
+ kindCount,
61
+ tagIndex,
62
+ totalEntries: entries.length,
63
+ };
64
+ }
65
+
66
+ // ─── Scoring Functions ─────────────────────────────────────
67
+
68
+ /**
69
+ * Score an entry for relevance to a query context.
70
+ *
71
+ * Scoring factors:
72
+ * - Scope match: exact=5, parent/child=3, global=1
73
+ * - Tag overlap: 2 per matching tag
74
+ * - Kind priority: see KIND_PRIORITY
75
+ * - Importance weight: see IMPORTANCE_WEIGHT
76
+ * - Frequency bonus: log2(frequency)
77
+ * - Recency bonus: entries updated in last 7 days get +2
78
+ *
79
+ * @param {object} entry — memory entry
80
+ * @param {{ scope?: string, tags?: string[], preferKinds?: string[] }} context
81
+ * @returns {number} — relevance score
82
+ */
83
+ export function scoreEntry(entry, context = {}) {
84
+ let score = 0;
85
+
86
+ // Scope match
87
+ if (context.scope && entry.scope) {
88
+ if (entry.scope === context.scope) {
89
+ score += 5; // exact match
90
+ } else {
91
+ const ancestors = getAncestorScopes(context.scope);
92
+ if (ancestors.includes(entry.scope)) {
93
+ score += 3; // ancestor match
94
+ } else if (entry.scope.startsWith(context.scope + '/')) {
95
+ score += 3; // descendant match
96
+ } else if (entry.scope === 'global') {
97
+ score += 1; // global fallback
98
+ }
99
+ }
100
+ }
101
+
102
+ // Tag overlap
103
+ if (context.tags && context.tags.length > 0 && entry.tags) {
104
+ const entryTags = new Set(entry.tags.map(t => t.toLowerCase()));
105
+ for (const tag of context.tags) {
106
+ if (entryTags.has(tag.toLowerCase())) {
107
+ score += 2;
108
+ }
109
+ }
110
+ }
111
+
112
+ // Kind priority
113
+ const kindPriority = KIND_PRIORITY[entry.kind] || 0;
114
+ score += kindPriority * 0.5;
115
+
116
+ // Preferred kinds bonus
117
+ if (context.preferKinds && context.preferKinds.includes(entry.kind)) {
118
+ score += 2;
119
+ }
120
+
121
+ // Importance weight
122
+ const impWeight = IMPORTANCE_WEIGHT[entry.importance] || IMPORTANCE_WEIGHT.normal;
123
+ score += impWeight * 0.5;
124
+
125
+ // Frequency bonus (logarithmic)
126
+ const freq = entry.frequency || 1;
127
+ score += Math.log2(Math.max(freq, 1));
128
+
129
+ // Recency bonus
130
+ if (entry.updated_at) {
131
+ const daysSince = (Date.now() - new Date(entry.updated_at).getTime()) / (1000 * 60 * 60 * 24);
132
+ if (daysSince <= 7) score += 2;
133
+ else if (daysSince <= 30) score += 1;
134
+ }
135
+
136
+ return score;
137
+ }
138
+
139
+ // ─── Stale Detection (for Dream) ────────────────────────────
140
+
141
+ /**
142
+ * Find entries that are potentially stale.
143
+ *
144
+ * Stale criteria:
145
+ * - context entries older than 30 days
146
+ * - entries never recalled (frequency = 1) and older than 60 days
147
+ * - relation entries older than 90 days
148
+ *
149
+ * @param {object[]} entries
150
+ * @returns {object[]} — stale entries
151
+ */
152
+ export function findStaleEntries(entries) {
153
+ const now = Date.now();
154
+ const stale = [];
155
+
156
+ for (const entry of entries) {
157
+ const updatedAt = entry.updated_at ? new Date(entry.updated_at).getTime() : 0;
158
+ const daysSince = (now - updatedAt) / (1000 * 60 * 60 * 24);
159
+
160
+ let isStale = false;
161
+
162
+ // Context entries become stale fast
163
+ if (entry.kind === 'context' && daysSince > 30) {
164
+ isStale = true;
165
+ }
166
+
167
+ // Entries never recalled and old
168
+ if ((entry.frequency || 1) <= 1 && daysSince > 60) {
169
+ isStale = true;
170
+ }
171
+
172
+ // Relations are volatile
173
+ if (entry.kind === 'relation' && daysSince > 90) {
174
+ isStale = true;
175
+ }
176
+
177
+ if (isStale) {
178
+ stale.push({ ...entry, _daysSinceUpdate: Math.round(daysSince) });
179
+ }
180
+ }
181
+
182
+ return stale;
183
+ }
184
+
185
+ // ─── Duplicate Detection (for Dream Merge) ──────────────────
186
+
187
+ /**
188
+ * Find groups of entries that are potentially duplicates.
189
+ * Entries are grouped if they share ≥2 tags AND the same kind.
190
+ *
191
+ * @param {object[]} entries
192
+ * @returns {object[][]} — groups of potentially duplicate entries
193
+ */
194
+ export function findDuplicateGroups(entries) {
195
+ const groups = [];
196
+ const visited = new Set();
197
+
198
+ for (let i = 0; i < entries.length; i++) {
199
+ if (visited.has(i)) continue;
200
+
201
+ const group = [entries[i]];
202
+ const eTags = new Set((entries[i].tags || []).map(t => t.toLowerCase()));
203
+
204
+ for (let j = i + 1; j < entries.length; j++) {
205
+ if (visited.has(j)) continue;
206
+ if (entries[i].kind !== entries[j].kind) continue;
207
+
208
+ const jTags = new Set((entries[j].tags || []).map(t => t.toLowerCase()));
209
+ let overlap = 0;
210
+ for (const tag of eTags) {
211
+ if (jTags.has(tag)) overlap++;
212
+ }
213
+
214
+ if (overlap >= 2) {
215
+ group.push(entries[j]);
216
+ visited.add(j);
217
+ }
218
+ }
219
+
220
+ if (group.length > 1) {
221
+ visited.add(i);
222
+ groups.push(group);
223
+ }
224
+ }
225
+
226
+ return groups;
227
+ }
228
+
229
+ // ─── Stats Summary ──────────────────────────────────────────
230
+
231
+ /**
232
+ * Generate a text summary of memory state (for Dream prompts).
233
+ *
234
+ * @param {ScanResult} scan
235
+ * @returns {string}
236
+ */
237
+ export function summarizeScan(scan) {
238
+ const lines = [];
239
+
240
+ lines.push(`Total entries: ${scan.totalEntries}`);
241
+
242
+ // Kind breakdown
243
+ const kindLines = [];
244
+ for (const kind of KINDS) {
245
+ const count = scan.kindCount.get(kind) || 0;
246
+ if (count > 0) kindLines.push(`${kind}: ${count}`);
247
+ }
248
+ if (kindLines.length > 0) {
249
+ lines.push(`Kinds: ${kindLines.join(', ')}`);
250
+ }
251
+
252
+ // Scope breakdown (top 10)
253
+ const scopeEntries = [...scan.scopeCount.entries()]
254
+ .sort((a, b) => b[1] - a[1])
255
+ .slice(0, 10);
256
+ if (scopeEntries.length > 0) {
257
+ lines.push('Top scopes:');
258
+ for (const [scope, count] of scopeEntries) {
259
+ lines.push(` ${scope}: ${count}`);
260
+ }
261
+ }
262
+
263
+ // Tag cloud (top 20)
264
+ const tagEntries = [...scan.tagIndex.entries()]
265
+ .map(([tag, names]) => [tag, names.size])
266
+ .sort((a, b) => b[1] - a[1])
267
+ .slice(0, 20);
268
+ if (tagEntries.length > 0) {
269
+ lines.push(`Top tags: ${tagEntries.map(([t, c]) => `${t}(${c})`).join(', ')}`);
270
+ }
271
+
272
+ return lines.join('\n');
273
+ }