agentic-kdd 3.5.6 → 3.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ 'use strict';
2
+ /**
3
+ * Agentic KDD — Memory Curator v2.0
4
+ * Curación automática real: deduplicación, scoring por relevancia, expiración.
5
+ *
6
+ * Principio: 30 entradas precisas > 300 entradas ruidosas.
7
+ *
8
+ * Uso:
9
+ * node .agentic/grafo/mem-curator.cjs run → cura completa
10
+ * node .agentic/grafo/mem-curator.cjs report → solo reporte, no modifica
11
+ * node .agentic/grafo/mem-curator.cjs dedup → solo deduplicar
12
+ * node .agentic/grafo/mem-curator.cjs score → solo recalcular scores
13
+ * node .agentic/grafo/mem-curator.cjs expire → solo expirar entradas viejas
14
+ */
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+
19
+ const ROOT = process.cwd();
20
+ const MEMORIA_DIR = path.join(ROOT, '.agentic', 'memoria');
21
+ const ERRORES_FILE = path.join(MEMORIA_DIR, 'errores.md');
22
+ const PATRONES_FILE= path.join(MEMORIA_DIR, 'patrones.md');
23
+ const DECISIONES_FILE = path.join(MEMORIA_DIR, 'decisiones.md');
24
+
25
+ // ── Configuración de curación ───────────────────────────────────────────────
26
+
27
+ const CONFIG = {
28
+ // Días sin referencias antes de marcar como candidato a expiración
29
+ EXPIRY_DAYS_LOW_SCORE: 90,
30
+ EXPIRY_DAYS_HIGH_SCORE: 365,
31
+ // Número máximo de entradas por archivo antes de forzar curación
32
+ MAX_ENTRIES_ERRORS: 50,
33
+ MAX_ENTRIES_PATTERNS: 40,
34
+ MAX_ENTRIES_DECISIONS:30,
35
+ // Umbral de similitud para considerar duplicado (0-1)
36
+ SIMILARITY_THRESHOLD: 0.75,
37
+ };
38
+
39
+ // ── Parser de archivos de memoria ───────────────────────────────────────────
40
+
41
+ function parseMemoryFile(filePath) {
42
+ if (!fs.existsSync(filePath)) return [];
43
+
44
+ const content = fs.readFileSync(filePath, 'utf8');
45
+ const entries = [];
46
+
47
+ // Detectar entradas por bloques --- o por encabezados ###
48
+ const blocks = content.split(/\n(?=###\s|---\s*\n###)/);
49
+
50
+ for (const block of blocks) {
51
+ const titleMatch = block.match(/###\s+(.+)/);
52
+ if (!titleMatch) continue;
53
+
54
+ const entry = {
55
+ title: titleMatch[1].trim(),
56
+ raw: block,
57
+ confidence: extractField(block, 'confianza') || extractField(block, 'confidence') || 'MEDIA',
58
+ date: extractField(block, 'fecha') || extractField(block, 'date') || null,
59
+ references: parseInt(extractField(block, 'referencias') || '0'),
60
+ module: extractField(block, 'módulo') || extractField(block, 'module') || 'global',
61
+ score: 0,
62
+ };
63
+
64
+ entry.score = computeScore(entry);
65
+ entries.push(entry);
66
+ }
67
+
68
+ return entries;
69
+ }
70
+
71
+ function extractField(text, field) {
72
+ const regex = new RegExp(`\\*\\*${field}\\*\\*:\\s*(.+)`, 'i');
73
+ const match = text.match(regex);
74
+ return match ? match[1].trim() : null;
75
+ }
76
+
77
+ // ── Scoring ──────────────────────────────────────────────────────────────────
78
+
79
+ function computeScore(entry) {
80
+ let score = 0;
81
+
82
+ // Confianza base
83
+ if (entry.confidence === 'ALTA') score += 40;
84
+ if (entry.confidence === 'MEDIA') score += 20;
85
+ if (entry.confidence === 'BAJA') score += 5;
86
+
87
+ // Referencias acumuladas
88
+ score += Math.min(entry.references * 5, 30);
89
+
90
+ // Penalización por antigüedad
91
+ if (entry.date) {
92
+ const daysSince = daysSinceDate(entry.date);
93
+ if (daysSince > 180) score -= 10;
94
+ if (daysSince > 365) score -= 20;
95
+ }
96
+
97
+ // Bonus por módulo global
98
+ if (entry.module === 'global') score += 5;
99
+
100
+ return Math.max(0, score);
101
+ }
102
+
103
+ function daysSinceDate(dateStr) {
104
+ try {
105
+ const d = new Date(dateStr);
106
+ return Math.floor((Date.now() - d.getTime()) / (1000 * 60 * 60 * 24));
107
+ } catch { return 0; }
108
+ }
109
+
110
+ // ── Deduplicación ────────────────────────────────────────────────────────────
111
+
112
+ function cosineSimilarity(a, b) {
113
+ const wordsA = tokenize(a);
114
+ const wordsB = tokenize(b);
115
+ const vocab = [...new Set([...wordsA, ...wordsB])];
116
+
117
+ const vecA = vocab.map(w => wordsA.filter(x => x === w).length);
118
+ const vecB = vocab.map(w => wordsB.filter(x => x === w).length);
119
+
120
+ const dot = vecA.reduce((s, v, i) => s + v * vecB[i], 0);
121
+ const magA = Math.sqrt(vecA.reduce((s, v) => s + v * v, 0));
122
+ const magB = Math.sqrt(vecB.reduce((s, v) => s + v * v, 0));
123
+
124
+ return (magA && magB) ? dot / (magA * magB) : 0;
125
+ }
126
+
127
+ function tokenize(text) {
128
+ return text.toLowerCase()
129
+ .replace(/[^a-z0-9áéíóúñü\s]/g, ' ')
130
+ .split(/\s+/)
131
+ .filter(w => w.length > 3);
132
+ }
133
+
134
+ function deduplicateEntries(entries) {
135
+ const kept = [];
136
+ const removed = [];
137
+
138
+ for (let i = 0; i < entries.length; i++) {
139
+ let isDuplicate = false;
140
+
141
+ for (let j = 0; j < kept.length; j++) {
142
+ const sim = cosineSimilarity(entries[i].title + ' ' + entries[i].raw,
143
+ kept[j].title + ' ' + kept[j].raw);
144
+ if (sim >= CONFIG.SIMILARITY_THRESHOLD) {
145
+ // Keep the one with higher score
146
+ if (entries[i].score > kept[j].score) {
147
+ removed.push(kept[j]);
148
+ kept[j] = entries[i];
149
+ } else {
150
+ removed.push(entries[i]);
151
+ }
152
+ isDuplicate = true;
153
+ break;
154
+ }
155
+ }
156
+
157
+ if (!isDuplicate) kept.push(entries[i]);
158
+ }
159
+
160
+ return { kept, removed };
161
+ }
162
+
163
+ // ── Expiración ───────────────────────────────────────────────────────────────
164
+
165
+ function expireEntries(entries) {
166
+ const now = Date.now();
167
+ const kept = [];
168
+ const expired = [];
169
+
170
+ for (const entry of entries) {
171
+ const maxDays = entry.score >= 40
172
+ ? CONFIG.EXPIRY_DAYS_HIGH_SCORE
173
+ : CONFIG.EXPIRY_DAYS_LOW_SCORE;
174
+
175
+ if (entry.date) {
176
+ const days = daysSinceDate(entry.date);
177
+ if (days > maxDays && entry.confidence === 'BAJA') {
178
+ expired.push(entry);
179
+ continue;
180
+ }
181
+ }
182
+
183
+ kept.push(entry);
184
+ }
185
+
186
+ return { kept, expired };
187
+ }
188
+
189
+ // ── Reconstruir archivo ──────────────────────────────────────────────────────
190
+
191
+ function rebuildFile(filePath, entries, headerLines) {
192
+ // Sort by score descending
193
+ const sorted = [...entries].sort((a, b) => b.score - a.score);
194
+
195
+ const lines = [];
196
+ if (headerLines) lines.push(...headerLines, '');
197
+
198
+ for (const entry of sorted) {
199
+ lines.push(entry.raw.trim());
200
+ lines.push('');
201
+ }
202
+
203
+ fs.writeFileSync(filePath, lines.join('\n'), 'utf8');
204
+ }
205
+
206
+ function extractHeader(filePath) {
207
+ if (!fs.existsSync(filePath)) return [];
208
+ const content = fs.readFileSync(filePath, 'utf8');
209
+ const lines = content.split('\n');
210
+ const headerLines = [];
211
+ for (const line of lines) {
212
+ if (line.startsWith('### ')) break;
213
+ headerLines.push(line);
214
+ }
215
+ return headerLines;
216
+ }
217
+
218
+ // ── Curación principal ───────────────────────────────────────────────────────
219
+
220
+ function curateFile(filePath, maxEntries, label) {
221
+ const result = {
222
+ file: label,
223
+ before: 0, after: 0,
224
+ deduped: 0, expired: 0, sorted: true,
225
+ changes: [],
226
+ };
227
+
228
+ if (!fs.existsSync(filePath)) {
229
+ result.changes.push('archivo no encontrado — sin cambios');
230
+ return result;
231
+ }
232
+
233
+ const header = extractHeader(filePath);
234
+ const entries = parseMemoryFile(filePath);
235
+ result.before = entries.length;
236
+
237
+ // 1. Recalcular scores
238
+ entries.forEach(e => { e.score = computeScore(e); });
239
+
240
+ // 2. Deduplicar
241
+ const { kept: afterDedup, removed } = deduplicateEntries(entries);
242
+ result.deduped = removed.length;
243
+ if (removed.length > 0) {
244
+ result.changes.push(`${removed.length} duplicados eliminados`);
245
+ }
246
+
247
+ // 3. Expirar
248
+ const { kept: afterExpire, expired } = expireEntries(afterDedup);
249
+ result.expired = expired.length;
250
+ if (expired.length > 0) {
251
+ result.changes.push(`${expired.length} entradas expiradas (BAJA confianza, sin uso)`);
252
+ }
253
+
254
+ // 4. Si supera máximo, descartar las de menor score
255
+ let finalEntries = afterExpire;
256
+ if (finalEntries.length > maxEntries) {
257
+ const cutoff = finalEntries.length - maxEntries;
258
+ result.changes.push(`${cutoff} entradas de bajo score descartadas (límite: ${maxEntries})`);
259
+ finalEntries = finalEntries.sort((a, b) => b.score - a.score).slice(0, maxEntries);
260
+ }
261
+
262
+ result.after = finalEntries.length;
263
+
264
+ // 5. Reescribir ordenado por score
265
+ rebuildFile(filePath, finalEntries, header);
266
+
267
+ if (result.changes.length === 0) {
268
+ result.changes.push('sin cambios necesarios');
269
+ }
270
+
271
+ return result;
272
+ }
273
+
274
+ // ── Report ───────────────────────────────────────────────────────────────────
275
+
276
+ function report() {
277
+ console.log('\n══════════════════════════════════════════════════');
278
+ console.log(' 🧠 Memory Curator — Análisis');
279
+ console.log('══════════════════════════════════════════════════');
280
+
281
+ const files = [
282
+ { path: ERRORES_FILE, label: 'errores.md', max: CONFIG.MAX_ENTRIES_ERRORS },
283
+ { path: PATRONES_FILE, label: 'patrones.md', max: CONFIG.MAX_ENTRIES_PATTERNS },
284
+ { path: DECISIONES_FILE, label: 'decisiones.md', max: CONFIG.MAX_ENTRIES_DECISIONS },
285
+ ];
286
+
287
+ for (const f of files) {
288
+ const entries = parseMemoryFile(f.path);
289
+ console.log(`\n 📄 ${f.label}: ${entries.length} entradas`);
290
+
291
+ if (entries.length === 0) {
292
+ console.log(' (vacío)');
293
+ continue;
294
+ }
295
+
296
+ const sorted = [...entries].sort((a, b) => b.score - a.score);
297
+ const high = sorted.filter(e => e.score >= 40).length;
298
+ const med = sorted.filter(e => e.score >= 20 && e.score < 40).length;
299
+ const low = sorted.filter(e => e.score < 20).length;
300
+
301
+ console.log(` Score ALTO (≥40): ${high} | MEDIO (20-39): ${med} | BAJO (<20): ${low}`);
302
+ if (entries.length > f.max) {
303
+ console.log(` ⚠️ Supera límite (${f.max}) — curación recomendada`);
304
+ }
305
+
306
+ // Show duplicates preview
307
+ const { removed } = deduplicateEntries(entries);
308
+ if (removed.length > 0) {
309
+ console.log(` 🔁 Posibles duplicados: ${removed.length}`);
310
+ }
311
+ }
312
+
313
+ console.log('\n══════════════════════════════════════════════════\n');
314
+ }
315
+
316
+ // ── CLI ──────────────────────────────────────────────────────────────────────
317
+
318
+ if (require.main === module) {
319
+ const cmd = process.argv[2] || 'run';
320
+
321
+ if (cmd === 'report') {
322
+ report();
323
+ process.exit(0);
324
+ }
325
+
326
+ if (cmd === 'run' || cmd === 'dedup' || cmd === 'score' || cmd === 'expire') {
327
+ console.log('\n══════════════════════════════════════════════════');
328
+ console.log(' 🧠 Memory Curator — Curación');
329
+ console.log('══════════════════════════════════════════════════\n');
330
+
331
+ const files = [
332
+ { path: ERRORES_FILE, max: CONFIG.MAX_ENTRIES_ERRORS, label: 'errores.md' },
333
+ { path: PATRONES_FILE, max: CONFIG.MAX_ENTRIES_PATTERNS, label: 'patrones.md' },
334
+ { path: DECISIONES_FILE, max: CONFIG.MAX_ENTRIES_DECISIONS, label: 'decisiones.md' },
335
+ ];
336
+
337
+ let totalRemoved = 0;
338
+
339
+ for (const f of files) {
340
+ const result = curateFile(f.path, f.max, f.label);
341
+ totalRemoved += result.deduped + result.expired;
342
+
343
+ const delta = result.before - result.after;
344
+ console.log(` ${f.label}:`);
345
+ console.log(` Antes: ${result.before} → Después: ${result.after} (${delta > 0 ? '-' + delta : 'sin cambio'})`);
346
+ for (const c of result.changes) {
347
+ console.log(` ✓ ${c}`);
348
+ }
349
+ console.log('');
350
+ }
351
+
352
+ console.log(` Total eliminadas: ${totalRemoved}`);
353
+ console.log('══════════════════════════════════════════════════\n');
354
+
355
+ process.exit(0);
356
+ }
357
+
358
+ console.log('Uso: node mem-curator.cjs [run|report|dedup|score|expire]');
359
+ }
360
+
361
+ module.exports = { curateFile, parseMemoryFile, computeScore, deduplicateEntries, expireEntries };