agentic-kdd 3.0.4 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/akdd.js +69 -0
- package/contract-guard.cjs +851 -0
- package/creative-engine.cjs +560 -0
- package/embeddings-v2.cjs +320 -0
- package/llms-generator.cjs +425 -0
- package/mem-curator.cjs +584 -0
- package/package.json +1 -1
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Agentic KDD — Embeddings Engine v2.0
|
|
6
|
+
*
|
|
7
|
+
* Modelo DEFAULT: jina-embeddings-v2-base-code (Jina AI)
|
|
8
|
+
* - 137M parámetros entrenados en CODE + texto natural (bimodal NL-PL)
|
|
9
|
+
* - 768 dimensiones vs 384 de all-MiniLM
|
|
10
|
+
* - Entiende relaciones lógicas de tipos, AST, control de flujo
|
|
11
|
+
* - ~500MB instalado, 100% offline
|
|
12
|
+
*
|
|
13
|
+
* Fallback automático si jina no está: all-MiniLM-L6-v2 (384 dims, ~23MB)
|
|
14
|
+
*
|
|
15
|
+
* Gap cerrado: all-MiniLM-L6-v2 fue entrenado en NL natural, no en código.
|
|
16
|
+
* UniXcoder/jina mapean correctamente la semántica formal de lenguajes de programación.
|
|
17
|
+
*
|
|
18
|
+
* Uso:
|
|
19
|
+
* node embeddings.cjs embed "function calculateTotal(price, qty)"
|
|
20
|
+
* node embeddings.cjs status
|
|
21
|
+
* node embeddings.cjs install-jina
|
|
22
|
+
* node embeddings.cjs install-mini (fallback ligero)
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
const path = require('path');
|
|
26
|
+
const fs = require('fs');
|
|
27
|
+
const { execSync } = require('child_process');
|
|
28
|
+
|
|
29
|
+
// ─── MODELOS ──────────────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
const MODELS = {
|
|
32
|
+
// Modelo primario: bimodal NL-PL, específico para código
|
|
33
|
+
JINA_CODE: {
|
|
34
|
+
id: 'jinaai/jina-embeddings-v2-base-code',
|
|
35
|
+
name: 'jina-embeddings-v2-base-code',
|
|
36
|
+
dims: 768,
|
|
37
|
+
size: '~500MB',
|
|
38
|
+
type: 'bimodal_nlpl',
|
|
39
|
+
description: 'Entrenado en código + texto. Entiende relaciones de tipos, AST, control de flujo.',
|
|
40
|
+
},
|
|
41
|
+
// Fallback: modelo NL general, ligero
|
|
42
|
+
MINI_LM: {
|
|
43
|
+
id: 'Xenova/all-MiniLM-L6-v2',
|
|
44
|
+
name: 'all-MiniLM-L6-v2',
|
|
45
|
+
dims: 384,
|
|
46
|
+
size: '~23MB',
|
|
47
|
+
type: 'natural_language',
|
|
48
|
+
description: 'Modelo NL general. Fallback cuando jina no está instalado.',
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// ─── ESTADO INTERNO ───────────────────────────────────────────────────────────
|
|
53
|
+
|
|
54
|
+
let _pipeline = null;
|
|
55
|
+
let _activeModel = null;
|
|
56
|
+
let _available = null;
|
|
57
|
+
|
|
58
|
+
// ─── DETECCIÓN DE MODELO DISPONIBLE ──────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
function detectAvailableModel(projectRoot) {
|
|
61
|
+
if (_available !== null) return _available;
|
|
62
|
+
|
|
63
|
+
// 1. Verificar si jina está en cache local del proyecto
|
|
64
|
+
const localCache = path.join(projectRoot || process.cwd(), '.agentic', '.model_cache');
|
|
65
|
+
if (fs.existsSync(localCache)) {
|
|
66
|
+
const jinaDir = path.join(localCache, 'models--jinaai--jina-embeddings-v2-base-code');
|
|
67
|
+
if (fs.existsSync(jinaDir)) {
|
|
68
|
+
_available = 'jina';
|
|
69
|
+
return 'jina';
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// 2. Verificar cache global de HuggingFace
|
|
74
|
+
const hfCache = path.join(require('os').homedir(), '.cache', 'huggingface', 'hub');
|
|
75
|
+
if (fs.existsSync(hfCache)) {
|
|
76
|
+
const jinaGlobal = path.join(hfCache, 'models--jinaai--jina-embeddings-v2-base-code');
|
|
77
|
+
if (fs.existsSync(jinaGlobal)) {
|
|
78
|
+
_available = 'jina';
|
|
79
|
+
return 'jina';
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// 3. Verificar si @xenova/transformers está instalado
|
|
84
|
+
try {
|
|
85
|
+
require.resolve('@xenova/transformers');
|
|
86
|
+
// MiniLM siempre descargable si transformers está
|
|
87
|
+
_available = 'mini';
|
|
88
|
+
return 'mini';
|
|
89
|
+
} catch {}
|
|
90
|
+
|
|
91
|
+
_available = false;
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ─── CARGAR PIPELINE ─────────────────────────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
async function getPipeline(projectRoot) {
|
|
98
|
+
if (_pipeline) return { pipeline: _pipeline, model: _activeModel };
|
|
99
|
+
|
|
100
|
+
const available = detectAvailableModel(projectRoot || process.cwd());
|
|
101
|
+
|
|
102
|
+
if (!available) {
|
|
103
|
+
return { pipeline: null, model: null };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
process.env.TRANSFORMERS_VERBOSITY = 'error';
|
|
108
|
+
const { pipeline, env } = require('@xenova/transformers');
|
|
109
|
+
|
|
110
|
+
// Usar cache local del proyecto si existe
|
|
111
|
+
const localCache = path.join(projectRoot || process.cwd(), '.agentic', '.model_cache');
|
|
112
|
+
if (fs.existsSync(localCache)) env.cacheDir = localCache;
|
|
113
|
+
|
|
114
|
+
const model = available === 'jina' ? MODELS.JINA_CODE : MODELS.MINI_LM;
|
|
115
|
+
_activeModel = model;
|
|
116
|
+
|
|
117
|
+
_pipeline = await pipeline('feature-extraction', model.id, { quantized: true });
|
|
118
|
+
|
|
119
|
+
return { pipeline: _pipeline, model };
|
|
120
|
+
} catch (e) {
|
|
121
|
+
_available = false;
|
|
122
|
+
return { pipeline: null, model: null };
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ─── GENERAR EMBEDDING ────────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Genera embedding para código o texto.
|
|
130
|
+
* Retorna array de dims (768 con jina, 384 con mini) o null si no disponible.
|
|
131
|
+
*/
|
|
132
|
+
async function embed(text, projectRoot) {
|
|
133
|
+
const { pipeline: pipe } = await getPipeline(projectRoot);
|
|
134
|
+
if (!pipe) return null;
|
|
135
|
+
try {
|
|
136
|
+
const output = await pipe(text, { pooling: 'mean', normalize: true });
|
|
137
|
+
return Array.from(output.data);
|
|
138
|
+
} catch { return null; }
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// ─── SIMILITUD COSENO ────────────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
function cosineSim(a, b) {
|
|
144
|
+
if (!a || !b || a.length !== b.length) return 0;
|
|
145
|
+
let dot = 0, normA = 0, normB = 0;
|
|
146
|
+
for (let i = 0; i < a.length; i++) {
|
|
147
|
+
dot += a[i] * b[i];
|
|
148
|
+
normA += a[i] * a[i];
|
|
149
|
+
normB += b[i] * b[i];
|
|
150
|
+
}
|
|
151
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
152
|
+
return denom === 0 ? 0 : dot / denom;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// ─── BÚSQUEDA SEMÁNTICA ───────────────────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Búsqueda semántica sobre un array de items.
|
|
159
|
+
* @param {string} query - consulta en lenguaje natural o código
|
|
160
|
+
* @param {Array} items - [{id, texto, embedding}]
|
|
161
|
+
* @param {number} topK
|
|
162
|
+
*/
|
|
163
|
+
async function semanticSearch(query, items, topK = 10, projectRoot) {
|
|
164
|
+
const queryEmbed = await embed(query, projectRoot);
|
|
165
|
+
if (!queryEmbed) return items.slice(0, topK); // fallback sin embeddings
|
|
166
|
+
|
|
167
|
+
const scored = items
|
|
168
|
+
.filter(item => item.embedding && Array.isArray(item.embedding))
|
|
169
|
+
.map(item => ({
|
|
170
|
+
...item,
|
|
171
|
+
score: cosineSim(queryEmbed, item.embedding),
|
|
172
|
+
}))
|
|
173
|
+
.sort((a, b) => b.score - a.score)
|
|
174
|
+
.slice(0, topK);
|
|
175
|
+
|
|
176
|
+
return scored;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// ─── STATUS ───────────────────────────────────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
async function getStatus(projectRoot) {
|
|
182
|
+
const available = detectAvailableModel(projectRoot || process.cwd());
|
|
183
|
+
const model = available === 'jina' ? MODELS.JINA_CODE : available === 'mini' ? MODELS.MINI_LM : null;
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
available: !!available,
|
|
187
|
+
active_model: model?.name || 'none',
|
|
188
|
+
model_type: model?.type || 'none',
|
|
189
|
+
dims: model?.dims || 0,
|
|
190
|
+
size: model?.size || 'N/A',
|
|
191
|
+
description: model?.description || 'Sin modelo de embeddings instalado',
|
|
192
|
+
recommended: 'jina-embeddings-v2-base-code',
|
|
193
|
+
install_command: available === 'jina' ? 'Ya instalado ✅' : 'akdd jina-install',
|
|
194
|
+
gap_status: available === 'jina'
|
|
195
|
+
? '✅ Modelo bimodal NL-PL activo — semántica de código precisa'
|
|
196
|
+
: available === 'mini'
|
|
197
|
+
? '⚠️ Usando all-MiniLM-L6-v2 — no optimizado para código. Ejecutar: akdd jina-install'
|
|
198
|
+
: '❌ Sin embeddings — búsqueda semántica desactivada. Ejecutar: akdd embed-install',
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// ─── INSTALACIÓN ─────────────────────────────────────────────────────────────
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Instalar jina-embeddings-v2-base-code (modelo primario recomendado).
|
|
206
|
+
* ~500MB. Se guarda en .agentic/.model_cache para uso offline.
|
|
207
|
+
*/
|
|
208
|
+
async function installJina(projectRoot) {
|
|
209
|
+
projectRoot = projectRoot || process.cwd();
|
|
210
|
+
console.log('\n[EMBEDDINGS] Instalando jina-embeddings-v2-base-code...');
|
|
211
|
+
console.log('[EMBEDDINGS] Tamaño: ~500MB. Puede tomar 5-10 minutos.');
|
|
212
|
+
console.log('[EMBEDDINGS] Modelo bimodal NL-PL — entrenado específicamente en código.\n');
|
|
213
|
+
|
|
214
|
+
// Verificar que @xenova/transformers está instalado
|
|
215
|
+
try {
|
|
216
|
+
require.resolve('@xenova/transformers');
|
|
217
|
+
} catch {
|
|
218
|
+
console.log('[EMBEDDINGS] Instalando @xenova/transformers primero...');
|
|
219
|
+
execSync('npm install @xenova/transformers --save-dev', {
|
|
220
|
+
stdio: 'inherit',
|
|
221
|
+
cwd: projectRoot,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Descargar el modelo
|
|
226
|
+
try {
|
|
227
|
+
process.env.TRANSFORMERS_VERBOSITY = 'info';
|
|
228
|
+
const { pipeline, env } = require('@xenova/transformers');
|
|
229
|
+
const localCache = path.join(projectRoot, '.agentic', '.model_cache');
|
|
230
|
+
fs.mkdirSync(localCache, { recursive: true });
|
|
231
|
+
env.cacheDir = localCache;
|
|
232
|
+
|
|
233
|
+
console.log('[EMBEDDINGS] Descargando modelo...');
|
|
234
|
+
const pipe = await pipeline('feature-extraction', MODELS.JINA_CODE.id, { quantized: true });
|
|
235
|
+
|
|
236
|
+
// Test
|
|
237
|
+
const testEmbed = await pipe('function test() { return 1; }', { pooling: 'mean', normalize: true });
|
|
238
|
+
if (testEmbed && testEmbed.data.length > 0) {
|
|
239
|
+
console.log(`\n[EMBEDDINGS] ✅ jina-embeddings-v2-base-code instalado.`);
|
|
240
|
+
console.log(`[EMBEDDINGS] Dimensiones: ${testEmbed.data.length}`);
|
|
241
|
+
console.log(`[EMBEDDINGS] Búsqueda semántica de código ahora es precisa.\n`);
|
|
242
|
+
_available = 'jina';
|
|
243
|
+
_pipeline = pipe;
|
|
244
|
+
}
|
|
245
|
+
} catch (e) {
|
|
246
|
+
console.error('[EMBEDDINGS] Error instalando jina:', e.message);
|
|
247
|
+
console.log('[EMBEDDINGS] Alternativa: akdd embed-install (all-MiniLM-L6-v2, 23MB)\n');
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Instalar all-MiniLM-L6-v2 (fallback ligero, ~23MB).
|
|
253
|
+
*/
|
|
254
|
+
async function installMini(projectRoot) {
|
|
255
|
+
projectRoot = projectRoot || process.cwd();
|
|
256
|
+
console.log('\n[EMBEDDINGS] Instalando all-MiniLM-L6-v2 (modelo ligero, 23MB)...');
|
|
257
|
+
console.log('[EMBEDDINGS] Nota: este modelo es para texto natural, no optimizado para código.');
|
|
258
|
+
console.log('[EMBEDDINGS] Para precisión máxima en código: akdd jina-install\n');
|
|
259
|
+
|
|
260
|
+
try {
|
|
261
|
+
require.resolve('@xenova/transformers');
|
|
262
|
+
} catch {
|
|
263
|
+
execSync('npm install @xenova/transformers --save-dev', { stdio: 'inherit', cwd: projectRoot });
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
try {
|
|
267
|
+
const { pipeline, env } = require('@xenova/transformers');
|
|
268
|
+
const localCache = path.join(projectRoot, '.agentic', '.model_cache');
|
|
269
|
+
fs.mkdirSync(localCache, { recursive: true });
|
|
270
|
+
env.cacheDir = localCache;
|
|
271
|
+
|
|
272
|
+
const pipe = await pipeline('feature-extraction', MODELS.MINI_LM.id, { quantized: true });
|
|
273
|
+
console.log('\n[EMBEDDINGS] ✅ all-MiniLM-L6-v2 instalado como fallback.\n');
|
|
274
|
+
_available = 'mini';
|
|
275
|
+
_pipeline = pipe;
|
|
276
|
+
} catch (e) {
|
|
277
|
+
console.error('[EMBEDDINGS] Error:', e.message);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ─── CLI ──────────────────────────────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
if (require.main === module) {
|
|
284
|
+
const [,, cmd, ...args] = process.argv;
|
|
285
|
+
const projectRoot = process.cwd();
|
|
286
|
+
|
|
287
|
+
switch (cmd) {
|
|
288
|
+
case 'embed':
|
|
289
|
+
if (!args[0]) { console.error('Uso: embeddings.cjs embed "<texto>"'); break; }
|
|
290
|
+
embed(args.join(' '), projectRoot).then(v => {
|
|
291
|
+
if (!v) console.log('Sin embeddings disponibles. Ejecutar: akdd jina-install');
|
|
292
|
+
else console.log(`Vector [${v.length} dims]: [${v.slice(0,4).map(x=>x.toFixed(4)).join(', ')}...]`);
|
|
293
|
+
});
|
|
294
|
+
break;
|
|
295
|
+
|
|
296
|
+
case 'status':
|
|
297
|
+
getStatus(projectRoot).then(s => {
|
|
298
|
+
console.log('\n=== Embeddings Status ===');
|
|
299
|
+
console.log(`Modelo activo: ${s.active_model}`);
|
|
300
|
+
console.log(`Tipo: ${s.model_type}`);
|
|
301
|
+
console.log(`Dimensiones: ${s.dims}`);
|
|
302
|
+
console.log(`Gap: ${s.gap_status}`);
|
|
303
|
+
console.log(`Instalar: ${s.install_command}\n`);
|
|
304
|
+
});
|
|
305
|
+
break;
|
|
306
|
+
|
|
307
|
+
case 'install-jina':
|
|
308
|
+
installJina(projectRoot).catch(console.error);
|
|
309
|
+
break;
|
|
310
|
+
|
|
311
|
+
case 'install-mini':
|
|
312
|
+
installMini(projectRoot).catch(console.error);
|
|
313
|
+
break;
|
|
314
|
+
|
|
315
|
+
default:
|
|
316
|
+
console.log('Uso: embeddings.cjs [embed <text> | status | install-jina | install-mini]');
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
module.exports = { embed, cosineSim, semanticSearch, getStatus, installJina, installMini, detectAvailableModel, MODELS };
|