@stevenvo780/st-lang 4.14.1 → 4.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/format/stnb/__tests__/executor.test.d.ts +2 -0
- package/dist/format/stnb/__tests__/executor.test.d.ts.map +1 -0
- package/dist/format/stnb/__tests__/executor.test.js +140 -0
- package/dist/format/stnb/__tests__/executor.test.js.map +1 -0
- package/dist/format/stnb/__tests__/parser.test.d.ts +2 -0
- package/dist/format/stnb/__tests__/parser.test.d.ts.map +1 -0
- package/dist/format/stnb/__tests__/parser.test.js +119 -0
- package/dist/format/stnb/__tests__/parser.test.js.map +1 -0
- package/dist/format/stnb/__tests__/renderer.test.d.ts +2 -0
- package/dist/format/stnb/__tests__/renderer.test.d.ts.map +1 -0
- package/dist/format/stnb/__tests__/renderer.test.js +109 -0
- package/dist/format/stnb/__tests__/renderer.test.js.map +1 -0
- package/dist/format/stnb/__tests__/roundtrip.test.d.ts +2 -0
- package/dist/format/stnb/__tests__/roundtrip.test.d.ts.map +1 -0
- package/dist/format/stnb/__tests__/roundtrip.test.js +91 -0
- package/dist/format/stnb/__tests__/roundtrip.test.js.map +1 -0
- package/dist/format/stnb/__tests__/serializer.test.d.ts +2 -0
- package/dist/format/stnb/__tests__/serializer.test.d.ts.map +1 -0
- package/dist/format/stnb/__tests__/serializer.test.js +60 -0
- package/dist/format/stnb/__tests__/serializer.test.js.map +1 -0
- package/dist/format/stnb/executor.d.ts +29 -0
- package/dist/format/stnb/executor.d.ts.map +1 -0
- package/dist/format/stnb/executor.js +139 -0
- package/dist/format/stnb/executor.js.map +1 -0
- package/dist/format/stnb/index.d.ts +19 -0
- package/dist/format/stnb/index.d.ts.map +1 -0
- package/dist/format/stnb/index.js +28 -0
- package/dist/format/stnb/index.js.map +1 -0
- package/dist/format/stnb/renderer-html.d.ts +11 -0
- package/dist/format/stnb/renderer-html.d.ts.map +1 -0
- package/dist/format/stnb/renderer-html.js +180 -0
- package/dist/format/stnb/renderer-html.js.map +1 -0
- package/dist/format/stnb/renderer-markdown.d.ts +13 -0
- package/dist/format/stnb/renderer-markdown.d.ts.map +1 -0
- package/dist/format/stnb/renderer-markdown.js +92 -0
- package/dist/format/stnb/renderer-markdown.js.map +1 -0
- package/dist/format/stnb/serializer.d.ts +14 -0
- package/dist/format/stnb/serializer.d.ts.map +1 -0
- package/dist/format/stnb/serializer.js +21 -0
- package/dist/format/stnb/serializer.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/logic/profiles/dl-hybrid/__tests__/differential.test.d.ts +2 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/differential.test.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/differential.test.js +141 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/differential.test.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/examples.test.d.ts +2 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/examples.test.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/examples.test.js +55 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/examples.test.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/parser.test.d.ts +2 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/parser.test.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/parser.test.js +149 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/parser.test.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/tableau.test.d.ts +2 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/tableau.test.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/tableau.test.js +105 -0
- package/dist/logic/profiles/dl-hybrid/__tests__/tableau.test.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/ast.d.ts +160 -0
- package/dist/logic/profiles/dl-hybrid/ast.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/ast.js +261 -0
- package/dist/logic/profiles/dl-hybrid/ast.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/differential.d.ts +61 -0
- package/dist/logic/profiles/dl-hybrid/differential.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/differential.js +291 -0
- package/dist/logic/profiles/dl-hybrid/differential.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/index.d.ts +10 -0
- package/dist/logic/profiles/dl-hybrid/index.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/index.js +90 -0
- package/dist/logic/profiles/dl-hybrid/index.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/parser.d.ts +8 -0
- package/dist/logic/profiles/dl-hybrid/parser.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/parser.js +410 -0
- package/dist/logic/profiles/dl-hybrid/parser.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/profile.d.ts +23 -0
- package/dist/logic/profiles/dl-hybrid/profile.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/profile.js +122 -0
- package/dist/logic/profiles/dl-hybrid/profile.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/semantics.d.ts +16 -0
- package/dist/logic/profiles/dl-hybrid/semantics.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/semantics.js +181 -0
- package/dist/logic/profiles/dl-hybrid/semantics.js.map +1 -0
- package/dist/logic/profiles/dl-hybrid/tableau.d.ts +38 -0
- package/dist/logic/profiles/dl-hybrid/tableau.d.ts.map +1 -0
- package/dist/logic/profiles/dl-hybrid/tableau.js +289 -0
- package/dist/logic/profiles/dl-hybrid/tableau.js.map +1 -0
- package/dist/reasoning/dl-hybrid/__tests__/invariant.test.d.ts +2 -0
- package/dist/reasoning/dl-hybrid/__tests__/invariant.test.d.ts.map +1 -0
- package/dist/reasoning/dl-hybrid/__tests__/invariant.test.js +74 -0
- package/dist/reasoning/dl-hybrid/__tests__/invariant.test.js.map +1 -0
- package/dist/reasoning/dl-hybrid/index.d.ts +3 -0
- package/dist/reasoning/dl-hybrid/index.d.ts.map +1 -0
- package/dist/reasoning/dl-hybrid/index.js +16 -0
- package/dist/reasoning/dl-hybrid/index.js.map +1 -0
- package/dist/reasoning/dl-hybrid/invariant-search.d.ts +39 -0
- package/dist/reasoning/dl-hybrid/invariant-search.d.ts.map +1 -0
- package/dist/reasoning/dl-hybrid/invariant-search.js +188 -0
- package/dist/reasoning/dl-hybrid/invariant-search.js.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/benchmarks.test.d.ts +2 -0
- package/dist/reasoning/lemma-rag/__tests__/benchmarks.test.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/benchmarks.test.js +72 -0
- package/dist/reasoning/lemma-rag/__tests__/benchmarks.test.js.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/embedding.test.d.ts +2 -0
- package/dist/reasoning/lemma-rag/__tests__/embedding.test.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/embedding.test.js +102 -0
- package/dist/reasoning/lemma-rag/__tests__/embedding.test.js.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/integration.test.d.ts +2 -0
- package/dist/reasoning/lemma-rag/__tests__/integration.test.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/integration.test.js +40 -0
- package/dist/reasoning/lemma-rag/__tests__/integration.test.js.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/query.test.d.ts +2 -0
- package/dist/reasoning/lemma-rag/__tests__/query.test.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/query.test.js +136 -0
- package/dist/reasoning/lemma-rag/__tests__/query.test.js.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/retrieval.test.d.ts +2 -0
- package/dist/reasoning/lemma-rag/__tests__/retrieval.test.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/__tests__/retrieval.test.js +147 -0
- package/dist/reasoning/lemma-rag/__tests__/retrieval.test.js.map +1 -0
- package/dist/reasoning/lemma-rag/benchmarks.d.ts +30 -0
- package/dist/reasoning/lemma-rag/benchmarks.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/benchmarks.js +177 -0
- package/dist/reasoning/lemma-rag/benchmarks.js.map +1 -0
- package/dist/reasoning/lemma-rag/embedding.d.ts +26 -0
- package/dist/reasoning/lemma-rag/embedding.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/embedding.js +243 -0
- package/dist/reasoning/lemma-rag/embedding.js.map +1 -0
- package/dist/reasoning/lemma-rag/index-store.d.ts +35 -0
- package/dist/reasoning/lemma-rag/index-store.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/index-store.js +105 -0
- package/dist/reasoning/lemma-rag/index-store.js.map +1 -0
- package/dist/reasoning/lemma-rag/index.d.ts +9 -0
- package/dist/reasoning/lemma-rag/index.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/index.js +34 -0
- package/dist/reasoning/lemma-rag/index.js.map +1 -0
- package/dist/reasoning/lemma-rag/query.d.ts +48 -0
- package/dist/reasoning/lemma-rag/query.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/query.js +92 -0
- package/dist/reasoning/lemma-rag/query.js.map +1 -0
- package/dist/reasoning/lemma-rag/retrieval.d.ts +33 -0
- package/dist/reasoning/lemma-rag/retrieval.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/retrieval.js +123 -0
- package/dist/reasoning/lemma-rag/retrieval.js.map +1 -0
- package/dist/reasoning/lemma-rag/types.d.ts +52 -0
- package/dist/reasoning/lemma-rag/types.d.ts.map +1 -0
- package/dist/reasoning/lemma-rag/types.js +13 -0
- package/dist/reasoning/lemma-rag/types.js.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/extractor.test.d.ts +2 -0
- package/dist/reasoning/proof-mining/__tests__/extractor.test.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/extractor.test.js +263 -0
- package/dist/reasoning/proof-mining/__tests__/extractor.test.js.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/generalizer.test.d.ts +2 -0
- package/dist/reasoning/proof-mining/__tests__/generalizer.test.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/generalizer.test.js +123 -0
- package/dist/reasoning/proof-mining/__tests__/generalizer.test.js.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/integration.test.d.ts +2 -0
- package/dist/reasoning/proof-mining/__tests__/integration.test.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/integration.test.js +128 -0
- package/dist/reasoning/proof-mining/__tests__/integration.test.js.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/persistence.test.d.ts +2 -0
- package/dist/reasoning/proof-mining/__tests__/persistence.test.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/persistence.test.js +119 -0
- package/dist/reasoning/proof-mining/__tests__/persistence.test.js.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/ranker.test.d.ts +2 -0
- package/dist/reasoning/proof-mining/__tests__/ranker.test.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/__tests__/ranker.test.js +93 -0
- package/dist/reasoning/proof-mining/__tests__/ranker.test.js.map +1 -0
- package/dist/reasoning/proof-mining/extractor.d.ts +73 -0
- package/dist/reasoning/proof-mining/extractor.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/extractor.js +170 -0
- package/dist/reasoning/proof-mining/extractor.js.map +1 -0
- package/dist/reasoning/proof-mining/generalizer.d.ts +46 -0
- package/dist/reasoning/proof-mining/generalizer.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/generalizer.js +245 -0
- package/dist/reasoning/proof-mining/generalizer.js.map +1 -0
- package/dist/reasoning/proof-mining/index.d.ts +62 -0
- package/dist/reasoning/proof-mining/index.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/index.js +235 -0
- package/dist/reasoning/proof-mining/index.js.map +1 -0
- package/dist/reasoning/proof-mining/persistence.d.ts +35 -0
- package/dist/reasoning/proof-mining/persistence.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/persistence.js +126 -0
- package/dist/reasoning/proof-mining/persistence.js.map +1 -0
- package/dist/reasoning/proof-mining/ranker.d.ts +29 -0
- package/dist/reasoning/proof-mining/ranker.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/ranker.js +71 -0
- package/dist/reasoning/proof-mining/ranker.js.map +1 -0
- package/dist/reasoning/proof-mining/statistics.d.ts +11 -0
- package/dist/reasoning/proof-mining/statistics.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/statistics.js +44 -0
- package/dist/reasoning/proof-mining/statistics.js.map +1 -0
- package/dist/reasoning/proof-mining/types.d.ts +117 -0
- package/dist/reasoning/proof-mining/types.d.ts.map +1 -0
- package/dist/reasoning/proof-mining/types.js +24 -0
- package/dist/reasoning/proof-mining/types.js.map +1 -0
- package/package.json +26 -1
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// HashEmbedding — embedding determinístico R^256 sobre features AST
|
|
4
|
+
//
|
|
5
|
+
// Sin dependencias de red ni modelos externos. Features extraídas
|
|
6
|
+
// directamente de la representación string de la fórmula:
|
|
7
|
+
// 1. Frecuencias de conectivos top-level (∧ ∨ ¬ → ↔ □ ◇ ∀ ∃ = ⊥ ⊤)
|
|
8
|
+
// 2. Átomos/identificadores hasheados a posiciones del vector
|
|
9
|
+
// 3. Profundidad de cuantificadores (∀ ∃)
|
|
10
|
+
// 4. Tipos de operadores (modal, aritmético, set-theory, etc.)
|
|
11
|
+
// 5. Firma de esqueleto: secuencia de categorías de tokens
|
|
12
|
+
//
|
|
13
|
+
// Propiedades garantizadas:
|
|
14
|
+
// - Determinístico: misma entrada → mismo vector
|
|
15
|
+
// - α-invariante por canonicalización previa
|
|
16
|
+
// - Dimensión fija: EMBEDDING_DIM (256)
|
|
17
|
+
// - Sin NaN/Infinity
|
|
18
|
+
// ============================================================
|
|
19
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
20
|
+
exports.defaultProvider = exports.HashEmbeddingProvider = void 0;
|
|
21
|
+
exports.normalizeEmbedding = normalizeEmbedding;
|
|
22
|
+
exports.hashEmbed = hashEmbed;
|
|
23
|
+
const types_1 = require("./types");
|
|
24
|
+
// --------------- Hashing FNV-1a 32-bit (determinístico) ----------------
|
|
25
|
+
function fnv1a(str) {
|
|
26
|
+
let hash = 2166136261;
|
|
27
|
+
for (let i = 0; i < str.length; i++) {
|
|
28
|
+
hash ^= str.charCodeAt(i);
|
|
29
|
+
hash = (hash * 16777619) >>> 0;
|
|
30
|
+
}
|
|
31
|
+
return hash;
|
|
32
|
+
}
|
|
33
|
+
/** Proyecta un string a un índice [0, DIM). */
|
|
34
|
+
function strToIndex(str, dim) {
|
|
35
|
+
return fnv1a(str) % dim;
|
|
36
|
+
}
|
|
37
|
+
/** Proyecta un string a un índice con desplazamiento de salt. */
|
|
38
|
+
function strToIndexSalted(str, salt, dim) {
|
|
39
|
+
return fnv1a(salt + str) % dim;
|
|
40
|
+
}
|
|
41
|
+
// --------------- Extracción de features --------------------------------
|
|
42
|
+
const CONNECTIVE_SYMBOLS = ['∧', '∨', '¬', '→', '↔', '□', '◇', '∀', '∃', '=', '⊥', '⊤', '⊢', '⇒'];
|
|
43
|
+
const MODAL_OPS = new Set(['□', '◇']);
|
|
44
|
+
const QUANTIFIER_OPS = new Set(['∀', '∃']);
|
|
45
|
+
const ARITH_OPS = new Set(['+', '·', '<', '≤', '>', '≠', 'S']);
|
|
46
|
+
const SET_OPS = new Set(['∈', '∉', '⊆', '⊂', '∪', '∩', '∅']);
|
|
47
|
+
const IDENT_RE = /[A-Za-z_][A-Za-z0-9_]*/g;
|
|
48
|
+
const LOGIC_RESERVED = new Set([
|
|
49
|
+
'not', 'and', 'or', 'implies', 'iff', 'forall', 'exists',
|
|
50
|
+
'true', 'false', 'True', 'False', 'TRUE', 'FALSE',
|
|
51
|
+
]);
|
|
52
|
+
function extractFeatures(formula) {
|
|
53
|
+
const connectiveFreq = {};
|
|
54
|
+
const atomFreq = {};
|
|
55
|
+
let quantifierDepth = 0;
|
|
56
|
+
let currentQDepth = 0;
|
|
57
|
+
let modalCount = 0;
|
|
58
|
+
let arithCount = 0;
|
|
59
|
+
let setCount = 0;
|
|
60
|
+
let parenDepth = 0;
|
|
61
|
+
let maxParenDepth = 0;
|
|
62
|
+
// Skeleton: primeros 16 tokens categorizados
|
|
63
|
+
const skeleton = [];
|
|
64
|
+
for (const ch of formula) {
|
|
65
|
+
if (ch === '(') {
|
|
66
|
+
parenDepth++;
|
|
67
|
+
if (parenDepth > maxParenDepth)
|
|
68
|
+
maxParenDepth = parenDepth;
|
|
69
|
+
}
|
|
70
|
+
else if (ch === ')') {
|
|
71
|
+
parenDepth = Math.max(0, parenDepth - 1);
|
|
72
|
+
}
|
|
73
|
+
if (CONNECTIVE_SYMBOLS.includes(ch)) {
|
|
74
|
+
connectiveFreq[ch] = (connectiveFreq[ch] ?? 0) + 1;
|
|
75
|
+
if (QUANTIFIER_OPS.has(ch)) {
|
|
76
|
+
currentQDepth++;
|
|
77
|
+
if (currentQDepth > quantifierDepth)
|
|
78
|
+
quantifierDepth = currentQDepth;
|
|
79
|
+
if (skeleton.length < 16)
|
|
80
|
+
skeleton.push('q');
|
|
81
|
+
}
|
|
82
|
+
else if (MODAL_OPS.has(ch)) {
|
|
83
|
+
modalCount++;
|
|
84
|
+
if (skeleton.length < 16)
|
|
85
|
+
skeleton.push('m');
|
|
86
|
+
}
|
|
87
|
+
else if (ARITH_OPS.has(ch)) {
|
|
88
|
+
arithCount++;
|
|
89
|
+
if (skeleton.length < 16)
|
|
90
|
+
skeleton.push('n');
|
|
91
|
+
}
|
|
92
|
+
else if (SET_OPS.has(ch)) {
|
|
93
|
+
setCount++;
|
|
94
|
+
if (skeleton.length < 16)
|
|
95
|
+
skeleton.push('s');
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
if (skeleton.length < 16)
|
|
99
|
+
skeleton.push('c');
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Cuantificadores: restamos al salir del scope aproximado
|
|
104
|
+
// (simplificado: depth = max alcanzada, no tracking exacto)
|
|
105
|
+
// Extraer átomos con regex
|
|
106
|
+
let m;
|
|
107
|
+
IDENT_RE.lastIndex = 0;
|
|
108
|
+
while ((m = IDENT_RE.exec(formula)) !== null) {
|
|
109
|
+
const tok = m[0];
|
|
110
|
+
if (!LOGIC_RESERVED.has(tok)) {
|
|
111
|
+
atomFreq[tok] = (atomFreq[tok] ?? 0) + 1;
|
|
112
|
+
if (skeleton.length < 16)
|
|
113
|
+
skeleton.push('a');
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// Aritmética también detecta dígitos
|
|
117
|
+
for (const ch of formula) {
|
|
118
|
+
if (ARITH_OPS.has(ch)) {
|
|
119
|
+
arithCount++;
|
|
120
|
+
if (skeleton.length < 16)
|
|
121
|
+
skeleton.push('n');
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
const skeletonHash = fnv1a(skeleton.join(''));
|
|
125
|
+
return {
|
|
126
|
+
connectiveFreq,
|
|
127
|
+
atomFreq,
|
|
128
|
+
quantifierDepth,
|
|
129
|
+
modalCount,
|
|
130
|
+
arithCount,
|
|
131
|
+
setCount,
|
|
132
|
+
totalLen: formula.length,
|
|
133
|
+
parenDepth: maxParenDepth,
|
|
134
|
+
skeletonHash,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
// --------------- Construcción del vector embedding ---------------------
|
|
138
|
+
/**
|
|
139
|
+
* Construye un embedding R^256 determinístico a partir de la fórmula.
|
|
140
|
+
* El vector NO está normalizado aquí — la normalización se hace en
|
|
141
|
+
* `normalizeEmbedding`.
|
|
142
|
+
*/
|
|
143
|
+
function buildRawEmbedding(features, formula) {
|
|
144
|
+
const vec = new Float32Array(types_1.EMBEDDING_DIM);
|
|
145
|
+
// ---- Bloque 1: conectivos (dim 0..15) ----
|
|
146
|
+
// Cada conectivo tiene una posición fija
|
|
147
|
+
const connectiveSlots = {
|
|
148
|
+
'∧': 0, '∨': 1, '¬': 2, '→': 3, '↔': 4,
|
|
149
|
+
'□': 5, '◇': 6, '∀': 7, '∃': 8, '=': 9,
|
|
150
|
+
'⊥': 10, '⊤': 11, '⊢': 12, '⇒': 13,
|
|
151
|
+
};
|
|
152
|
+
for (const [sym, slot] of Object.entries(connectiveSlots)) {
|
|
153
|
+
const freq = features.connectiveFreq[sym] ?? 0;
|
|
154
|
+
if (freq > 0)
|
|
155
|
+
vec[slot] = (vec[slot] ?? 0) + Math.log1p(freq);
|
|
156
|
+
}
|
|
157
|
+
// ---- Bloque 2: átomos hasheados (dim 16..127) ----
|
|
158
|
+
for (const [atom, freq] of Object.entries(features.atomFreq)) {
|
|
159
|
+
// Hash doble para reducir colisiones
|
|
160
|
+
const idx1 = 16 + (strToIndex(atom, 56));
|
|
161
|
+
const idx2 = 72 + (strToIndex(atom, 56));
|
|
162
|
+
const idx3 = 16 + (strToIndexSalted(atom, 'b', 56));
|
|
163
|
+
vec[idx1] = (vec[idx1] ?? 0) + Math.log1p(freq) * 0.5;
|
|
164
|
+
vec[idx2] = (vec[idx2] ?? 0) + Math.log1p(freq) * 0.3;
|
|
165
|
+
vec[idx3] = (vec[idx3] ?? 0) + Math.log1p(freq) * 0.2;
|
|
166
|
+
}
|
|
167
|
+
// ---- Bloque 3: profundidad y métricas estructurales (dim 128..143) ----
|
|
168
|
+
vec[128] = features.quantifierDepth * 0.5;
|
|
169
|
+
vec[129] = Math.log1p(features.modalCount);
|
|
170
|
+
vec[130] = Math.log1p(features.arithCount);
|
|
171
|
+
vec[131] = Math.log1p(features.setCount);
|
|
172
|
+
vec[132] = Math.log1p(features.totalLen) * 0.1;
|
|
173
|
+
vec[133] = features.parenDepth * 0.3;
|
|
174
|
+
// ---- Bloque 4: firma de esqueleto (dim 144..159) ----
|
|
175
|
+
// Esparcimos el hash del esqueleto en 16 posiciones con salts distintos
|
|
176
|
+
for (let i = 0; i < 16; i++) {
|
|
177
|
+
const pos = 144 + i;
|
|
178
|
+
const contrib = Math.sin(features.skeletonHash * (i + 1)) * 0.5 + 0.5;
|
|
179
|
+
vec[pos] = (vec[pos] ?? 0) + contrib;
|
|
180
|
+
}
|
|
181
|
+
// ---- Bloque 5: bigrams de conectivos hasheados (dim 160..191) ----
|
|
182
|
+
// Captura patrones co-ocurrencia entre pares de conectivos
|
|
183
|
+
const connKeys = Object.keys(features.connectiveFreq);
|
|
184
|
+
for (let i = 0; i < connKeys.length; i++) {
|
|
185
|
+
for (let j = i + 1; j < connKeys.length; j++) {
|
|
186
|
+
const pair = (connKeys[i] ?? '') + (connKeys[j] ?? '');
|
|
187
|
+
const pairIdx = 160 + (fnv1a(pair) % 32);
|
|
188
|
+
const freq = Math.min(features.connectiveFreq[connKeys[i] ?? ''] ?? 0, features.connectiveFreq[connKeys[j] ?? ''] ?? 0);
|
|
189
|
+
vec[pairIdx] = (vec[pairIdx] ?? 0) + Math.log1p(freq) * 0.4;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
// ---- Bloque 6: n-grams de caracteres de la fórmula (dim 192..255) ----
|
|
193
|
+
// Captura la "textura" local de la fórmula
|
|
194
|
+
for (let i = 0; i < formula.length - 1; i++) {
|
|
195
|
+
const bigram = formula[i] + formula[i + 1];
|
|
196
|
+
const pos = 192 + (fnv1a(bigram) % 64);
|
|
197
|
+
vec[pos] = (vec[pos] ?? 0) + 0.1;
|
|
198
|
+
}
|
|
199
|
+
return vec;
|
|
200
|
+
}
|
|
201
|
+
/** Normaliza un vector a norma L2 = 1. Si la norma es 0 devuelve el vector de ceros. */
|
|
202
|
+
function normalizeEmbedding(vec) {
|
|
203
|
+
let norm = 0;
|
|
204
|
+
for (let i = 0; i < vec.length; i++)
|
|
205
|
+
norm += (vec[i] ?? 0) * (vec[i] ?? 0);
|
|
206
|
+
if (norm === 0)
|
|
207
|
+
return vec;
|
|
208
|
+
const invNorm = 1 / Math.sqrt(norm);
|
|
209
|
+
const out = new Float32Array(vec.length);
|
|
210
|
+
for (let i = 0; i < vec.length; i++)
|
|
211
|
+
out[i] = (vec[i] ?? 0) * invNorm;
|
|
212
|
+
return out;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Genera el embedding de una fórmula/texto.
|
|
216
|
+
*
|
|
217
|
+
* El embedding es:
|
|
218
|
+
* 1. Determinístico (misma entrada → mismo vector)
|
|
219
|
+
* 2. Normalizado L2
|
|
220
|
+
* 3. Sin NaN/Infinity
|
|
221
|
+
*
|
|
222
|
+
* Para queries de texto libre (no fórmulas puras), el texto se trata
|
|
223
|
+
* igual — los átomos y operadores se extraen con las mismas reglas.
|
|
224
|
+
*/
|
|
225
|
+
function hashEmbed(text) {
|
|
226
|
+
const features = extractFeatures(text);
|
|
227
|
+
const raw = buildRawEmbedding(features, text);
|
|
228
|
+
return normalizeEmbedding(raw);
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* EmbeddingProvider basado en HashEmbedding.
|
|
232
|
+
* Implementa la interfaz EmbeddingProvider para Stage 2.
|
|
233
|
+
*/
|
|
234
|
+
class HashEmbeddingProvider {
|
|
235
|
+
dim = types_1.EMBEDDING_DIM;
|
|
236
|
+
embed(text) {
|
|
237
|
+
return hashEmbed(text);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
exports.HashEmbeddingProvider = HashEmbeddingProvider;
|
|
241
|
+
/** Instancia singleton reutilizable. */
|
|
242
|
+
exports.defaultProvider = new HashEmbeddingProvider();
|
|
243
|
+
//# sourceMappingURL=embedding.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding.js","sourceRoot":"","sources":["../../../src/reasoning/lemma-rag/embedding.ts"],"names":[],"mappings":";AAAA,+DAA+D;AAC/D,oEAAoE;AACpE,EAAE;AACF,kEAAkE;AAClE,0DAA0D;AAC1D,qEAAqE;AACrE,gEAAgE;AAChE,4CAA4C;AAC5C,iEAAiE;AACjE,6DAA6D;AAC7D,EAAE;AACF,4BAA4B;AAC5B,mDAAmD;AACnD,+CAA+C;AAC/C,0CAA0C;AAC1C,uBAAuB;AACvB,+DAA+D;;;AAwN/D,gDAQC;AAaD,8BAIC;AA/OD,mCAAwC;AAGxC,0EAA0E;AAE1E,SAAS,KAAK,CAAC,GAAW;IACxB,IAAI,IAAI,GAAG,UAAU,CAAC;IACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,GAAG,CAAC,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,+CAA+C;AAC/C,SAAS,UAAU,CAAC,GAAW,EAAE,GAAW;IAC1C,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;AAC1B,CAAC;AAED,iEAAiE;AACjE,SAAS,gBAAgB,CAAC,GAAW,EAAE,IAAY,EAAE,GAAW;IAC9D,OAAO,KAAK,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AACjC,CAAC;AAED,0EAA0E;AAE1E,MAAM,kBAAkB,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;AAClG,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACtC,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAC3C,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAC/D,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAC7D,MAAM,QAAQ,GAAG,yBAAyB,CAAC;AAC3C,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC;IAC7B,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,QAAQ;IACxD,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO;CAClD,CAAC,CAAC;AAuBH,SAAS,eAAe,CAAC,OAAe;IACtC,MAAM,cAAc,GAA2B,EAAE,CAAC;IAClD,MAAM,QAAQ,GAA2B,EAAE,CAAC;IAC5C,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,6CAA6C;IAC7C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QACzB,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,UAAU,EAAE,CAAC;YACb,IAAI,UAAU,GAAG,aAAa;gBAAE,aAAa,GAAG,UAAU,CAAC;QAC7D,CAAC;aAAM,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACtB,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,kBAAkB,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;YACpC,cAAc,CAAC,EAAE,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACnD,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC3B,aAAa,EAAE,CAAC;gBAChB,IAAI,aAAa,GAAG,eAAe;oBAAE,eAAe,GAAG,aAAa,CAAC;gBACrE,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE;oBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7B,UAAU,EAAE,CAAC;gBACb,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE;oBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7B,UAAU,EAAE,CAAC;gBACb,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE;oBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC3B,QAAQ,EAAE,CAAC;gBACX,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE;oBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE;oBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC;IACH,CAAC;IAED,0DAA0D;IAC1D,4DAA4D;IAE5D,2BAA2B;IAC3B,IAAI,CAAyB,CAAC;IAC9B,QAAQ,CAAC,SAAS,GAAG,CAAC,CAAC;IACvB,OAAO,CAAC,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACjB,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC7B,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACzC,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE;gBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,KAAK,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QACzB,IAAI,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACtB,UAAU,EAAE,CAAC;YACb,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE;gBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IAE9C,OAAO;QACL,cAAc;QACd,QAAQ;QACR,eAAe;QACf,UAAU;QACV,UAAU;QACV,QAAQ;QACR,QAAQ,EAAE,OAAO,CAAC,MAAM;QACxB,UAAU,EAAE,aAAa;QACzB,YAAY;KACb,CAAC;AACJ,CAAC;AAED,0EAA0E;AAE1E;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,QAAqB,EAAE,OAAe;IAC/D,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,qBAAa,CAAC,CAAC;IAE5C,6CAA6C;IAC7C,yCAAyC;IACzC,MAAM,eAAe,GAA2B;QAC9C,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;QACtC,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC;QACtC,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE;KACnC,CAAC;IACF,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,GAAG,QAAQ,CAAC,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC/C,IAAI,IAAI,GAAG,CAAC;YAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChE,CAAC;IAED,qDAAqD;IACrD,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC7D,qCAAqC;QACrC,MAAM,IAAI,GAAG,EAAE,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;QACzC,MAAM,IAAI,GAAG,EAAE,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;QACzC,MAAM,IAAI,GAAG,EAAE,GAAG,CAAC,gBAAgB,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC;QACpD,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC;QACtD,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC;QACtD,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC;IACxD,CAAC;IAED,0EAA0E;IAC1E,GAAG,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,eAAe,GAAG,GAAG,CAAC;IAC1C,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAC3C,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAC3C,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACzC,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC;IAC/C,GAAG,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,UAAU,GAAG,GAAG,CAAC;IAErC,wDAAwD;IACxD,wEAAwE;IACxE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,YAAY,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC;QACtE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC;IACvC,CAAC;IAED,qEAAqE;IACrE,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC;IACtD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACvD,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;YACzC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CACnB,QAAQ,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,EAC/C,QAAQ,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAChD,CAAC;YACF,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,yEAAyE;IACzE,2CAA2C;IAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAE,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC;QAC7C,MAAM,GAAG,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;QACvC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC;IACnC,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,wFAAwF;AACxF,SAAgB,kBAAkB,CAAC,GAAiB;IAClD,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC3E,IAAI,IAAI,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAC3B,MAAM,OAAO,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC;IACtE,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAgB,SAAS,CAAC,IAAY;IACpC,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,GAAG,GAAG,iBAAiB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAC9C,OAAO,kBAAkB,CAAC,GAAG,CAAC,CAAC;AACjC,CAAC;AAED;;;GAGG;AACH,MAAa,qBAAqB;IACvB,GAAG,GAAG,qBAAa,CAAC;IAE7B,KAAK,CAAC,IAAY;QAChB,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;CACF;AAND,sDAMC;AAED,wCAAwC;AAC3B,QAAA,eAAe,GAAG,IAAI,qBAAqB,EAAE,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { CuratedLemma } from '../../tooling/lemma-library/types';
|
|
2
|
+
import type { EmbeddedLemma, Embedding, EmbeddingProvider } from './types';
|
|
3
|
+
export declare class IndexStore {
|
|
4
|
+
private readonly provider;
|
|
5
|
+
private entries;
|
|
6
|
+
private byId;
|
|
7
|
+
/** inverted index: token → set of lemma IDs (para BM25). */
|
|
8
|
+
private invertedIndex;
|
|
9
|
+
/** document frequency: token → número de documentos que lo contienen. */
|
|
10
|
+
private docFreq;
|
|
11
|
+
constructor(provider: EmbeddingProvider);
|
|
12
|
+
/** Añade un lema al índice. Idempotente si ya existe (mismo id). */
|
|
13
|
+
add(lemma: CuratedLemma): EmbeddedLemma;
|
|
14
|
+
/** Añade múltiples lemas. */
|
|
15
|
+
addAll(lemmas: CuratedLemma[]): void;
|
|
16
|
+
/** Devuelve todos los EmbeddedLemma. */
|
|
17
|
+
all(): EmbeddedLemma[];
|
|
18
|
+
/** Busca por ID. */
|
|
19
|
+
getById(id: string): EmbeddedLemma | undefined;
|
|
20
|
+
/** Número de lemas indexados. */
|
|
21
|
+
size(): number;
|
|
22
|
+
/** Devuelve el document frequency de un token (para BM25 IDF). */
|
|
23
|
+
getDocFreq(token: string): number;
|
|
24
|
+
/** Total de documentos. */
|
|
25
|
+
totalDocs(): number;
|
|
26
|
+
/** Average document length (para BM25). */
|
|
27
|
+
avgDocLength(): number;
|
|
28
|
+
/** Filtra los entries por dominio. */
|
|
29
|
+
byDomain(domain: string): EmbeddedLemma[];
|
|
30
|
+
/** Devuelve los IDs de lemas que contienen el token (inverted index). */
|
|
31
|
+
docsForToken(token: string): Set<string>;
|
|
32
|
+
/** Devuelve el embedding de un vector de consulta generado por el provider. */
|
|
33
|
+
embedQuery(text: string): Embedding;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=index-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-store.d.ts","sourceRoot":"","sources":["../../../src/reasoning/lemma-rag/index-store.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mCAAmC,CAAC;AACtE,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAiB3E,qBAAa,UAAU;IAQT,OAAO,CAAC,QAAQ,CAAC,QAAQ;IAPrC,OAAO,CAAC,OAAO,CAAuB;IACtC,OAAO,CAAC,IAAI,CAAyC;IACrD,4DAA4D;IAC5D,OAAO,CAAC,aAAa,CAAuC;IAC5D,yEAAyE;IACzE,OAAO,CAAC,OAAO,CAAkC;gBAEpB,QAAQ,EAAE,iBAAiB;IAExD,oEAAoE;IACpE,GAAG,CAAC,KAAK,EAAE,YAAY,GAAG,aAAa;IAyBvC,6BAA6B;IAC7B,MAAM,CAAC,MAAM,EAAE,YAAY,EAAE,GAAG,IAAI;IAIpC,wCAAwC;IACxC,GAAG,IAAI,aAAa,EAAE;IAItB,oBAAoB;IACpB,OAAO,CAAC,EAAE,EAAE,MAAM,GAAG,aAAa,GAAG,SAAS;IAI9C,iCAAiC;IACjC,IAAI,IAAI,MAAM;IAId,kEAAkE;IAClE,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAIjC,2BAA2B;IAC3B,SAAS,IAAI,MAAM;IAInB,2CAA2C;IAC3C,YAAY,IAAI,MAAM;IAOtB,sCAAsC;IACtC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa,EAAE;IAIzC,yEAAyE;IACzE,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;IAIxC,+EAA+E;IAC/E,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS;CAGpC"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// IndexStore — índice in-memory de lemas embebidos
|
|
4
|
+
//
|
|
5
|
+
// Almacena EmbeddedLemma[] + inverted index de tokens para BM25.
|
|
6
|
+
// Soporta inserción, borrado y búsqueda por embedding o por token.
|
|
7
|
+
// ============================================================
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.IndexStore = void 0;
|
|
10
|
+
const tokenize_1 = require("../../tooling/lemma-library/tokenize");
|
|
11
|
+
/** Construye el term-frequency bag para un lema (name + tags + statement). */
|
|
12
|
+
function buildTermFreq(lemma) {
|
|
13
|
+
const tokens = [
|
|
14
|
+
...(0, tokenize_1.tokenize)(lemma.name),
|
|
15
|
+
...lemma.tags.flatMap((t) => (0, tokenize_1.tokenize)(t)),
|
|
16
|
+
...(0, tokenize_1.tokenize)(lemma.statement),
|
|
17
|
+
];
|
|
18
|
+
const termFreq = new Map();
|
|
19
|
+
for (const tok of tokens) {
|
|
20
|
+
termFreq.set(tok, (termFreq.get(tok) ?? 0) + 1);
|
|
21
|
+
}
|
|
22
|
+
return { termFreq, docLength: tokens.length };
|
|
23
|
+
}
|
|
24
|
+
class IndexStore {
|
|
25
|
+
provider;
|
|
26
|
+
entries = [];
|
|
27
|
+
byId = new Map();
|
|
28
|
+
/** inverted index: token → set of lemma IDs (para BM25). */
|
|
29
|
+
invertedIndex = new Map();
|
|
30
|
+
/** document frequency: token → número de documentos que lo contienen. */
|
|
31
|
+
docFreq = new Map();
|
|
32
|
+
constructor(provider) {
|
|
33
|
+
this.provider = provider;
|
|
34
|
+
}
|
|
35
|
+
/** Añade un lema al índice. Idempotente si ya existe (mismo id). */
|
|
36
|
+
add(lemma) {
|
|
37
|
+
if (this.byId.has(lemma.id)) {
|
|
38
|
+
return this.byId.get(lemma.id);
|
|
39
|
+
}
|
|
40
|
+
const embedding = this.provider.embed(lemma.statement);
|
|
41
|
+
const { termFreq, docLength } = buildTermFreq(lemma);
|
|
42
|
+
const entry = { lemma, embedding, termFreq, docLength };
|
|
43
|
+
this.entries.push(entry);
|
|
44
|
+
this.byId.set(lemma.id, entry);
|
|
45
|
+
// Actualizar inverted index + docFreq
|
|
46
|
+
for (const tok of termFreq.keys()) {
|
|
47
|
+
let bucket = this.invertedIndex.get(tok);
|
|
48
|
+
if (!bucket) {
|
|
49
|
+
bucket = new Set();
|
|
50
|
+
this.invertedIndex.set(tok, bucket);
|
|
51
|
+
}
|
|
52
|
+
bucket.add(lemma.id);
|
|
53
|
+
this.docFreq.set(tok, (this.docFreq.get(tok) ?? 0) + 1);
|
|
54
|
+
}
|
|
55
|
+
return entry;
|
|
56
|
+
}
|
|
57
|
+
/** Añade múltiples lemas. */
|
|
58
|
+
addAll(lemmas) {
|
|
59
|
+
for (const l of lemmas)
|
|
60
|
+
this.add(l);
|
|
61
|
+
}
|
|
62
|
+
/** Devuelve todos los EmbeddedLemma. */
|
|
63
|
+
all() {
|
|
64
|
+
return this.entries;
|
|
65
|
+
}
|
|
66
|
+
/** Busca por ID. */
|
|
67
|
+
getById(id) {
|
|
68
|
+
return this.byId.get(id);
|
|
69
|
+
}
|
|
70
|
+
/** Número de lemas indexados. */
|
|
71
|
+
size() {
|
|
72
|
+
return this.entries.length;
|
|
73
|
+
}
|
|
74
|
+
/** Devuelve el document frequency de un token (para BM25 IDF). */
|
|
75
|
+
getDocFreq(token) {
|
|
76
|
+
return this.docFreq.get(token) ?? 0;
|
|
77
|
+
}
|
|
78
|
+
/** Total de documentos. */
|
|
79
|
+
totalDocs() {
|
|
80
|
+
return this.entries.length;
|
|
81
|
+
}
|
|
82
|
+
/** Average document length (para BM25). */
|
|
83
|
+
avgDocLength() {
|
|
84
|
+
if (this.entries.length === 0)
|
|
85
|
+
return 1;
|
|
86
|
+
let total = 0;
|
|
87
|
+
for (const e of this.entries)
|
|
88
|
+
total += e.docLength;
|
|
89
|
+
return total / this.entries.length;
|
|
90
|
+
}
|
|
91
|
+
/** Filtra los entries por dominio. */
|
|
92
|
+
byDomain(domain) {
|
|
93
|
+
return this.entries.filter((e) => e.lemma.domain === domain);
|
|
94
|
+
}
|
|
95
|
+
/** Devuelve los IDs de lemas que contienen el token (inverted index). */
|
|
96
|
+
docsForToken(token) {
|
|
97
|
+
return this.invertedIndex.get(token) ?? new Set();
|
|
98
|
+
}
|
|
99
|
+
/** Devuelve el embedding de un vector de consulta generado por el provider. */
|
|
100
|
+
embedQuery(text) {
|
|
101
|
+
return this.provider.embed(text);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
exports.IndexStore = IndexStore;
|
|
105
|
+
//# sourceMappingURL=index-store.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-store.js","sourceRoot":"","sources":["../../../src/reasoning/lemma-rag/index-store.ts"],"names":[],"mappings":";AAAA,+DAA+D;AAC/D,mDAAmD;AACnD,EAAE;AACF,iEAAiE;AACjE,mEAAmE;AACnE,+DAA+D;;;AAI/D,mEAAgE;AAEhE,8EAA8E;AAC9E,SAAS,aAAa,CAAC,KAAmB;IACxC,MAAM,MAAM,GAAG;QACb,GAAG,IAAA,mBAAQ,EAAC,KAAK,CAAC,IAAI,CAAC;QACvB,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAA,mBAAQ,EAAC,CAAC,CAAC,CAAC;QACzC,GAAG,IAAA,mBAAQ,EAAC,KAAK,CAAC,SAAS,CAAC;KAC7B,CAAC;IACF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;AAChD,CAAC;AAED,MAAa,UAAU;IAQQ;IAPrB,OAAO,GAAoB,EAAE,CAAC;IAC9B,IAAI,GAA+B,IAAI,GAAG,EAAE,CAAC;IACrD,4DAA4D;IACpD,aAAa,GAA6B,IAAI,GAAG,EAAE,CAAC;IAC5D,yEAAyE;IACjE,OAAO,GAAwB,IAAI,GAAG,EAAE,CAAC;IAEjD,YAA6B,QAA2B;QAA3B,aAAQ,GAAR,QAAQ,CAAmB;IAAG,CAAC;IAE5D,oEAAoE;IACpE,GAAG,CAAC,KAAmB;QACrB,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAE,CAAC;QAClC,CAAC;QACD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACvD,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;QACrD,MAAM,KAAK,GAAkB,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;QAEvE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAE/B,sCAAsC;QACtC,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,IAAI,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACzC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;gBACnB,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YACtC,CAAC;YACD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACrB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1D,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED,6BAA6B;IAC7B,MAAM,CAAC,MAAsB;QAC3B,KAAK,MAAM,CAAC,IAAI,MAAM;YAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC;IAED,wCAAwC;IACxC,GAAG;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,oBAAoB;IACpB,OAAO,CAAC,EAAU;QAChB,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC3B,CAAC;IAED,iCAAiC;IACjC,IAAI;QACF,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;IAC7B,CAAC;IAED,kEAAkE;IAClE,UAAU,CAAC,KAAa;QACtB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,2BAA2B;IAC3B,SAAS;QACP,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;IAC7B,CAAC;IAED,2CAA2C;IAC3C,YAAY;QACV,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QACxC,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO;YAAE,KAAK,IAAI,CAAC,CAAC,SAAS,CAAC;QACnD,OAAO,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;IACrC,CAAC;IAED,sCAAsC;IACtC,QAAQ,CAAC,MAAc;QACrB,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC;IAC/D,CAAC;IAED,yEAAyE;IACzE,YAAY,CAAC,KAAa;QACxB,OAAO,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,IAAI,GAAG,EAAE,CAAC;IACpD,CAAC;IAED,+EAA+E;IAC/E,UAAU,CAAC,IAAY;QACrB,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,CAAC;CACF;AAxFD,gCAwFC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type { Embedding, EmbeddedLemma, QueryResult, RAGOptions, EmbeddingProvider, RecallResult } from './types';
|
|
2
|
+
export { EMBEDDING_DIM } from './types';
|
|
3
|
+
export { hashEmbed, normalizeEmbedding, HashEmbeddingProvider, defaultProvider } from './embedding';
|
|
4
|
+
export { IndexStore } from './index-store';
|
|
5
|
+
export { cosineSimilarity, bm25Score, hybridScore, retrieveTopK } from './retrieval';
|
|
6
|
+
export { LemmaRAG, queryLemmas, globalRAG, resetGlobalRAG } from './query';
|
|
7
|
+
export { runBenchmark, formatBenchmarkResults, BENCHMARK_PAIRS, } from './benchmarks';
|
|
8
|
+
export type { BenchmarkPair } from './benchmarks';
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/reasoning/lemma-rag/index.ts"],"names":[],"mappings":"AAQA,YAAY,EAAE,SAAS,EAAE,aAAa,EAAE,WAAW,EAAE,UAAU,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAClH,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAExC,OAAO,EAAE,SAAS,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEpG,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,SAAS,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAErF,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAE3E,OAAO,EACL,YAAY,EACZ,sBAAsB,EACtB,eAAe,GAChB,MAAM,cAAc,CAAC;AACtB,YAAY,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// lemma-rag — entrypoint público
|
|
4
|
+
//
|
|
5
|
+
// Sistema RAG semántico sobre la biblioteca de lemas ST.
|
|
6
|
+
// Stage 1: HashEmbedding determinístico R^256 (sin deps de red).
|
|
7
|
+
// Stage 2 (pendiente): ONNX transformers (MiniLM-L6).
|
|
8
|
+
// ============================================================
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.BENCHMARK_PAIRS = exports.formatBenchmarkResults = exports.runBenchmark = exports.resetGlobalRAG = exports.globalRAG = exports.queryLemmas = exports.LemmaRAG = exports.retrieveTopK = exports.hybridScore = exports.bm25Score = exports.cosineSimilarity = exports.IndexStore = exports.defaultProvider = exports.HashEmbeddingProvider = exports.normalizeEmbedding = exports.hashEmbed = exports.EMBEDDING_DIM = void 0;
|
|
11
|
+
var types_1 = require("./types");
|
|
12
|
+
Object.defineProperty(exports, "EMBEDDING_DIM", { enumerable: true, get: function () { return types_1.EMBEDDING_DIM; } });
|
|
13
|
+
var embedding_1 = require("./embedding");
|
|
14
|
+
Object.defineProperty(exports, "hashEmbed", { enumerable: true, get: function () { return embedding_1.hashEmbed; } });
|
|
15
|
+
Object.defineProperty(exports, "normalizeEmbedding", { enumerable: true, get: function () { return embedding_1.normalizeEmbedding; } });
|
|
16
|
+
Object.defineProperty(exports, "HashEmbeddingProvider", { enumerable: true, get: function () { return embedding_1.HashEmbeddingProvider; } });
|
|
17
|
+
Object.defineProperty(exports, "defaultProvider", { enumerable: true, get: function () { return embedding_1.defaultProvider; } });
|
|
18
|
+
var index_store_1 = require("./index-store");
|
|
19
|
+
Object.defineProperty(exports, "IndexStore", { enumerable: true, get: function () { return index_store_1.IndexStore; } });
|
|
20
|
+
var retrieval_1 = require("./retrieval");
|
|
21
|
+
Object.defineProperty(exports, "cosineSimilarity", { enumerable: true, get: function () { return retrieval_1.cosineSimilarity; } });
|
|
22
|
+
Object.defineProperty(exports, "bm25Score", { enumerable: true, get: function () { return retrieval_1.bm25Score; } });
|
|
23
|
+
Object.defineProperty(exports, "hybridScore", { enumerable: true, get: function () { return retrieval_1.hybridScore; } });
|
|
24
|
+
Object.defineProperty(exports, "retrieveTopK", { enumerable: true, get: function () { return retrieval_1.retrieveTopK; } });
|
|
25
|
+
var query_1 = require("./query");
|
|
26
|
+
Object.defineProperty(exports, "LemmaRAG", { enumerable: true, get: function () { return query_1.LemmaRAG; } });
|
|
27
|
+
Object.defineProperty(exports, "queryLemmas", { enumerable: true, get: function () { return query_1.queryLemmas; } });
|
|
28
|
+
Object.defineProperty(exports, "globalRAG", { enumerable: true, get: function () { return query_1.globalRAG; } });
|
|
29
|
+
Object.defineProperty(exports, "resetGlobalRAG", { enumerable: true, get: function () { return query_1.resetGlobalRAG; } });
|
|
30
|
+
var benchmarks_1 = require("./benchmarks");
|
|
31
|
+
Object.defineProperty(exports, "runBenchmark", { enumerable: true, get: function () { return benchmarks_1.runBenchmark; } });
|
|
32
|
+
Object.defineProperty(exports, "formatBenchmarkResults", { enumerable: true, get: function () { return benchmarks_1.formatBenchmarkResults; } });
|
|
33
|
+
Object.defineProperty(exports, "BENCHMARK_PAIRS", { enumerable: true, get: function () { return benchmarks_1.BENCHMARK_PAIRS; } });
|
|
34
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/reasoning/lemma-rag/index.ts"],"names":[],"mappings":";AAAA,+DAA+D;AAC/D,iCAAiC;AACjC,EAAE;AACF,yDAAyD;AACzD,iEAAiE;AACjE,sDAAsD;AACtD,+DAA+D;;;AAG/D,iCAAwC;AAA/B,sGAAA,aAAa,OAAA;AAEtB,yCAAoG;AAA3F,sGAAA,SAAS,OAAA;AAAE,+GAAA,kBAAkB,OAAA;AAAE,kHAAA,qBAAqB,OAAA;AAAE,4GAAA,eAAe,OAAA;AAE9E,6CAA2C;AAAlC,yGAAA,UAAU,OAAA;AAEnB,yCAAqF;AAA5E,6GAAA,gBAAgB,OAAA;AAAE,sGAAA,SAAS,OAAA;AAAE,wGAAA,WAAW,OAAA;AAAE,yGAAA,YAAY,OAAA;AAE/D,iCAA2E;AAAlE,iGAAA,QAAQ,OAAA;AAAE,oGAAA,WAAW,OAAA;AAAE,kGAAA,SAAS,OAAA;AAAE,uGAAA,cAAc,OAAA;AAEzD,2CAIsB;AAHpB,0GAAA,YAAY,OAAA;AACZ,oHAAA,sBAAsB,OAAA;AACtB,6GAAA,eAAe,OAAA"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type { CuratedLemma } from '../../tooling/lemma-library/types';
|
|
2
|
+
import type { EmbeddingProvider, QueryResult, RAGOptions } from './types';
|
|
3
|
+
import { IndexStore } from './index-store';
|
|
4
|
+
/**
|
|
5
|
+
* LemmaRAG — instancia principal del sistema RAG.
|
|
6
|
+
*
|
|
7
|
+
* Flujo de uso:
|
|
8
|
+
* const rag = new LemmaRAG();
|
|
9
|
+
* rag.index(lemmas);
|
|
10
|
+
* const results = rag.query('negation implies double negation', { k: 3 });
|
|
11
|
+
*/
|
|
12
|
+
export declare class LemmaRAG {
|
|
13
|
+
private store;
|
|
14
|
+
constructor(provider?: EmbeddingProvider);
|
|
15
|
+
/** Indexa una lista de lemas. Idempotente por id. */
|
|
16
|
+
index(lemmas: CuratedLemma[]): void;
|
|
17
|
+
/** Número de lemas indexados. */
|
|
18
|
+
size(): number;
|
|
19
|
+
/**
|
|
20
|
+
* Realiza una búsqueda semántica híbrida.
|
|
21
|
+
*
|
|
22
|
+
* Flujo:
|
|
23
|
+
* 1. Embed(query) → query_vec
|
|
24
|
+
* 2. Cosine similarity contra todos los vectores en el store
|
|
25
|
+
* 3. BM25 sobre los tokens de la query
|
|
26
|
+
* 4. Score híbrido: 0.7 * cosine + 0.3 * bm25_norm
|
|
27
|
+
* 5. Top-k con score ≥ minScore
|
|
28
|
+
*/
|
|
29
|
+
query(queryText: string, opts?: RAGOptions): QueryResult[];
|
|
30
|
+
/** Acceso directo al IndexStore (para tests y benchmarks). */
|
|
31
|
+
getStore(): IndexStore;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Retorna (creando si necesario) la instancia global del RAG.
|
|
35
|
+
* Útil para uso quick-start sin gestionar el ciclo de vida.
|
|
36
|
+
*/
|
|
37
|
+
export declare function globalRAG(): LemmaRAG;
|
|
38
|
+
/** Resetea la instancia global (útil en tests). */
|
|
39
|
+
export declare function resetGlobalRAG(): void;
|
|
40
|
+
/**
|
|
41
|
+
* Función de conveniencia: indexa lemas en la instancia global y
|
|
42
|
+
* ejecuta la query. Si `rag` no se pasa, usa la global.
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* const results = queryLemmas(standardLibrary().all(), 'double negation');
|
|
46
|
+
*/
|
|
47
|
+
export declare function queryLemmas(lemmas: CuratedLemma[], queryText: string, opts?: RAGOptions, rag?: LemmaRAG): QueryResult[];
|
|
48
|
+
//# sourceMappingURL=query.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../../../src/reasoning/lemma-rag/query.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mCAAmC,CAAC;AACtE,OAAO,KAAK,EAAE,iBAAiB,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAO3C;;;;;;;GAOG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAAa;gBAEd,QAAQ,GAAE,iBAAmC;IAIzD,qDAAqD;IACrD,KAAK,CAAC,MAAM,EAAE,YAAY,EAAE,GAAG,IAAI;IAInC,iCAAiC;IACjC,IAAI,IAAI,MAAM;IAId;;;;;;;;;OASG;IACH,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,UAAe,GAAG,WAAW,EAAE;IAY9D,8DAA8D;IAC9D,QAAQ,IAAI,UAAU;CAGvB;AAMD;;;GAGG;AACH,wBAAgB,SAAS,IAAI,QAAQ,CAGpC;AAED,mDAAmD;AACnD,wBAAgB,cAAc,IAAI,IAAI,CAErC;AAED;;;;;;GAMG;AACH,wBAAgB,WAAW,CACzB,MAAM,EAAE,YAAY,EAAE,EACtB,SAAS,EAAE,MAAM,EACjB,IAAI,GAAE,UAAe,EACrB,GAAG,CAAC,EAAE,QAAQ,GACb,WAAW,EAAE,CAIf"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// query.ts — API pública del sistema RAG
|
|
4
|
+
//
|
|
5
|
+
// Expone:
|
|
6
|
+
// - LemmaRAG: clase principal que encapsula store + retrieval
|
|
7
|
+
// - queryLemmas: función de conveniencia stateless (instancia lazy)
|
|
8
|
+
// ============================================================
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.LemmaRAG = void 0;
|
|
11
|
+
exports.globalRAG = globalRAG;
|
|
12
|
+
exports.resetGlobalRAG = resetGlobalRAG;
|
|
13
|
+
exports.queryLemmas = queryLemmas;
|
|
14
|
+
const index_store_1 = require("./index-store");
|
|
15
|
+
const retrieval_1 = require("./retrieval");
|
|
16
|
+
const embedding_1 = require("./embedding");
|
|
17
|
+
const DEFAULT_K = 5;
|
|
18
|
+
const DEFAULT_COSINE_WEIGHT = 0.7;
|
|
19
|
+
/**
|
|
20
|
+
* LemmaRAG — instancia principal del sistema RAG.
|
|
21
|
+
*
|
|
22
|
+
* Flujo de uso:
|
|
23
|
+
* const rag = new LemmaRAG();
|
|
24
|
+
* rag.index(lemmas);
|
|
25
|
+
* const results = rag.query('negation implies double negation', { k: 3 });
|
|
26
|
+
*/
|
|
27
|
+
class LemmaRAG {
|
|
28
|
+
store;
|
|
29
|
+
constructor(provider = embedding_1.defaultProvider) {
|
|
30
|
+
this.store = new index_store_1.IndexStore(provider);
|
|
31
|
+
}
|
|
32
|
+
/** Indexa una lista de lemas. Idempotente por id. */
|
|
33
|
+
index(lemmas) {
|
|
34
|
+
this.store.addAll(lemmas);
|
|
35
|
+
}
|
|
36
|
+
/** Número de lemas indexados. */
|
|
37
|
+
size() {
|
|
38
|
+
return this.store.size();
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Realiza una búsqueda semántica híbrida.
|
|
42
|
+
*
|
|
43
|
+
* Flujo:
|
|
44
|
+
* 1. Embed(query) → query_vec
|
|
45
|
+
* 2. Cosine similarity contra todos los vectores en el store
|
|
46
|
+
* 3. BM25 sobre los tokens de la query
|
|
47
|
+
* 4. Score híbrido: 0.7 * cosine + 0.3 * bm25_norm
|
|
48
|
+
* 5. Top-k con score ≥ minScore
|
|
49
|
+
*/
|
|
50
|
+
query(queryText, opts = {}) {
|
|
51
|
+
const k = opts.k ?? DEFAULT_K;
|
|
52
|
+
const cosineWeight = opts.cosineWeight ?? DEFAULT_COSINE_WEIGHT;
|
|
53
|
+
const minScore = opts.minScore ?? 0;
|
|
54
|
+
if (this.store.size() === 0)
|
|
55
|
+
return [];
|
|
56
|
+
const queryVec = this.store.embedQuery(queryText);
|
|
57
|
+
return (0, retrieval_1.retrieveTopK)(queryText, queryVec, this.store, k, cosineWeight, opts.domain, minScore);
|
|
58
|
+
}
|
|
59
|
+
/** Acceso directo al IndexStore (para tests y benchmarks). */
|
|
60
|
+
getStore() {
|
|
61
|
+
return this.store;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
exports.LemmaRAG = LemmaRAG;
|
|
65
|
+
// --------------- Instancia lazy global (stateful) ----------------------
|
|
66
|
+
let _globalRAG;
|
|
67
|
+
/**
|
|
68
|
+
* Retorna (creando si necesario) la instancia global del RAG.
|
|
69
|
+
* Útil para uso quick-start sin gestionar el ciclo de vida.
|
|
70
|
+
*/
|
|
71
|
+
function globalRAG() {
|
|
72
|
+
if (!_globalRAG)
|
|
73
|
+
_globalRAG = new LemmaRAG();
|
|
74
|
+
return _globalRAG;
|
|
75
|
+
}
|
|
76
|
+
/** Resetea la instancia global (útil en tests). */
|
|
77
|
+
function resetGlobalRAG() {
|
|
78
|
+
_globalRAG = undefined;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Función de conveniencia: indexa lemas en la instancia global y
|
|
82
|
+
* ejecuta la query. Si `rag` no se pasa, usa la global.
|
|
83
|
+
*
|
|
84
|
+
* @example
|
|
85
|
+
* const results = queryLemmas(standardLibrary().all(), 'double negation');
|
|
86
|
+
*/
|
|
87
|
+
function queryLemmas(lemmas, queryText, opts = {}, rag) {
|
|
88
|
+
const instance = rag ?? new LemmaRAG();
|
|
89
|
+
instance.index(lemmas);
|
|
90
|
+
return instance.query(queryText, opts);
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=query.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"query.js","sourceRoot":"","sources":["../../../src/reasoning/lemma-rag/query.ts"],"names":[],"mappings":";AAAA,+DAA+D;AAC/D,yCAAyC;AACzC,EAAE;AACF,UAAU;AACV,gEAAgE;AAChE,sEAAsE;AACtE,+DAA+D;;;AAwE/D,8BAGC;AAGD,wCAEC;AASD,kCASC;AA9FD,+CAA2C;AAC3C,2CAA2C;AAC3C,2CAA8C;AAE9C,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC;;;;;;;GAOG;AACH,MAAa,QAAQ;IACX,KAAK,CAAa;IAE1B,YAAY,WAA8B,2BAAe;QACvD,IAAI,CAAC,KAAK,GAAG,IAAI,wBAAU,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED,qDAAqD;IACrD,KAAK,CAAC,MAAsB;QAC1B,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED,iCAAiC;IACjC,IAAI;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IAC3B,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,SAAiB,EAAE,OAAmB,EAAE;QAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,SAAS,CAAC;QAC9B,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,qBAAqB,CAAC;QAChE,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;QAEpC,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEvC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAElD,OAAO,IAAA,wBAAY,EAAC,SAAS,EAAE,QAAQ,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,EAAE,YAAY,EAAE,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAC/F,CAAC;IAED,8DAA8D;IAC9D,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;CACF;AA3CD,4BA2CC;AAED,0EAA0E;AAE1E,IAAI,UAAgC,CAAC;AAErC;;;GAGG;AACH,SAAgB,SAAS;IACvB,IAAI,CAAC,UAAU;QAAE,UAAU,GAAG,IAAI,QAAQ,EAAE,CAAC;IAC7C,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,mDAAmD;AACnD,SAAgB,cAAc;IAC5B,UAAU,GAAG,SAAS,CAAC;AACzB,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,WAAW,CACzB,MAAsB,EACtB,SAAiB,EACjB,OAAmB,EAAE,EACrB,GAAc;IAEd,MAAM,QAAQ,GAAG,GAAG,IAAI,IAAI,QAAQ,EAAE,CAAC;IACvC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,OAAO,QAAQ,CAAC,KAAK,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AACzC,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { Embedding, EmbeddedLemma, QueryResult } from './types';
|
|
2
|
+
import type { IndexStore } from './index-store';
|
|
3
|
+
/**
|
|
4
|
+
* Cosine similarity entre dos vectores NORMALIZADOS (norma L2 = 1).
|
|
5
|
+
* En ese caso, es simplemente el producto punto.
|
|
6
|
+
* Devuelve un valor en [-1, 1], saturado a [0, 1] para usarse como score.
|
|
7
|
+
*/
|
|
8
|
+
export declare function cosineSimilarity(a: Embedding, b: Embedding): number;
|
|
9
|
+
/**
|
|
10
|
+
* BM25 Okapi score de una query contra un documento en el store.
|
|
11
|
+
* Devuelve un score ≥ 0.
|
|
12
|
+
*/
|
|
13
|
+
export declare function bm25Score(queryText: string, doc: EmbeddedLemma, store: IndexStore): number;
|
|
14
|
+
/**
|
|
15
|
+
* Score híbrido: `cosineWeight * cosine + (1 - cosineWeight) * bm25_norm`.
|
|
16
|
+
*
|
|
17
|
+
* BM25 se normaliza dividiendo por `maxBm25` (máximo observado en el batch).
|
|
18
|
+
* Si maxBm25 === 0, el componente BM25 aporta 0.
|
|
19
|
+
*/
|
|
20
|
+
export declare function hybridScore(cosine: number, bm25: number, maxBm25: number, cosineWeight: number): number;
|
|
21
|
+
/**
|
|
22
|
+
* Recupera los top-k lemas del store para una query dada.
|
|
23
|
+
*
|
|
24
|
+
* @param queryText - Texto de la query (fórmula o descripción)
|
|
25
|
+
* @param queryVec - Embedding pre-calculado de queryText
|
|
26
|
+
* @param store - IndexStore con los lemas indexados
|
|
27
|
+
* @param k - Número de resultados
|
|
28
|
+
* @param cosineWeight - Peso del componente cosine (default 0.7)
|
|
29
|
+
* @param domain - Filtro opcional de dominio
|
|
30
|
+
* @param minScore - Score mínimo para incluir
|
|
31
|
+
*/
|
|
32
|
+
export declare function retrieveTopK(queryText: string, queryVec: Embedding, store: IndexStore, k: number, cosineWeight: number, domain?: string, minScore?: number): QueryResult[];
|
|
33
|
+
//# sourceMappingURL=retrieval.d.ts.map
|