@stevenvo780/autologic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +166 -0
- package/dist/atoms/coreference.d.ts +29 -0
- package/dist/atoms/coreference.d.ts.map +1 -0
- package/dist/atoms/coreference.js +75 -0
- package/dist/atoms/coreference.js.map +1 -0
- package/dist/atoms/identifier-gen.d.ts +23 -0
- package/dist/atoms/identifier-gen.d.ts.map +1 -0
- package/dist/atoms/identifier-gen.js +83 -0
- package/dist/atoms/identifier-gen.js.map +1 -0
- package/dist/atoms/index.d.ts +20 -0
- package/dist/atoms/index.d.ts.map +1 -0
- package/dist/atoms/index.js +112 -0
- package/dist/atoms/index.js.map +1 -0
- package/dist/atoms/keyword-extractor.d.ts +29 -0
- package/dist/atoms/keyword-extractor.d.ts.map +1 -0
- package/dist/atoms/keyword-extractor.js +73 -0
- package/dist/atoms/keyword-extractor.js.map +1 -0
- package/dist/autologic.d.ts +63 -0
- package/dist/autologic.d.ts.map +1 -0
- package/dist/autologic.js +107 -0
- package/dist/autologic.js.map +1 -0
- package/dist/discourse/index.d.ts +18 -0
- package/dist/discourse/index.d.ts.map +1 -0
- package/dist/discourse/index.js +43 -0
- package/dist/discourse/index.js.map +1 -0
- package/dist/discourse/markers-en.d.ts +7 -0
- package/dist/discourse/markers-en.d.ts.map +1 -0
- package/dist/discourse/markers-en.js +113 -0
- package/dist/discourse/markers-en.js.map +1 -0
- package/dist/discourse/markers-es.d.ts +7 -0
- package/dist/discourse/markers-es.d.ts.map +1 -0
- package/dist/discourse/markers-es.js +134 -0
- package/dist/discourse/markers-es.js.map +1 -0
- package/dist/discourse/pattern-detector.d.ts +16 -0
- package/dist/discourse/pattern-detector.d.ts.map +1 -0
- package/dist/discourse/pattern-detector.js +95 -0
- package/dist/discourse/pattern-detector.js.map +1 -0
- package/dist/discourse/role-classifier.d.ts +12 -0
- package/dist/discourse/role-classifier.d.ts.map +1 -0
- package/dist/discourse/role-classifier.js +141 -0
- package/dist/discourse/role-classifier.js.map +1 -0
- package/dist/formalize.d.ts +25 -0
- package/dist/formalize.d.ts.map +1 -0
- package/dist/formalize.js +123 -0
- package/dist/formalize.js.map +1 -0
- package/dist/formula/connectors.d.ts +31 -0
- package/dist/formula/connectors.d.ts.map +1 -0
- package/dist/formula/connectors.js +90 -0
- package/dist/formula/connectors.js.map +1 -0
- package/dist/formula/first-order.d.ts +11 -0
- package/dist/formula/first-order.d.ts.map +1 -0
- package/dist/formula/first-order.js +156 -0
- package/dist/formula/first-order.js.map +1 -0
- package/dist/formula/index.d.ts +15 -0
- package/dist/formula/index.d.ts.map +1 -0
- package/dist/formula/index.js +49 -0
- package/dist/formula/index.js.map +1 -0
- package/dist/formula/modal.d.ts +11 -0
- package/dist/formula/modal.d.ts.map +1 -0
- package/dist/formula/modal.js +138 -0
- package/dist/formula/modal.js.map +1 -0
- package/dist/formula/propositional.d.ts +11 -0
- package/dist/formula/propositional.d.ts.map +1 -0
- package/dist/formula/propositional.js +241 -0
- package/dist/formula/propositional.js.map +1 -0
- package/dist/formula/temporal.d.ts +11 -0
- package/dist/formula/temporal.d.ts.map +1 -0
- package/dist/formula/temporal.js +134 -0
- package/dist/formula/temporal.js.map +1 -0
- package/dist/generator/index.d.ts +6 -0
- package/dist/generator/index.d.ts.map +1 -0
- package/dist/generator/index.js +12 -0
- package/dist/generator/index.js.map +1 -0
- package/dist/generator/st-emitter.d.ts +23 -0
- package/dist/generator/st-emitter.d.ts.map +1 -0
- package/dist/generator/st-emitter.js +134 -0
- package/dist/generator/st-emitter.js.map +1 -0
- package/dist/generator/validator.d.ts +22 -0
- package/dist/generator/validator.d.ts.map +1 -0
- package/dist/generator/validator.js +53 -0
- package/dist/generator/validator.js.map +1 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +44 -0
- package/dist/index.js.map +1 -0
- package/dist/nlp/index.d.ts +8 -0
- package/dist/nlp/index.d.ts.map +1 -0
- package/dist/nlp/index.js +17 -0
- package/dist/nlp/index.js.map +1 -0
- package/dist/nlp/stemmer-en.d.ts +10 -0
- package/dist/nlp/stemmer-en.d.ts.map +1 -0
- package/dist/nlp/stemmer-en.js +149 -0
- package/dist/nlp/stemmer-en.js.map +1 -0
- package/dist/nlp/stemmer-es.d.ts +10 -0
- package/dist/nlp/stemmer-es.d.ts.map +1 -0
- package/dist/nlp/stemmer-es.js +117 -0
- package/dist/nlp/stemmer-es.js.map +1 -0
- package/dist/nlp/stopwords.d.ts +11 -0
- package/dist/nlp/stopwords.d.ts.map +1 -0
- package/dist/nlp/stopwords.js +73 -0
- package/dist/nlp/stopwords.js.map +1 -0
- package/dist/nlp/tokenizer.d.ts +13 -0
- package/dist/nlp/tokenizer.d.ts.map +1 -0
- package/dist/nlp/tokenizer.js +36 -0
- package/dist/nlp/tokenizer.js.map +1 -0
- package/dist/segmenter/clause-splitter.d.ts +15 -0
- package/dist/segmenter/clause-splitter.d.ts.map +1 -0
- package/dist/segmenter/clause-splitter.js +192 -0
- package/dist/segmenter/clause-splitter.js.map +1 -0
- package/dist/segmenter/index.d.ts +11 -0
- package/dist/segmenter/index.d.ts.map +1 -0
- package/dist/segmenter/index.js +25 -0
- package/dist/segmenter/index.js.map +1 -0
- package/dist/segmenter/sentence-splitter.d.ts +13 -0
- package/dist/segmenter/sentence-splitter.d.ts.map +1 -0
- package/dist/segmenter/sentence-splitter.js +69 -0
- package/dist/segmenter/sentence-splitter.js.map +1 -0
- package/dist/types.d.ts +184 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/package.json +51 -0
- package/readme.md +1 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 stevenvo780
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# @stevenvo780/autologic
|
|
2
|
+
|
|
3
|
+
**Formalizador automático de texto natural → lógica formal ST, sin IA.**
|
|
4
|
+
|
|
5
|
+
Autologic convierte texto en lenguaje natural (español/inglés) en código formal ejecutable en [ST lang](https://github.com/stevenvo780/st-lang), utilizando NLP basado en reglas — sin modelos de lenguaje, sin APIs externas, sin dependencias de runtime.
|
|
6
|
+
|
|
7
|
+
## Características
|
|
8
|
+
|
|
9
|
+
- 🧠 **11 perfiles lógicos**: proposicional, primer orden, modal K, deóntico, epistémico, intuicionista, temporal LTL, paraconsistente Belnap, silogístico aristotélico, probabilístico, aritmético
|
|
10
|
+
- 🌍 **Bilingüe**: español e inglés con ~200 marcadores discursivos
|
|
11
|
+
- 🔍 **Detección de patrones**: modus ponens, modus tollens, silogismo hipotético, cadenas condicionales, generalización/instanciación universal
|
|
12
|
+
- ⚡ **Zero dependencies**: todo el NLP es interno (stemmers Snowball, stopwords, coreferencia)
|
|
13
|
+
- 🎯 **Trazabilidad**: cada fórmula referencia el texto fuente original
|
|
14
|
+
- ✅ **Validación**: valida el ST generado contra `@stevenvo780/st-lang`
|
|
15
|
+
|
|
16
|
+
## Instalación
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install @stevenvo780/autologic @stevenvo780/st-lang
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Uso rápido
|
|
23
|
+
|
|
24
|
+
### Función stateless
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
import { formalize } from '@stevenvo780/autologic';
|
|
28
|
+
|
|
29
|
+
const result = formalize(
|
|
30
|
+
'Si llueve, entonces la calle se moja. Está lloviendo. Por lo tanto, la calle está mojada.',
|
|
31
|
+
{ profile: 'classical.propositional', language: 'es' }
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
console.log(result.stCode);
|
|
35
|
+
// logic classical.propositional
|
|
36
|
+
// interpret "llueve" as LLUEVE
|
|
37
|
+
// interpret "la calle se moja." as CALLE_MOJA
|
|
38
|
+
// axiom regla_1 : LLUEVE -> CALLE_MOJA
|
|
39
|
+
// axiom hecho_2 : LLOVIENDO
|
|
40
|
+
// derive MOJADA_CALLE from {regla_1, hecho_2}
|
|
41
|
+
// check valid (LLUEVE -> CALLE_MOJA)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Clase con estado
|
|
45
|
+
|
|
46
|
+
```typescript
|
|
47
|
+
import { Autologic } from '@stevenvo780/autologic';
|
|
48
|
+
|
|
49
|
+
const al = new Autologic({
|
|
50
|
+
language: 'es',
|
|
51
|
+
defaultProfile: 'classical.propositional',
|
|
52
|
+
includeComments: true,
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const r1 = al.formalize('Todo humano es mortal. Sócrates es humano.');
|
|
56
|
+
const r2 = al.formalize('Es necesario que las leyes se cumplan.');
|
|
57
|
+
|
|
58
|
+
console.log(al.getHistory()); // últimas 2 formalizaciones
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Perfiles lógicos
|
|
62
|
+
|
|
63
|
+
| Perfil | Descripción |
|
|
64
|
+
|--------|------------|
|
|
65
|
+
| `classical.propositional` | Lógica proposicional clásica (→, &, \|, !, ↔) |
|
|
66
|
+
| `classical.first_order` | Primer orden con cuantificadores (∀, ∃, predicados) |
|
|
67
|
+
| `modal.k` | Lógica modal K (□, ◇) |
|
|
68
|
+
| `deontic.standard` | Lógica deóntica (obligación, permisión) |
|
|
69
|
+
| `epistemic.s5` | Lógica epistémica S5 (conocimiento, creencia) |
|
|
70
|
+
| `intuitionistic.propositional` | Lógica intuicionista |
|
|
71
|
+
| `temporal.ltl` | Lógica temporal lineal (next, until, always, eventually) |
|
|
72
|
+
| `paraconsistent.belnap` | Lógica paraconsistente Belnap 4-valores |
|
|
73
|
+
| `aristotelian.syllogistic` | Silogística aristotélica |
|
|
74
|
+
| `probabilistic.basic` | Lógica probabilística básica |
|
|
75
|
+
| `arithmetic` | Aritmética formal |
|
|
76
|
+
|
|
77
|
+
## API
|
|
78
|
+
|
|
79
|
+
### `formalize(text, options?)`
|
|
80
|
+
|
|
81
|
+
Función principal stateless. Retorna `FormalizationResult`:
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
interface FormalizationResult {
|
|
85
|
+
ok: boolean;
|
|
86
|
+
stCode: string;
|
|
87
|
+
analysis: DiscourseAnalysis;
|
|
88
|
+
atoms: Map<string, string>;
|
|
89
|
+
formulas: FormulaEntry[];
|
|
90
|
+
diagnostics: Diagnostic[];
|
|
91
|
+
}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Opciones
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
interface FormalizeOptions {
|
|
98
|
+
profile?: LogicProfile; // default: 'classical.propositional'
|
|
99
|
+
language?: 'es' | 'en'; // default: 'es'
|
|
100
|
+
atomStyle?: 'keywords' | 'letters' | 'numbered'; // default: 'keywords'
|
|
101
|
+
includeComments?: boolean; // default: true
|
|
102
|
+
validate?: boolean; // default: false
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Utilidades exportadas
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
import {
|
|
110
|
+
segment, // Segmentar texto en oraciones/cláusulas
|
|
111
|
+
analyzeDiscourse, // Analizar marcadores y patrones
|
|
112
|
+
extractAtoms, // Extraer proposiciones atómicas
|
|
113
|
+
buildFormulas, // Construir fórmulas ST
|
|
114
|
+
emitST, // Generar código ST
|
|
115
|
+
validateST, // Validar ST con st-lang
|
|
116
|
+
} from '@stevenvo780/autologic';
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Pipeline
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
Texto Natural
|
|
123
|
+
│
|
|
124
|
+
▼
|
|
125
|
+
┌──────────┐ Divide por . ; ? !
|
|
126
|
+
│ Segmenter │ Divide cláusulas por marcadores
|
|
127
|
+
└────┬─────┘
|
|
128
|
+
▼
|
|
129
|
+
┌──────────────┐ Detecta marcadores discursivos
|
|
130
|
+
│ Discourse │ Clasifica roles (premisa, condición, conclusión...)
|
|
131
|
+
│ Analyzer │ Detecta patrones (modus ponens, silogismos...)
|
|
132
|
+
└────┬─────────┘
|
|
133
|
+
▼
|
|
134
|
+
┌──────────┐ Extrae keywords / predicados
|
|
135
|
+
│ Atoms │ Genera IDs únicos (LLUEVE, CALLE_MOJA)
|
|
136
|
+
│ Extractor│ Resuelve coreferencia (Dice/Jaccard)
|
|
137
|
+
└────┬─────┘
|
|
138
|
+
▼
|
|
139
|
+
┌──────────┐ Mapea roles → operadores ST
|
|
140
|
+
│ Formula │ Construye fórmulas por perfil lógico
|
|
141
|
+
│ Builder │ (proposicional, 1er orden, modal, temporal)
|
|
142
|
+
└────┬─────┘
|
|
143
|
+
▼
|
|
144
|
+
┌──────────┐ Emite código ST formateado
|
|
145
|
+
│ ST │ Agrega comments/traceability
|
|
146
|
+
│ Generator│ Valida con st-lang (opcional)
|
|
147
|
+
└──────────┘
|
|
148
|
+
│
|
|
149
|
+
▼
|
|
150
|
+
Código .st
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Desarrollo
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
git clone https://github.com/stevenvo780/autologic
|
|
157
|
+
cd autologic
|
|
158
|
+
npm install
|
|
159
|
+
npm run build # Compilar TypeScript
|
|
160
|
+
npm test # 74 tests
|
|
161
|
+
npm run test:coverage # Coverage (~80%)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Licencia
|
|
165
|
+
|
|
166
|
+
MIT
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coreference — Resolución básica de coreferencia por similitud léxica
|
|
3
|
+
*
|
|
4
|
+
* Detecta cuando dos cláusulas se refieren a la misma proposición:
|
|
5
|
+
* "llueve" ≈ "está lloviendo" → mismo átomo
|
|
6
|
+
*
|
|
7
|
+
* Usa coeficiente Dice/Jaccard sobre bags-of-stems.
|
|
8
|
+
*/
|
|
9
|
+
import type { Language } from '../types';
|
|
10
|
+
/**
|
|
11
|
+
* Calcula la similitud Dice entre dos conjuntos de stems.
|
|
12
|
+
* Dice = 2|A∩B| / (|A| + |B|)
|
|
13
|
+
*/
|
|
14
|
+
export declare function diceSimilarity(setA: Set<string>, setB: Set<string>): number;
|
|
15
|
+
/**
|
|
16
|
+
* Calcula la similitud Jaccard entre dos conjuntos.
|
|
17
|
+
* Jaccard = |A∩B| / |A∪B|
|
|
18
|
+
*/
|
|
19
|
+
export declare function jaccardSimilarity(setA: Set<string>, setB: Set<string>): number;
|
|
20
|
+
/**
|
|
21
|
+
* Determina si dos textos son correferentes (hablan de lo mismo).
|
|
22
|
+
*/
|
|
23
|
+
export declare function areCoreferent(textA: string, textB: string, language?: Language, threshold?: number): boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Dado un array de textos, agrupa los correferentes y retorna
|
|
26
|
+
* un mapa de texto → índice del grupo representativo.
|
|
27
|
+
*/
|
|
28
|
+
export declare function resolveCoreferenceGroups(texts: string[], language?: Language, threshold?: number): Map<number, number>;
|
|
29
|
+
//# sourceMappingURL=coreference.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"coreference.d.ts","sourceRoot":"","sources":["../../src/atoms/coreference.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAMzC;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,MAAM,CAU3E;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,MAAM,CAW9E;AAED;;GAEG;AACH,wBAAgB,aAAa,CAC3B,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,EACb,QAAQ,GAAE,QAAe,EACzB,SAAS,GAAE,MAA8B,GACxC,OAAO,CAIT;AAED;;;GAGG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EAAE,EACf,QAAQ,GAAE,QAAe,EACzB,SAAS,GAAE,MAA8B,GACxC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAoBrB"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.diceSimilarity = diceSimilarity;
|
|
4
|
+
exports.jaccardSimilarity = jaccardSimilarity;
|
|
5
|
+
exports.areCoreferent = areCoreferent;
|
|
6
|
+
exports.resolveCoreferenceGroups = resolveCoreferenceGroups;
|
|
7
|
+
const keyword_extractor_1 = require("./keyword-extractor");
|
|
8
|
+
/** Umbral de similitud para considerar coreferencia (70%) */
|
|
9
|
+
const COREFERENCE_THRESHOLD = 0.7;
|
|
10
|
+
/**
|
|
11
|
+
* Calcula la similitud Dice entre dos conjuntos de stems.
|
|
12
|
+
* Dice = 2|A∩B| / (|A| + |B|)
|
|
13
|
+
*/
|
|
14
|
+
function diceSimilarity(setA, setB) {
|
|
15
|
+
if (setA.size === 0 && setB.size === 0)
|
|
16
|
+
return 1;
|
|
17
|
+
if (setA.size === 0 || setB.size === 0)
|
|
18
|
+
return 0;
|
|
19
|
+
let intersection = 0;
|
|
20
|
+
for (const item of setA) {
|
|
21
|
+
if (setB.has(item))
|
|
22
|
+
intersection++;
|
|
23
|
+
}
|
|
24
|
+
return (2 * intersection) / (setA.size + setB.size);
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Calcula la similitud Jaccard entre dos conjuntos.
|
|
28
|
+
* Jaccard = |A∩B| / |A∪B|
|
|
29
|
+
*/
|
|
30
|
+
function jaccardSimilarity(setA, setB) {
|
|
31
|
+
if (setA.size === 0 && setB.size === 0)
|
|
32
|
+
return 1;
|
|
33
|
+
if (setA.size === 0 || setB.size === 0)
|
|
34
|
+
return 0;
|
|
35
|
+
let intersection = 0;
|
|
36
|
+
const union = new Set([...setA, ...setB]);
|
|
37
|
+
for (const item of setA) {
|
|
38
|
+
if (setB.has(item))
|
|
39
|
+
intersection++;
|
|
40
|
+
}
|
|
41
|
+
return intersection / union.size;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Determina si dos textos son correferentes (hablan de lo mismo).
|
|
45
|
+
*/
|
|
46
|
+
function areCoreferent(textA, textB, language = 'es', threshold = COREFERENCE_THRESHOLD) {
|
|
47
|
+
const stemsA = (0, keyword_extractor_1.bagOfStems)(textA, language);
|
|
48
|
+
const stemsB = (0, keyword_extractor_1.bagOfStems)(textB, language);
|
|
49
|
+
return diceSimilarity(stemsA, stemsB) >= threshold;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Dado un array de textos, agrupa los correferentes y retorna
|
|
53
|
+
* un mapa de texto → índice del grupo representativo.
|
|
54
|
+
*/
|
|
55
|
+
function resolveCoreferenceGroups(texts, language = 'es', threshold = COREFERENCE_THRESHOLD) {
|
|
56
|
+
const stemSets = texts.map(t => (0, keyword_extractor_1.bagOfStems)(t, language));
|
|
57
|
+
const groups = new Map(); // idx → representative idx
|
|
58
|
+
for (let i = 0; i < texts.length; i++) {
|
|
59
|
+
let found = false;
|
|
60
|
+
for (let j = 0; j < i; j++) {
|
|
61
|
+
if (groups.has(j) && groups.get(j) !== j)
|
|
62
|
+
continue;
|
|
63
|
+
if (diceSimilarity(stemSets[i], stemSets[j]) >= threshold) {
|
|
64
|
+
groups.set(i, j);
|
|
65
|
+
found = true;
|
|
66
|
+
break;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (!found) {
|
|
70
|
+
groups.set(i, i); // es su propio representante
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return groups;
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=coreference.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"coreference.js","sourceRoot":"","sources":["../../src/atoms/coreference.ts"],"names":[],"mappings":";;AAkBA,wCAUC;AAMD,8CAWC;AAKD,sCASC;AAMD,4DAwBC;AAhFD,2DAAiD;AAEjD,6DAA6D;AAC7D,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC;;;GAGG;AACH,SAAgB,cAAc,CAAC,IAAiB,EAAE,IAAiB;IACjE,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACjD,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,YAAY,EAAE,CAAC;IACrC,CAAC;IAED,OAAO,CAAC,CAAC,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,SAAgB,iBAAiB,CAAC,IAAiB,EAAE,IAAiB;IACpE,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACjD,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC;IAC1C,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,YAAY,EAAE,CAAC;IACrC,CAAC;IAED,OAAO,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,SAAgB,aAAa,CAC3B,KAAa,EACb,KAAa,EACb,WAAqB,IAAI,EACzB,YAAoB,qBAAqB;IAEzC,MAAM,MAAM,GAAG,IAAA,8BAAU,EAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,IAAA,8BAAU,EAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAC3C,OAAO,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,SAAS,CAAC;AACrD,CAAC;AAED;;;GAGG;AACH,SAAgB,wBAAwB,CACtC,KAAe,EACf,WAAqB,IAAI,EACzB,YAAoB,qBAAqB;IAEzC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAA,8BAAU,EAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,2BAA2B;IAErE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,KAAK,GAAG,KAAK,CAAC;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3B,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;gBAAE,SAAS;YACnD,IAAI,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,SAAS,EAAE,CAAC;gBAC1D,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBACjB,KAAK,GAAG,IAAI,CAAC;gBACb,MAAM;YACR,CAAC;QACH,CAAC;QACD,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACjD,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Identifier Generator — Genera IDs simbólicos ST válidos
|
|
3
|
+
*
|
|
4
|
+
* Convierte palabras clave en identificadores válidos para ST:
|
|
5
|
+
* - Keywords → KEYWORDS_JOINED (e.g., LLUEVE, CALLE_MOJA)
|
|
6
|
+
* - Letters → P, Q, R, S, ...
|
|
7
|
+
* - Numbered → A1, A2, A3, ...
|
|
8
|
+
*/
|
|
9
|
+
import type { AtomStyle } from '../types';
|
|
10
|
+
/**
|
|
11
|
+
* Genera un identificador ST válido a partir de palabras clave.
|
|
12
|
+
*/
|
|
13
|
+
export declare function generateId(keywords: string[], style?: AtomStyle, index?: number): string;
|
|
14
|
+
/**
|
|
15
|
+
* Genera un identificador para predicados (primer orden).
|
|
16
|
+
* Formato: NombrePredicado (CamelCase)
|
|
17
|
+
*/
|
|
18
|
+
export declare function generatePredicateId(words: string[]): string;
|
|
19
|
+
/**
|
|
20
|
+
* Genera un identificador de variable (minúscula).
|
|
21
|
+
*/
|
|
22
|
+
export declare function generateVariableId(name: string): string;
|
|
23
|
+
//# sourceMappingURL=identifier-gen.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"identifier-gen.d.ts","sourceRoot":"","sources":["../../src/atoms/identifier-gen.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAK1C;;GAEG;AACH,wBAAgB,UAAU,CACxB,QAAQ,EAAE,MAAM,EAAE,EAClB,KAAK,GAAE,SAAsB,EAC7B,KAAK,GAAE,MAAU,GAChB,MAAM,CASR;AA+CD;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAQ3D;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGvD"}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.generateId = generateId;
|
|
4
|
+
exports.generatePredicateId = generatePredicateId;
|
|
5
|
+
exports.generateVariableId = generateVariableId;
|
|
6
|
+
/** Letras disponibles para estilo 'letters' */
|
|
7
|
+
const LETTERS = 'PQRSTUVWXYZABCDEFGHIJKLMNO'.split('');
|
|
8
|
+
/**
|
|
9
|
+
* Genera un identificador ST válido a partir de palabras clave.
|
|
10
|
+
*/
|
|
11
|
+
function generateId(keywords, style = 'keywords', index = 0) {
|
|
12
|
+
switch (style) {
|
|
13
|
+
case 'keywords':
|
|
14
|
+
return keywordsToId(keywords);
|
|
15
|
+
case 'letters':
|
|
16
|
+
return lettersId(index);
|
|
17
|
+
case 'numbered':
|
|
18
|
+
return numberedId(index);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Genera ID a partir de palabras clave.
|
|
23
|
+
* Limpia caracteres especiales y une con underscore.
|
|
24
|
+
*/
|
|
25
|
+
function keywordsToId(keywords) {
|
|
26
|
+
if (keywords.length === 0)
|
|
27
|
+
return 'ATOM';
|
|
28
|
+
const id = keywords
|
|
29
|
+
.map(k => sanitize(k))
|
|
30
|
+
.filter(k => k.length > 0)
|
|
31
|
+
.join('_')
|
|
32
|
+
.toUpperCase();
|
|
33
|
+
return id || 'ATOM';
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Genera ID tipo letra (P, Q, R, ..., P1, Q1, ...).
|
|
37
|
+
*/
|
|
38
|
+
function lettersId(index) {
|
|
39
|
+
if (index < LETTERS.length)
|
|
40
|
+
return LETTERS[index];
|
|
41
|
+
const cycle = Math.floor(index / LETTERS.length);
|
|
42
|
+
const pos = index % LETTERS.length;
|
|
43
|
+
return `${LETTERS[pos]}${cycle}`;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Genera ID tipo numerado (A1, A2, A3, ...).
|
|
47
|
+
*/
|
|
48
|
+
function numberedId(index) {
|
|
49
|
+
return `A${index + 1}`;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Sanitiza una palabra para ser un identificador ST válido.
|
|
53
|
+
* Solo permite letras ASCII, dígitos y underscore.
|
|
54
|
+
*/
|
|
55
|
+
function sanitize(word) {
|
|
56
|
+
return word
|
|
57
|
+
.normalize('NFD')
|
|
58
|
+
.replace(/[\u0300-\u036f]/g, '') // eliminar diacríticos
|
|
59
|
+
.replace(/[^a-zA-Z0-9_]/g, '')
|
|
60
|
+
.replace(/^[0-9]/, '_$&'); // no empezar con dígito
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Genera un identificador para predicados (primer orden).
|
|
64
|
+
* Formato: NombrePredicado (CamelCase)
|
|
65
|
+
*/
|
|
66
|
+
function generatePredicateId(words) {
|
|
67
|
+
if (words.length === 0)
|
|
68
|
+
return 'Pred';
|
|
69
|
+
return words
|
|
70
|
+
.map(w => {
|
|
71
|
+
const s = sanitize(w);
|
|
72
|
+
return s.charAt(0).toUpperCase() + s.slice(1).toLowerCase();
|
|
73
|
+
})
|
|
74
|
+
.join('');
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Genera un identificador de variable (minúscula).
|
|
78
|
+
*/
|
|
79
|
+
function generateVariableId(name) {
|
|
80
|
+
const s = sanitize(name).toLowerCase();
|
|
81
|
+
return s || 'x';
|
|
82
|
+
}
|
|
83
|
+
//# sourceMappingURL=identifier-gen.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"identifier-gen.js","sourceRoot":"","sources":["../../src/atoms/identifier-gen.ts"],"names":[],"mappings":";;AAgBA,gCAaC;AAmDD,kDAQC;AAKD,gDAGC;AAtFD,+CAA+C;AAC/C,MAAM,OAAO,GAAG,4BAA4B,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;AAEvD;;GAEG;AACH,SAAgB,UAAU,CACxB,QAAkB,EAClB,QAAmB,UAAU,EAC7B,QAAgB,CAAC;IAEjB,QAAQ,KAAK,EAAE,CAAC;QACd,KAAK,UAAU;YACb,OAAO,YAAY,CAAC,QAAQ,CAAC,CAAC;QAChC,KAAK,SAAS;YACZ,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;QAC1B,KAAK,UAAU;YACb,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,QAAkB;IACtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC;IAEzC,MAAM,EAAE,GAAG,QAAQ;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;SACrB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;SACzB,IAAI,CAAC,GAAG,CAAC;SACT,WAAW,EAAE,CAAC;IAEjB,OAAO,EAAE,IAAI,MAAM,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,KAAa;IAC9B,IAAI,KAAK,GAAG,OAAO,CAAC,MAAM;QAAE,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACjD,MAAM,GAAG,GAAG,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;IACnC,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,EAAE,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;AACzB,CAAC;AAED;;;GAGG;AACH,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI;SACR,SAAS,CAAC,KAAK,CAAC;SAChB,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,uBAAuB;SACvD,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC;SAC7B,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,wBAAwB;AACvD,CAAC;AAED;;;GAGG;AACH,SAAgB,mBAAmB,CAAC,KAAe;IACjD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC;IACtC,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,CAAC,EAAE;QACP,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACtB,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IAC9D,CAAC,CAAC;SACD,IAAI,CAAC,EAAE,CAAC,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,IAAY;IAC7C,MAAM,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;IACvC,OAAO,CAAC,IAAI,GAAG,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Atoms — Extractor de átomos proposicionales
|
|
3
|
+
*/
|
|
4
|
+
export { extractKeywords, extractStems, bagOfStems, extractSubjectPredicate } from './keyword-extractor';
|
|
5
|
+
export { generateId, generatePredicateId, generateVariableId } from './identifier-gen';
|
|
6
|
+
export { areCoreferent, resolveCoreferenceGroups, diceSimilarity } from './coreference';
|
|
7
|
+
import type { AtomEntry, AtomStyle, Language, AnalyzedSentence, LogicProfile } from '../types';
|
|
8
|
+
/**
|
|
9
|
+
* Extrae átomos proposicionales de oraciones analizadas.
|
|
10
|
+
* Resuelve coreferencia y asigna IDs simbólicos.
|
|
11
|
+
*/
|
|
12
|
+
export declare function extractAtoms(sentences: AnalyzedSentence[], options?: {
|
|
13
|
+
language?: Language;
|
|
14
|
+
atomStyle?: AtomStyle;
|
|
15
|
+
profile?: LogicProfile;
|
|
16
|
+
}): {
|
|
17
|
+
atoms: Map<string, string>;
|
|
18
|
+
entries: AtomEntry[];
|
|
19
|
+
};
|
|
20
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/atoms/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,UAAU,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AACzG,OAAO,EAAE,UAAU,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACvF,OAAO,EAAE,aAAa,EAAE,wBAAwB,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAExF,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAK/F;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,SAAS,EAAE,gBAAgB,EAAE,EAC7B,OAAO,GAAE;IACP,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,OAAO,CAAC,EAAE,YAAY,CAAC;CACnB,GACL;IAAE,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAAC,OAAO,EAAE,SAAS,EAAE,CAAA;CAAE,CA0FtD"}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.diceSimilarity = exports.resolveCoreferenceGroups = exports.areCoreferent = exports.generateVariableId = exports.generatePredicateId = exports.generateId = exports.extractSubjectPredicate = exports.bagOfStems = exports.extractStems = exports.extractKeywords = void 0;
|
|
4
|
+
exports.extractAtoms = extractAtoms;
|
|
5
|
+
/**
|
|
6
|
+
* Atoms — Extractor de átomos proposicionales
|
|
7
|
+
*/
|
|
8
|
+
var keyword_extractor_1 = require("./keyword-extractor");
|
|
9
|
+
Object.defineProperty(exports, "extractKeywords", { enumerable: true, get: function () { return keyword_extractor_1.extractKeywords; } });
|
|
10
|
+
Object.defineProperty(exports, "extractStems", { enumerable: true, get: function () { return keyword_extractor_1.extractStems; } });
|
|
11
|
+
Object.defineProperty(exports, "bagOfStems", { enumerable: true, get: function () { return keyword_extractor_1.bagOfStems; } });
|
|
12
|
+
Object.defineProperty(exports, "extractSubjectPredicate", { enumerable: true, get: function () { return keyword_extractor_1.extractSubjectPredicate; } });
|
|
13
|
+
var identifier_gen_1 = require("./identifier-gen");
|
|
14
|
+
Object.defineProperty(exports, "generateId", { enumerable: true, get: function () { return identifier_gen_1.generateId; } });
|
|
15
|
+
Object.defineProperty(exports, "generatePredicateId", { enumerable: true, get: function () { return identifier_gen_1.generatePredicateId; } });
|
|
16
|
+
Object.defineProperty(exports, "generateVariableId", { enumerable: true, get: function () { return identifier_gen_1.generateVariableId; } });
|
|
17
|
+
var coreference_1 = require("./coreference");
|
|
18
|
+
Object.defineProperty(exports, "areCoreferent", { enumerable: true, get: function () { return coreference_1.areCoreferent; } });
|
|
19
|
+
Object.defineProperty(exports, "resolveCoreferenceGroups", { enumerable: true, get: function () { return coreference_1.resolveCoreferenceGroups; } });
|
|
20
|
+
Object.defineProperty(exports, "diceSimilarity", { enumerable: true, get: function () { return coreference_1.diceSimilarity; } });
|
|
21
|
+
const keyword_extractor_2 = require("./keyword-extractor");
|
|
22
|
+
const identifier_gen_2 = require("./identifier-gen");
|
|
23
|
+
const coreference_2 = require("./coreference");
|
|
24
|
+
/**
|
|
25
|
+
* Extrae átomos proposicionales de oraciones analizadas.
|
|
26
|
+
* Resuelve coreferencia y asigna IDs simbólicos.
|
|
27
|
+
*/
|
|
28
|
+
function extractAtoms(sentences, options = {}) {
|
|
29
|
+
const language = options.language || 'es';
|
|
30
|
+
const style = options.atomStyle || 'keywords';
|
|
31
|
+
const profile = options.profile || 'classical.propositional';
|
|
32
|
+
// 1. Recopilar todos los textos de cláusulas
|
|
33
|
+
const clauseTexts = [];
|
|
34
|
+
for (let sIdx = 0; sIdx < sentences.length; sIdx++) {
|
|
35
|
+
for (let cIdx = 0; cIdx < sentences[sIdx].clauses.length; cIdx++) {
|
|
36
|
+
const clause = sentences[sIdx].clauses[cIdx];
|
|
37
|
+
clauseTexts.push({
|
|
38
|
+
text: clause.text,
|
|
39
|
+
sentIdx: sIdx,
|
|
40
|
+
clauseIdx: cIdx,
|
|
41
|
+
role: clause.role,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// 2. Resolver coreferencia
|
|
46
|
+
const texts = clauseTexts.map(ct => ct.text);
|
|
47
|
+
const corefGroups = (0, coreference_2.resolveCoreferenceGroups)(texts, language);
|
|
48
|
+
// 3. Generar átomos únicos
|
|
49
|
+
const atoms = new Map();
|
|
50
|
+
const entries = [];
|
|
51
|
+
const representativeIds = new Map(); // groupIdx → atomId
|
|
52
|
+
let atomCounter = 0;
|
|
53
|
+
for (let i = 0; i < clauseTexts.length; i++) {
|
|
54
|
+
const ct = clauseTexts[i];
|
|
55
|
+
const representative = corefGroups.get(i) ?? i;
|
|
56
|
+
if (representativeIds.has(representative)) {
|
|
57
|
+
// Ya existe un átomo para este grupo de coreferencia
|
|
58
|
+
const existingId = representativeIds.get(representative);
|
|
59
|
+
entries.push({
|
|
60
|
+
id: existingId,
|
|
61
|
+
text: ct.text,
|
|
62
|
+
sourceClause: i,
|
|
63
|
+
role: ct.role,
|
|
64
|
+
});
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
// Crear nuevo átomo
|
|
68
|
+
let atomId;
|
|
69
|
+
if (profile === 'classical.first_order') {
|
|
70
|
+
// Para primer orden: extraer predicado y términos
|
|
71
|
+
const sp = (0, keyword_extractor_2.extractSubjectPredicate)(ct.text, language);
|
|
72
|
+
if (sp) {
|
|
73
|
+
const predicate = (0, identifier_gen_2.generatePredicateId)(sp.predicate.split('_'));
|
|
74
|
+
const variable = (0, identifier_gen_2.generateVariableId)(sp.subject);
|
|
75
|
+
atomId = `${predicate}(${variable})`;
|
|
76
|
+
entries.push({
|
|
77
|
+
id: atomId,
|
|
78
|
+
text: ct.text,
|
|
79
|
+
sourceClause: i,
|
|
80
|
+
role: ct.role,
|
|
81
|
+
predicate,
|
|
82
|
+
terms: [variable],
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
const keywords = (0, keyword_extractor_2.extractKeywords)(ct.text, language);
|
|
87
|
+
atomId = (0, identifier_gen_2.generateId)(keywords, style, atomCounter);
|
|
88
|
+
entries.push({
|
|
89
|
+
id: atomId,
|
|
90
|
+
text: ct.text,
|
|
91
|
+
sourceClause: i,
|
|
92
|
+
role: ct.role,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
const keywords = (0, keyword_extractor_2.extractKeywords)(ct.text, language);
|
|
98
|
+
atomId = (0, identifier_gen_2.generateId)(keywords, style, atomCounter);
|
|
99
|
+
entries.push({
|
|
100
|
+
id: atomId,
|
|
101
|
+
text: ct.text,
|
|
102
|
+
sourceClause: i,
|
|
103
|
+
role: ct.role,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
atoms.set(atomId, ct.text);
|
|
107
|
+
representativeIds.set(representative, atomId);
|
|
108
|
+
atomCounter++;
|
|
109
|
+
}
|
|
110
|
+
return { atoms, entries };
|
|
111
|
+
}
|
|
112
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/atoms/index.ts"],"names":[],"mappings":";;;AAgBA,oCAiGC;AAjHD;;GAEG;AACH,yDAAyG;AAAhG,oHAAA,eAAe,OAAA;AAAE,iHAAA,YAAY,OAAA;AAAE,+GAAA,UAAU,OAAA;AAAE,4HAAA,uBAAuB,OAAA;AAC3E,mDAAuF;AAA9E,4GAAA,UAAU,OAAA;AAAE,qHAAA,mBAAmB,OAAA;AAAE,oHAAA,kBAAkB,OAAA;AAC5D,6CAAwF;AAA/E,4GAAA,aAAa,OAAA;AAAE,uHAAA,wBAAwB,OAAA;AAAE,6GAAA,cAAc,OAAA;AAGhE,2DAA+E;AAC/E,qDAAuF;AACvF,+CAAyD;AAEzD;;;GAGG;AACH,SAAgB,YAAY,CAC1B,SAA6B,EAC7B,UAII,EAAE;IAEN,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,IAAI,UAAU,CAAC;IAC9C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,yBAAyB,CAAC;IAE7D,6CAA6C;IAC7C,MAAM,WAAW,GAA0E,EAAE,CAAC;IAC9F,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;QACnD,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;YACjE,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC7C,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,OAAO,EAAE,IAAI;gBACb,SAAS,EAAE,IAAI;gBACf,IAAI,EAAE,MAAM,CAAC,IAAI;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,IAAA,sCAAwB,EAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE9D,2BAA2B;IAC3B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,MAAM,OAAO,GAAgB,EAAE,CAAC;IAChC,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,oBAAoB;IACzE,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,cAAc,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAE/C,IAAI,iBAAiB,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;YAC1C,qDAAqD;YACrD,MAAM,UAAU,GAAG,iBAAiB,CAAC,GAAG,CAAC,cAAc,CAAE,CAAC;YAC1D,OAAO,CAAC,IAAI,CAAC;gBACX,EAAE,EAAE,UAAU;gBACd,IAAI,EAAE,EAAE,CAAC,IAAI;gBACb,YAAY,EAAE,CAAC;gBACf,IAAI,EAAE,EAAE,CAAC,IAAyB;aACnC,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,MAAc,CAAC;QAEnB,IAAI,OAAO,KAAK,uBAAuB,EAAE,CAAC;YACxC,kDAAkD;YAClD,MAAM,EAAE,GAAG,IAAA,2CAAuB,EAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACtD,IAAI,EAAE,EAAE,CAAC;gBACP,MAAM,SAAS,GAAG,IAAA,oCAAmB,EAAC,EAAE,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC/D,MAAM,QAAQ,GAAG,IAAA,mCAAkB,EAAC,EAAE,CAAC,OAAO,CAAC,CAAC;gBAChD,MAAM,GAAG,GAAG,SAAS,IAAI,QAAQ,GAAG,CAAC;gBACrC,OAAO,CAAC,IAAI,CAAC;oBACX,EAAE,EAAE,MAAM;oBACV,IAAI,EAAE,EAAE,CAAC,IAAI;oBACb,YAAY,EAAE,CAAC;oBACf,IAAI,EAAE,EAAE,CAAC,IAAyB;oBAClC,SAAS;oBACT,KAAK,EAAE,CAAC,QAAQ,CAAC;iBAClB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,MAAM,QAAQ,GAAG,IAAA,mCAAe,EAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;gBACpD,MAAM,GAAG,IAAA,2BAAU,EAAC,QAAQ,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;gBAClD,OAAO,CAAC,IAAI,CAAC;oBACX,EAAE,EAAE,MAAM;oBACV,IAAI,EAAE,EAAE,CAAC,IAAI;oBACb,YAAY,EAAE,CAAC;oBACf,IAAI,EAAE,EAAE,CAAC,IAAyB;iBACnC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,QAAQ,GAAG,IAAA,mCAAe,EAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACpD,MAAM,GAAG,IAAA,2BAAU,EAAC,QAAQ,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC;gBACX,EAAE,EAAE,MAAM;gBACV,IAAI,EAAE,EAAE,CAAC,IAAI;gBACb,YAAY,EAAE,CAAC;gBACf,IAAI,EAAE,EAAE,CAAC,IAAyB;aACnC,CAAC,CAAC;QACL,CAAC;QAED,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;QAC3B,iBAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QAC9C,WAAW,EAAE,CAAC;IAChB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Keyword Extractor — Extrae palabras clave de una cláusula
|
|
3
|
+
*
|
|
4
|
+
* Filtra stopwords, extrae 2-4 palabras clave por lema,
|
|
5
|
+
* genera bags-of-words para coreferencia.
|
|
6
|
+
*/
|
|
7
|
+
import type { Language } from '../types';
|
|
8
|
+
/**
|
|
9
|
+
* Extrae las palabras clave principales de un texto.
|
|
10
|
+
* Filtra stopwords y selecciona las más significativas.
|
|
11
|
+
*/
|
|
12
|
+
export declare function extractKeywords(text: string, language?: Language, maxWords?: number): string[];
|
|
13
|
+
/**
|
|
14
|
+
* Obtiene los stems de las palabras de contenido para comparación.
|
|
15
|
+
*/
|
|
16
|
+
export declare function extractStems(text: string, language?: Language): string[];
|
|
17
|
+
/**
|
|
18
|
+
* Calcula un bag-of-stems para comparación de similitud.
|
|
19
|
+
*/
|
|
20
|
+
export declare function bagOfStems(text: string, language?: Language): Set<string>;
|
|
21
|
+
/**
|
|
22
|
+
* Extrae sujeto y predicado para lógica de primer orden.
|
|
23
|
+
* Heurística: el primer sustantivo es el sujeto, el verbo + complemento es el predicado.
|
|
24
|
+
*/
|
|
25
|
+
export declare function extractSubjectPredicate(text: string, language?: Language): {
|
|
26
|
+
subject: string;
|
|
27
|
+
predicate: string;
|
|
28
|
+
} | null;
|
|
29
|
+
//# sourceMappingURL=keyword-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"keyword-extractor.d.ts","sourceRoot":"","sources":["../../src/atoms/keyword-extractor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,KAAK,EAAE,QAAQ,EAAS,MAAM,UAAU,CAAC;AAGhD;;;GAGG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,GAAE,QAAe,EAAE,QAAQ,GAAE,MAAU,GAAG,MAAM,EAAE,CAavG;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,GAAE,QAAe,GAAG,MAAM,EAAE,CAK9E;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,GAAE,QAAe,GAAG,GAAG,CAAC,MAAM,CAAC,CAE/E;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,QAAQ,GAAE,QAAe,GACxB;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAiC/C"}
|