@stevenvo780/autologic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +166 -0
- package/dist/atoms/coreference.d.ts +29 -0
- package/dist/atoms/coreference.d.ts.map +1 -0
- package/dist/atoms/coreference.js +75 -0
- package/dist/atoms/coreference.js.map +1 -0
- package/dist/atoms/identifier-gen.d.ts +23 -0
- package/dist/atoms/identifier-gen.d.ts.map +1 -0
- package/dist/atoms/identifier-gen.js +83 -0
- package/dist/atoms/identifier-gen.js.map +1 -0
- package/dist/atoms/index.d.ts +20 -0
- package/dist/atoms/index.d.ts.map +1 -0
- package/dist/atoms/index.js +112 -0
- package/dist/atoms/index.js.map +1 -0
- package/dist/atoms/keyword-extractor.d.ts +29 -0
- package/dist/atoms/keyword-extractor.d.ts.map +1 -0
- package/dist/atoms/keyword-extractor.js +73 -0
- package/dist/atoms/keyword-extractor.js.map +1 -0
- package/dist/autologic.d.ts +63 -0
- package/dist/autologic.d.ts.map +1 -0
- package/dist/autologic.js +107 -0
- package/dist/autologic.js.map +1 -0
- package/dist/discourse/index.d.ts +18 -0
- package/dist/discourse/index.d.ts.map +1 -0
- package/dist/discourse/index.js +43 -0
- package/dist/discourse/index.js.map +1 -0
- package/dist/discourse/markers-en.d.ts +7 -0
- package/dist/discourse/markers-en.d.ts.map +1 -0
- package/dist/discourse/markers-en.js +113 -0
- package/dist/discourse/markers-en.js.map +1 -0
- package/dist/discourse/markers-es.d.ts +7 -0
- package/dist/discourse/markers-es.d.ts.map +1 -0
- package/dist/discourse/markers-es.js +134 -0
- package/dist/discourse/markers-es.js.map +1 -0
- package/dist/discourse/pattern-detector.d.ts +16 -0
- package/dist/discourse/pattern-detector.d.ts.map +1 -0
- package/dist/discourse/pattern-detector.js +95 -0
- package/dist/discourse/pattern-detector.js.map +1 -0
- package/dist/discourse/role-classifier.d.ts +12 -0
- package/dist/discourse/role-classifier.d.ts.map +1 -0
- package/dist/discourse/role-classifier.js +141 -0
- package/dist/discourse/role-classifier.js.map +1 -0
- package/dist/formalize.d.ts +25 -0
- package/dist/formalize.d.ts.map +1 -0
- package/dist/formalize.js +123 -0
- package/dist/formalize.js.map +1 -0
- package/dist/formula/connectors.d.ts +31 -0
- package/dist/formula/connectors.d.ts.map +1 -0
- package/dist/formula/connectors.js +90 -0
- package/dist/formula/connectors.js.map +1 -0
- package/dist/formula/first-order.d.ts +11 -0
- package/dist/formula/first-order.d.ts.map +1 -0
- package/dist/formula/first-order.js +156 -0
- package/dist/formula/first-order.js.map +1 -0
- package/dist/formula/index.d.ts +15 -0
- package/dist/formula/index.d.ts.map +1 -0
- package/dist/formula/index.js +49 -0
- package/dist/formula/index.js.map +1 -0
- package/dist/formula/modal.d.ts +11 -0
- package/dist/formula/modal.d.ts.map +1 -0
- package/dist/formula/modal.js +138 -0
- package/dist/formula/modal.js.map +1 -0
- package/dist/formula/propositional.d.ts +11 -0
- package/dist/formula/propositional.d.ts.map +1 -0
- package/dist/formula/propositional.js +241 -0
- package/dist/formula/propositional.js.map +1 -0
- package/dist/formula/temporal.d.ts +11 -0
- package/dist/formula/temporal.d.ts.map +1 -0
- package/dist/formula/temporal.js +134 -0
- package/dist/formula/temporal.js.map +1 -0
- package/dist/generator/index.d.ts +6 -0
- package/dist/generator/index.d.ts.map +1 -0
- package/dist/generator/index.js +12 -0
- package/dist/generator/index.js.map +1 -0
- package/dist/generator/st-emitter.d.ts +23 -0
- package/dist/generator/st-emitter.d.ts.map +1 -0
- package/dist/generator/st-emitter.js +134 -0
- package/dist/generator/st-emitter.js.map +1 -0
- package/dist/generator/validator.d.ts +22 -0
- package/dist/generator/validator.d.ts.map +1 -0
- package/dist/generator/validator.js +53 -0
- package/dist/generator/validator.js.map +1 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +44 -0
- package/dist/index.js.map +1 -0
- package/dist/nlp/index.d.ts +8 -0
- package/dist/nlp/index.d.ts.map +1 -0
- package/dist/nlp/index.js +17 -0
- package/dist/nlp/index.js.map +1 -0
- package/dist/nlp/stemmer-en.d.ts +10 -0
- package/dist/nlp/stemmer-en.d.ts.map +1 -0
- package/dist/nlp/stemmer-en.js +149 -0
- package/dist/nlp/stemmer-en.js.map +1 -0
- package/dist/nlp/stemmer-es.d.ts +10 -0
- package/dist/nlp/stemmer-es.d.ts.map +1 -0
- package/dist/nlp/stemmer-es.js +117 -0
- package/dist/nlp/stemmer-es.js.map +1 -0
- package/dist/nlp/stopwords.d.ts +11 -0
- package/dist/nlp/stopwords.d.ts.map +1 -0
- package/dist/nlp/stopwords.js +73 -0
- package/dist/nlp/stopwords.js.map +1 -0
- package/dist/nlp/tokenizer.d.ts +13 -0
- package/dist/nlp/tokenizer.d.ts.map +1 -0
- package/dist/nlp/tokenizer.js +36 -0
- package/dist/nlp/tokenizer.js.map +1 -0
- package/dist/segmenter/clause-splitter.d.ts +15 -0
- package/dist/segmenter/clause-splitter.d.ts.map +1 -0
- package/dist/segmenter/clause-splitter.js +192 -0
- package/dist/segmenter/clause-splitter.js.map +1 -0
- package/dist/segmenter/index.d.ts +11 -0
- package/dist/segmenter/index.d.ts.map +1 -0
- package/dist/segmenter/index.js +25 -0
- package/dist/segmenter/index.js.map +1 -0
- package/dist/segmenter/sentence-splitter.d.ts +13 -0
- package/dist/segmenter/sentence-splitter.d.ts.map +1 -0
- package/dist/segmenter/sentence-splitter.js +69 -0
- package/dist/segmenter/sentence-splitter.js.map +1 -0
- package/dist/types.d.ts +184 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/package.json +51 -0
- package/readme.md +1 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.validateST = validateST;
|
|
4
|
+
exports.validationToDiagnostics = validationToDiagnostics;
|
|
5
|
+
/**
|
|
6
|
+
* Valida código ST usando el parser de st-lang.
|
|
7
|
+
* Requiere que @stevenvo780/st-lang esté instalado (peerDependency).
|
|
8
|
+
*/
|
|
9
|
+
function validateST(code) {
|
|
10
|
+
try {
|
|
11
|
+
// Importar st-lang dinámicamente
|
|
12
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
13
|
+
const stLang = require('@stevenvo780/st-lang');
|
|
14
|
+
if (stLang && typeof stLang.parse === 'function') {
|
|
15
|
+
const result = stLang.parse(code);
|
|
16
|
+
if (result.ok) {
|
|
17
|
+
return { ok: true, errors: [] };
|
|
18
|
+
}
|
|
19
|
+
const errors = (result.diagnostics || [])
|
|
20
|
+
.filter((d) => d.severity === 'error')
|
|
21
|
+
.map((d) => d.message || 'Error desconocido');
|
|
22
|
+
return { ok: false, errors };
|
|
23
|
+
}
|
|
24
|
+
if (stLang && typeof stLang.check === 'function') {
|
|
25
|
+
const result = stLang.check(code);
|
|
26
|
+
if (result.ok) {
|
|
27
|
+
return { ok: true, errors: [] };
|
|
28
|
+
}
|
|
29
|
+
const errors = (result.diagnostics || [])
|
|
30
|
+
.filter((d) => d.severity === 'error')
|
|
31
|
+
.map((d) => d.message || 'Error desconocido');
|
|
32
|
+
return { ok: false, errors };
|
|
33
|
+
}
|
|
34
|
+
return { ok: true, errors: [] };
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
// st-lang no disponible — no es un error, es un warning
|
|
38
|
+
return { ok: true, errors: [] };
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Convierte errores de validación en diagnósticos.
|
|
43
|
+
*/
|
|
44
|
+
function validationToDiagnostics(validation) {
|
|
45
|
+
if (validation.ok)
|
|
46
|
+
return [];
|
|
47
|
+
return validation.errors.map(error => ({
|
|
48
|
+
severity: 'error',
|
|
49
|
+
message: `ST Validation: ${error}`,
|
|
50
|
+
code: 'ST_VALIDATION',
|
|
51
|
+
}));
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=validator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/generator/validator.ts"],"names":[],"mappings":";;AAiBA,gCAiCC;AAKD,0DAOC;AAjDD;;;GAGG;AACH,SAAgB,UAAU,CAAC,IAAY;IACrC,IAAI,CAAC;QACH,iCAAiC;QACjC,8DAA8D;QAC9D,MAAM,MAAM,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;QAE/C,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;gBACd,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;YAClC,CAAC;YACD,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC;iBACtC,MAAM,CAAC,CAAC,CAAwB,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC;iBAC5D,GAAG,CAAC,CAAC,CAAuB,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,mBAAmB,CAAC,CAAC;YACtE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAC/B,CAAC;QAED,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;gBACd,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;YAClC,CAAC;YACD,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC;iBACtC,MAAM,CAAC,CAAC,CAAwB,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC;iBAC5D,GAAG,CAAC,CAAC,CAAuB,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,mBAAmB,CAAC,CAAC;YACtE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAC/B,CAAC;QAED,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAClC,CAAC;IAAC,MAAM,CAAC;QACP,wDAAwD;QACxD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAClC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAgB,uBAAuB,CAAC,UAA4B;IAClE,IAAI,UAAU,CAAC,EAAE;QAAE,OAAO,EAAE,CAAC;IAC7B,OAAO,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACrC,QAAQ,EAAE,OAAgB;QAC1B,OAAO,EAAE,kBAAkB,KAAK,EAAE;QAClC,IAAI,EAAE,eAAe;KACtB,CAAC,CAAC,CAAC;AACN,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Autologic — Formalizador automático de texto natural a lógica formal ST
|
|
3
|
+
*
|
|
4
|
+
* @packageDocumentation
|
|
5
|
+
* @module @stevenvo780/autologic
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```ts
|
|
9
|
+
* import { formalize, Autologic } from '@stevenvo780/autologic';
|
|
10
|
+
*
|
|
11
|
+
* // Uso stateless
|
|
12
|
+
* const result = formalize("Si llueve, la calle se moja.", {
|
|
13
|
+
* profile: 'classical.propositional',
|
|
14
|
+
* language: 'es',
|
|
15
|
+
* });
|
|
16
|
+
* console.log(result.stCode);
|
|
17
|
+
*
|
|
18
|
+
* // Uso con estado
|
|
19
|
+
* const al = new Autologic({ language: 'es' });
|
|
20
|
+
* const r = al.formalize("Todo humano es mortal. Sócrates es humano.");
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
export { formalize } from './formalize';
|
|
24
|
+
export { Autologic } from './autologic';
|
|
25
|
+
export type { FormalizeOptions, FormalizationResult, AutologicConfig, LogicProfile, Language, AtomStyle, DiscourseAnalysis, AnalyzedSentence, AnalyzedClause, SentenceType, ClauseRole, ClauseModifier, MarkerRole, MarkerDefinition, DetectedMarker, ArgumentStructure, ArgumentRelation, ArgumentPattern, Sentence, Clause, AtomEntry, FormulaEntry, STStatementType, Diagnostic, Token, } from './types';
|
|
26
|
+
export { segment } from './segmenter';
|
|
27
|
+
export { analyzeDiscourse } from './discourse';
|
|
28
|
+
export { extractAtoms } from './atoms';
|
|
29
|
+
export { buildFormulas } from './formula';
|
|
30
|
+
export { emitST } from './generator/st-emitter';
|
|
31
|
+
export { validateST } from './generator/validator';
|
|
32
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAGH,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAGxC,YAAY,EACV,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,QAAQ,EACR,SAAS,EACT,iBAAiB,EACjB,gBAAgB,EAChB,cAAc,EACd,YAAY,EACZ,UAAU,EACV,cAAc,EACd,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,iBAAiB,EACjB,gBAAgB,EAChB,eAAe,EACf,QAAQ,EACR,MAAM,EACN,SAAS,EACT,YAAY,EACZ,eAAe,EACf,UAAU,EACV,KAAK,GACN,MAAM,SAAS,CAAC;AAGjB,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Autologic — Formalizador automático de texto natural a lógica formal ST
|
|
4
|
+
*
|
|
5
|
+
* @packageDocumentation
|
|
6
|
+
* @module @stevenvo780/autologic
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```ts
|
|
10
|
+
* import { formalize, Autologic } from '@stevenvo780/autologic';
|
|
11
|
+
*
|
|
12
|
+
* // Uso stateless
|
|
13
|
+
* const result = formalize("Si llueve, la calle se moja.", {
|
|
14
|
+
* profile: 'classical.propositional',
|
|
15
|
+
* language: 'es',
|
|
16
|
+
* });
|
|
17
|
+
* console.log(result.stCode);
|
|
18
|
+
*
|
|
19
|
+
* // Uso con estado
|
|
20
|
+
* const al = new Autologic({ language: 'es' });
|
|
21
|
+
* const r = al.formalize("Todo humano es mortal. Sócrates es humano.");
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
|
+
exports.validateST = exports.emitST = exports.buildFormulas = exports.extractAtoms = exports.analyzeDiscourse = exports.segment = exports.Autologic = exports.formalize = void 0;
|
|
26
|
+
// ── API pública principal ─────────────────────
|
|
27
|
+
var formalize_1 = require("./formalize");
|
|
28
|
+
Object.defineProperty(exports, "formalize", { enumerable: true, get: function () { return formalize_1.formalize; } });
|
|
29
|
+
var autologic_1 = require("./autologic");
|
|
30
|
+
Object.defineProperty(exports, "Autologic", { enumerable: true, get: function () { return autologic_1.Autologic; } });
|
|
31
|
+
// ── Utilidades expuestas ──────────────────────
|
|
32
|
+
var segmenter_1 = require("./segmenter");
|
|
33
|
+
Object.defineProperty(exports, "segment", { enumerable: true, get: function () { return segmenter_1.segment; } });
|
|
34
|
+
var discourse_1 = require("./discourse");
|
|
35
|
+
Object.defineProperty(exports, "analyzeDiscourse", { enumerable: true, get: function () { return discourse_1.analyzeDiscourse; } });
|
|
36
|
+
var atoms_1 = require("./atoms");
|
|
37
|
+
Object.defineProperty(exports, "extractAtoms", { enumerable: true, get: function () { return atoms_1.extractAtoms; } });
|
|
38
|
+
var formula_1 = require("./formula");
|
|
39
|
+
Object.defineProperty(exports, "buildFormulas", { enumerable: true, get: function () { return formula_1.buildFormulas; } });
|
|
40
|
+
var st_emitter_1 = require("./generator/st-emitter");
|
|
41
|
+
Object.defineProperty(exports, "emitST", { enumerable: true, get: function () { return st_emitter_1.emitST; } });
|
|
42
|
+
var validator_1 = require("./generator/validator");
|
|
43
|
+
Object.defineProperty(exports, "validateST", { enumerable: true, get: function () { return validator_1.validateST; } });
|
|
44
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AAEH,iDAAiD;AACjD,yCAAwC;AAA/B,sGAAA,SAAS,OAAA;AAClB,yCAAwC;AAA/B,sGAAA,SAAS,OAAA;AA+BlB,iDAAiD;AACjD,yCAAsC;AAA7B,oGAAA,OAAO,OAAA;AAChB,yCAA+C;AAAtC,6GAAA,gBAAgB,OAAA;AACzB,iCAAuC;AAA9B,qGAAA,YAAY,OAAA;AACrB,qCAA0C;AAAjC,wGAAA,aAAa,OAAA;AACtB,qDAAgD;AAAvC,oGAAA,MAAM,OAAA;AACf,mDAAmD;AAA1C,uGAAA,UAAU,OAAA"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NLP — Utilidades de procesamiento de lenguaje natural
|
|
3
|
+
*/
|
|
4
|
+
export { tokenize } from './tokenizer';
|
|
5
|
+
export { stemEs } from './stemmer-es';
|
|
6
|
+
export { stemEn } from './stemmer-en';
|
|
7
|
+
export { isStopword, STOPWORDS_ES, STOPWORDS_EN } from './stopwords';
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/nlp/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.STOPWORDS_EN = exports.STOPWORDS_ES = exports.isStopword = exports.stemEn = exports.stemEs = exports.tokenize = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* NLP — Utilidades de procesamiento de lenguaje natural
|
|
6
|
+
*/
|
|
7
|
+
var tokenizer_1 = require("./tokenizer");
|
|
8
|
+
Object.defineProperty(exports, "tokenize", { enumerable: true, get: function () { return tokenizer_1.tokenize; } });
|
|
9
|
+
var stemmer_es_1 = require("./stemmer-es");
|
|
10
|
+
Object.defineProperty(exports, "stemEs", { enumerable: true, get: function () { return stemmer_es_1.stemEs; } });
|
|
11
|
+
var stemmer_en_1 = require("./stemmer-en");
|
|
12
|
+
Object.defineProperty(exports, "stemEn", { enumerable: true, get: function () { return stemmer_en_1.stemEn; } });
|
|
13
|
+
var stopwords_1 = require("./stopwords");
|
|
14
|
+
Object.defineProperty(exports, "isStopword", { enumerable: true, get: function () { return stopwords_1.isStopword; } });
|
|
15
|
+
Object.defineProperty(exports, "STOPWORDS_ES", { enumerable: true, get: function () { return stopwords_1.STOPWORDS_ES; } });
|
|
16
|
+
Object.defineProperty(exports, "STOPWORDS_EN", { enumerable: true, get: function () { return stopwords_1.STOPWORDS_EN; } });
|
|
17
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/nlp/index.ts"],"names":[],"mappings":";;;AAAA;;GAEG;AACH,yCAAuC;AAA9B,qGAAA,QAAQ,OAAA;AACjB,2CAAsC;AAA7B,oGAAA,MAAM,OAAA;AACf,2CAAsC;AAA7B,oGAAA,MAAM,OAAA;AACf,yCAAqE;AAA5D,uGAAA,UAAU,OAAA;AAAE,yGAAA,YAAY,OAAA;AAAE,yGAAA,YAAY,OAAA"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stemmer Snowball para inglés
|
|
3
|
+
* Implementación simplificada del algoritmo Snowball English (Porter2).
|
|
4
|
+
* Ref: https://snowballstem.org/algorithms/english/stemmer.html
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Aplica stemming Snowball simplificado al inglés.
|
|
8
|
+
*/
|
|
9
|
+
export declare function stemEn(word: string): string;
|
|
10
|
+
//# sourceMappingURL=stemmer-en.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stemmer-en.d.ts","sourceRoot":"","sources":["../../src/nlp/stemmer-en.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAsBH;;GAEG;AACH,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAuH3C"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Stemmer Snowball para inglés
|
|
4
|
+
* Implementación simplificada del algoritmo Snowball English (Porter2).
|
|
5
|
+
* Ref: https://snowballstem.org/algorithms/english/stemmer.html
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.stemEn = stemEn;
|
|
9
|
+
const VOWELS = 'aeiouy';
|
|
10
|
+
function isVowel(ch) {
|
|
11
|
+
return VOWELS.includes(ch);
|
|
12
|
+
}
|
|
13
|
+
function findR1(word) {
|
|
14
|
+
let i = 0;
|
|
15
|
+
while (i < word.length && !isVowel(word[i]))
|
|
16
|
+
i++;
|
|
17
|
+
while (i < word.length && isVowel(word[i]))
|
|
18
|
+
i++;
|
|
19
|
+
if (i < word.length)
|
|
20
|
+
i++;
|
|
21
|
+
return Math.min(i, word.length);
|
|
22
|
+
}
|
|
23
|
+
function findR2(word) {
|
|
24
|
+
const r1 = findR1(word);
|
|
25
|
+
if (r1 >= word.length)
|
|
26
|
+
return word.length;
|
|
27
|
+
return r1 + findR1(word.slice(r1));
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Aplica stemming Snowball simplificado al inglés.
|
|
31
|
+
*/
|
|
32
|
+
function stemEn(word) {
|
|
33
|
+
if (word.length <= 3)
|
|
34
|
+
return word.toLowerCase();
|
|
35
|
+
let w = word.toLowerCase();
|
|
36
|
+
// Excepciones especiales
|
|
37
|
+
const exceptions = {
|
|
38
|
+
'skis': 'ski', 'skies': 'sky', 'dying': 'die', 'lying': 'lie',
|
|
39
|
+
'tying': 'tie', 'idly': 'idl', 'gently': 'gentl', 'ugly': 'ugli',
|
|
40
|
+
'early': 'earli', 'only': 'onli', 'singly': 'singl',
|
|
41
|
+
};
|
|
42
|
+
if (exceptions[w])
|
|
43
|
+
return exceptions[w];
|
|
44
|
+
// Paso 0: eliminar 's y 's
|
|
45
|
+
if (w.endsWith("'s'"))
|
|
46
|
+
w = w.slice(0, -3);
|
|
47
|
+
else if (w.endsWith("'s"))
|
|
48
|
+
w = w.slice(0, -2);
|
|
49
|
+
else if (w.endsWith("'"))
|
|
50
|
+
w = w.slice(0, -1);
|
|
51
|
+
// Paso 1a
|
|
52
|
+
if (w.endsWith('sses'))
|
|
53
|
+
w = w.slice(0, -2);
|
|
54
|
+
else if (w.endsWith('ied') || w.endsWith('ies')) {
|
|
55
|
+
w = w.length > 4 ? w.slice(0, -2) : w.slice(0, -1);
|
|
56
|
+
}
|
|
57
|
+
else if (w.endsWith('ss') || w.endsWith('us')) {
|
|
58
|
+
// no cambio
|
|
59
|
+
}
|
|
60
|
+
else if (w.endsWith('s') && w.length > 2) {
|
|
61
|
+
const preceding = w.slice(0, -1);
|
|
62
|
+
if (preceding.split('').some((c, i) => i > 0 && isVowel(c))) {
|
|
63
|
+
w = preceding;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
const r1 = findR1(w);
|
|
67
|
+
const r2 = findR2(w);
|
|
68
|
+
// Paso 1b
|
|
69
|
+
const step1b = ['eed', 'eedly'];
|
|
70
|
+
let didStep1b = false;
|
|
71
|
+
for (const suf of step1b) {
|
|
72
|
+
if (w.endsWith(suf)) {
|
|
73
|
+
if (w.length - suf.length >= r1) {
|
|
74
|
+
w = w.slice(0, -suf.length) + 'ee';
|
|
75
|
+
}
|
|
76
|
+
didStep1b = true;
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
if (!didStep1b) {
|
|
81
|
+
const step1b2 = ['ing', 'ingly', 'ed', 'edly'];
|
|
82
|
+
for (const suf of step1b2) {
|
|
83
|
+
if (w.endsWith(suf)) {
|
|
84
|
+
const stem = w.slice(0, -suf.length);
|
|
85
|
+
if (stem.split('').some(c => isVowel(c))) {
|
|
86
|
+
w = stem;
|
|
87
|
+
if (w.endsWith('at') || w.endsWith('bl') || w.endsWith('iz')) {
|
|
88
|
+
w += 'e';
|
|
89
|
+
}
|
|
90
|
+
else if (w.length >= 2 && w[w.length - 1] === w[w.length - 2] &&
|
|
91
|
+
!['l', 's', 'z'].includes(w[w.length - 1])) {
|
|
92
|
+
w = w.slice(0, -1);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Paso 2: sufijos derivacionales
|
|
100
|
+
const step2 = [
|
|
101
|
+
['ational', 'ate'], ['tional', 'tion'], ['enci', 'ence'], ['anci', 'ance'],
|
|
102
|
+
['izer', 'ize'], ['abli', 'able'], ['alli', 'al'], ['entli', 'ent'],
|
|
103
|
+
['eli', 'e'], ['ousli', 'ous'], ['ization', 'ize'], ['ation', 'ate'],
|
|
104
|
+
['ator', 'ate'], ['alism', 'al'], ['iveness', 'ive'], ['fulness', 'ful'],
|
|
105
|
+
['ousness', 'ous'], ['aliti', 'al'], ['iviti', 'ive'], ['biliti', 'ble'],
|
|
106
|
+
['fulli', 'ful'], ['lessli', 'less'],
|
|
107
|
+
];
|
|
108
|
+
for (const [suf, rep] of step2) {
|
|
109
|
+
if (w.endsWith(suf) && w.length - suf.length >= r1) {
|
|
110
|
+
w = w.slice(0, -suf.length) + rep;
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Paso 3
|
|
115
|
+
const step3 = [
|
|
116
|
+
['ational', 'ate'], ['tional', 'tion'], ['alize', 'al'], ['icate', 'ic'],
|
|
117
|
+
['iciti', 'ic'], ['ical', 'ic'], ['ful', ''], ['ness', ''],
|
|
118
|
+
];
|
|
119
|
+
for (const [suf, rep] of step3) {
|
|
120
|
+
if (w.endsWith(suf) && w.length - suf.length >= r1) {
|
|
121
|
+
w = w.slice(0, -suf.length) + rep;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
// Paso 4: eliminar sufijos
|
|
126
|
+
const step4 = [
|
|
127
|
+
'ement', 'ment', 'ence', 'ance', 'able', 'ible', 'ant', 'ent',
|
|
128
|
+
'ism', 'ate', 'iti', 'ous', 'ive', 'ize', 'al', 'er', 'ic',
|
|
129
|
+
];
|
|
130
|
+
for (const suf of step4) {
|
|
131
|
+
if (w.endsWith(suf) && w.length - suf.length >= r2) {
|
|
132
|
+
w = w.slice(0, -suf.length);
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Paso 5
|
|
137
|
+
if (w.endsWith('e')) {
|
|
138
|
+
if (w.length - 1 >= r2) {
|
|
139
|
+
w = w.slice(0, -1);
|
|
140
|
+
}
|
|
141
|
+
else if (w.length - 1 >= r1) {
|
|
142
|
+
const ch = w[w.length - 2];
|
|
143
|
+
if (ch && !isVowel(ch))
|
|
144
|
+
w = w.slice(0, -1);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return w;
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=stemmer-en.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stemmer-en.js","sourceRoot":"","sources":["../../src/nlp/stemmer-en.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;AAyBH,wBAuHC;AA9ID,MAAM,MAAM,GAAG,QAAQ,CAAC;AAExB,SAAS,OAAO,CAAC,EAAU;IACzB,OAAO,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;AAC7B,CAAC;AAED,SAAS,MAAM,CAAC,IAAY;IAC1B,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAC;IACjD,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAC;IAChD,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM;QAAE,CAAC,EAAE,CAAC;IACzB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;AAClC,CAAC;AAED,SAAS,MAAM,CAAC,IAAY;IAC1B,MAAM,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;IACxB,IAAI,EAAE,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC,MAAM,CAAC;IAC1C,OAAO,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAgB,MAAM,CAAC,IAAY;IACjC,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;IAEhD,IAAI,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAE3B,yBAAyB;IACzB,MAAM,UAAU,GAA2B;QACzC,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK;QAC7D,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM;QAChE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO;KACpD,CAAC;IACF,IAAI,UAAU,CAAC,CAAC,CAAC;QAAE,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;IAExC,2BAA2B;IAC3B,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;SACrC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;SACzC,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE7C,UAAU;IACV,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;SACtC,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAChD,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACrD,CAAC;SAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAChD,YAAY;IACd,CAAC;SAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3C,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5D,CAAC,GAAG,SAAS,CAAC;QAChB,CAAC;IACH,CAAC;IAED,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IAErB,UAAU;IACV,MAAM,MAAM,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAChC,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACpB,IAAI,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;gBAChC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;YACrC,CAAC;YACD,SAAS,GAAG,IAAI,CAAC;YACjB,MAAM;QACR,CAAC;IACH,CAAC;IACD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;QAC/C,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;YAC1B,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;gBACrC,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBACzC,CAAC,GAAG,IAAI,CAAC;oBACT,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;wBAC7D,CAAC,IAAI,GAAG,CAAC;oBACX,CAAC;yBAAM,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;wBAC7D,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC7C,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;oBACrB,CAAC;gBACH,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,iCAAiC;IACjC,MAAM,KAAK,GAAuB;QAChC,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC;QAC1E,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;QACnE,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;QACpE,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,CAAC;QACxE,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,QAAQ,EAAE,KAAK,CAAC;QACxE,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC;KACrC,CAAC;IAEF,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;YACnD,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;YAClC,MAAM;QACR,CAAC;IACH,CAAC;IAED,SAAS;IACT,MAAM,KAAK,GAAuB;QAChC,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC;QACxE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC;KAC3D,CAAC;IAEF,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;YACnD,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;YAClC,MAAM;QACR,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,MAAM,KAAK,GAAG;QACZ,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK;QAC7D,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;KAC3D,CAAC;IAEF,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;YACnD,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,SAAS;IACT,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACpB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC;YACvB,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;aAAM,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC;YAC9B,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAC3B,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,OAAO,CAAC,CAAC;AACX,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stemmer Snowball para español
|
|
3
|
+
* Implementación simplificada del algoritmo Snowball Spanish stemmer.
|
|
4
|
+
* Ref: https://snowballstem.org/algorithms/spanish/stemmer.html
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Aplica stemming Snowball simplificado al español.
|
|
8
|
+
*/
|
|
9
|
+
export declare function stemEs(word: string): string;
|
|
10
|
+
//# sourceMappingURL=stemmer-es.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stemmer-es.d.ts","sourceRoot":"","sources":["../../src/nlp/stemmer-es.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AA4BH;;GAEG;AACH,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAiF3C"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Stemmer Snowball para español
|
|
4
|
+
* Implementación simplificada del algoritmo Snowball Spanish stemmer.
|
|
5
|
+
* Ref: https://snowballstem.org/algorithms/spanish/stemmer.html
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.stemEs = stemEs;
|
|
9
|
+
const VOWELS = 'aeiouáéíóú';
|
|
10
|
+
function isVowel(ch) {
|
|
11
|
+
return VOWELS.includes(ch);
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Calcula la región R1 (después de la primera secuencia consonante-vocal tras una vocal)
|
|
15
|
+
*/
|
|
16
|
+
function findR1(word) {
|
|
17
|
+
let i = 0;
|
|
18
|
+
// Buscar primera vocal
|
|
19
|
+
while (i < word.length && !isVowel(word[i]))
|
|
20
|
+
i++;
|
|
21
|
+
// Buscar primera consonante después de la vocal
|
|
22
|
+
while (i < word.length && isVowel(word[i]))
|
|
23
|
+
i++;
|
|
24
|
+
if (i < word.length)
|
|
25
|
+
i++; // siguiente posición
|
|
26
|
+
return i;
|
|
27
|
+
}
|
|
28
|
+
function findR2(word) {
|
|
29
|
+
const r1 = findR1(word);
|
|
30
|
+
if (r1 >= word.length)
|
|
31
|
+
return word.length;
|
|
32
|
+
const sub = word.slice(r1);
|
|
33
|
+
return r1 + findR1(sub);
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Aplica stemming Snowball simplificado al español.
|
|
37
|
+
*/
|
|
38
|
+
function stemEs(word) {
|
|
39
|
+
if (word.length <= 3)
|
|
40
|
+
return word;
|
|
41
|
+
let w = word.toLowerCase()
|
|
42
|
+
.replace(/á/g, 'a')
|
|
43
|
+
.replace(/é/g, 'e')
|
|
44
|
+
.replace(/í/g, 'i')
|
|
45
|
+
.replace(/ó/g, 'o')
|
|
46
|
+
.replace(/ú/g, 'u');
|
|
47
|
+
const r1 = findR1(w);
|
|
48
|
+
const r2 = findR2(w);
|
|
49
|
+
// Paso 1: Sufijos estándar
|
|
50
|
+
const suffixes1 = [
|
|
51
|
+
'imientos', 'imiento',
|
|
52
|
+
'amientos', 'amiento',
|
|
53
|
+
'aciones', 'ación',
|
|
54
|
+
'uciones', 'ución',
|
|
55
|
+
'adoras', 'adora', 'adores', 'ador',
|
|
56
|
+
'ancias', 'ancia', 'encias', 'encia',
|
|
57
|
+
'antes', 'ante',
|
|
58
|
+
'ibles', 'ible',
|
|
59
|
+
'istas', 'ista',
|
|
60
|
+
'mente',
|
|
61
|
+
'idades', 'idad',
|
|
62
|
+
'ivas', 'iva', 'ivos', 'ivo',
|
|
63
|
+
'anzas', 'anza',
|
|
64
|
+
'logías', 'logía', 'logias', 'logia',
|
|
65
|
+
'mente',
|
|
66
|
+
'ables', 'able',
|
|
67
|
+
];
|
|
68
|
+
for (const suf of suffixes1) {
|
|
69
|
+
if (w.endsWith(suf) && w.length - suf.length >= r2) {
|
|
70
|
+
w = w.slice(0, -suf.length);
|
|
71
|
+
return w;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Paso 2: Sufijos verbales
|
|
75
|
+
const verbSuffixes = [
|
|
76
|
+
'iéramos', 'aríamos', 'eríamos', 'iríamos',
|
|
77
|
+
'aríais', 'eríais', 'iríais',
|
|
78
|
+
'aremos', 'eremos', 'iremos',
|
|
79
|
+
'ásemos', 'iésemos',
|
|
80
|
+
'aríam', 'eríam', 'iríam',
|
|
81
|
+
'arían', 'erían', 'irían',
|
|
82
|
+
'arías', 'erías', 'irías',
|
|
83
|
+
'iendo', 'ando',
|
|
84
|
+
'ieron', 'aron', 'asen', 'iesen',
|
|
85
|
+
'aban', 'aran', 'irán',
|
|
86
|
+
'ería', 'iría', 'aría',
|
|
87
|
+
'emos', 'amos', 'imos',
|
|
88
|
+
'aste', 'iste',
|
|
89
|
+
'aron', 'aron',
|
|
90
|
+
'aban', 'ían',
|
|
91
|
+
'ado', 'ido',
|
|
92
|
+
'ará', 'erá', 'irá',
|
|
93
|
+
'aré', 'eré', 'iré',
|
|
94
|
+
'aba', 'ase', 'iese',
|
|
95
|
+
'ais', 'áis',
|
|
96
|
+
'ar', 'er', 'ir',
|
|
97
|
+
'an', 'en',
|
|
98
|
+
'as', 'es', 'ís',
|
|
99
|
+
];
|
|
100
|
+
for (const suf of verbSuffixes) {
|
|
101
|
+
if (w.endsWith(suf) && w.length - suf.length >= r1) {
|
|
102
|
+
w = w.slice(0, -suf.length);
|
|
103
|
+
return w;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// Paso 3: Sufijos residuales
|
|
107
|
+
if (w.endsWith('os') && w.length > 3)
|
|
108
|
+
w = w.slice(0, -2);
|
|
109
|
+
else if (w.endsWith('a') && w.length > 3)
|
|
110
|
+
w = w.slice(0, -1);
|
|
111
|
+
else if (w.endsWith('o') && w.length > 3)
|
|
112
|
+
w = w.slice(0, -1);
|
|
113
|
+
else if (w.endsWith('e') && w.length > 3)
|
|
114
|
+
w = w.slice(0, -1);
|
|
115
|
+
return w;
|
|
116
|
+
}
|
|
117
|
+
//# sourceMappingURL=stemmer-es.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stemmer-es.js","sourceRoot":"","sources":["../../src/nlp/stemmer-es.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;AA+BH,wBAiFC;AA9GD,MAAM,MAAM,GAAG,YAAY,CAAC;AAE5B,SAAS,OAAO,CAAC,EAAU;IACzB,OAAO,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAS,MAAM,CAAC,IAAY;IAC1B,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,uBAAuB;IACvB,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAC;IACjD,gDAAgD;IAChD,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAC;IAChD,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM;QAAE,CAAC,EAAE,CAAC,CAAC,qBAAqB;IAC/C,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,MAAM,CAAC,IAAY;IAC1B,MAAM,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;IACxB,IAAI,EAAE,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC,MAAM,CAAC;IAC1C,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC3B,OAAO,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAgB,MAAM,CAAC,IAAY;IACjC,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAElC,IAAI,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE;SACvB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAEtB,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACrB,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IAErB,2BAA2B;IAC3B,MAAM,SAAS,GAAG;QAChB,UAAU,EAAE,SAAS;QACrB,UAAU,EAAE,SAAS;QACrB,SAAS,EAAE,OAAO;QAClB,SAAS,EAAE,OAAO;QAClB,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM;QACnC,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;QACpC,OAAO,EAAE,MAAM;QACf,OAAO,EAAE,MAAM;QACf,OAAO,EAAE,MAAM;QACf,OAAO;QACP,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK;QAC5B,OAAO,EAAE,MAAM;QACf,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;QACpC,OAAO;QACP,OAAO,EAAE,MAAM;KAChB,CAAC;IAEF,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;YACnD,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAC5B,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,MAAM,YAAY,GAAG;QACnB,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS;QAC1C,QAAQ,EAAE,QAAQ,EAAE,QAAQ;QAC5B,QAAQ,EAAE,QAAQ,EAAE,QAAQ;QAC5B,QAAQ,EAAE,SAAS;QACnB,OAAO,EAAE,OAAO,EAAE,OAAO;QACzB,OAAO,EAAE,OAAO,EAAE,OAAO;QACzB,OAAO,EAAE,OAAO,EAAE,OAAO;QACzB,OAAO,EAAE,MAAM;QACf,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;QAChC,MAAM,EAAE,MAAM,EAAE,MAAM;QACtB,MAAM,EAAE,MAAM,EAAE,MAAM;QACtB,MAAM,EAAE,MAAM,EAAE,MAAM;QACtB,MAAM,EAAE,MAAM;QACd,MAAM,EAAE,MAAM;QACd,MAAM,EAAE,KAAK;QACb,KAAK,EAAE,KAAK;QACZ,KAAK,EAAE,KAAK,EAAE,KAAK;QACnB,KAAK,EAAE,KAAK,EAAE,KAAK;QACnB,KAAK,EAAE,KAAK,EAAE,MAAM;QACpB,KAAK,EAAE,KAAK;QACZ,IAAI,EAAE,IAAI,EAAE,IAAI;QAChB,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,IAAI,EAAE,IAAI;KACjB,CAAC;IAEF,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;YACnD,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAC5B,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;SACpD,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;SACxD,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;SACxD,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE7D,OAAO,CAAC,CAAC;AACX,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stopwords — Listas curadas de palabras vacías en español e inglés
|
|
3
|
+
*/
|
|
4
|
+
import type { Language } from '../types';
|
|
5
|
+
export declare const STOPWORDS_ES: Set<string>;
|
|
6
|
+
export declare const STOPWORDS_EN: Set<string>;
|
|
7
|
+
/**
|
|
8
|
+
* Comprueba si una palabra es stopword en el idioma dado.
|
|
9
|
+
*/
|
|
10
|
+
export declare function isStopword(word: string, language?: Language): boolean;
|
|
11
|
+
//# sourceMappingURL=stopwords.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stopwords.d.ts","sourceRoot":"","sources":["../../src/nlp/stopwords.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,eAAO,MAAM,YAAY,aA0BvB,CAAC;AAEH,eAAO,MAAM,YAAY,aAgCvB,CAAC;AAEH;;GAEG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,GAAE,QAAe,GAAG,OAAO,CAI3E"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.STOPWORDS_EN = exports.STOPWORDS_ES = void 0;
|
|
4
|
+
exports.isStopword = isStopword;
|
|
5
|
+
exports.STOPWORDS_ES = new Set([
|
|
6
|
+
// Artículos
|
|
7
|
+
'el', 'la', 'los', 'las', 'un', 'una', 'unos', 'unas',
|
|
8
|
+
// Preposiciones
|
|
9
|
+
'a', 'ante', 'bajo', 'con', 'contra', 'de', 'del', 'desde', 'en', 'entre',
|
|
10
|
+
'hacia', 'hasta', 'para', 'por', 'según', 'sin', 'sobre', 'tras',
|
|
11
|
+
// Pronombres
|
|
12
|
+
'yo', 'tú', 'él', 'ella', 'nosotros', 'nosotras', 'vosotros', 'vosotras',
|
|
13
|
+
'ellos', 'ellas', 'me', 'te', 'se', 'nos', 'os', 'lo', 'le', 'les',
|
|
14
|
+
'mi', 'tu', 'su', 'nuestro', 'nuestra', 'nuestros', 'nuestras',
|
|
15
|
+
'vuestro', 'vuestra', 'vuestros', 'vuestras', 'suyo', 'suya', 'suyos', 'suyas',
|
|
16
|
+
'este', 'esta', 'estos', 'estas', 'ese', 'esa', 'esos', 'esas',
|
|
17
|
+
'aquel', 'aquella', 'aquellos', 'aquellas', 'esto', 'eso', 'aquello',
|
|
18
|
+
// Verbos copulativos / auxiliares
|
|
19
|
+
'es', 'ser', 'está', 'estar', 'son', 'están', 'era', 'fue', 'sido',
|
|
20
|
+
'estaba', 'estaban', 'eran', 'ha', 'han', 'haber', 'he', 'has', 'hay',
|
|
21
|
+
'siendo', 'estando', 'soy', 'eres', 'somos', 'sois',
|
|
22
|
+
// Conjunciones / relativos
|
|
23
|
+
'que', 'quien', 'cual', 'cuyo', 'cuya', 'donde', 'cuando', 'como',
|
|
24
|
+
// Adverbios comunes
|
|
25
|
+
'muy', 'más', 'menos', 'ya', 'aún', 'aquí', 'ahí', 'allí',
|
|
26
|
+
'bien', 'mal', 'así', 'tan', 'tanto', 'mucho', 'poco',
|
|
27
|
+
// Otros
|
|
28
|
+
'al', 'e', 'u', 'o', 'y', 'ni', 'otro', 'otra', 'otros', 'otras',
|
|
29
|
+
'mismo', 'misma', 'mismos', 'mismas', 'propio', 'propia',
|
|
30
|
+
'cada', 'demás', 'ambos', 'ambas',
|
|
31
|
+
]);
|
|
32
|
+
exports.STOPWORDS_EN = new Set([
|
|
33
|
+
// Articles
|
|
34
|
+
'a', 'an', 'the',
|
|
35
|
+
// Prepositions
|
|
36
|
+
'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'about',
|
|
37
|
+
'into', 'through', 'during', 'before', 'after', 'above', 'below',
|
|
38
|
+
'between', 'under', 'over', 'up', 'down', 'out', 'off', 'around',
|
|
39
|
+
// Pronouns
|
|
40
|
+
'i', 'me', 'my', 'mine', 'myself',
|
|
41
|
+
'you', 'your', 'yours', 'yourself',
|
|
42
|
+
'he', 'him', 'his', 'himself',
|
|
43
|
+
'she', 'her', 'hers', 'herself',
|
|
44
|
+
'it', 'its', 'itself',
|
|
45
|
+
'we', 'us', 'our', 'ours', 'ourselves',
|
|
46
|
+
'they', 'them', 'their', 'theirs', 'themselves',
|
|
47
|
+
'this', 'that', 'these', 'those',
|
|
48
|
+
'who', 'whom', 'whose', 'which', 'what',
|
|
49
|
+
// Verbs (copulative / aux)
|
|
50
|
+
'is', 'am', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
51
|
+
'have', 'has', 'had', 'having',
|
|
52
|
+
'do', 'does', 'did', 'doing',
|
|
53
|
+
'will', 'would', 'shall', 'should',
|
|
54
|
+
'can', 'could', 'may', 'might',
|
|
55
|
+
// Conjunctions / relative
|
|
56
|
+
'and', 'or', 'but', 'nor', 'so', 'yet',
|
|
57
|
+
'if', 'then', 'than', 'when', 'where', 'while', 'as',
|
|
58
|
+
// Adverbs
|
|
59
|
+
'very', 'too', 'also', 'just', 'only', 'now', 'here', 'there',
|
|
60
|
+
'well', 'still', 'already', 'even', 'quite', 'rather',
|
|
61
|
+
// Other
|
|
62
|
+
'no', 'not', 'both', 'each', 'few', 'more', 'most', 'other',
|
|
63
|
+
'some', 'such', 'own', 'same',
|
|
64
|
+
]);
|
|
65
|
+
/**
|
|
66
|
+
* Comprueba si una palabra es stopword en el idioma dado.
|
|
67
|
+
*/
|
|
68
|
+
function isStopword(word, language = 'es') {
|
|
69
|
+
const normalized = word.toLowerCase();
|
|
70
|
+
const set = language === 'es' ? exports.STOPWORDS_ES : exports.STOPWORDS_EN;
|
|
71
|
+
return set.has(normalized);
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=stopwords.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stopwords.js","sourceRoot":"","sources":["../../src/nlp/stopwords.ts"],"names":[],"mappings":";;;AAsEA,gCAIC;AArEY,QAAA,YAAY,GAAG,IAAI,GAAG,CAAC;IAClC,YAAY;IACZ,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM;IACrD,gBAAgB;IAChB,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO;IACzE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM;IAChE,aAAa;IACb,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,UAAU,EAAE,UAAU;IACxE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK;IAClE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU;IAC9D,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IAC9E,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM;IAC9D,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS;IACpE,kCAAkC;IAClC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM;IAClE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK;IACrE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IACnD,2BAA2B;IAC3B,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM;IACjE,oBAAoB;IACpB,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IACzD,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM;IACrD,QAAQ;IACR,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IAChE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;IACxD,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO;CAClC,CAAC,CAAC;AAEU,QAAA,YAAY,GAAG,IAAI,GAAG,CAAC;IAClC,WAAW;IACX,GAAG,EAAE,IAAI,EAAE,KAAK;IAChB,eAAe;IACf,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;IAClE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO;IAChE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ;IAChE,WAAW;IACX,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ;IACjC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU;IAClC,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,SAAS;IAC7B,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS;IAC/B,IAAI,EAAE,KAAK,EAAE,QAAQ;IACrB,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW;IACtC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY;IAC/C,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IAChC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM;IACvC,2BAA2B;IAC3B,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;IACvD,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ;IAC9B,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO;IAC5B,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ;IAClC,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO;IAC9B,0BAA0B;IAC1B,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK;IACtC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI;IACpD,UAAU;IACV,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO;IAC7D,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ;IACrD,QAAQ;IACR,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IAC3D,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;CAC9B,CAAC,CAAC;AAEH;;GAEG;AACH,SAAgB,UAAU,CAAC,IAAY,EAAE,WAAqB,IAAI;IAChE,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACtC,MAAM,GAAG,GAAG,QAAQ,KAAK,IAAI,CAAC,CAAC,CAAC,oBAAY,CAAC,CAAC,CAAC,oBAAY,CAAC;IAC5D,OAAO,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tokenizer — Tokenización básica basada en regex
|
|
3
|
+
*/
|
|
4
|
+
import type { Token, Language } from '../types';
|
|
5
|
+
/**
|
|
6
|
+
* Tokeniza un texto en tokens con metadatos.
|
|
7
|
+
*/
|
|
8
|
+
export declare function tokenize(text: string, language?: Language): Token[];
|
|
9
|
+
/**
|
|
10
|
+
* Extrae palabras de contenido (no stopwords) de un texto.
|
|
11
|
+
*/
|
|
12
|
+
export declare function contentWords(text: string, language?: Language): Token[];
|
|
13
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/nlp/tokenizer.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAKhD;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,GAAE,QAAe,GAAG,KAAK,EAAE,CAoBzE;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,GAAE,QAAe,GAAG,KAAK,EAAE,CAE7E"}
|