@keymanapp/kmc-model 17.0.155-alpha → 17.0.156-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/src/build-trie.d.ts +42 -40
- package/build/src/build-trie.d.ts.map +1 -1
- package/build/src/build-trie.js +370 -366
- package/build/src/build-trie.js.map +1 -1
- package/build/src/compiler-callbacks.d.ts +6 -4
- package/build/src/compiler-callbacks.d.ts.map +1 -1
- package/build/src/compiler-callbacks.js +7 -5
- package/build/src/compiler-callbacks.js.map +1 -1
- package/build/src/join-word-breaker-decorator.d.ts +12 -10
- package/build/src/join-word-breaker-decorator.d.ts.map +1 -1
- package/build/src/join-word-breaker-decorator.js +123 -121
- package/build/src/join-word-breaker-decorator.js.map +1 -1
- package/build/src/lexical-model-compiler.d.ts +19 -16
- package/build/src/lexical-model-compiler.d.ts.map +1 -1
- package/build/src/lexical-model-compiler.js +153 -150
- package/build/src/lexical-model-compiler.js.map +1 -1
- package/build/src/lexical-model.d.ts +137 -135
- package/build/src/lexical-model.d.ts.map +1 -1
- package/build/src/lexical-model.js +8 -6
- package/build/src/lexical-model.js.map +1 -1
- package/build/src/main.d.ts +19 -17
- package/build/src/main.d.ts.map +1 -1
- package/build/src/main.js +60 -60
- package/build/src/main.js.map +1 -1
- package/build/src/model-compiler-errors.d.ts +53 -51
- package/build/src/model-compiler-errors.d.ts.map +1 -1
- package/build/src/model-compiler-errors.js +58 -56
- package/build/src/model-compiler-errors.js.map +1 -1
- package/build/src/model-defaults.d.ts +58 -56
- package/build/src/model-defaults.d.ts.map +1 -1
- package/build/src/model-defaults.js +108 -106
- package/build/src/model-defaults.js.map +1 -1
- package/build/src/model-definitions.d.ts +73 -71
- package/build/src/model-definitions.d.ts.map +1 -1
- package/build/src/model-definitions.js +191 -189
- package/build/src/model-definitions.js.map +1 -1
- package/build/src/script-overrides-decorator.d.ts +6 -4
- package/build/src/script-overrides-decorator.d.ts.map +1 -1
- package/build/src/script-overrides-decorator.js +66 -64
- package/build/src/script-overrides-decorator.js.map +1 -1
- package/package.json +7 -7
|
@@ -1,150 +1,153 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import * as ts from "typescript";
|
|
6
|
-
import
|
|
7
|
-
import
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
* @param
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
//
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
case "
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
//
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
func += `
|
|
53
|
-
|
|
54
|
-
func += `
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
;
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
//
|
|
102
|
-
//
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
.
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
1
|
+
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="aa7f9248-c034-571a-ad9c-1e537ecc1597")}catch(e){}}();
|
|
2
|
+
/*
|
|
3
|
+
lexical-model-compiler.ts: base file for lexical model compiler.
|
|
4
|
+
*/
|
|
5
|
+
import * as ts from "typescript";
|
|
6
|
+
import { createTrieDataStructure } from "./build-trie.js";
|
|
7
|
+
import { ModelDefinitions } from "./model-definitions.js";
|
|
8
|
+
import { decorateWithJoin } from "./join-word-breaker-decorator.js";
|
|
9
|
+
import { decorateWithScriptOverrides } from "./script-overrides-decorator.js";
|
|
10
|
+
import { ModelCompilerError, ModelCompilerMessages } from "./model-compiler-errors.js";
|
|
11
|
+
import { callbacks, setCompilerCallbacks } from "./compiler-callbacks.js";
|
|
12
|
+
export default class LexicalModelCompiler {
|
|
13
|
+
constructor(callbacks) {
|
|
14
|
+
setCompilerCallbacks(callbacks);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Returns the generated code for the model that will ultimately be loaded by
|
|
18
|
+
* the LMLayer worker. This code contains all model parameters, and specifies
|
|
19
|
+
* word breakers and auxilary functions that may be required.
|
|
20
|
+
*
|
|
21
|
+
* @param model_id The model ID. TODO: not sure if this is actually required!
|
|
22
|
+
* @param modelSource A specification of the model to compile
|
|
23
|
+
* @param sourcePath Where to find auxilary sources files
|
|
24
|
+
*/
|
|
25
|
+
generateLexicalModelCode(model_id, modelSource, sourcePath) {
|
|
26
|
+
// TODO: add metadata in comment
|
|
27
|
+
const filePrefix = `(function() {\n'use strict';\n`;
|
|
28
|
+
const fileSuffix = `})();`;
|
|
29
|
+
let func = filePrefix;
|
|
30
|
+
//
|
|
31
|
+
// Emit the model as code and data
|
|
32
|
+
//
|
|
33
|
+
switch (modelSource.format) {
|
|
34
|
+
case "custom-1.0":
|
|
35
|
+
let sources = modelSource.sources.map(function (source) {
|
|
36
|
+
return new TextDecoder().decode(callbacks.loadFile(callbacks.path.join(sourcePath, source)));
|
|
37
|
+
});
|
|
38
|
+
func += this.transpileSources(sources).join('\n');
|
|
39
|
+
func += `LMLayerWorker.loadModel(new ${modelSource.rootClass}());\n`;
|
|
40
|
+
break;
|
|
41
|
+
case "fst-foma-1.0":
|
|
42
|
+
throw new ModelCompilerError(ModelCompilerMessages.Error_UnimplementedModelFormat({ format: modelSource.format }));
|
|
43
|
+
case "trie-1.0":
|
|
44
|
+
// Convert all relative path names to paths relative to the enclosing
|
|
45
|
+
// directory. This way, we'll read the files relative to the model.ts
|
|
46
|
+
// file, rather than the current working directory.
|
|
47
|
+
let filenames = modelSource.sources.map(filename => callbacks.path.join(sourcePath, filename));
|
|
48
|
+
let definitions = new ModelDefinitions(modelSource);
|
|
49
|
+
func += definitions.compileDefinitions();
|
|
50
|
+
// Needs the actual searchTermToKey closure...
|
|
51
|
+
// Which needs the actual applyCasing closure as well.
|
|
52
|
+
func += `LMLayerWorker.loadModel(new models.TrieModel(${createTrieDataStructure(filenames, definitions.searchTermToKey)}, {\n`;
|
|
53
|
+
let wordBreakerSourceCode = compileWordBreaker(normalizeWordBreakerSpec(modelSource.wordBreaker));
|
|
54
|
+
func += ` wordBreaker: ${wordBreakerSourceCode},\n`;
|
|
55
|
+
// START - the lexical mapping option block
|
|
56
|
+
func += ` searchTermToKey: ${definitions.compileSearchTermToKey()},\n`;
|
|
57
|
+
if (modelSource.languageUsesCasing != null) {
|
|
58
|
+
func += ` languageUsesCasing: ${modelSource.languageUsesCasing},\n`;
|
|
59
|
+
} // else leave undefined.
|
|
60
|
+
if (modelSource.languageUsesCasing) {
|
|
61
|
+
func += ` applyCasing: ${definitions.compileApplyCasing()},\n`;
|
|
62
|
+
}
|
|
63
|
+
// END - the lexical mapping option block.
|
|
64
|
+
if (modelSource.punctuation) {
|
|
65
|
+
func += ` punctuation: ${JSON.stringify(modelSource.punctuation)},\n`;
|
|
66
|
+
}
|
|
67
|
+
func += `}));\n`;
|
|
68
|
+
break;
|
|
69
|
+
default:
|
|
70
|
+
throw new ModelCompilerError(ModelCompilerMessages.Error_UnknownModelFormat({ format: modelSource.format }));
|
|
71
|
+
}
|
|
72
|
+
func += fileSuffix;
|
|
73
|
+
return func;
|
|
74
|
+
}
|
|
75
|
+
transpileSources(sources) {
|
|
76
|
+
return sources.map((source) => ts.transpileModule(source, {
|
|
77
|
+
compilerOptions: {
|
|
78
|
+
target: ts.ScriptTarget.ES3,
|
|
79
|
+
module: ts.ModuleKind.None,
|
|
80
|
+
}
|
|
81
|
+
}).outputText);
|
|
82
|
+
}
|
|
83
|
+
;
|
|
84
|
+
}
|
|
85
|
+
;
|
|
86
|
+
/**
|
|
87
|
+
* Returns a JavaScript expression (as a string) that can serve as a word
|
|
88
|
+
* breaking function.
|
|
89
|
+
*/
|
|
90
|
+
function compileWordBreaker(spec) {
|
|
91
|
+
let wordBreakerCode = compileInnerWordBreaker(spec.use);
|
|
92
|
+
if (spec.joinWordsAt) {
|
|
93
|
+
wordBreakerCode = compileJoinDecorator(spec, wordBreakerCode);
|
|
94
|
+
}
|
|
95
|
+
if (spec.overrideScriptDefaults) {
|
|
96
|
+
wordBreakerCode = compileScriptOverrides(spec, wordBreakerCode);
|
|
97
|
+
}
|
|
98
|
+
return wordBreakerCode;
|
|
99
|
+
}
|
|
100
|
+
function compileJoinDecorator(spec, existingWordBreakerCode) {
|
|
101
|
+
// Bundle the source of the join decorator, as an IIFE,
|
|
102
|
+
// like this: (function join(breaker, joiners) {/*...*/}(breaker, joiners))
|
|
103
|
+
// The decorator will run IMMEDIATELY when the model is loaded,
|
|
104
|
+
// by the LMLayer returning the decorated word breaker to the
|
|
105
|
+
// LMLayer model.
|
|
106
|
+
let joinerExpr = JSON.stringify(spec.joinWordsAt);
|
|
107
|
+
return `(${decorateWithJoin.toString()}(${existingWordBreakerCode}, ${joinerExpr}))`;
|
|
108
|
+
}
|
|
109
|
+
function compileScriptOverrides(spec, existingWordBreakerCode) {
|
|
110
|
+
return `(${decorateWithScriptOverrides.toString()}(${existingWordBreakerCode}, '${spec.overrideScriptDefaults}'))`;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Compiles the base word breaker, that may be decorated later.
|
|
114
|
+
* Returns the source code of a JavaScript expression.
|
|
115
|
+
*/
|
|
116
|
+
function compileInnerWordBreaker(spec) {
|
|
117
|
+
if (typeof spec === "string") {
|
|
118
|
+
// It must be a builtin word breaker, so just instantiate it.
|
|
119
|
+
return `wordBreakers['${spec}']`;
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
// It must be a function:
|
|
123
|
+
return spec.toString()
|
|
124
|
+
// Note: the .toString() might just be the property name, but we want a
|
|
125
|
+
// plain function:
|
|
126
|
+
.replace(/^wordBreak(ing|er)\b/, 'function');
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Given a word breaker specification in any of the messy ways,
|
|
131
|
+
* normalizes it to a common form that the compiler can deal with.
|
|
132
|
+
*/
|
|
133
|
+
function normalizeWordBreakerSpec(wordBreakerSpec) {
|
|
134
|
+
if (wordBreakerSpec == undefined) {
|
|
135
|
+
// Use the default word breaker when it's unspecified
|
|
136
|
+
return { use: 'default' };
|
|
137
|
+
}
|
|
138
|
+
else if (isSimpleWordBreaker(wordBreakerSpec)) {
|
|
139
|
+
// The word breaker was passed as a literal function; use its source code.
|
|
140
|
+
return { use: wordBreakerSpec };
|
|
141
|
+
}
|
|
142
|
+
else if (wordBreakerSpec.use) {
|
|
143
|
+
return wordBreakerSpec;
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
throw new ModelCompilerError(ModelCompilerMessages.Error_UnknownWordBreaker({ spec: wordBreakerSpec.toString() }));
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
function isSimpleWordBreaker(spec) {
|
|
150
|
+
return typeof spec === "function" || spec === "default" || spec === "ascii";
|
|
151
|
+
}
|
|
152
|
+
//# debugId=aa7f9248-c034-571a-ad9c-1e537ecc1597
|
|
153
|
+
//# sourceMappingURL=lexical-model-compiler.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"
|
|
1
|
+
{"debug_id":"aa7f9248-c034-571a-ad9c-1e537ecc1597","file":"lexical-model-compiler.js","mappings":";AAAA;;EAEE;AAEF,OAAO,KAAK,EAAE,MAAM,YAAY,CAAC;AACjC,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAC,gBAAgB,EAAC,MAAM,kCAAkC,CAAC;AAClE,OAAO,EAAC,2BAA2B,EAAC,MAAM,iCAAiC,CAAC;AAE5E,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACvF,OAAO,EAAE,SAAS,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAG1E,MAAM,CAAC,OAAO,OAAO,oBAAoB;IAEvC,YAAY,SAA4B;QACtC,oBAAoB,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAED;;;;;;;;OAQG;IACH,wBAAwB,CAAC,QAAgB,EAAE,WAA+B,EAAE,UAAkB;QAC5F,gCAAgC;QAChC,MAAM,UAAU,GAAW,gCAAgC,CAAC;QAC5D,MAAM,UAAU,GAAW,OAAO,CAAC;QACnC,IAAI,IAAI,GAAG,UAAU,CAAC;QAEtB,EAAE;QACF,kCAAkC;QAClC,EAAE;QAEF,QAAO,WAAW,CAAC,MAAM,EAAE;YACzB,KAAK,YAAY;gBACf,IAAI,OAAO,GAAa,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,UAAS,MAAM;oBAC7D,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;gBAC/F,CAAC,CAAC,CAAC;gBACH,IAAI,IAAI,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAClD,IAAI,IAAI,+BAA+B,WAAW,CAAC,SAAS,QAAQ,CAAC;gBACrE,MAAM;YACR,KAAK,cAAc;gBACjB,MAAM,IAAI,kBAAkB,CAAC,qBAAqB,CAAC,8BAA8B,CAAC,EAAC,MAAM,EAAC,WAAW,CAAC,MAAM,EAAC,CAAC,CAAC,CAAC;YAClH,KAAK,UAAU;gBACb,qEAAqE;gBACrE,qEAAqE;gBACrE,mDAAmD;gBACnD,IAAI,SAAS,GAAG,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;gBAE/F,IAAI,WAAW,GAAG,IAAI,gBAAgB,CAAC,WAAW,CAAC,CAAC;gBAEpD,IAAI,IAAI,WAAW,CAAC,kBAAkB,EAAE,CAAC;gBAEzC,8CAA8C;gBAC9C,sDAAsD;gBACtD,IAAI,IAAI,gDACN,uBAAuB,CAAC,SAAS,EAAE,WAAW,CAAC,eAAe,CAChE,OAAO,CAAC;gBAER,IAAI,qBAAqB,GAAG,kBAAkB,CAAC,wBAAwB,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC;gBAClG,IAAI,IAAI,kBAAkB,qBAAqB,KAAK,CAAC;gBAErD,2CAA2C;gBAC3C,IAAI,IAAI,sBAAsB,WAAW,CAAC,sBAAsB,EAAE,KAAK,CAAC;gBAExE,IAAG,WAAW,CAAC,kBAAkB,IAAI,IAAI,EAAE;oBACzC,IAAI,IAAI,yBAAyB,WAAW,CAAC,kBAAkB,KAAK,CAAC;iBACtE,CAAC,wBAAwB;gBAE1B,IAAG,WAAW,CAAC,kBAAkB,EAAE;oBACjC,IAAI,IAAI,kBAAkB,WAAW,CAAC,kBAAkB,EAAE,KAAK,CAAC;iBACjE;gBACD,0CAA0C;gBAE1C,IAAI,WAAW,CAAC,WAAW,EAAE;oBAC3B,IAAI,IAAI,kBAAkB,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,WAAW,CAAC,KAAK,CAAC;iBACxE;gBACD,IAAI,IAAI,QAAQ,CAAC;gBACjB,MAAM;YACR;gBACE,MAAM,IAAI,kBAAkB,CAAC,qBAAqB,CAAC,wBAAwB,CAAC,EAAC,MAAM,EAAE,WAAW,CAAC,MAAM,EAAC,CAAC,CAAC,CAAC;SAC9G;QAED,IAAI,IAAI,UAAU,CAAC;QAEnB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,gBAAgB,CAAC,OAAsB;QACrC,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,eAAe,CAAC,MAAM,EAAE;YACtD,eAAe,EAAE;gBACf,MAAM,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG;gBAC3B,MAAM,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI;aAC3B;SACF,CAAC,CAAC,UAAU,CACd,CAAC;IACJ,CAAC;IAAA,CAAC;CAEH;AAAA,CAAC;AAEF;;;GAGG;AACH,SAAS,kBAAkB,CAAC,IAAqB;IAC/C,IAAI,eAAe,GAAG,uBAAuB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAExD,IAAI,IAAI,CAAC,WAAW,EAAE;QACpB,eAAe,GAAG,oBAAoB,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC;KAC/D;IAED,IAAI,IAAI,CAAC,sBAAsB,EAAE;QAC/B,eAAe,GAAG,sBAAsB,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC;KACjE;IAED,OAAO,eAAe,CAAC;AACzB,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAqB,EAAE,uBAA+B;IAClF,uDAAuD;IACvD,2EAA2E;IAC3E,+DAA+D;IAC/D,6DAA6D;IAC7D,iBAAiB;IACjB,IAAI,UAAU,GAAW,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;IACzD,OAAO,IAAI,gBAAgB,CAAC,QAAQ,EAAE,IAAI,uBAAuB,KAAK,UAAU,IAAI,CAAC;AACvF,CAAC;AAED,SAAS,sBAAsB,CAAC,IAAqB,EAAE,uBAA+B;IACpF,OAAO,IAAI,2BAA2B,CAAC,QAAQ,EAAE,IAAI,uBAAuB,MAAM,IAAI,CAAC,sBAAsB,KAAK,CAAC;AACrH,CAAC;AAED;;;GAGG;AACH,SAAS,uBAAuB,CAAC,IAA2B;IAC1D,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE;QAC5B,6DAA6D;QAC7D,OAAO,iBAAiB,IAAI,IAAI,CAAC;KAClC;SAAM;QACL,yBAAyB;QACzB,OAAO,IAAI,CAAC,QAAQ,EAAE;YACpB,uEAAuE;YACvE,kBAAkB;aACjB,OAAO,CAAC,sBAAsB,EAAE,UAAU,CAAC,CAAC;KAChD;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,wBAAwB,CAAC,eAAkD;IAClF,IAAI,eAAe,IAAI,SAAS,EAAE;QAChC,qDAAqD;QACrD,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC;KAC3B;SAAM,IAAI,mBAAmB,CAAC,eAAe,CAAC,EAAE;QAC/C,0EAA0E;QAC1E,OAAO,EAAE,GAAG,EAAE,eAAe,EAAE,CAAC;KACjC;SAAM,IAAI,eAAe,CAAC,GAAG,EAAE;QAC9B,OAAO,eAAe,CAAC;KACxB;SAAM;QACL,MAAM,IAAI,kBAAkB,CAAC,qBAAqB,CAAC,wBAAwB,CAAC,EAAC,IAAI,EAAE,eAAe,CAAC,QAAQ,EAAE,EAAC,CAAC,CAAC,CAAC;KAClH;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,IAA6C;IACxE,OAAO,OAAO,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,OAAO,CAAC;AAC9E,CAAC","names":[],"sourceRoot":"","sources":["../../src/lexical-model-compiler.ts"],"version":3}
|
|
@@ -1,135 +1,137 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
*
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
33
|
-
*
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
*
|
|
40
|
-
*
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
*
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
*
|
|
63
|
-
*
|
|
64
|
-
*
|
|
65
|
-
*
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
* *
|
|
71
|
-
* *
|
|
72
|
-
* *
|
|
73
|
-
*
|
|
74
|
-
*
|
|
75
|
-
*
|
|
76
|
-
*
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
export
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
*
|
|
94
|
-
*
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
*
|
|
100
|
-
*
|
|
101
|
-
*
|
|
102
|
-
* - '
|
|
103
|
-
*
|
|
104
|
-
*
|
|
105
|
-
*
|
|
106
|
-
*
|
|
107
|
-
*
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
*
|
|
113
|
-
*
|
|
114
|
-
*
|
|
115
|
-
*
|
|
116
|
-
* -
|
|
117
|
-
* -
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
*
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
*
|
|
128
|
-
*
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
1
|
+
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="94f66ee3-becb-53a9-9cc1-28e99f0ada0e")}catch(e){}}();
|
|
2
|
+
/**
|
|
3
|
+
* Interfaces and constants used by the lexical model compiler. These target
|
|
4
|
+
* the LMLayer's internal worker code, so we provide those definitions too.
|
|
5
|
+
*/
|
|
6
|
+
/// <reference types="@keymanapp/models-types" />
|
|
7
|
+
export interface LexicalModelDeclaration {
|
|
8
|
+
readonly format: 'trie-1.0' | 'fst-foma-1.0' | 'custom-1.0';
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Keyman 14.0+ word breaker specification:
|
|
12
|
+
*
|
|
13
|
+
* Can support all old word breaking specification,
|
|
14
|
+
* but can also be extended with options.
|
|
15
|
+
*
|
|
16
|
+
* @since 14.0
|
|
17
|
+
*/
|
|
18
|
+
export interface WordBreakerSpec {
|
|
19
|
+
readonly use: SimpleWordBreakerSpec;
|
|
20
|
+
/**
|
|
21
|
+
* If present, joins words that were split by the word breaker
|
|
22
|
+
* together at the given strings. e.g.,
|
|
23
|
+
*
|
|
24
|
+
* joinWordsAt: ['-'] // to keep hyphenated items together
|
|
25
|
+
*
|
|
26
|
+
* @since 14.0
|
|
27
|
+
*/
|
|
28
|
+
readonly joinWordsAt?: string[];
|
|
29
|
+
/**
|
|
30
|
+
* Overrides word splitting behaviour for certain scripts.
|
|
31
|
+
* For example, specifing that spaces break words in certain South-East
|
|
32
|
+
* Asian scripts that otherwise do not use spaces.
|
|
33
|
+
*
|
|
34
|
+
* @since 14.0
|
|
35
|
+
*/
|
|
36
|
+
readonly overrideScriptDefaults?: OverrideScriptDefaults;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Simplified word breaker specification.
|
|
40
|
+
*
|
|
41
|
+
* @since 11.0
|
|
42
|
+
*/
|
|
43
|
+
export type SimpleWordBreakerSpec = 'default' | 'ascii' | WordBreakingFunction;
|
|
44
|
+
/**
|
|
45
|
+
* Simplifies input text to facilitate finding entries within a lexical model's
|
|
46
|
+
* lexicon.
|
|
47
|
+
* @since 11.0
|
|
48
|
+
*/
|
|
49
|
+
export type SimpleWordformToKeySpec = (term: string) => string;
|
|
50
|
+
/**
|
|
51
|
+
* Simplifies input text to facilitate finding entries within a lexical model's
|
|
52
|
+
* lexicon, using the model's `applyCasing` function to assist in the keying process.
|
|
53
|
+
* @since 14.0
|
|
54
|
+
*/
|
|
55
|
+
export type CasedWordformToKeySpec = (term: string, applyCasing?: CasingFunction) => string;
|
|
56
|
+
/**
|
|
57
|
+
* Simplifies input text to facilitate finding entries within a lexical model's
|
|
58
|
+
* lexicon.
|
|
59
|
+
*/
|
|
60
|
+
export type WordformToKeySpec = SimpleWordformToKeySpec | CasedWordformToKeySpec;
|
|
61
|
+
/**
|
|
62
|
+
* Override the default word breaking behaviour for some scripts.
|
|
63
|
+
*
|
|
64
|
+
* There is currently only one option:
|
|
65
|
+
*
|
|
66
|
+
* 'break-words-at-spaces'
|
|
67
|
+
* : some South-East Asian scripts conventionally do not use space or any
|
|
68
|
+
* explicit word boundary character to write word breaks. These scripts are:
|
|
69
|
+
*
|
|
70
|
+
* * Burmese
|
|
71
|
+
* * Khmer
|
|
72
|
+
* * Thai
|
|
73
|
+
* * Laos
|
|
74
|
+
*
|
|
75
|
+
* (this list may be incomplete and extended in the future)
|
|
76
|
+
*
|
|
77
|
+
* For these scripts, the default word breaker breaks at **every**
|
|
78
|
+
* letter/syllable/ideograph. However, in languages that use these scripts BUT
|
|
79
|
+
* use spaces (or some other delimier) as word breaks, enable
|
|
80
|
+
* 'break-words-at-spaces'; enabling 'break-words-at-spaces' prevents the word
|
|
81
|
+
* breaker from making too many breaks in these scripts.
|
|
82
|
+
*
|
|
83
|
+
* @since 14.0
|
|
84
|
+
*/
|
|
85
|
+
export type OverrideScriptDefaults = 'break-words-at-spaces';
|
|
86
|
+
export interface LexicalModelSource extends LexicalModelDeclaration {
|
|
87
|
+
readonly sources: Array<string>;
|
|
88
|
+
/**
|
|
89
|
+
* The name of the type to instantiate (without parameters) as the base object for a custom predictive model.
|
|
90
|
+
*/
|
|
91
|
+
readonly rootClass?: string;
|
|
92
|
+
/**
|
|
93
|
+
* When set to `true`, suggestions will attempt to match the case of the input text even if
|
|
94
|
+
* the lexicon entries use a different casing scheme due to search term keying effects.
|
|
95
|
+
* @since 14.0
|
|
96
|
+
*/
|
|
97
|
+
readonly languageUsesCasing?: boolean;
|
|
98
|
+
/**
|
|
99
|
+
* Specifies the casing rules for a language. Should implement three casing forms:
|
|
100
|
+
* - 'lower' -- a fully-lowercased version of the text appropriate for the language's
|
|
101
|
+
* use of the writing system.
|
|
102
|
+
* - 'upper' -- a fully-uppercased version of the text
|
|
103
|
+
* - 'initial' -- a version preserving the input casing aside from the initial character,
|
|
104
|
+
* which is uppercased (like with proper nouns and sentence-initial words in English
|
|
105
|
+
* sentences.)
|
|
106
|
+
*
|
|
107
|
+
* This is only utilized if `languageUsesCasing` is defined and set to `true`.
|
|
108
|
+
* @since 14.0
|
|
109
|
+
*/
|
|
110
|
+
readonly applyCasing?: CasingFunction;
|
|
111
|
+
/**
|
|
112
|
+
* Which word breaker to use. Choose from:
|
|
113
|
+
*
|
|
114
|
+
* - 'default' -- breaks according to Unicode UAX #29 §4.1 Default Word
|
|
115
|
+
* Boundary Specification, which works well for *most* languages.
|
|
116
|
+
* - 'ascii' -- a very simple word breaker, for demonstration purposes only.
|
|
117
|
+
* - word breaking function -- provide your own function that breaks words.
|
|
118
|
+
* - class-based word-breaker - may be supported in the future.
|
|
119
|
+
*/
|
|
120
|
+
readonly wordBreaker?: WordBreakerSpec | SimpleWordBreakerSpec;
|
|
121
|
+
/**
|
|
122
|
+
* How to simplify words, to convert them into simplified search keys
|
|
123
|
+
* This often involves removing accents, lowercasing, etc.
|
|
124
|
+
*/
|
|
125
|
+
readonly searchTermToKey?: WordformToKeySpec;
|
|
126
|
+
/**
|
|
127
|
+
* Punctuation and spacing suggested by the model.
|
|
128
|
+
*
|
|
129
|
+
* @see LexicalModelPunctuation
|
|
130
|
+
*/
|
|
131
|
+
readonly punctuation?: LexicalModelPunctuation;
|
|
132
|
+
}
|
|
133
|
+
export interface LexicalModelCompiled extends LexicalModelDeclaration {
|
|
134
|
+
readonly id: string;
|
|
135
|
+
}
|
|
136
|
+
//# debugId=94f66ee3-becb-53a9-9cc1-28e99f0ada0e
|
|
137
|
+
//# sourceMappingURL=lexical-model.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"
|
|
1
|
+
{"debug_id":"94f66ee3-becb-53a9-9cc1-28e99f0ada0e","file":"lexical-model.d.ts","mappings":";AAAA;;;GAGG;;AAEH,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,MAAM,EAAE,UAAU,GAAC,cAAc,GAAC,YAAY,CAAC;CAEzD;AAED;;;;;;;GAOG;AACF,MAAM,WAAW,eAAe;IAC/B,QAAQ,CAAC,GAAG,EAAE,qBAAqB,CAAC;IACpC;;;;;;;OAOG;IACH,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAEhC;;;;;;OAMG;IACH,QAAQ,CAAC,sBAAsB,CAAC,EAAE,sBAAsB,CAAC;CAC1D;AAED;;;;GAIG;AACF,MAAM,MAAM,qBAAqB,GAAG,SAAS,GAAG,OAAO,GAAG,oBAAoB,CAAC;AAEhF;;;;GAIG;AACF,MAAM,MAAM,uBAAuB,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;AAEhE;;;;GAIG;AACH,MAAM,MAAM,sBAAsB,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,cAAc,KAAK,MAAM,CAAC;AAE5F;;;GAGG;AACF,MAAM,MAAM,iBAAiB,GAAG,uBAAuB,GAAG,sBAAsB,CAAC;AAElF;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,MAAM,sBAAsB,GAAG,uBAAuB,CAAC;AAG7D,MAAM,WAAW,kBAAmB,SAAQ,uBAAuB;IACjE,QAAQ,CAAC,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAChC;;OAEG;IACH,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAA;IAE3B;;;;OAIG;IACH,QAAQ,CAAC,kBAAkB,CAAC,EAAE,OAAO,CAAA;IAErC;;;;;;;;;;;OAWG;IACH,QAAQ,CAAC,WAAW,CAAC,EAAE,cAAc,CAAA;IAErC;;;;;;;;OAQG;IACH,QAAQ,CAAC,WAAW,CAAC,EAAE,eAAe,GAAG,qBAAqB,CAAC;IAE/D;;;OAGG;IACH,QAAQ,CAAC,eAAe,CAAC,EAAE,iBAAiB,CAAC;IAE7C;;;;OAIG;IACH,QAAQ,CAAC,WAAW,CAAC,EAAE,uBAAuB,CAAC;CAChD;AAED,MAAM,WAAW,oBAAqB,SAAQ,uBAAuB;IACnE,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;CACrB","names":[],"sourceRoot":"","sources":["../../src/lexical-model.ts"],"version":3}
|