@keymanapp/kmc-model 17.0.85-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.nyc_output/coverage-10524-1681239236645-0.json +1 -0
  2. package/Makefile +38 -0
  3. package/build/cjs-src/lexical-model-compiler.cjs +152688 -0
  4. package/build/src/build-trie.d.ts +40 -0
  5. package/build/src/build-trie.d.ts.map +1 -0
  6. package/build/src/build-trie.js +362 -0
  7. package/build/src/build-trie.js.map +1 -0
  8. package/build/src/join-word-breaker-decorator.d.ts +10 -0
  9. package/build/src/join-word-breaker-decorator.d.ts.map +1 -0
  10. package/build/src/join-word-breaker-decorator.js +121 -0
  11. package/build/src/join-word-breaker-decorator.js.map +1 -0
  12. package/build/src/lexical-model-compiler.d.ts +19 -0
  13. package/build/src/lexical-model-compiler.d.ts.map +1 -0
  14. package/build/src/lexical-model-compiler.js +155 -0
  15. package/build/src/lexical-model-compiler.js.map +1 -0
  16. package/build/src/lexical-model.d.ts +135 -0
  17. package/build/src/lexical-model.d.ts.map +1 -0
  18. package/build/src/lexical-model.js +6 -0
  19. package/build/src/lexical-model.js.map +1 -0
  20. package/build/src/main.d.ts +15 -0
  21. package/build/src/main.d.ts.map +1 -0
  22. package/build/src/main.js +46 -0
  23. package/build/src/main.js.map +1 -0
  24. package/build/src/model-compiler-errors.d.ts +77 -0
  25. package/build/src/model-compiler-errors.d.ts.map +1 -0
  26. package/build/src/model-compiler-errors.js +156 -0
  27. package/build/src/model-compiler-errors.js.map +1 -0
  28. package/build/src/model-defaults.d.ts +56 -0
  29. package/build/src/model-defaults.d.ts.map +1 -0
  30. package/build/src/model-defaults.js +106 -0
  31. package/build/src/model-defaults.js.map +1 -0
  32. package/build/src/model-definitions.d.ts +71 -0
  33. package/build/src/model-definitions.d.ts.map +1 -0
  34. package/build/src/model-definitions.js +189 -0
  35. package/build/src/model-definitions.js.map +1 -0
  36. package/build/src/script-overrides-decorator.d.ts +4 -0
  37. package/build/src/script-overrides-decorator.d.ts.map +1 -0
  38. package/build/src/script-overrides-decorator.js +63 -0
  39. package/build/src/script-overrides-decorator.js.map +1 -0
  40. package/build/test/helpers/index.d.ts +69 -0
  41. package/build/test/helpers/index.d.ts.map +1 -0
  42. package/build/test/helpers/index.js +160 -0
  43. package/build/test/helpers/index.js.map +1 -0
  44. package/build/test/test-compile-model-with-pseudoclosure.d.ts +2 -0
  45. package/build/test/test-compile-model-with-pseudoclosure.d.ts.map +1 -0
  46. package/build/test/test-compile-model-with-pseudoclosure.js +200 -0
  47. package/build/test/test-compile-model-with-pseudoclosure.js.map +1 -0
  48. package/build/test/test-compile-model.d.ts +2 -0
  49. package/build/test/test-compile-model.d.ts.map +1 -0
  50. package/build/test/test-compile-model.js +30 -0
  51. package/build/test/test-compile-model.js.map +1 -0
  52. package/build/test/test-compile-trie.d.ts +2 -0
  53. package/build/test/test-compile-trie.d.ts.map +1 -0
  54. package/build/test/test-compile-trie.js +125 -0
  55. package/build/test/test-compile-trie.js.map +1 -0
  56. package/build/test/test-default-apply-case.d.ts +2 -0
  57. package/build/test/test-default-apply-case.d.ts.map +1 -0
  58. package/build/test/test-default-apply-case.js +105 -0
  59. package/build/test/test-default-apply-case.js.map +1 -0
  60. package/build/test/test-default-search-term-to-key.d.ts +2 -0
  61. package/build/test/test-default-search-term-to-key.d.ts.map +1 -0
  62. package/build/test/test-default-search-term-to-key.js +148 -0
  63. package/build/test/test-default-search-term-to-key.js.map +1 -0
  64. package/build/test/test-error-logger.d.ts +2 -0
  65. package/build/test/test-error-logger.d.ts.map +1 -0
  66. package/build/test/test-error-logger.js +26 -0
  67. package/build/test/test-error-logger.js.map +1 -0
  68. package/build/test/test-join-word-breaker.d.ts +2 -0
  69. package/build/test/test-join-word-breaker.d.ts.map +1 -0
  70. package/build/test/test-join-word-breaker.js +84 -0
  71. package/build/test/test-join-word-breaker.js.map +1 -0
  72. package/build/test/test-model-definitions.d.ts +2 -0
  73. package/build/test/test-model-definitions.d.ts.map +1 -0
  74. package/build/test/test-model-definitions.js +165 -0
  75. package/build/test/test-model-definitions.js.map +1 -0
  76. package/build/test/test-override-script-defaults.d.ts +2 -0
  77. package/build/test/test-override-script-defaults.d.ts.map +1 -0
  78. package/build/test/test-override-script-defaults.js +28 -0
  79. package/build/test/test-override-script-defaults.js.map +1 -0
  80. package/build/test/test-parse-wordlist.d.ts +2 -0
  81. package/build/test/test-parse-wordlist.d.ts.map +1 -0
  82. package/build/test/test-parse-wordlist.js +110 -0
  83. package/build/test/test-parse-wordlist.js.map +1 -0
  84. package/build/test/test-punctuation.d.ts +2 -0
  85. package/build/test/test-punctuation.d.ts.map +1 -0
  86. package/build/test/test-punctuation.js +31 -0
  87. package/build/test/test-punctuation.js.map +1 -0
  88. package/build/test/tsconfig.tsbuildinfo +1 -0
  89. package/build/test/wordbreakers/data.d.ts +35 -0
  90. package/build/test/wordbreakers/data.d.ts.map +1 -0
  91. package/build/test/wordbreakers/data.js +1778 -0
  92. package/build/test/wordbreakers/data.js.map +1 -0
  93. package/build/test/wordbreakers/default-wordbreaker-esm.d.ts +10 -0
  94. package/build/test/wordbreakers/default-wordbreaker-esm.d.ts.map +1 -0
  95. package/build/test/wordbreakers/default-wordbreaker-esm.js +354 -0
  96. package/build/test/wordbreakers/default-wordbreaker-esm.js.map +1 -0
  97. package/build/tsconfig.tsbuildinfo +1 -0
  98. package/build.sh +73 -0
  99. package/coverage/lcov-report/base.css +224 -0
  100. package/coverage/lcov-report/block-navigation.js +87 -0
  101. package/coverage/lcov-report/favicon.png +0 -0
  102. package/coverage/lcov-report/index.html +161 -0
  103. package/coverage/lcov-report/prettify.css +1 -0
  104. package/coverage/lcov-report/prettify.js +2 -0
  105. package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
  106. package/coverage/lcov-report/sorter.js +196 -0
  107. package/coverage/lcov-report/src/build-trie.ts.html +1618 -0
  108. package/coverage/lcov-report/src/index.html +221 -0
  109. package/coverage/lcov-report/src/join-word-breaker-decorator.ts.html +487 -0
  110. package/coverage/lcov-report/src/lexical-model-compiler.ts.html +622 -0
  111. package/coverage/lcov-report/src/main.ts.html +271 -0
  112. package/coverage/lcov-report/src/model-compiler-errors.ts.html +691 -0
  113. package/coverage/lcov-report/src/model-defaults.ts.html +415 -0
  114. package/coverage/lcov-report/src/model-definitions.ts.html +748 -0
  115. package/coverage/lcov-report/src/script-overrides-decorator.ts.html +310 -0
  116. package/coverage/lcov-report/test/helpers/index.html +116 -0
  117. package/coverage/lcov-report/test/helpers/index.ts.html +646 -0
  118. package/coverage/lcov-report/test/index.html +266 -0
  119. package/coverage/lcov-report/test/test-compile-model-with-pseudoclosure.ts.html +802 -0
  120. package/coverage/lcov-report/test/test-compile-model.ts.html +187 -0
  121. package/coverage/lcov-report/test/test-compile-trie.ts.html +541 -0
  122. package/coverage/lcov-report/test/test-default-apply-case.ts.html +466 -0
  123. package/coverage/lcov-report/test/test-default-search-term-to-key.ts.html +628 -0
  124. package/coverage/lcov-report/test/test-error-logger.ts.html +196 -0
  125. package/coverage/lcov-report/test/test-join-word-breaker.ts.html +376 -0
  126. package/coverage/lcov-report/test/test-model-definitions.ts.html +676 -0
  127. package/coverage/lcov-report/test/test-override-script-defaults.ts.html +184 -0
  128. package/coverage/lcov-report/test/test-parse-wordlist.ts.html +466 -0
  129. package/coverage/lcov-report/test/test-punctuation.ts.html +190 -0
  130. package/coverage/lcov-report/test/wordbreakers/data.ts.html +5413 -0
  131. package/coverage/lcov-report/test/wordbreakers/default-wordbreaker-esm.ts.html +1234 -0
  132. package/coverage/lcov-report/test/wordbreakers/index.html +131 -0
  133. package/coverage/lcov.info +5969 -0
  134. package/package.json +61 -0
  135. package/src/build-trie.ts +511 -0
  136. package/src/join-word-breaker-decorator.ts +134 -0
  137. package/src/lexical-model-compiler.ts +179 -0
  138. package/src/lexical-model.ts +150 -0
  139. package/src/main.ts +62 -0
  140. package/src/model-compiler-errors.ts +203 -0
  141. package/src/model-defaults.ts +111 -0
  142. package/src/model-definitions.ts +222 -0
  143. package/src/script-overrides-decorator.ts +75 -0
  144. package/test/README.md +15 -0
  145. package/test/fixtures/example.qaa.joinwordbreaker/example.qaa.joinwordbreaker.model.ts +10 -0
  146. package/test/fixtures/example.qaa.joinwordbreaker/wordlist.tsv +3 -0
  147. package/test/fixtures/example.qaa.scriptusesspaces/example.qaa.scriptusesspaces.model.ts +10 -0
  148. package/test/fixtures/example.qaa.scriptusesspaces/wordlist.tsv +8 -0
  149. package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.kmp.json +45 -0
  150. package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.kps +35 -0
  151. package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.ts +6 -0
  152. package/test/fixtures/example.qaa.sencoten/wordlist.tsv +10 -0
  153. package/test/fixtures/example.qaa.smp/example.qaa.smp.model.ts +6 -0
  154. package/test/fixtures/example.qaa.smp/wordlist.tsv +5 -0
  155. package/test/fixtures/example.qaa.trivial/example.qaa.trivial.model.ts +5 -0
  156. package/test/fixtures/example.qaa.trivial/wordlist.tsv +3 -0
  157. package/test/fixtures/example.qaa.utf16be/example.qaa.utf16be.model.ts +5 -0
  158. package/test/fixtures/example.qaa.utf16be/wordlist.txt +0 -0
  159. package/test/fixtures/example.qaa.utf16le/example.qaa.utf16le.model.ts +5 -0
  160. package/test/fixtures/example.qaa.utf16le/wordlist.txt +0 -0
  161. package/test/fixtures/example.qaa.wordbreaker/example.qaa.wordbreaker.model.ts +9 -0
  162. package/test/fixtures/example.qaa.wordbreaker/wordlist.tsv +3 -0
  163. package/test/helpers/index.ts +187 -0
  164. package/test/test-compile-model-with-pseudoclosure.ts +239 -0
  165. package/test/test-compile-model.ts +34 -0
  166. package/test/test-compile-trie.ts +152 -0
  167. package/test/test-default-apply-case.ts +128 -0
  168. package/test/test-default-search-term-to-key.ts +181 -0
  169. package/test/test-error-logger.ts +38 -0
  170. package/test/test-join-word-breaker.ts +97 -0
  171. package/test/test-model-definitions.ts +198 -0
  172. package/test/test-override-script-defaults.ts +33 -0
  173. package/test/test-parse-wordlist.ts +127 -0
  174. package/test/test-punctuation.ts +35 -0
  175. package/test/tsconfig.json +22 -0
  176. package/test/wordbreakers/README.md +3 -0
  177. package/test/wordbreakers/data.ts +1776 -0
  178. package/test/wordbreakers/default-wordbreaker-esm.ts +383 -0
  179. package/tools/create-override-script-regexp.ts +145 -0
  180. package/tsconfig.json +17 -0
@@ -0,0 +1,179 @@
1
+ /*
2
+ lexical-model-compiler.ts: base file for lexical model compiler.
3
+ */
4
+
5
+ /// <reference path="./lexical-model.ts" />
6
+
7
+ import * as ts from "typescript";
8
+ import * as fs from "fs";
9
+ import * as path from "path";
10
+ import { createTrieDataStructure } from "./build-trie.js";
11
+ import { ModelDefinitions } from "./model-definitions.js";
12
+ import {decorateWithJoin} from "./join-word-breaker-decorator.js";
13
+ import {decorateWithScriptOverrides} from "./script-overrides-decorator.js";
14
+ import { LexicalModelSource, WordBreakerSpec, SimpleWordBreakerSpec } from "./lexical-model.js";
15
+
16
+ export default class LexicalModelCompiler {
17
+
18
+ /**
19
+ * Returns the generated code for the model that will ultimately be loaded by
20
+ * the LMLayer worker. This code contains all model parameters, and specifies
21
+ * word breakers and auxilary functions that may be required.
22
+ *
23
+ * @param model_id The model ID. TODO: not sure if this is actually required!
24
+ * @param modelSource A specification of the model to compile
25
+ * @param sourcePath Where to find auxilary sources files
26
+ */
27
+ generateLexicalModelCode(model_id: string, modelSource: LexicalModelSource, sourcePath: string) {
28
+ // TODO: add metadata in comment
29
+ const filePrefix: string = `(function() {\n'use strict';\n`;
30
+ const fileSuffix: string = `})();`;
31
+ let func = filePrefix;
32
+
33
+ //
34
+ // Emit the model as code and data
35
+ //
36
+
37
+ switch(modelSource.format) {
38
+ case "custom-1.0":
39
+ let sources: string[] = modelSource.sources.map(function(source) {
40
+ return fs.readFileSync(path.join(sourcePath, source), 'utf8');
41
+ });
42
+ func += this.transpileSources(sources).join('\n');
43
+ func += `LMLayerWorker.loadModel(new ${modelSource.rootClass}());\n`;
44
+ break;
45
+ case "fst-foma-1.0":
46
+ throw new ModelSourceError(`Unimplemented model format: ${modelSource.format}`);
47
+ case "trie-1.0":
48
+ // Convert all relative path names to paths relative to the enclosing
49
+ // directory. This way, we'll read the files relative to the model.ts
50
+ // file, rather than the current working directory.
51
+ let filenames = modelSource.sources.map(filename => path.join(sourcePath, filename));
52
+
53
+ let definitions = new ModelDefinitions(modelSource);
54
+
55
+ func += definitions.compileDefinitions();
56
+
57
+ // Needs the actual searchTermToKey closure...
58
+ // Which needs the actual applyCasing closure as well.
59
+ func += `LMLayerWorker.loadModel(new models.TrieModel(${
60
+ createTrieDataStructure(filenames, definitions.searchTermToKey)
61
+ }, {\n`;
62
+
63
+ let wordBreakerSourceCode = compileWordBreaker(normalizeWordBreakerSpec(modelSource.wordBreaker));
64
+ func += ` wordBreaker: ${wordBreakerSourceCode},\n`;
65
+
66
+ // START - the lexical mapping option block
67
+ func += ` searchTermToKey: ${definitions.compileSearchTermToKey()},\n`;
68
+
69
+ if(modelSource.languageUsesCasing != null) {
70
+ func += ` languageUsesCasing: ${modelSource.languageUsesCasing},\n`;
71
+ } // else leave undefined.
72
+
73
+ if(modelSource.languageUsesCasing) {
74
+ func += ` applyCasing: ${definitions.compileApplyCasing()},\n`;
75
+ }
76
+ // END - the lexical mapping option block.
77
+
78
+ if (modelSource.punctuation) {
79
+ func += ` punctuation: ${JSON.stringify(modelSource.punctuation)},\n`;
80
+ }
81
+ func += `}));\n`;
82
+ break;
83
+ default:
84
+ throw new ModelSourceError(`Unknown model format: ${modelSource.format}`);
85
+ }
86
+
87
+ func += fileSuffix;
88
+
89
+ return func;
90
+ }
91
+
92
+ transpileSources(sources: Array<string>): Array<string> {
93
+ return sources.map((source) => ts.transpileModule(source, {
94
+ compilerOptions: {
95
+ target: ts.ScriptTarget.ES3,
96
+ module: ts.ModuleKind.None,
97
+ }
98
+ }).outputText
99
+ );
100
+ };
101
+
102
+ logError(s: string) {
103
+ console.error(require('chalk').red(s));
104
+ };
105
+ };
106
+
107
+ export class ModelSourceError extends Error {
108
+ }
109
+
110
+ /**
111
+ * Returns a JavaScript expression (as a string) that can serve as a word
112
+ * breaking function.
113
+ */
114
+ function compileWordBreaker(spec: WordBreakerSpec): string {
115
+ let wordBreakerCode = compileInnerWordBreaker(spec.use);
116
+
117
+ if (spec.joinWordsAt) {
118
+ wordBreakerCode = compileJoinDecorator(spec, wordBreakerCode);
119
+ }
120
+
121
+ if (spec.overrideScriptDefaults) {
122
+ wordBreakerCode = compileScriptOverrides(spec, wordBreakerCode);
123
+ }
124
+
125
+ return wordBreakerCode;
126
+ }
127
+
128
+ function compileJoinDecorator(spec: WordBreakerSpec, existingWordBreakerCode: string) {
129
+ // Bundle the source of the join decorator, as an IIFE,
130
+ // like this: (function join(breaker, joiners) {/*...*/}(breaker, joiners))
131
+ // The decorator will run IMMEDIATELY when the model is loaded,
132
+ // by the LMLayer returning the decorated word breaker to the
133
+ // LMLayer model.
134
+ let joinerExpr: string = JSON.stringify(spec.joinWordsAt)
135
+ return `(${decorateWithJoin.toString()}(${existingWordBreakerCode}, ${joinerExpr}))`;
136
+ }
137
+
138
+ function compileScriptOverrides(spec: WordBreakerSpec, existingWordBreakerCode: string) {
139
+ return `(${decorateWithScriptOverrides.toString()}(${existingWordBreakerCode}, '${spec.overrideScriptDefaults}'))`;
140
+ }
141
+
142
+ /**
143
+ * Compiles the base word breaker, that may be decorated later.
144
+ * Returns the source code of a JavaScript expression.
145
+ */
146
+ function compileInnerWordBreaker(spec: SimpleWordBreakerSpec): string {
147
+ if (typeof spec === "string") {
148
+ // It must be a builtin word breaker, so just instantiate it.
149
+ return `wordBreakers['${spec}']`;
150
+ } else {
151
+ // It must be a function:
152
+ return spec.toString()
153
+ // Note: the .toString() might just be the property name, but we want a
154
+ // plain function:
155
+ .replace(/^wordBreak(ing|er)\b/, 'function');
156
+ }
157
+ }
158
+
159
+ /**
160
+ * Given a word breaker specification in any of the messy ways,
161
+ * normalizes it to a common form that the compiler can deal with.
162
+ */
163
+ function normalizeWordBreakerSpec(wordBreakerSpec: LexicalModelSource["wordBreaker"]): WordBreakerSpec {
164
+ if (wordBreakerSpec == undefined) {
165
+ // Use the default word breaker when it's unspecified
166
+ return { use: 'default' };
167
+ } else if (isSimpleWordBreaker(wordBreakerSpec)) {
168
+ // The word breaker was passed as a literal function; use its source code.
169
+ return { use: wordBreakerSpec };
170
+ } else if (wordBreakerSpec.use) {
171
+ return wordBreakerSpec;
172
+ } else {
173
+ throw new Error(`Unknown word breaker: ${wordBreakerSpec}`);
174
+ }
175
+ }
176
+
177
+ function isSimpleWordBreaker(spec: WordBreakerSpec | SimpleWordBreakerSpec): spec is SimpleWordBreakerSpec {
178
+ return typeof spec === "function" || spec === "default" || spec === "ascii";
179
+ }
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Interfaces and constants used by the lexical model compiler. These target
3
+ * the LMLayer's internal worker code, so we provide those definitions too.
4
+ */
5
+
6
+ export interface LexicalModelDeclaration {
7
+ readonly format: 'trie-1.0'|'fst-foma-1.0'|'custom-1.0',
8
+ //... metadata ...
9
+ }
10
+
11
+ /**
12
+ * Keyman 14.0+ word breaker specification:
13
+ *
14
+ * Can support all old word breaking specification,
15
+ * but can also be extended with options.
16
+ *
17
+ * @since 14.0
18
+ */
19
+ export interface WordBreakerSpec {
20
+ readonly use: SimpleWordBreakerSpec;
21
+ /**
22
+ * If present, joins words that were split by the word breaker
23
+ * together at the given strings. e.g.,
24
+ *
25
+ * joinWordsAt: ['-'] // to keep hyphenated items together
26
+ *
27
+ * @since 14.0
28
+ */
29
+ readonly joinWordsAt?: string[];
30
+
31
+ /**
32
+ * Overrides word splitting behaviour for certain scripts.
33
+ * For example, specifing that spaces break words in certain South-East
34
+ * Asian scripts that otherwise do not use spaces.
35
+ *
36
+ * @since 14.0
37
+ */
38
+ readonly overrideScriptDefaults?: OverrideScriptDefaults;
39
+ }
40
+
41
+ /**
42
+ * Simplified word breaker specification.
43
+ *
44
+ * @since 11.0
45
+ */
46
+ export type SimpleWordBreakerSpec = 'default' | 'ascii' | WordBreakingFunction;
47
+
48
+ /**
49
+ * Simplifies input text to facilitate finding entries within a lexical model's
50
+ * lexicon.
51
+ * @since 11.0
52
+ */
53
+ export type SimpleWordformToKeySpec = (term: string) => string;
54
+
55
+ /**
56
+ * Simplifies input text to facilitate finding entries within a lexical model's
57
+ * lexicon, using the model's `applyCasing` function to assist in the keying process.
58
+ * @since 14.0
59
+ */
60
+ export type CasedWordformToKeySpec = (term: string, applyCasing?: CasingFunction) => string;
61
+
62
+ /**
63
+ * Simplifies input text to facilitate finding entries within a lexical model's
64
+ * lexicon.
65
+ */
66
+ export type WordformToKeySpec = SimpleWordformToKeySpec | CasedWordformToKeySpec;
67
+
68
+ /**
69
+ * Override the default word breaking behaviour for some scripts.
70
+ *
71
+ * There is currently only one option:
72
+ *
73
+ * 'break-words-at-spaces'
74
+ * : some South-East Asian scripts conventionally do not use space or any
75
+ * explicit word boundary character to write word breaks. These scripts are:
76
+ *
77
+ * * Burmese
78
+ * * Khmer
79
+ * * Thai
80
+ * * Laos
81
+ *
82
+ * (this list may be incomplete and extended in the future)
83
+ *
84
+ * For these scripts, the default word breaker breaks at **every**
85
+ * letter/syllable/ideograph. However, in languages that use these scripts BUT
86
+ * use spaces (or some other delimier) as word breaks, enable
87
+ * 'break-words-at-spaces'; enabling 'break-words-at-spaces' prevents the word
88
+ * breaker from making too many breaks in these scripts.
89
+ *
90
+ * @since 14.0
91
+ */
92
+ export type OverrideScriptDefaults = 'break-words-at-spaces';
93
+
94
+
95
+ export interface LexicalModelSource extends LexicalModelDeclaration {
96
+ readonly sources: Array<string>;
97
+ /**
98
+ * The name of the type to instantiate (without parameters) as the base object for a custom predictive model.
99
+ */
100
+ readonly rootClass?: string
101
+
102
+ /**
103
+ * When set to `true`, suggestions will attempt to match the case of the input text even if
104
+ * the lexicon entries use a different casing scheme due to search term keying effects.
105
+ * @since 14.0
106
+ */
107
+ readonly languageUsesCasing?: boolean
108
+
109
+ /**
110
+ * Specifies the casing rules for a language. Should implement three casing forms:
111
+ * - 'lower' -- a fully-lowercased version of the text appropriate for the language's
112
+ * use of the writing system.
113
+ * - 'upper' -- a fully-uppercased version of the text
114
+ * - 'initial' -- a version preserving the input casing aside from the initial character,
115
+ * which is uppercased (like with proper nouns and sentence-initial words in English
116
+ * sentences.)
117
+ *
118
+ * This is only utilized if `languageUsesCasing` is defined and set to `true`.
119
+ * @since 14.0
120
+ */
121
+ readonly applyCasing?: CasingFunction
122
+
123
+ /**
124
+ * Which word breaker to use. Choose from:
125
+ *
126
+ * - 'default' -- breaks according to Unicode UAX #29 §4.1 Default Word
127
+ * Boundary Specification, which works well for *most* languages.
128
+ * - 'ascii' -- a very simple word breaker, for demonstration purposes only.
129
+ * - word breaking function -- provide your own function that breaks words.
130
+ * - class-based word-breaker - may be supported in the future.
131
+ */
132
+ readonly wordBreaker?: WordBreakerSpec | SimpleWordBreakerSpec;
133
+
134
+ /**
135
+ * How to simplify words, to convert them into simplified search keys
136
+ * This often involves removing accents, lowercasing, etc.
137
+ */
138
+ readonly searchTermToKey?: WordformToKeySpec;
139
+
140
+ /**
141
+ * Punctuation and spacing suggested by the model.
142
+ *
143
+ * @see LexicalModelPunctuation
144
+ */
145
+ readonly punctuation?: LexicalModelPunctuation;
146
+ }
147
+
148
+ export interface LexicalModelCompiled extends LexicalModelDeclaration {
149
+ readonly id: string;
150
+ }
package/src/main.ts ADDED
@@ -0,0 +1,62 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import ts from 'typescript';
4
+
5
+ import LexicalModelCompiler from './lexical-model-compiler.js';
6
+ import { LexicalModelSource } from './lexical-model.js';
7
+
8
+ /**
9
+ * Compiles a model.ts file, using paths relative to its location.
10
+ *
11
+ * @param filename path to model.ts source.
12
+ * @return model source code
13
+ */
14
+ export function compileModel(filename: string): string {
15
+ let modelSource = loadFromFilename(filename);
16
+ let containingDirectory = path.dirname(filename);
17
+
18
+ return (new LexicalModelCompiler)
19
+ .generateLexicalModelCode('<unknown>', modelSource, containingDirectory);
20
+ }
21
+
22
+ /**
23
+ * An ECMAScript module as emitted by the TypeScript compiler.
24
+ */
25
+ interface ES2015Module {
26
+ /** This is always true. */
27
+ __esModule: boolean;
28
+ 'default'?: unknown;
29
+ }
30
+
31
+ /**
32
+ * Loads a lexical model's source module from the given filename.
33
+ *
34
+ * @param filename path to the model source file.
35
+ */
36
+ export function loadFromFilename(filename: string): LexicalModelSource {
37
+ let sourceCode = fs.readFileSync(filename, 'utf8');
38
+ // Compile the module to JavaScript code.
39
+ // NOTE: transpile module does a very simple TS to JS compilation.
40
+ // It DOES NOT check for types!
41
+ let compilationOutput = ts.transpile(sourceCode, {
42
+ // Our runtime only supports ES3 with Node/CommonJS modules on Android 5.0.
43
+ // When we drop Android 5.0 support, we can update this to a `ScriptTarget`
44
+ // matrix against target version of Keyman, here and in
45
+ // lexical-model-compiler.ts.
46
+ target: ts.ScriptTarget.ES3,
47
+ module: ts.ModuleKind.CommonJS,
48
+ });
49
+ // Turn the module into a function in which we can inject a global.
50
+ let moduleCode = '(function(exports){' + compilationOutput + '})';
51
+
52
+ // Run the module; its exports will be assigned to `moduleExports`.
53
+ let moduleExports: Partial<ES2015Module> = {};
54
+ let module = eval(moduleCode);
55
+ module(moduleExports);
56
+
57
+ if (!moduleExports['__esModule'] || !moduleExports['default']) {
58
+ throw new Error(`Model source '${filename}' does have a default export. Did you remember to write \`export default source;\`?`);
59
+ }
60
+
61
+ return moduleExports['default'] as LexicalModelSource;
62
+ }
@@ -0,0 +1,203 @@
1
+ // TODO: merge with kmc keyboard-compiler errors
2
+ /**
3
+ * Log levels.
4
+ *
5
+ * Note: Currently, this acts like a bit set, where the upper 4 bits of an
6
+ * unsigned 16 bit value are the log level flags.
7
+ *
8
+ * Warning: even though these look like bitmasks, these flags may not be
9
+ * combined.
10
+ */
11
+ export enum LogLevel {
12
+ CERR_FATAL = 0x8000,
13
+ CERR_ERROR = 0x4000,
14
+ CERR_WARNING = 0x2000,
15
+ CERR_HINT = 0x1000
16
+ };
17
+
18
+ /**
19
+ * Error codes. Use these when logging messages.
20
+ *
21
+ * Extends https://github.com/keymanapp/keyman/blob/99db3c0d2448f448242e6397f9d72e9a7ccee4b9/windows/src/global/inc/Comperr.h
22
+ */
23
+ export enum KeymanCompilerError {
24
+ CERR_LEXICAL_MODEL_MIN = 0x0800,
25
+ CERR_LEXICAL_MODEL_MAX = 0x08FF,
26
+
27
+
28
+ CERR_FATAL_LM = LogLevel.CERR_FATAL | CERR_LEXICAL_MODEL_MIN,
29
+ /* Place all fatal LM compiler errors here! */
30
+
31
+ CERR_ERROR_LM = LogLevel.CERR_ERROR | CERR_LEXICAL_MODEL_MIN,
32
+ /* Place all recoverable LM compiler errors here! */
33
+
34
+ CERR_WARN_LM = LogLevel.CERR_WARNING | CERR_LEXICAL_MODEL_MIN,
35
+ /* Place all LM compiler warnings here! */
36
+ CWARN_MixedNormalizationForms = 0x2801, /* CERR_WARN_LM + 1 */
37
+ CWARN_DuplicateWordInSameFile = 0x2802, /* CERR_WARN_LM + 2 */
38
+
39
+ CERR_HINT_LM = LogLevel.CERR_HINT | CERR_LEXICAL_MODEL_MIN,
40
+ /* Place all LM compiler hints here! */
41
+
42
+ /* Errors that are not specific to the lexical model compiler, from comperr.h: */
43
+ CWARN_TooManyErrorsOrWarnings = 0x20A7,
44
+ }
45
+
46
+ /**
47
+ * Human-readable titles for the various log levels.
48
+ *
49
+ * Taken from https://github.com/keymanapp/keyman/blob/d83cfffe511ce65b781f919e89e3693146844849/windows/src/developer/TIKE/project/Keyman.Developer.System.Project.ProjectLog.pas#L39-L46
50
+ */
51
+ const LOG_LEVEL_TITLE: {[level in LogLevel]: string} = {
52
+ [LogLevel.CERR_HINT]: 'Hint',
53
+ [LogLevel.CERR_WARNING]: 'Warning',
54
+ [LogLevel.CERR_ERROR]: 'Error',
55
+ [LogLevel.CERR_FATAL]: 'Fatal Error',
56
+ };
57
+
58
+ /**
59
+ * How many errors or warnings are too many!
60
+ */
61
+ export const MAX_MESSAGES = 100;
62
+
63
+ /**
64
+ * Direct where log messages go.
65
+ */
66
+ let _logHandler: (log: LogMessage) => void = printLogs;
67
+
68
+ /**
69
+ * How many logs or warnings have been witnessed so far.
70
+ */
71
+ let _messagesSeen = 0;
72
+
73
+ /**
74
+ * Logs compiler messages (warnings, errors, logs).
75
+ *
76
+ * @param code Error code
77
+ * @param message A helpful message!
78
+ * @param source [optional] the filename/line number in the source that induced this error
79
+ *
80
+ * @see https://github.com/keymanapp/keyman/blob/99db3c0d2448f448242e6397f9d72e9a7ccee4b9/windows/src/developer/TIKE/project/Keyman.Developer.System.Project.ProjectLog.pas#L60-L77
81
+ */
82
+ export function log(code: KeymanCompilerError, message: string, source?: FilenameAndLineNo) {
83
+ // Ignore the request if there are too many messages
84
+ if (_messagesSeen > MAX_MESSAGES)
85
+ return;
86
+
87
+ let logMessage = source
88
+ ? new LogMessageFromSource(code, message, source)
89
+ : new OrdinaryLogMessage(code, message);
90
+
91
+ _logHandler(logMessage)
92
+ _messagesSeen++;
93
+
94
+ if (_messagesSeen > MAX_MESSAGES) {
95
+ _logHandler(new OrdinaryLogMessage(
96
+ KeymanCompilerError.CWARN_TooManyErrorsOrWarnings,
97
+ `More than ${MAX_MESSAGES} warnings or errors received; suppressing further messages`
98
+ ));
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Override where log messages go.
104
+ *
105
+ * @param fn The desired log message handler.
106
+ */
107
+ export function redirectLogMessagesTo(fn: (log: LogMessage) => void) {
108
+ _logHandler = fn;
109
+ }
110
+
111
+ /**
112
+ * Reset the log message handler to the default.
113
+ */
114
+ export function resetLogMessageHandler() {
115
+ _logHandler = printLogs;
116
+ _messagesSeen = 0;
117
+ }
118
+
119
+ /**
120
+ * Prints log messages to stdout. The default log action.
121
+ */
122
+ export function printLogs(log: LogMessage): void {
123
+ console.log(log.format());
124
+ }
125
+
126
+ /**
127
+ * Duct tapes together a filename and a line number of a log.
128
+ */
129
+ interface FilenameAndLineNo {
130
+ readonly filename: string;
131
+ readonly lineno: number;
132
+ }
133
+
134
+ /**
135
+ * A log message that knows how to format itself.
136
+ */
137
+ export interface LogMessage {
138
+ readonly code: KeymanCompilerError;
139
+ readonly level: LogLevel;
140
+ readonly message: string;
141
+
142
+ format(): string;
143
+ }
144
+
145
+ /**
146
+ * Concrete implementation of the log message.
147
+ */
148
+ class OrdinaryLogMessage implements LogMessage {
149
+ readonly code: KeymanCompilerError;
150
+ readonly message: string;
151
+
152
+ constructor(code: KeymanCompilerError, message: string) {
153
+ this.code = code;
154
+ this.message = message;
155
+ }
156
+
157
+ get level(): LogLevel {
158
+ return this.code & 0xF000;
159
+ }
160
+
161
+ determineLogLevelTitle(): string {
162
+ return LOG_LEVEL_TITLE[this.level] || '';
163
+ }
164
+
165
+ format(): string {
166
+ let prefix = this.determineLogLevelTitle();
167
+ if (prefix)
168
+ prefix = `${prefix}: `;
169
+
170
+ return `${prefix}${h(this.code)} ${this.message}`
171
+ }
172
+ }
173
+
174
+ /**
175
+ * A log message with a filename and line number.
176
+ */
177
+ class LogMessageFromSource extends OrdinaryLogMessage {
178
+ readonly filename: string;
179
+ readonly lineno: number;
180
+
181
+ constructor(code: KeymanCompilerError, message: string, source: FilenameAndLineNo) {
182
+ super(code, message);
183
+ this.filename = source.filename;
184
+ this.lineno = source.lineno;
185
+ }
186
+
187
+ format(): string {
188
+ let originalMessage = super.format();
189
+ return `${this.filename} (${this.lineno}): ${originalMessage}`;
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Format a number as a zero-padded 4 digit hexadecimal.
195
+ */
196
+ function h(n: number) {
197
+ let formatted = n.toString(16).toUpperCase();
198
+ if (formatted.length < 4) {
199
+ formatted = '0'.repeat(4 - formatted.length);
200
+ }
201
+
202
+ return formatted;
203
+ }