@keymanapp/kmc-model 17.0.85-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.nyc_output/coverage-10524-1681239236645-0.json +1 -0
  2. package/Makefile +38 -0
  3. package/build/cjs-src/lexical-model-compiler.cjs +152688 -0
  4. package/build/src/build-trie.d.ts +40 -0
  5. package/build/src/build-trie.d.ts.map +1 -0
  6. package/build/src/build-trie.js +362 -0
  7. package/build/src/build-trie.js.map +1 -0
  8. package/build/src/join-word-breaker-decorator.d.ts +10 -0
  9. package/build/src/join-word-breaker-decorator.d.ts.map +1 -0
  10. package/build/src/join-word-breaker-decorator.js +121 -0
  11. package/build/src/join-word-breaker-decorator.js.map +1 -0
  12. package/build/src/lexical-model-compiler.d.ts +19 -0
  13. package/build/src/lexical-model-compiler.d.ts.map +1 -0
  14. package/build/src/lexical-model-compiler.js +155 -0
  15. package/build/src/lexical-model-compiler.js.map +1 -0
  16. package/build/src/lexical-model.d.ts +135 -0
  17. package/build/src/lexical-model.d.ts.map +1 -0
  18. package/build/src/lexical-model.js +6 -0
  19. package/build/src/lexical-model.js.map +1 -0
  20. package/build/src/main.d.ts +15 -0
  21. package/build/src/main.d.ts.map +1 -0
  22. package/build/src/main.js +46 -0
  23. package/build/src/main.js.map +1 -0
  24. package/build/src/model-compiler-errors.d.ts +77 -0
  25. package/build/src/model-compiler-errors.d.ts.map +1 -0
  26. package/build/src/model-compiler-errors.js +156 -0
  27. package/build/src/model-compiler-errors.js.map +1 -0
  28. package/build/src/model-defaults.d.ts +56 -0
  29. package/build/src/model-defaults.d.ts.map +1 -0
  30. package/build/src/model-defaults.js +106 -0
  31. package/build/src/model-defaults.js.map +1 -0
  32. package/build/src/model-definitions.d.ts +71 -0
  33. package/build/src/model-definitions.d.ts.map +1 -0
  34. package/build/src/model-definitions.js +189 -0
  35. package/build/src/model-definitions.js.map +1 -0
  36. package/build/src/script-overrides-decorator.d.ts +4 -0
  37. package/build/src/script-overrides-decorator.d.ts.map +1 -0
  38. package/build/src/script-overrides-decorator.js +63 -0
  39. package/build/src/script-overrides-decorator.js.map +1 -0
  40. package/build/test/helpers/index.d.ts +69 -0
  41. package/build/test/helpers/index.d.ts.map +1 -0
  42. package/build/test/helpers/index.js +160 -0
  43. package/build/test/helpers/index.js.map +1 -0
  44. package/build/test/test-compile-model-with-pseudoclosure.d.ts +2 -0
  45. package/build/test/test-compile-model-with-pseudoclosure.d.ts.map +1 -0
  46. package/build/test/test-compile-model-with-pseudoclosure.js +200 -0
  47. package/build/test/test-compile-model-with-pseudoclosure.js.map +1 -0
  48. package/build/test/test-compile-model.d.ts +2 -0
  49. package/build/test/test-compile-model.d.ts.map +1 -0
  50. package/build/test/test-compile-model.js +30 -0
  51. package/build/test/test-compile-model.js.map +1 -0
  52. package/build/test/test-compile-trie.d.ts +2 -0
  53. package/build/test/test-compile-trie.d.ts.map +1 -0
  54. package/build/test/test-compile-trie.js +125 -0
  55. package/build/test/test-compile-trie.js.map +1 -0
  56. package/build/test/test-default-apply-case.d.ts +2 -0
  57. package/build/test/test-default-apply-case.d.ts.map +1 -0
  58. package/build/test/test-default-apply-case.js +105 -0
  59. package/build/test/test-default-apply-case.js.map +1 -0
  60. package/build/test/test-default-search-term-to-key.d.ts +2 -0
  61. package/build/test/test-default-search-term-to-key.d.ts.map +1 -0
  62. package/build/test/test-default-search-term-to-key.js +148 -0
  63. package/build/test/test-default-search-term-to-key.js.map +1 -0
  64. package/build/test/test-error-logger.d.ts +2 -0
  65. package/build/test/test-error-logger.d.ts.map +1 -0
  66. package/build/test/test-error-logger.js +26 -0
  67. package/build/test/test-error-logger.js.map +1 -0
  68. package/build/test/test-join-word-breaker.d.ts +2 -0
  69. package/build/test/test-join-word-breaker.d.ts.map +1 -0
  70. package/build/test/test-join-word-breaker.js +84 -0
  71. package/build/test/test-join-word-breaker.js.map +1 -0
  72. package/build/test/test-model-definitions.d.ts +2 -0
  73. package/build/test/test-model-definitions.d.ts.map +1 -0
  74. package/build/test/test-model-definitions.js +165 -0
  75. package/build/test/test-model-definitions.js.map +1 -0
  76. package/build/test/test-override-script-defaults.d.ts +2 -0
  77. package/build/test/test-override-script-defaults.d.ts.map +1 -0
  78. package/build/test/test-override-script-defaults.js +28 -0
  79. package/build/test/test-override-script-defaults.js.map +1 -0
  80. package/build/test/test-parse-wordlist.d.ts +2 -0
  81. package/build/test/test-parse-wordlist.d.ts.map +1 -0
  82. package/build/test/test-parse-wordlist.js +110 -0
  83. package/build/test/test-parse-wordlist.js.map +1 -0
  84. package/build/test/test-punctuation.d.ts +2 -0
  85. package/build/test/test-punctuation.d.ts.map +1 -0
  86. package/build/test/test-punctuation.js +31 -0
  87. package/build/test/test-punctuation.js.map +1 -0
  88. package/build/test/tsconfig.tsbuildinfo +1 -0
  89. package/build/test/wordbreakers/data.d.ts +35 -0
  90. package/build/test/wordbreakers/data.d.ts.map +1 -0
  91. package/build/test/wordbreakers/data.js +1778 -0
  92. package/build/test/wordbreakers/data.js.map +1 -0
  93. package/build/test/wordbreakers/default-wordbreaker-esm.d.ts +10 -0
  94. package/build/test/wordbreakers/default-wordbreaker-esm.d.ts.map +1 -0
  95. package/build/test/wordbreakers/default-wordbreaker-esm.js +354 -0
  96. package/build/test/wordbreakers/default-wordbreaker-esm.js.map +1 -0
  97. package/build/tsconfig.tsbuildinfo +1 -0
  98. package/build.sh +73 -0
  99. package/coverage/lcov-report/base.css +224 -0
  100. package/coverage/lcov-report/block-navigation.js +87 -0
  101. package/coverage/lcov-report/favicon.png +0 -0
  102. package/coverage/lcov-report/index.html +161 -0
  103. package/coverage/lcov-report/prettify.css +1 -0
  104. package/coverage/lcov-report/prettify.js +2 -0
  105. package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
  106. package/coverage/lcov-report/sorter.js +196 -0
  107. package/coverage/lcov-report/src/build-trie.ts.html +1618 -0
  108. package/coverage/lcov-report/src/index.html +221 -0
  109. package/coverage/lcov-report/src/join-word-breaker-decorator.ts.html +487 -0
  110. package/coverage/lcov-report/src/lexical-model-compiler.ts.html +622 -0
  111. package/coverage/lcov-report/src/main.ts.html +271 -0
  112. package/coverage/lcov-report/src/model-compiler-errors.ts.html +691 -0
  113. package/coverage/lcov-report/src/model-defaults.ts.html +415 -0
  114. package/coverage/lcov-report/src/model-definitions.ts.html +748 -0
  115. package/coverage/lcov-report/src/script-overrides-decorator.ts.html +310 -0
  116. package/coverage/lcov-report/test/helpers/index.html +116 -0
  117. package/coverage/lcov-report/test/helpers/index.ts.html +646 -0
  118. package/coverage/lcov-report/test/index.html +266 -0
  119. package/coverage/lcov-report/test/test-compile-model-with-pseudoclosure.ts.html +802 -0
  120. package/coverage/lcov-report/test/test-compile-model.ts.html +187 -0
  121. package/coverage/lcov-report/test/test-compile-trie.ts.html +541 -0
  122. package/coverage/lcov-report/test/test-default-apply-case.ts.html +466 -0
  123. package/coverage/lcov-report/test/test-default-search-term-to-key.ts.html +628 -0
  124. package/coverage/lcov-report/test/test-error-logger.ts.html +196 -0
  125. package/coverage/lcov-report/test/test-join-word-breaker.ts.html +376 -0
  126. package/coverage/lcov-report/test/test-model-definitions.ts.html +676 -0
  127. package/coverage/lcov-report/test/test-override-script-defaults.ts.html +184 -0
  128. package/coverage/lcov-report/test/test-parse-wordlist.ts.html +466 -0
  129. package/coverage/lcov-report/test/test-punctuation.ts.html +190 -0
  130. package/coverage/lcov-report/test/wordbreakers/data.ts.html +5413 -0
  131. package/coverage/lcov-report/test/wordbreakers/default-wordbreaker-esm.ts.html +1234 -0
  132. package/coverage/lcov-report/test/wordbreakers/index.html +131 -0
  133. package/coverage/lcov.info +5969 -0
  134. package/package.json +61 -0
  135. package/src/build-trie.ts +511 -0
  136. package/src/join-word-breaker-decorator.ts +134 -0
  137. package/src/lexical-model-compiler.ts +179 -0
  138. package/src/lexical-model.ts +150 -0
  139. package/src/main.ts +62 -0
  140. package/src/model-compiler-errors.ts +203 -0
  141. package/src/model-defaults.ts +111 -0
  142. package/src/model-definitions.ts +222 -0
  143. package/src/script-overrides-decorator.ts +75 -0
  144. package/test/README.md +15 -0
  145. package/test/fixtures/example.qaa.joinwordbreaker/example.qaa.joinwordbreaker.model.ts +10 -0
  146. package/test/fixtures/example.qaa.joinwordbreaker/wordlist.tsv +3 -0
  147. package/test/fixtures/example.qaa.scriptusesspaces/example.qaa.scriptusesspaces.model.ts +10 -0
  148. package/test/fixtures/example.qaa.scriptusesspaces/wordlist.tsv +8 -0
  149. package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.kmp.json +45 -0
  150. package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.kps +35 -0
  151. package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.ts +6 -0
  152. package/test/fixtures/example.qaa.sencoten/wordlist.tsv +10 -0
  153. package/test/fixtures/example.qaa.smp/example.qaa.smp.model.ts +6 -0
  154. package/test/fixtures/example.qaa.smp/wordlist.tsv +5 -0
  155. package/test/fixtures/example.qaa.trivial/example.qaa.trivial.model.ts +5 -0
  156. package/test/fixtures/example.qaa.trivial/wordlist.tsv +3 -0
  157. package/test/fixtures/example.qaa.utf16be/example.qaa.utf16be.model.ts +5 -0
  158. package/test/fixtures/example.qaa.utf16be/wordlist.txt +0 -0
  159. package/test/fixtures/example.qaa.utf16le/example.qaa.utf16le.model.ts +5 -0
  160. package/test/fixtures/example.qaa.utf16le/wordlist.txt +0 -0
  161. package/test/fixtures/example.qaa.wordbreaker/example.qaa.wordbreaker.model.ts +9 -0
  162. package/test/fixtures/example.qaa.wordbreaker/wordlist.tsv +3 -0
  163. package/test/helpers/index.ts +187 -0
  164. package/test/test-compile-model-with-pseudoclosure.ts +239 -0
  165. package/test/test-compile-model.ts +34 -0
  166. package/test/test-compile-trie.ts +152 -0
  167. package/test/test-default-apply-case.ts +128 -0
  168. package/test/test-default-search-term-to-key.ts +181 -0
  169. package/test/test-error-logger.ts +38 -0
  170. package/test/test-join-word-breaker.ts +97 -0
  171. package/test/test-model-definitions.ts +198 -0
  172. package/test/test-override-script-defaults.ts +33 -0
  173. package/test/test-parse-wordlist.ts +127 -0
  174. package/test/test-punctuation.ts +35 -0
  175. package/test/tsconfig.json +22 -0
  176. package/test/wordbreakers/README.md +3 -0
  177. package/test/wordbreakers/data.ts +1776 -0
  178. package/test/wordbreakers/default-wordbreaker-esm.ts +383 -0
  179. package/tools/create-override-script-regexp.ts +145 -0
  180. package/tsconfig.json +17 -0
@@ -0,0 +1,127 @@
1
+ import {parseWordListFromContents, parseWordListFromFilename, WordList} from '../src/build-trie.js';
2
+ import {assert} from 'chai';
3
+ import 'mocha';
4
+ import { makePathToFixture, LogHoarder } from './helpers/index.js';
5
+ import { KeymanCompilerError } from '../src/model-compiler-errors.js';
6
+
7
+ const BOM = '\ufeff';
8
+ const SENCOTEN_WORDLIST = {
9
+ 'TŦE': 13644,
10
+ 'E': 9134,
11
+ 'SEN': 4816,
12
+ 'Ȼ': 3479,
13
+ 'SW̱': 2621,
14
+ 'NIȽ': 2314,
15
+ 'U¸': 2298,
16
+ 'I¸': 1988,
17
+ 'ȻSE': 1925,
18
+ 'I': 1884
19
+ };
20
+
21
+ describe('parsing a word list', function () {
22
+ beforeEach(function () {
23
+ this.logHoarder = (new LogHoarder).install()
24
+ })
25
+
26
+ afterEach(function () {
27
+ this.logHoarder.uninstall();
28
+ delete this.logHoarder;
29
+ })
30
+
31
+ it('should remove the UTF-8 byte order mark from files', function () {
32
+ let word = 'hello';
33
+ let count = 1;
34
+ let expected: WordList = {};
35
+ expected[word] = count;
36
+
37
+ let file = `# this is a comment\n${word}\t${count}`;
38
+ let withoutBOM: WordList = {};
39
+ parseWordListFromContents(withoutBOM, file);
40
+ assert.deepEqual(withoutBOM, expected, "expected regular file to parse properly");
41
+ assert.isFalse(this.logHoarder.hasSeenWarnings());
42
+
43
+ let withBOM: WordList = {};
44
+ parseWordListFromContents(withBOM, `${BOM}${file}`)
45
+ assert.deepEqual(withBOM, expected, "expected BOM to be ignored");
46
+ assert.isFalse(this.logHoarder.hasSeenWarnings());
47
+ });
48
+
49
+ it('should read word lists in UTF-8', function () {
50
+ // N.B.: this is the format exported by Google Drive when selecting "TSV".
51
+ const filename = makePathToFixture('example.qaa.sencoten', 'wordlist.tsv');
52
+ let wordlist: WordList = {};
53
+ parseWordListFromFilename(wordlist, filename);
54
+
55
+ assert.deepEqual(wordlist, SENCOTEN_WORDLIST);
56
+ assert.isFalse(this.logHoarder.hasSeenWarnings());
57
+ });
58
+
59
+ it('should read word lists in UTF-16 little-endian (with BOM)', function () {
60
+ // N.B.: this is the format exported by MS Excel when selecting
61
+ // "UTF-16" text (tested on Excel for macOS).
62
+ const filename = makePathToFixture('example.qaa.utf16le', 'wordlist.txt');
63
+ let wordlist: WordList = {};
64
+ parseWordListFromFilename(wordlist, filename);
65
+
66
+ assert.deepEqual(wordlist, SENCOTEN_WORDLIST);
67
+ assert.isFalse(this.logHoarder.hasSeenWarnings());
68
+ });
69
+
70
+ it('should NOT read word lists in UTF-16 big-endian (with BOM)', function () {
71
+ // N.B.: Does anything output this format...?
72
+ const filename = makePathToFixture('example.qaa.utf16be', 'wordlist.txt');
73
+ let wordlist: WordList = {};
74
+ assert.throws(() => {
75
+ parseWordListFromFilename(wordlist, filename);
76
+ }, 'UTF-16BE is unsupported');
77
+ });
78
+
79
+ it('should merge duplicate entries in a wordlist', function () {
80
+ // Tests that we merge NFC+NFD entries and identical entries, trimming whitespace
81
+ // Note building the wordlist from an array to make clear that we have unnormalised inputs
82
+ const words = [
83
+ 'hello', //1
84
+ 'hello\u0301', //2, NFD helló
85
+ 'hell\u00f3', //3, NFC helló
86
+ ' hello ', //4, expect to trim whitespace
87
+ 'hello']; //5
88
+
89
+ const expected: WordList = {
90
+ 'hello': 10, /* 1+4+5 trimmed and identical */
91
+ 'hell\u00f3': 5, /* 2+3 normalised to NFC */
92
+ };
93
+
94
+ // Build a wordlist from the array
95
+ let file = `# this is a comment\n`;
96
+ for(let i = 0; i < words.length; i++) {
97
+ file += `${words[i]}\t${i+1}\n`;
98
+ }
99
+ let repeatedWords: WordList = {};
100
+ parseWordListFromContents(repeatedWords, file);
101
+
102
+ assert.deepEqual(repeatedWords, expected);
103
+
104
+ assert.isTrue(this.logHoarder.hasSeenWarnings());
105
+ // hello has been seen multiple times:
106
+ assert.isTrue(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_DuplicateWordInSameFile));
107
+ // helló and hello + U+0301 have both been seen:
108
+ assert.isTrue(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_MixedNormalizationForms));
109
+
110
+ // Let's parse another file:
111
+
112
+ this.logHoarder.clear();
113
+ // Now, parse a DIFFERENT file, but with an NFD entry.
114
+ parseWordListFromContents(repeatedWords, "hello\u0301\t5\n");
115
+ assert.isTrue(this.logHoarder.hasSeenWarnings())
116
+ // hello + U+0301 (NFD) has been seen, but...
117
+ assert.isTrue(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_MixedNormalizationForms));
118
+ // BUT! We have not seen a duplicate **within the same file**
119
+ assert.isFalse(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_DuplicateWordInSameFile));
120
+
121
+ assert.deepEqual(repeatedWords, {
122
+ hello: expected['hello'],
123
+ // should have seen more of this entry:
124
+ "hell\u00f3": expected["hell\u00f3"] + 5,
125
+ });
126
+ });
127
+ });
@@ -0,0 +1,35 @@
1
+ import LexicalModelCompiler from '../src/lexical-model-compiler.js';
2
+ import {assert} from 'chai';
3
+ import 'mocha';
4
+
5
+ import { makePathToFixture, compileModelSourceCode } from './helpers/index.js';
6
+
7
+ describe('LexicalModelCompiler', function () {
8
+ describe('specifying punctuation', function () {
9
+ const MODEL_ID = 'example.qaa.trivial';
10
+ const PATH = makePathToFixture(MODEL_ID);
11
+
12
+ it('should compile punctuation into the generated code', function () {
13
+ let compiler = new LexicalModelCompiler;
14
+ let code = compiler.generateLexicalModelCode(MODEL_ID, {
15
+ format: 'trie-1.0',
16
+ sources: ['wordlist.tsv'],
17
+ punctuation: {
18
+ quotesForKeepSuggestion: { open: `«`, close: `»`},
19
+ insertAfterWord: " " , // OGHAM SPACE MARK
20
+ }
21
+ }, PATH) as string;
22
+
23
+ // Check that the punctuation actually made into the code:
24
+ assert.match(code, /«/);
25
+ assert.match(code, /»/);
26
+ // Ensure we inserted that OGHAM SPACE MARK!
27
+ assert.match(code, /\u1680/);
28
+
29
+ // Make sure it compiles!
30
+ let compilation = compileModelSourceCode(code);
31
+ assert.isFalse(compilation.hasSyntaxError);
32
+ assert.isNotNull(compilation.exportedModel);
33
+ });
34
+ })
35
+ });
@@ -0,0 +1,22 @@
1
+ {
2
+ "extends": "../../kmc/tsconfig.kmc-base.json",
3
+
4
+ "compilerOptions": {
5
+ "rootDir": ".",
6
+ "rootDirs": ["./", "../src/"],
7
+ "outDir": "../build/test",
8
+ "esModuleInterop": true,
9
+ "moduleResolution": "node16",
10
+ "allowSyntheticDefaultImports": true,
11
+ "baseUrl": ".",
12
+ },
13
+ "include": [
14
+ "**/test-*.ts",
15
+ "*.ts",
16
+ "helpers/*.ts",
17
+ "wordbreakers/*.ts"
18
+ ],
19
+ "references": [
20
+ { "path": "../" },
21
+ ]
22
+ }
@@ -0,0 +1,3 @@
1
+ Wordbreakers ES Module format
2
+
3
+ TODO: once we move common/models/wordbreakers to ESM, eliminate this.