@keymanapp/kmc-model 17.0.85-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.nyc_output/coverage-10524-1681239236645-0.json +1 -0
- package/Makefile +38 -0
- package/build/cjs-src/lexical-model-compiler.cjs +152688 -0
- package/build/src/build-trie.d.ts +40 -0
- package/build/src/build-trie.d.ts.map +1 -0
- package/build/src/build-trie.js +362 -0
- package/build/src/build-trie.js.map +1 -0
- package/build/src/join-word-breaker-decorator.d.ts +10 -0
- package/build/src/join-word-breaker-decorator.d.ts.map +1 -0
- package/build/src/join-word-breaker-decorator.js +121 -0
- package/build/src/join-word-breaker-decorator.js.map +1 -0
- package/build/src/lexical-model-compiler.d.ts +19 -0
- package/build/src/lexical-model-compiler.d.ts.map +1 -0
- package/build/src/lexical-model-compiler.js +155 -0
- package/build/src/lexical-model-compiler.js.map +1 -0
- package/build/src/lexical-model.d.ts +135 -0
- package/build/src/lexical-model.d.ts.map +1 -0
- package/build/src/lexical-model.js +6 -0
- package/build/src/lexical-model.js.map +1 -0
- package/build/src/main.d.ts +15 -0
- package/build/src/main.d.ts.map +1 -0
- package/build/src/main.js +46 -0
- package/build/src/main.js.map +1 -0
- package/build/src/model-compiler-errors.d.ts +77 -0
- package/build/src/model-compiler-errors.d.ts.map +1 -0
- package/build/src/model-compiler-errors.js +156 -0
- package/build/src/model-compiler-errors.js.map +1 -0
- package/build/src/model-defaults.d.ts +56 -0
- package/build/src/model-defaults.d.ts.map +1 -0
- package/build/src/model-defaults.js +106 -0
- package/build/src/model-defaults.js.map +1 -0
- package/build/src/model-definitions.d.ts +71 -0
- package/build/src/model-definitions.d.ts.map +1 -0
- package/build/src/model-definitions.js +189 -0
- package/build/src/model-definitions.js.map +1 -0
- package/build/src/script-overrides-decorator.d.ts +4 -0
- package/build/src/script-overrides-decorator.d.ts.map +1 -0
- package/build/src/script-overrides-decorator.js +63 -0
- package/build/src/script-overrides-decorator.js.map +1 -0
- package/build/test/helpers/index.d.ts +69 -0
- package/build/test/helpers/index.d.ts.map +1 -0
- package/build/test/helpers/index.js +160 -0
- package/build/test/helpers/index.js.map +1 -0
- package/build/test/test-compile-model-with-pseudoclosure.d.ts +2 -0
- package/build/test/test-compile-model-with-pseudoclosure.d.ts.map +1 -0
- package/build/test/test-compile-model-with-pseudoclosure.js +200 -0
- package/build/test/test-compile-model-with-pseudoclosure.js.map +1 -0
- package/build/test/test-compile-model.d.ts +2 -0
- package/build/test/test-compile-model.d.ts.map +1 -0
- package/build/test/test-compile-model.js +30 -0
- package/build/test/test-compile-model.js.map +1 -0
- package/build/test/test-compile-trie.d.ts +2 -0
- package/build/test/test-compile-trie.d.ts.map +1 -0
- package/build/test/test-compile-trie.js +125 -0
- package/build/test/test-compile-trie.js.map +1 -0
- package/build/test/test-default-apply-case.d.ts +2 -0
- package/build/test/test-default-apply-case.d.ts.map +1 -0
- package/build/test/test-default-apply-case.js +105 -0
- package/build/test/test-default-apply-case.js.map +1 -0
- package/build/test/test-default-search-term-to-key.d.ts +2 -0
- package/build/test/test-default-search-term-to-key.d.ts.map +1 -0
- package/build/test/test-default-search-term-to-key.js +148 -0
- package/build/test/test-default-search-term-to-key.js.map +1 -0
- package/build/test/test-error-logger.d.ts +2 -0
- package/build/test/test-error-logger.d.ts.map +1 -0
- package/build/test/test-error-logger.js +26 -0
- package/build/test/test-error-logger.js.map +1 -0
- package/build/test/test-join-word-breaker.d.ts +2 -0
- package/build/test/test-join-word-breaker.d.ts.map +1 -0
- package/build/test/test-join-word-breaker.js +84 -0
- package/build/test/test-join-word-breaker.js.map +1 -0
- package/build/test/test-model-definitions.d.ts +2 -0
- package/build/test/test-model-definitions.d.ts.map +1 -0
- package/build/test/test-model-definitions.js +165 -0
- package/build/test/test-model-definitions.js.map +1 -0
- package/build/test/test-override-script-defaults.d.ts +2 -0
- package/build/test/test-override-script-defaults.d.ts.map +1 -0
- package/build/test/test-override-script-defaults.js +28 -0
- package/build/test/test-override-script-defaults.js.map +1 -0
- package/build/test/test-parse-wordlist.d.ts +2 -0
- package/build/test/test-parse-wordlist.d.ts.map +1 -0
- package/build/test/test-parse-wordlist.js +110 -0
- package/build/test/test-parse-wordlist.js.map +1 -0
- package/build/test/test-punctuation.d.ts +2 -0
- package/build/test/test-punctuation.d.ts.map +1 -0
- package/build/test/test-punctuation.js +31 -0
- package/build/test/test-punctuation.js.map +1 -0
- package/build/test/tsconfig.tsbuildinfo +1 -0
- package/build/test/wordbreakers/data.d.ts +35 -0
- package/build/test/wordbreakers/data.d.ts.map +1 -0
- package/build/test/wordbreakers/data.js +1778 -0
- package/build/test/wordbreakers/data.js.map +1 -0
- package/build/test/wordbreakers/default-wordbreaker-esm.d.ts +10 -0
- package/build/test/wordbreakers/default-wordbreaker-esm.d.ts.map +1 -0
- package/build/test/wordbreakers/default-wordbreaker-esm.js +354 -0
- package/build/test/wordbreakers/default-wordbreaker-esm.js.map +1 -0
- package/build/tsconfig.tsbuildinfo +1 -0
- package/build.sh +73 -0
- package/coverage/lcov-report/base.css +224 -0
- package/coverage/lcov-report/block-navigation.js +87 -0
- package/coverage/lcov-report/favicon.png +0 -0
- package/coverage/lcov-report/index.html +161 -0
- package/coverage/lcov-report/prettify.css +1 -0
- package/coverage/lcov-report/prettify.js +2 -0
- package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
- package/coverage/lcov-report/sorter.js +196 -0
- package/coverage/lcov-report/src/build-trie.ts.html +1618 -0
- package/coverage/lcov-report/src/index.html +221 -0
- package/coverage/lcov-report/src/join-word-breaker-decorator.ts.html +487 -0
- package/coverage/lcov-report/src/lexical-model-compiler.ts.html +622 -0
- package/coverage/lcov-report/src/main.ts.html +271 -0
- package/coverage/lcov-report/src/model-compiler-errors.ts.html +691 -0
- package/coverage/lcov-report/src/model-defaults.ts.html +415 -0
- package/coverage/lcov-report/src/model-definitions.ts.html +748 -0
- package/coverage/lcov-report/src/script-overrides-decorator.ts.html +310 -0
- package/coverage/lcov-report/test/helpers/index.html +116 -0
- package/coverage/lcov-report/test/helpers/index.ts.html +646 -0
- package/coverage/lcov-report/test/index.html +266 -0
- package/coverage/lcov-report/test/test-compile-model-with-pseudoclosure.ts.html +802 -0
- package/coverage/lcov-report/test/test-compile-model.ts.html +187 -0
- package/coverage/lcov-report/test/test-compile-trie.ts.html +541 -0
- package/coverage/lcov-report/test/test-default-apply-case.ts.html +466 -0
- package/coverage/lcov-report/test/test-default-search-term-to-key.ts.html +628 -0
- package/coverage/lcov-report/test/test-error-logger.ts.html +196 -0
- package/coverage/lcov-report/test/test-join-word-breaker.ts.html +376 -0
- package/coverage/lcov-report/test/test-model-definitions.ts.html +676 -0
- package/coverage/lcov-report/test/test-override-script-defaults.ts.html +184 -0
- package/coverage/lcov-report/test/test-parse-wordlist.ts.html +466 -0
- package/coverage/lcov-report/test/test-punctuation.ts.html +190 -0
- package/coverage/lcov-report/test/wordbreakers/data.ts.html +5413 -0
- package/coverage/lcov-report/test/wordbreakers/default-wordbreaker-esm.ts.html +1234 -0
- package/coverage/lcov-report/test/wordbreakers/index.html +131 -0
- package/coverage/lcov.info +5969 -0
- package/package.json +61 -0
- package/src/build-trie.ts +511 -0
- package/src/join-word-breaker-decorator.ts +134 -0
- package/src/lexical-model-compiler.ts +179 -0
- package/src/lexical-model.ts +150 -0
- package/src/main.ts +62 -0
- package/src/model-compiler-errors.ts +203 -0
- package/src/model-defaults.ts +111 -0
- package/src/model-definitions.ts +222 -0
- package/src/script-overrides-decorator.ts +75 -0
- package/test/README.md +15 -0
- package/test/fixtures/example.qaa.joinwordbreaker/example.qaa.joinwordbreaker.model.ts +10 -0
- package/test/fixtures/example.qaa.joinwordbreaker/wordlist.tsv +3 -0
- package/test/fixtures/example.qaa.scriptusesspaces/example.qaa.scriptusesspaces.model.ts +10 -0
- package/test/fixtures/example.qaa.scriptusesspaces/wordlist.tsv +8 -0
- package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.kmp.json +45 -0
- package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.kps +35 -0
- package/test/fixtures/example.qaa.sencoten/example.qaa.sencoten.model.ts +6 -0
- package/test/fixtures/example.qaa.sencoten/wordlist.tsv +10 -0
- package/test/fixtures/example.qaa.smp/example.qaa.smp.model.ts +6 -0
- package/test/fixtures/example.qaa.smp/wordlist.tsv +5 -0
- package/test/fixtures/example.qaa.trivial/example.qaa.trivial.model.ts +5 -0
- package/test/fixtures/example.qaa.trivial/wordlist.tsv +3 -0
- package/test/fixtures/example.qaa.utf16be/example.qaa.utf16be.model.ts +5 -0
- package/test/fixtures/example.qaa.utf16be/wordlist.txt +0 -0
- package/test/fixtures/example.qaa.utf16le/example.qaa.utf16le.model.ts +5 -0
- package/test/fixtures/example.qaa.utf16le/wordlist.txt +0 -0
- package/test/fixtures/example.qaa.wordbreaker/example.qaa.wordbreaker.model.ts +9 -0
- package/test/fixtures/example.qaa.wordbreaker/wordlist.tsv +3 -0
- package/test/helpers/index.ts +187 -0
- package/test/test-compile-model-with-pseudoclosure.ts +239 -0
- package/test/test-compile-model.ts +34 -0
- package/test/test-compile-trie.ts +152 -0
- package/test/test-default-apply-case.ts +128 -0
- package/test/test-default-search-term-to-key.ts +181 -0
- package/test/test-error-logger.ts +38 -0
- package/test/test-join-word-breaker.ts +97 -0
- package/test/test-model-definitions.ts +198 -0
- package/test/test-override-script-defaults.ts +33 -0
- package/test/test-parse-wordlist.ts +127 -0
- package/test/test-punctuation.ts +35 -0
- package/test/tsconfig.json +22 -0
- package/test/wordbreakers/README.md +3 -0
- package/test/wordbreakers/data.ts +1776 -0
- package/test/wordbreakers/default-wordbreaker-esm.ts +383 -0
- package/tools/create-override-script-regexp.ts +145 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import {parseWordListFromContents, parseWordListFromFilename, WordList} from '../src/build-trie.js';
|
|
2
|
+
import {assert} from 'chai';
|
|
3
|
+
import 'mocha';
|
|
4
|
+
import { makePathToFixture, LogHoarder } from './helpers/index.js';
|
|
5
|
+
import { KeymanCompilerError } from '../src/model-compiler-errors.js';
|
|
6
|
+
|
|
7
|
+
const BOM = '\ufeff';
|
|
8
|
+
const SENCOTEN_WORDLIST = {
|
|
9
|
+
'TŦE': 13644,
|
|
10
|
+
'E': 9134,
|
|
11
|
+
'SEN': 4816,
|
|
12
|
+
'Ȼ': 3479,
|
|
13
|
+
'SW̱': 2621,
|
|
14
|
+
'NIȽ': 2314,
|
|
15
|
+
'U¸': 2298,
|
|
16
|
+
'I¸': 1988,
|
|
17
|
+
'ȻSE': 1925,
|
|
18
|
+
'I': 1884
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
describe('parsing a word list', function () {
|
|
22
|
+
beforeEach(function () {
|
|
23
|
+
this.logHoarder = (new LogHoarder).install()
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
afterEach(function () {
|
|
27
|
+
this.logHoarder.uninstall();
|
|
28
|
+
delete this.logHoarder;
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
it('should remove the UTF-8 byte order mark from files', function () {
|
|
32
|
+
let word = 'hello';
|
|
33
|
+
let count = 1;
|
|
34
|
+
let expected: WordList = {};
|
|
35
|
+
expected[word] = count;
|
|
36
|
+
|
|
37
|
+
let file = `# this is a comment\n${word}\t${count}`;
|
|
38
|
+
let withoutBOM: WordList = {};
|
|
39
|
+
parseWordListFromContents(withoutBOM, file);
|
|
40
|
+
assert.deepEqual(withoutBOM, expected, "expected regular file to parse properly");
|
|
41
|
+
assert.isFalse(this.logHoarder.hasSeenWarnings());
|
|
42
|
+
|
|
43
|
+
let withBOM: WordList = {};
|
|
44
|
+
parseWordListFromContents(withBOM, `${BOM}${file}`)
|
|
45
|
+
assert.deepEqual(withBOM, expected, "expected BOM to be ignored");
|
|
46
|
+
assert.isFalse(this.logHoarder.hasSeenWarnings());
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('should read word lists in UTF-8', function () {
|
|
50
|
+
// N.B.: this is the format exported by Google Drive when selecting "TSV".
|
|
51
|
+
const filename = makePathToFixture('example.qaa.sencoten', 'wordlist.tsv');
|
|
52
|
+
let wordlist: WordList = {};
|
|
53
|
+
parseWordListFromFilename(wordlist, filename);
|
|
54
|
+
|
|
55
|
+
assert.deepEqual(wordlist, SENCOTEN_WORDLIST);
|
|
56
|
+
assert.isFalse(this.logHoarder.hasSeenWarnings());
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('should read word lists in UTF-16 little-endian (with BOM)', function () {
|
|
60
|
+
// N.B.: this is the format exported by MS Excel when selecting
|
|
61
|
+
// "UTF-16" text (tested on Excel for macOS).
|
|
62
|
+
const filename = makePathToFixture('example.qaa.utf16le', 'wordlist.txt');
|
|
63
|
+
let wordlist: WordList = {};
|
|
64
|
+
parseWordListFromFilename(wordlist, filename);
|
|
65
|
+
|
|
66
|
+
assert.deepEqual(wordlist, SENCOTEN_WORDLIST);
|
|
67
|
+
assert.isFalse(this.logHoarder.hasSeenWarnings());
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('should NOT read word lists in UTF-16 big-endian (with BOM)', function () {
|
|
71
|
+
// N.B.: Does anything output this format...?
|
|
72
|
+
const filename = makePathToFixture('example.qaa.utf16be', 'wordlist.txt');
|
|
73
|
+
let wordlist: WordList = {};
|
|
74
|
+
assert.throws(() => {
|
|
75
|
+
parseWordListFromFilename(wordlist, filename);
|
|
76
|
+
}, 'UTF-16BE is unsupported');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('should merge duplicate entries in a wordlist', function () {
|
|
80
|
+
// Tests that we merge NFC+NFD entries and identical entries, trimming whitespace
|
|
81
|
+
// Note building the wordlist from an array to make clear that we have unnormalised inputs
|
|
82
|
+
const words = [
|
|
83
|
+
'hello', //1
|
|
84
|
+
'hello\u0301', //2, NFD helló
|
|
85
|
+
'hell\u00f3', //3, NFC helló
|
|
86
|
+
' hello ', //4, expect to trim whitespace
|
|
87
|
+
'hello']; //5
|
|
88
|
+
|
|
89
|
+
const expected: WordList = {
|
|
90
|
+
'hello': 10, /* 1+4+5 trimmed and identical */
|
|
91
|
+
'hell\u00f3': 5, /* 2+3 normalised to NFC */
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// Build a wordlist from the array
|
|
95
|
+
let file = `# this is a comment\n`;
|
|
96
|
+
for(let i = 0; i < words.length; i++) {
|
|
97
|
+
file += `${words[i]}\t${i+1}\n`;
|
|
98
|
+
}
|
|
99
|
+
let repeatedWords: WordList = {};
|
|
100
|
+
parseWordListFromContents(repeatedWords, file);
|
|
101
|
+
|
|
102
|
+
assert.deepEqual(repeatedWords, expected);
|
|
103
|
+
|
|
104
|
+
assert.isTrue(this.logHoarder.hasSeenWarnings());
|
|
105
|
+
// hello has been seen multiple times:
|
|
106
|
+
assert.isTrue(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_DuplicateWordInSameFile));
|
|
107
|
+
// helló and hello + U+0301 have both been seen:
|
|
108
|
+
assert.isTrue(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_MixedNormalizationForms));
|
|
109
|
+
|
|
110
|
+
// Let's parse another file:
|
|
111
|
+
|
|
112
|
+
this.logHoarder.clear();
|
|
113
|
+
// Now, parse a DIFFERENT file, but with an NFD entry.
|
|
114
|
+
parseWordListFromContents(repeatedWords, "hello\u0301\t5\n");
|
|
115
|
+
assert.isTrue(this.logHoarder.hasSeenWarnings())
|
|
116
|
+
// hello + U+0301 (NFD) has been seen, but...
|
|
117
|
+
assert.isTrue(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_MixedNormalizationForms));
|
|
118
|
+
// BUT! We have not seen a duplicate **within the same file**
|
|
119
|
+
assert.isFalse(this.logHoarder.hasSeenCode(KeymanCompilerError.CWARN_DuplicateWordInSameFile));
|
|
120
|
+
|
|
121
|
+
assert.deepEqual(repeatedWords, {
|
|
122
|
+
hello: expected['hello'],
|
|
123
|
+
// should have seen more of this entry:
|
|
124
|
+
"hell\u00f3": expected["hell\u00f3"] + 5,
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
});
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import LexicalModelCompiler from '../src/lexical-model-compiler.js';
|
|
2
|
+
import {assert} from 'chai';
|
|
3
|
+
import 'mocha';
|
|
4
|
+
|
|
5
|
+
import { makePathToFixture, compileModelSourceCode } from './helpers/index.js';
|
|
6
|
+
|
|
7
|
+
describe('LexicalModelCompiler', function () {
|
|
8
|
+
describe('specifying punctuation', function () {
|
|
9
|
+
const MODEL_ID = 'example.qaa.trivial';
|
|
10
|
+
const PATH = makePathToFixture(MODEL_ID);
|
|
11
|
+
|
|
12
|
+
it('should compile punctuation into the generated code', function () {
|
|
13
|
+
let compiler = new LexicalModelCompiler;
|
|
14
|
+
let code = compiler.generateLexicalModelCode(MODEL_ID, {
|
|
15
|
+
format: 'trie-1.0',
|
|
16
|
+
sources: ['wordlist.tsv'],
|
|
17
|
+
punctuation: {
|
|
18
|
+
quotesForKeepSuggestion: { open: `«`, close: `»`},
|
|
19
|
+
insertAfterWord: " " , // OGHAM SPACE MARK
|
|
20
|
+
}
|
|
21
|
+
}, PATH) as string;
|
|
22
|
+
|
|
23
|
+
// Check that the punctuation actually made into the code:
|
|
24
|
+
assert.match(code, /«/);
|
|
25
|
+
assert.match(code, /»/);
|
|
26
|
+
// Ensure we inserted that OGHAM SPACE MARK!
|
|
27
|
+
assert.match(code, /\u1680/);
|
|
28
|
+
|
|
29
|
+
// Make sure it compiles!
|
|
30
|
+
let compilation = compileModelSourceCode(code);
|
|
31
|
+
assert.isFalse(compilation.hasSyntaxError);
|
|
32
|
+
assert.isNotNull(compilation.exportedModel);
|
|
33
|
+
});
|
|
34
|
+
})
|
|
35
|
+
});
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "../../kmc/tsconfig.kmc-base.json",
|
|
3
|
+
|
|
4
|
+
"compilerOptions": {
|
|
5
|
+
"rootDir": ".",
|
|
6
|
+
"rootDirs": ["./", "../src/"],
|
|
7
|
+
"outDir": "../build/test",
|
|
8
|
+
"esModuleInterop": true,
|
|
9
|
+
"moduleResolution": "node16",
|
|
10
|
+
"allowSyntheticDefaultImports": true,
|
|
11
|
+
"baseUrl": ".",
|
|
12
|
+
},
|
|
13
|
+
"include": [
|
|
14
|
+
"**/test-*.ts",
|
|
15
|
+
"*.ts",
|
|
16
|
+
"helpers/*.ts",
|
|
17
|
+
"wordbreakers/*.ts"
|
|
18
|
+
],
|
|
19
|
+
"references": [
|
|
20
|
+
{ "path": "../" },
|
|
21
|
+
]
|
|
22
|
+
}
|