@keymanapp/kmc-model 18.0.16-alpha → 18.0.18-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,66 +1,67 @@
1
- !function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="b781c9e0-1012-5d01-8ce7-3bc015949ab3")}catch(e){}}();
2
- import { CompilerErrorNamespace, CompilerErrorSeverity, CompilerMessageSpec, CompilerMessageDef as def, CompilerMessageSpecWithException } from "@keymanapp/common-types";
3
- const Namespace = CompilerErrorNamespace.ModelCompiler;
4
- // const SevInfo = CompilerErrorSeverity.Info | Namespace;
5
- const SevHint = CompilerErrorSeverity.Hint | Namespace;
6
- // const SevWarn = CompilerErrorSeverity.Warn | Namespace;
7
- const SevError = CompilerErrorSeverity.Error | Namespace;
8
- const SevFatal = CompilerErrorSeverity.Fatal | Namespace;
9
- const m = (code, message) => ({
10
- ...CompilerMessageSpec(code, message),
11
- line: ModelCompilerMessageContext.line,
12
- filename: ModelCompilerMessageContext.filename,
13
- });
14
- const m_e = (code, message, exceptionVar) => ({
15
- ...CompilerMessageSpecWithException(code, message, exceptionVar),
16
- line: ModelCompilerMessageContext.line,
17
- filename: ModelCompilerMessageContext.filename,
18
- });
19
- /**
20
- * @internal
21
- */
22
- export class ModelCompilerMessageContext {
23
- // Context added to all messages
24
- static line;
25
- static filename;
26
- }
27
- /**
28
- * @internal
29
- */
30
- export class ModelCompilerMessages {
31
- static FATAL_UnexpectedException = SevFatal | 0x0001;
32
- static Fatal_UnexpectedException = (o) => m_e(this.FATAL_UnexpectedException, null, o.e ?? 'unknown error');
33
- static HINT_MixedNormalizationForms = SevHint | 0x0002;
34
- static Hint_MixedNormalizationForms = (o) => m(this.HINT_MixedNormalizationForms, `“${def(o.wordform)}” is not in Unicode NFC. Automatically converting to NFC.`);
35
- static HINT_DuplicateWordInSameFile = SevHint | 0x0003;
36
- static Hint_DuplicateWordInSameFile = (o) => m(this.HINT_DuplicateWordInSameFile, `duplicate word “${def(o.wordform)}” found in same file; summing counts`);
37
- static ERROR_UnimplementedModelFormat = SevError | 0x0004;
38
- static Error_UnimplementedModelFormat = (o) => m(this.ERROR_UnimplementedModelFormat, `Unimplemented model format: ${def(o.format)}`);
39
- static ERROR_UnknownModelFormat = SevError | 0x0005;
40
- static Error_UnknownModelFormat = (o) => m(this.ERROR_UnknownModelFormat, `Unimplemented model format: ${def(o.format)}`);
41
- static ERROR_NoDefaultExport = SevError | 0x0006;
42
- static Error_NoDefaultExport = () => m(this.ERROR_NoDefaultExport, `Model source does have a default export. Did you remember to write \`export default source;\`?`);
43
- static ERROR_SearchTermToKeyMustBeExplicitlySpecified = SevError | 0x0007;
44
- static Error_SearchTermToKeyMustBeExplicitlySpecified = () => m(this.ERROR_SearchTermToKeyMustBeExplicitlySpecified, "searchTermToKey must be explicitly specified");
45
- static ERROR_UTF16BEUnsupported = SevError | 0x0008;
46
- static Error_UTF16BEUnsupported = () => m(this.ERROR_UTF16BEUnsupported, 'UTF-16BE is unsupported');
47
- static ERROR_UnknownWordBreaker = SevError | 0x0009;
48
- static Error_UnknownWordBreaker = (o) => m(this.ERROR_UnknownWordBreaker, `Unknown word breaker: ${def(o.spec)}`);
49
- static ERROR_UnsupportedScriptOverride = SevError | 0x000A;
50
- static Error_UnsupportedScriptOverride = (o) => m(this.ERROR_UnsupportedScriptOverride, `Unsupported script override: ${def(o.option)}`);
51
- }
52
- ;
53
- /**
54
- * A ModelCompilerError should be thrown when an unrecoverable error occurs that
55
- * would block further compilation. It will be caught in the top-most compiler
56
- * API endpoint and converted into a callback message.
57
- */
58
- export class ModelCompilerError extends Error {
59
- event;
60
- constructor(event) {
61
- super(event.message);
62
- this.event = event;
63
- }
64
- }
65
- //# debugId=b781c9e0-1012-5d01-8ce7-3bc015949ab3
1
+
2
+ !function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="01ca3db7-cbc6-516d-9c09-2fd9d1e8ca5d")}catch(e){}}();
3
+ import { CompilerErrorNamespace, CompilerErrorSeverity, CompilerMessageSpec, CompilerMessageDef as def, CompilerMessageSpecWithException } from "@keymanapp/common-types";
4
+ const Namespace = CompilerErrorNamespace.ModelCompiler;
5
+ // const SevInfo = CompilerErrorSeverity.Info | Namespace;
6
+ const SevHint = CompilerErrorSeverity.Hint | Namespace;
7
+ // const SevWarn = CompilerErrorSeverity.Warn | Namespace;
8
+ const SevError = CompilerErrorSeverity.Error | Namespace;
9
+ const SevFatal = CompilerErrorSeverity.Fatal | Namespace;
10
+ const m = (code, message) => ({
11
+ ...CompilerMessageSpec(code, message),
12
+ line: ModelCompilerMessageContext.line,
13
+ filename: ModelCompilerMessageContext.filename,
14
+ });
15
+ const m_e = (code, message, exceptionVar) => ({
16
+ ...CompilerMessageSpecWithException(code, message, exceptionVar),
17
+ line: ModelCompilerMessageContext.line,
18
+ filename: ModelCompilerMessageContext.filename,
19
+ });
20
+ /**
21
+ * @internal
22
+ */
23
+ export class ModelCompilerMessageContext {
24
+ // Context added to all messages
25
+ static line;
26
+ static filename;
27
+ }
28
+ /**
29
+ * @internal
30
+ */
31
+ export class ModelCompilerMessages {
32
+ static FATAL_UnexpectedException = SevFatal | 0x0001;
33
+ static Fatal_UnexpectedException = (o) => m_e(this.FATAL_UnexpectedException, null, o.e ?? 'unknown error');
34
+ static HINT_MixedNormalizationForms = SevHint | 0x0002;
35
+ static Hint_MixedNormalizationForms = (o) => m(this.HINT_MixedNormalizationForms, `“${def(o.wordform)}” is not in Unicode NFC. Automatically converting to NFC.`);
36
+ static HINT_DuplicateWordInSameFile = SevHint | 0x0003;
37
+ static Hint_DuplicateWordInSameFile = (o) => m(this.HINT_DuplicateWordInSameFile, `duplicate word “${def(o.wordform)}” found in same file; summing counts`);
38
+ static ERROR_UnimplementedModelFormat = SevError | 0x0004;
39
+ static Error_UnimplementedModelFormat = (o) => m(this.ERROR_UnimplementedModelFormat, `Unimplemented model format: ${def(o.format)}`);
40
+ static ERROR_UnknownModelFormat = SevError | 0x0005;
41
+ static Error_UnknownModelFormat = (o) => m(this.ERROR_UnknownModelFormat, `Unimplemented model format: ${def(o.format)}`);
42
+ static ERROR_NoDefaultExport = SevError | 0x0006;
43
+ static Error_NoDefaultExport = () => m(this.ERROR_NoDefaultExport, `Model source does have a default export. Did you remember to write \`export default source;\`?`);
44
+ static ERROR_SearchTermToKeyMustBeExplicitlySpecified = SevError | 0x0007;
45
+ static Error_SearchTermToKeyMustBeExplicitlySpecified = () => m(this.ERROR_SearchTermToKeyMustBeExplicitlySpecified, "searchTermToKey must be explicitly specified");
46
+ static ERROR_UTF16BEUnsupported = SevError | 0x0008;
47
+ static Error_UTF16BEUnsupported = () => m(this.ERROR_UTF16BEUnsupported, 'UTF-16BE is unsupported');
48
+ static ERROR_UnknownWordBreaker = SevError | 0x0009;
49
+ static Error_UnknownWordBreaker = (o) => m(this.ERROR_UnknownWordBreaker, `Unknown word breaker: ${def(o.spec)}`);
50
+ static ERROR_UnsupportedScriptOverride = SevError | 0x000A;
51
+ static Error_UnsupportedScriptOverride = (o) => m(this.ERROR_UnsupportedScriptOverride, `Unsupported script override: ${def(o.option)}`);
52
+ }
53
+ ;
54
+ /**
55
+ * A ModelCompilerError should be thrown when an unrecoverable error occurs that
56
+ * would block further compilation. It will be caught in the top-most compiler
57
+ * API endpoint and converted into a callback message.
58
+ */
59
+ export class ModelCompilerError extends Error {
60
+ event;
61
+ constructor(event) {
62
+ super(event.message);
63
+ this.event = event;
64
+ }
65
+ }
66
66
  //# sourceMappingURL=model-compiler-messages.js.map
67
+ //# debugId=01ca3db7-cbc6-516d-9c09-2fd9d1e8ca5d
@@ -1 +1 @@
1
- {"debug_id":"b781c9e0-1012-5d01-8ce7-3bc015949ab3","file":"model-compiler-messages.js","mappings":";AAAA,OAAO,EAAE,sBAAsB,EAAE,qBAAqB,EAAiB,mBAAmB,EAAE,kBAAkB,IAAI,GAAG,EAAE,gCAAgC,EAAE,MAAM,yBAAyB,CAAC;AAEzL,MAAM,SAAS,GAAG,sBAAsB,CAAC,aAAa,CAAC;AACvD,0DAA0D;AAC1D,MAAM,OAAO,GAAG,qBAAqB,CAAC,IAAI,GAAG,SAAS,CAAC;AACvD,0DAA0D;AAC1D,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,GAAG,SAAS,CAAC;AACzD,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,GAAG,SAAS,CAAC;AAEzD,MAAM,CAAC,GAAG,CAAC,IAAY,EAAE,OAAe,EAAkB,EAAE,CAAC,CAAC;IAC5D,GAAG,mBAAmB,CAAC,IAAI,EAAE,OAAO,CAAC;IACrC,IAAI,EAAE,2BAA2B,CAAC,IAAI;IACtC,QAAQ,EAAE,2BAA2B,CAAC,QAAQ;CAC/C,CAAC,CAAC;AAEH,MAAM,GAAG,GAAG,CAAC,IAAY,EAAE,OAAe,EAAE,YAAiB,EAAkB,EAAE,CAAC,CAAC;IACjF,GAAG,gCAAgC,CAAC,IAAI,EAAE,OAAO,EAAE,YAAY,CAAC;IAChE,IAAI,EAAE,2BAA2B,CAAC,IAAI;IACtC,QAAQ,EAAE,2BAA2B,CAAC,QAAQ;CAC/C,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,OAAO,2BAA2B;IACtC,gCAAgC;IAChC,MAAM,CAAC,IAAI,CAAS;IACpB,MAAM,CAAC,QAAQ,CAAS;CACzB;AAED;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAEhC,MAAM,CAAC,yBAAyB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrD,MAAM,CAAC,yBAAyB,GAAG,CAAC,CAAU,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,yBAAyB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,eAAe,CAAC,CAAC;IAErH,MAAM,CAAC,4BAA4B,GAAG,OAAO,GAAG,MAAM,CAAC;IACvD,MAAM,CAAC,4BAA4B,GAAG,CAAC,CAAoB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,4BAA4B,EACjG,IAAI,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,2DAA2D,CAAC,CAAC;IAElF,MAAM,CAAC,4BAA4B,GAAG,OAAO,GAAG,MAAM,CAAC;IACvD,MAAM,CAAC,4BAA4B,GAAG,CAAC,CAAoB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,4BAA4B,EACjG,mBAAmB,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,sCAAsC,CAAC,CAAC;IAE5E,MAAM,CAAC,8BAA8B,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC1D,MAAM,CAAC,8BAA8B,GAAG,CAAC,CAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,8BAA8B,EACnG,+BAA+B,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAElD,MAAM,CAAC,wBAAwB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpD,MAAM,CAAC,wBAAwB,GAAG,CAAC,CAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,EACvF,+BAA+B,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAElD,MAAM,CAAC,qBAAqB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACjD,MAAM,CAAC,qBAAqB,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,EAC/D,gGAAgG,CAAC,CAAC;IAEpG,MAAM,CAAC,8CAA8C,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC1E,MAAM,CAAC,8CAA8C,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,8CAA8C,EACjH,8CAA8C,CAAC,CAAC;IAElD,MAAM,CAAC,wBAAwB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpD,MAAM,CAAC,wBAAwB,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,EAAE,yBAAyB,CAAC,CAAC;IAEpG,MAAM,CAAC,wBAAwB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpD,MAAM,CAAC,wBAAwB,GAAG,CAAC,CAAe,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,EACpF,yBAAyB,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAE1C,MAAM,CAAC,+BAA+B,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC3D,MAAM,CAAC,+BAA+B,GAAG,CAAC,CAAiB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,+BAA+B,EACpG,gCAAgC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;;AACpD,CAAC;AAEF;;;;GAIG;AACH,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACxB;IAAnB,YAAmB,KAAoB;QACrC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QADJ,UAAK,GAAL,KAAK,CAAe;IAEvC,CAAC;CACF","names":[],"sourceRoot":"","sources":["../../src/model-compiler-messages.ts"],"version":3}
1
+ {"version":3,"file":"model-compiler-messages.js","sources":["../../src/model-compiler-messages.ts"],"sourceRoot":"","names":[],"mappings":";;AAAA,OAAO,EAAE,sBAAsB,EAAE,qBAAqB,EAAiB,mBAAmB,EAAE,kBAAkB,IAAI,GAAG,EAAE,gCAAgC,EAAE,MAAM,yBAAyB,CAAC;AAEzL,MAAM,SAAS,GAAG,sBAAsB,CAAC,aAAa,CAAC;AACvD,0DAA0D;AAC1D,MAAM,OAAO,GAAG,qBAAqB,CAAC,IAAI,GAAG,SAAS,CAAC;AACvD,0DAA0D;AAC1D,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,GAAG,SAAS,CAAC;AACzD,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,GAAG,SAAS,CAAC;AAEzD,MAAM,CAAC,GAAG,CAAC,IAAY,EAAE,OAAe,EAAkB,EAAE,CAAC,CAAC;IAC5D,GAAG,mBAAmB,CAAC,IAAI,EAAE,OAAO,CAAC;IACrC,IAAI,EAAE,2BAA2B,CAAC,IAAI;IACtC,QAAQ,EAAE,2BAA2B,CAAC,QAAQ;CAC/C,CAAC,CAAC;AAEH,MAAM,GAAG,GAAG,CAAC,IAAY,EAAE,OAAe,EAAE,YAAiB,EAAkB,EAAE,CAAC,CAAC;IACjF,GAAG,gCAAgC,CAAC,IAAI,EAAE,OAAO,EAAE,YAAY,CAAC;IAChE,IAAI,EAAE,2BAA2B,CAAC,IAAI;IACtC,QAAQ,EAAE,2BAA2B,CAAC,QAAQ;CAC/C,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,OAAO,2BAA2B;IACtC,gCAAgC;IAChC,MAAM,CAAC,IAAI,CAAS;IACpB,MAAM,CAAC,QAAQ,CAAS;CACzB;AAED;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAEhC,MAAM,CAAC,yBAAyB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrD,MAAM,CAAC,yBAAyB,GAAG,CAAC,CAAU,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,yBAAyB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,eAAe,CAAC,CAAC;IAErH,MAAM,CAAC,4BAA4B,GAAG,OAAO,GAAG,MAAM,CAAC;IACvD,MAAM,CAAC,4BAA4B,GAAG,CAAC,CAAoB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,4BAA4B,EACjG,IAAI,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,2DAA2D,CAAC,CAAC;IAElF,MAAM,CAAC,4BAA4B,GAAG,OAAO,GAAG,MAAM,CAAC;IACvD,MAAM,CAAC,4BAA4B,GAAG,CAAC,CAAoB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,4BAA4B,EACjG,mBAAmB,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,sCAAsC,CAAC,CAAC;IAE5E,MAAM,CAAC,8BAA8B,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC1D,MAAM,CAAC,8BAA8B,GAAG,CAAC,CAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,8BAA8B,EACnG,+BAA+B,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAElD,MAAM,CAAC,wBAAwB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpD,MAAM,CAAC,wBAAwB,GAAG,CAAC,CAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,EACvF,+BAA+B,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAElD,MAAM,CAAC,qBAAqB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACjD,MAAM,CAAC,qBAAqB,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,EAC/D,gGAAgG,CAAC,CAAC;IAEpG,MAAM,CAAC,8CAA8C,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC1E,MAAM,CAAC,8CAA8C,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,8CAA8C,EACjH,8CAA8C,CAAC,CAAC;IAElD,MAAM,CAAC,wBAAwB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpD,MAAM,CAAC,wBAAwB,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,EAAE,yBAAyB,CAAC,CAAC;IAEpG,MAAM,CAAC,wBAAwB,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpD,MAAM,CAAC,wBAAwB,GAAG,CAAC,CAAe,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,EACpF,yBAAyB,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAE1C,MAAM,CAAC,+BAA+B,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC3D,MAAM,CAAC,+BAA+B,GAAG,CAAC,CAAiB,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,+BAA+B,EACpG,gCAAgC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;;AACpD,CAAC;AAEF;;;;GAIG;AACH,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IACxB;IAAnB,YAAmB,KAAoB;QACrC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QADJ,UAAK,GAAL,KAAK,CAAe;IAEvC,CAAC;CACF","debug_id":"01ca3db7-cbc6-516d-9c09-2fd9d1e8ca5d"}
@@ -1,104 +1,105 @@
1
- !function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="2ae29ec9-eed0-568a-afa7-1797b21bd3e2")}catch(e){}}();
2
- /**
3
- * Converts wordforms into an indexable form. It does this by
4
- * normalizing the letter case of characters INDIVIDUALLY (to disregard
5
- * context-sensitive case transformations), normalizing to NFKD form,
6
- * and removing common diacritical marks.
7
- *
8
- * This is a very speculative implementation, that might work with
9
- * your language. We don't guarantee that this will be perfect for your
10
- * language, but it's a start.
11
- *
12
- * This uses String.prototype.normalize() to convert normalize into NFKD.
13
- * NFKD neutralizes some funky distinctions, e.g., ꬲ, e, e should all be the
14
- * same character; plus, it's an easy way to separate a Latin character from
15
- * its diacritics; Even then, orthographies regularly use code points
16
- * that, under NFKD normalization, do NOT decompose appropriately for your
17
- * language (e.g., SENĆOŦEN, Plains Cree in syllabics).
18
- *
19
- * Use this in early iterations of the model. For a production lexical model,
20
- * you will probably write/generate your own key function, tailored to your
21
- * language. There is a chance the default will work properly out of the box.
22
- */
23
- export function defaultSearchTermToKey(wordform) {
24
- return wordform
25
- .normalize('NFKD')
26
- // Remove any combining diacritics (if input is in NFKD)
27
- .replace(/[\u0300-\u036F]/g, '')
28
- // Replace directional quotation marks with plain apostrophes
29
- .replace(/[‘’]/g, "'")
30
- // Also double-quote marks.
31
- .replace(/[“”]/g, '"');
32
- }
33
- /**
34
- * Converts wordforms into an indexable form. It does this by
35
- * normalizing the letter case of characters INDIVIDUALLY (to disregard
36
- * context-sensitive case transformations), normalizing to NFKD form,
37
- * and removing common diacritical marks.
38
- *
39
- * This is a very speculative implementation, that might work with
40
- * your language. We don't guarantee that this will be perfect for your
41
- * language, but it's a start.
42
- *
43
- * This uses String.prototype.normalize() to convert normalize into NFKD.
44
- * NFKD neutralizes some funky distinctions, e.g., ꬲ, e, e should all be the
45
- * same character; plus, it's an easy way to separate a Latin character from
46
- * its diacritics; Even then, orthographies regularly use code points
47
- * that, under NFKD normalization, do NOT decompose appropriately for your
48
- * language (e.g., SENĆOŦEN, Plains Cree in syllabics).
49
- *
50
- * Use this in early iterations of the model. For a production lexical model,
51
- * you will probably write/generate your own key function, tailored to your
52
- * language. There is a chance the default will work properly out of the box.
53
- */
54
- export function defaultCasedSearchTermToKey(wordform, applyCasing) {
55
- // While this is a bit WET, as the basic `defaultSearchTermToKey` exists and performs some of
56
- // the same functions, repetition is the easiest way to allow the function to be safely compiled
57
- // with ease by use of `.toString()`.
58
- return Array.from(wordform
59
- .normalize('NFKD')
60
- // Remove any combining diacritics (if input is in NFKD)
61
- .replace(/[\u0300-\u036F]/g, '')) // end of `Array.from`
62
- .map(function (c) { return applyCasing('lower', c); })
63
- .join('')
64
- // Replace directional quotation marks with plain apostrophes
65
- .replace(/[‘’]/g, "'")
66
- // Also double-quote marks.
67
- .replace(/[“”]/g, '"');
68
- }
69
- /**
70
- * Specifies default casing behavior for lexical models when `languageUsesCasing` is
71
- * set to true.
72
- * @param casing One of 'lower' (lowercased), 'upper' (uppercased), or 'initial'.
73
- *
74
- * 'initial' is designed to cover cases like sentence-initial & proper noun capitalization in English.
75
- * This may be overwritten as appropriate in model-specific implementations.
76
- * @param text The text to be modified.
77
- */
78
- export function defaultApplyCasing(casing, text) {
79
- switch (casing) {
80
- case 'lower':
81
- return text.toLowerCase();
82
- case 'upper':
83
- return text.toUpperCase();
84
- case 'initial':
85
- var headCode = text.charCodeAt(0);
86
- // The length of the first code unit, as measured in code points.
87
- var headUnitLength = 1;
88
- // Is the first character a high surrogate, indicating possible use of UTF-16
89
- // surrogate pairs? Also, is the string long enough for there to BE a pair?
90
- if (text.length > 1 && headCode >= 0xD800 && headCode <= 0xDBFF) {
91
- // It's possible, so now we check for low surrogates.
92
- var lowSurrogateCode = text.charCodeAt(1);
93
- if (lowSurrogateCode >= 0xDC00 && lowSurrogateCode <= 0xDFFF) {
94
- // We have a surrogate pair; this pair is the 'first' character.
95
- headUnitLength++;
96
- }
97
- }
98
- // Capitalizes the first code unit of the string, leaving the rest intact.
99
- return text.substring(0, headUnitLength).toUpperCase() // head - uppercased
100
- .concat(text.substring(headUnitLength)); // tail - lowercased
101
- }
102
- }
103
- //# debugId=2ae29ec9-eed0-568a-afa7-1797b21bd3e2
1
+ /**
2
+ * Converts wordforms into an indexable form. It does this by
3
+ * normalizing the letter case of characters INDIVIDUALLY (to disregard
4
+ * context-sensitive case transformations), normalizing to NFKD form,
5
+ * and removing common diacritical marks.
6
+ *
7
+ * This is a very speculative implementation, that might work with
8
+ * your language. We don't guarantee that this will be perfect for your
9
+ * language, but it's a start.
10
+ *
11
+ * This uses String.prototype.normalize() to convert normalize into NFKD.
12
+ * NFKD neutralizes some funky distinctions, e.g., ꬲ, e, e should all be the
13
+ * same character; plus, it's an easy way to separate a Latin character from
14
+ * its diacritics; Even then, orthographies regularly use code points
15
+ * that, under NFKD normalization, do NOT decompose appropriately for your
16
+ * language (e.g., SENĆOŦEN, Plains Cree in syllabics).
17
+ *
18
+ * Use this in early iterations of the model. For a production lexical model,
19
+ * you will probably write/generate your own key function, tailored to your
20
+ * language. There is a chance the default will work properly out of the box.
21
+ */
22
+
23
+ !function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="85495a1e-2cde-5517-addc-9128a8efdae6")}catch(e){}}();
24
+ export function defaultSearchTermToKey(wordform) {
25
+ return wordform
26
+ .normalize('NFKD')
27
+ // Remove any combining diacritics (if input is in NFKD)
28
+ .replace(/[\u0300-\u036F]/g, '')
29
+ // Replace directional quotation marks with plain apostrophes
30
+ .replace(/[‘’]/g, "'")
31
+ // Also double-quote marks.
32
+ .replace(/[“”]/g, '"');
33
+ }
34
+ /**
35
+ * Converts wordforms into an indexable form. It does this by
36
+ * normalizing the letter case of characters INDIVIDUALLY (to disregard
37
+ * context-sensitive case transformations), normalizing to NFKD form,
38
+ * and removing common diacritical marks.
39
+ *
40
+ * This is a very speculative implementation, that might work with
41
+ * your language. We don't guarantee that this will be perfect for your
42
+ * language, but it's a start.
43
+ *
44
+ * This uses String.prototype.normalize() to convert normalize into NFKD.
45
+ * NFKD neutralizes some funky distinctions, e.g., ꬲ, e, e should all be the
46
+ * same character; plus, it's an easy way to separate a Latin character from
47
+ * its diacritics; Even then, orthographies regularly use code points
48
+ * that, under NFKD normalization, do NOT decompose appropriately for your
49
+ * language (e.g., SENĆOŦEN, Plains Cree in syllabics).
50
+ *
51
+ * Use this in early iterations of the model. For a production lexical model,
52
+ * you will probably write/generate your own key function, tailored to your
53
+ * language. There is a chance the default will work properly out of the box.
54
+ */
55
+ export function defaultCasedSearchTermToKey(wordform, applyCasing) {
56
+ // While this is a bit WET, as the basic `defaultSearchTermToKey` exists and performs some of
57
+ // the same functions, repetition is the easiest way to allow the function to be safely compiled
58
+ // with ease by use of `.toString()`.
59
+ return Array.from(wordform
60
+ .normalize('NFKD')
61
+ // Remove any combining diacritics (if input is in NFKD)
62
+ .replace(/[\u0300-\u036F]/g, '')) // end of `Array.from`
63
+ .map(function (c) { return applyCasing('lower', c); })
64
+ .join('')
65
+ // Replace directional quotation marks with plain apostrophes
66
+ .replace(/[‘’]/g, "'")
67
+ // Also double-quote marks.
68
+ .replace(/[“”]/g, '"');
69
+ }
70
+ /**
71
+ * Specifies default casing behavior for lexical models when `languageUsesCasing` is
72
+ * set to true.
73
+ * @param casing One of 'lower' (lowercased), 'upper' (uppercased), or 'initial'.
74
+ *
75
+ * 'initial' is designed to cover cases like sentence-initial & proper noun capitalization in English.
76
+ * This may be overwritten as appropriate in model-specific implementations.
77
+ * @param text The text to be modified.
78
+ */
79
+ export function defaultApplyCasing(casing, text) {
80
+ switch (casing) {
81
+ case 'lower':
82
+ return text.toLowerCase();
83
+ case 'upper':
84
+ return text.toUpperCase();
85
+ case 'initial':
86
+ var headCode = text.charCodeAt(0);
87
+ // The length of the first code unit, as measured in code points.
88
+ var headUnitLength = 1;
89
+ // Is the first character a high surrogate, indicating possible use of UTF-16
90
+ // surrogate pairs? Also, is the string long enough for there to BE a pair?
91
+ if (text.length > 1 && headCode >= 0xD800 && headCode <= 0xDBFF) {
92
+ // It's possible, so now we check for low surrogates.
93
+ var lowSurrogateCode = text.charCodeAt(1);
94
+ if (lowSurrogateCode >= 0xDC00 && lowSurrogateCode <= 0xDFFF) {
95
+ // We have a surrogate pair; this pair is the 'first' character.
96
+ headUnitLength++;
97
+ }
98
+ }
99
+ // Capitalizes the first code unit of the string, leaving the rest intact.
100
+ return text.substring(0, headUnitLength).toUpperCase() // head - uppercased
101
+ .concat(text.substring(headUnitLength)); // tail - lowercased
102
+ }
103
+ }
104
104
  //# sourceMappingURL=model-defaults.js.map
105
+ //# debugId=85495a1e-2cde-5517-addc-9128a8efdae6
@@ -1 +1 @@
1
- {"debug_id":"2ae29ec9-eed0-568a-afa7-1797b21bd3e2","file":"model-defaults.js","mappings":";AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,OAAO,QAAQ;SACV,SAAS,CAAC,MAAM,CAAC;QAClB,wDAAwD;SACvD,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;QAChC,6DAA6D;SAC5D,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;QACtB,2BAA2B;SAC1B,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,UAAU,2BAA2B,CAAC,QAAgB,EAAE,WAA2B;IACvF,6FAA6F;IAC7F,gGAAgG;IAChG,qCAAqC;IACrC,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ;SACnB,SAAS,CAAC,MAAM,CAAC;QAClB,wDAAwD;SACvD,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CACjC,CAAC,sBAAsB;SACvB,GAAG,CAAC,UAAS,CAAC,IAAI,OAAO,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC,CAAA,CAAA,CAAC,CAAC;SAClD,IAAI,CAAC,EAAE,CAAC;QACT,6DAA6D;SAC5D,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;QACtB,2BAA2B;SAC1B,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAAkB,EAAE,IAAY;IACjE,QAAO,MAAM,EAAE;QACb,KAAK,OAAO;YACV,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;QAC5B,KAAK,OAAO;YACV,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;QAC5B,KAAK,SAAS;YACZ,IAAI,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAClC,iEAAiE;YACjE,IAAI,cAAc,GAAG,CAAC,CAAC;YAEvB,6EAA6E;YAC7E,4EAA4E;YAC5E,IAAG,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,IAAI,MAAM,IAAI,QAAQ,IAAI,MAAM,EAAE;gBAC9D,qDAAqD;gBACrD,IAAI,gBAAgB,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;gBAE1C,IAAG,gBAAgB,IAAI,MAAM,IAAI,gBAAgB,IAAI,MAAM,EAAE;oBAC3D,gEAAgE;oBAChE,cAAc,EAAE,CAAC;iBAClB;aACF;YAED,0EAA0E;YAC1E,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC,WAAW,EAAE,CAAC,oBAAoB;iBACnE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,CAAQ,oBAAoB;KAC9E;AACH,CAAC","names":[],"sourceRoot":"","sources":["../../src/model-defaults.ts"],"version":3}
1
+ {"version":3,"file":"model-defaults.js","sources":["../../src/model-defaults.ts"],"sourceRoot":"","names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;;;AACH,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,OAAO,QAAQ;SACV,SAAS,CAAC,MAAM,CAAC;QAClB,wDAAwD;SACvD,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;QAChC,6DAA6D;SAC5D,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;QACtB,2BAA2B;SAC1B,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,UAAU,2BAA2B,CAAC,QAAgB,EAAE,WAA2B;IACvF,6FAA6F;IAC7F,gGAAgG;IAChG,qCAAqC;IACrC,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ;SACnB,SAAS,CAAC,MAAM,CAAC;QAClB,wDAAwD;SACvD,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CACjC,CAAC,sBAAsB;SACvB,GAAG,CAAC,UAAS,CAAC,IAAI,OAAO,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC,CAAA,CAAA,CAAC,CAAC;SAClD,IAAI,CAAC,EAAE,CAAC;QACT,6DAA6D;SAC5D,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;QACtB,2BAA2B;SAC1B,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAAkB,EAAE,IAAY;IACjE,QAAO,MAAM,EAAE;QACb,KAAK,OAAO;YACV,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;QAC5B,KAAK,OAAO;YACV,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;QAC5B,KAAK,SAAS;YACZ,IAAI,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAClC,iEAAiE;YACjE,IAAI,cAAc,GAAG,CAAC,CAAC;YAEvB,6EAA6E;YAC7E,4EAA4E;YAC5E,IAAG,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,IAAI,MAAM,IAAI,QAAQ,IAAI,MAAM,EAAE;gBAC9D,qDAAqD;gBACrD,IAAI,gBAAgB,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;gBAE1C,IAAG,gBAAgB,IAAI,MAAM,IAAI,gBAAgB,IAAI,MAAM,EAAE;oBAC3D,gEAAgE;oBAChE,cAAc,EAAE,CAAC;iBAClB;aACF;YAED,0EAA0E;YAC1E,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC,WAAW,EAAE,CAAC,oBAAoB;iBACnE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,CAAQ,oBAAoB;KAC9E;AACH,CAAC","debug_id":"85495a1e-2cde-5517-addc-9128a8efdae6"}