@sarmay/kaz-converter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -0
- package/dist/index.cjs +1512 -0
- package/dist/index.d.cts +82 -0
- package/dist/index.d.ts +82 -0
- package/dist/index.js +1480 -0
- package/package.json +52 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
interface LexiconData {
|
|
2
|
+
nativeRoots?: string[];
|
|
3
|
+
loanRoots?: string[];
|
|
4
|
+
}
|
|
5
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
6
|
+
type RawToken = readonly [source: string, converted: string];
|
|
7
|
+
interface ContextDisambiguator {
|
|
8
|
+
disambiguate(rawTokens: readonly RawToken[], contextSentence: string): MaybePromise<string[]>;
|
|
9
|
+
}
|
|
10
|
+
interface CyrillicToArabicOptions {
|
|
11
|
+
lexicon?: LexiconData;
|
|
12
|
+
}
|
|
13
|
+
interface ArabicToCyrillicOptions {
|
|
14
|
+
useLm?: boolean;
|
|
15
|
+
disambiguator?: ContextDisambiguator;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
declare class NoopDisambiguator implements ContextDisambiguator {
|
|
19
|
+
disambiguate(rawTokens: readonly RawToken[]): string[];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
type WordMatchType = "exception" | "proper" | "loanword" | "anonymous" | null;
|
|
23
|
+
type HarmonyState = "soft" | "hard";
|
|
24
|
+
interface RootMatch {
|
|
25
|
+
matchType: WordMatchType;
|
|
26
|
+
base: string | null;
|
|
27
|
+
suffix: string;
|
|
28
|
+
}
|
|
29
|
+
declare class ArabicToCyrillicConverter {
|
|
30
|
+
readonly HAMZA = "\u0674";
|
|
31
|
+
private readonly disambiguator;
|
|
32
|
+
private readonly loanwordPrefixTrie;
|
|
33
|
+
private readonly reZwnjEtc;
|
|
34
|
+
private readonly reSpaces;
|
|
35
|
+
private readonly reHyphens;
|
|
36
|
+
private readonly reRedundantYye1;
|
|
37
|
+
private readonly reRedundantYye2;
|
|
38
|
+
private readonly reRedundantYye3;
|
|
39
|
+
private readonly reUndantYa;
|
|
40
|
+
private readonly reArabicWords;
|
|
41
|
+
private readonly reCapAfterPunct;
|
|
42
|
+
private readonly reCapAfterQuote;
|
|
43
|
+
private readonly frontVowelsCyr;
|
|
44
|
+
private readonly backVowelsCyr;
|
|
45
|
+
private readonly arabicVowels;
|
|
46
|
+
constructor(options?: ArabicToCyrillicOptions);
|
|
47
|
+
isLoanword(word: string): boolean;
|
|
48
|
+
hasConsonantCluster(word: string): boolean;
|
|
49
|
+
isLoanwordWithEPrefix(word: string): boolean;
|
|
50
|
+
getCyrillicVowelState(cyrillicWord: string): boolean;
|
|
51
|
+
isValidSuffixSequence(suffix: string): boolean;
|
|
52
|
+
getHarmonyFromArabicRoot(word: string): HarmonyState;
|
|
53
|
+
segmentCompoundWord(word: string): string[];
|
|
54
|
+
extractRootAndSuffix(word: string): RootMatch;
|
|
55
|
+
convertSuffixOnly(suffix: string, isFront: boolean): string;
|
|
56
|
+
convertWord(word: string): string;
|
|
57
|
+
private convertWordInternal;
|
|
58
|
+
preprocess(text: string): string;
|
|
59
|
+
private postProcessContextFix;
|
|
60
|
+
private postProcessContextFixAsync;
|
|
61
|
+
convertPhrase(phrase: string): string;
|
|
62
|
+
convertPhraseAsync(phrase: string): Promise<string>;
|
|
63
|
+
convert(text: string): string;
|
|
64
|
+
convertAsync(text: string): Promise<string>;
|
|
65
|
+
}
|
|
66
|
+
declare function arb2syr(text: string, options?: ArabicToCyrillicOptions): string;
|
|
67
|
+
declare function arb2syrAsync(text: string, options?: ArabicToCyrillicOptions): Promise<string>;
|
|
68
|
+
|
|
69
|
+
declare class CyrillicToArabicConverter {
|
|
70
|
+
readonly HAMZA = "\u0674";
|
|
71
|
+
private readonly trie;
|
|
72
|
+
private readonly splitter;
|
|
73
|
+
constructor(options?: CyrillicToArabicOptions);
|
|
74
|
+
private getInitialHarmony;
|
|
75
|
+
private applyHamzaRule;
|
|
76
|
+
convertWord(word: string, isSuffix?: boolean): string;
|
|
77
|
+
convertCompoundWord(word: string): string;
|
|
78
|
+
convert(text: string): string;
|
|
79
|
+
}
|
|
80
|
+
declare function syr2arb(text: string, options?: CyrillicToArabicOptions): string;
|
|
81
|
+
|
|
82
|
+
export { ArabicToCyrillicConverter, type ArabicToCyrillicOptions, type ContextDisambiguator, CyrillicToArabicConverter, type CyrillicToArabicOptions, type LexiconData, type MaybePromise, NoopDisambiguator, type RawToken, arb2syr, arb2syrAsync, syr2arb };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
interface LexiconData {
|
|
2
|
+
nativeRoots?: string[];
|
|
3
|
+
loanRoots?: string[];
|
|
4
|
+
}
|
|
5
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
6
|
+
type RawToken = readonly [source: string, converted: string];
|
|
7
|
+
interface ContextDisambiguator {
|
|
8
|
+
disambiguate(rawTokens: readonly RawToken[], contextSentence: string): MaybePromise<string[]>;
|
|
9
|
+
}
|
|
10
|
+
interface CyrillicToArabicOptions {
|
|
11
|
+
lexicon?: LexiconData;
|
|
12
|
+
}
|
|
13
|
+
interface ArabicToCyrillicOptions {
|
|
14
|
+
useLm?: boolean;
|
|
15
|
+
disambiguator?: ContextDisambiguator;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
declare class NoopDisambiguator implements ContextDisambiguator {
|
|
19
|
+
disambiguate(rawTokens: readonly RawToken[]): string[];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
type WordMatchType = "exception" | "proper" | "loanword" | "anonymous" | null;
|
|
23
|
+
type HarmonyState = "soft" | "hard";
|
|
24
|
+
interface RootMatch {
|
|
25
|
+
matchType: WordMatchType;
|
|
26
|
+
base: string | null;
|
|
27
|
+
suffix: string;
|
|
28
|
+
}
|
|
29
|
+
declare class ArabicToCyrillicConverter {
|
|
30
|
+
readonly HAMZA = "\u0674";
|
|
31
|
+
private readonly disambiguator;
|
|
32
|
+
private readonly loanwordPrefixTrie;
|
|
33
|
+
private readonly reZwnjEtc;
|
|
34
|
+
private readonly reSpaces;
|
|
35
|
+
private readonly reHyphens;
|
|
36
|
+
private readonly reRedundantYye1;
|
|
37
|
+
private readonly reRedundantYye2;
|
|
38
|
+
private readonly reRedundantYye3;
|
|
39
|
+
private readonly reUndantYa;
|
|
40
|
+
private readonly reArabicWords;
|
|
41
|
+
private readonly reCapAfterPunct;
|
|
42
|
+
private readonly reCapAfterQuote;
|
|
43
|
+
private readonly frontVowelsCyr;
|
|
44
|
+
private readonly backVowelsCyr;
|
|
45
|
+
private readonly arabicVowels;
|
|
46
|
+
constructor(options?: ArabicToCyrillicOptions);
|
|
47
|
+
isLoanword(word: string): boolean;
|
|
48
|
+
hasConsonantCluster(word: string): boolean;
|
|
49
|
+
isLoanwordWithEPrefix(word: string): boolean;
|
|
50
|
+
getCyrillicVowelState(cyrillicWord: string): boolean;
|
|
51
|
+
isValidSuffixSequence(suffix: string): boolean;
|
|
52
|
+
getHarmonyFromArabicRoot(word: string): HarmonyState;
|
|
53
|
+
segmentCompoundWord(word: string): string[];
|
|
54
|
+
extractRootAndSuffix(word: string): RootMatch;
|
|
55
|
+
convertSuffixOnly(suffix: string, isFront: boolean): string;
|
|
56
|
+
convertWord(word: string): string;
|
|
57
|
+
private convertWordInternal;
|
|
58
|
+
preprocess(text: string): string;
|
|
59
|
+
private postProcessContextFix;
|
|
60
|
+
private postProcessContextFixAsync;
|
|
61
|
+
convertPhrase(phrase: string): string;
|
|
62
|
+
convertPhraseAsync(phrase: string): Promise<string>;
|
|
63
|
+
convert(text: string): string;
|
|
64
|
+
convertAsync(text: string): Promise<string>;
|
|
65
|
+
}
|
|
66
|
+
declare function arb2syr(text: string, options?: ArabicToCyrillicOptions): string;
|
|
67
|
+
declare function arb2syrAsync(text: string, options?: ArabicToCyrillicOptions): Promise<string>;
|
|
68
|
+
|
|
69
|
+
declare class CyrillicToArabicConverter {
|
|
70
|
+
readonly HAMZA = "\u0674";
|
|
71
|
+
private readonly trie;
|
|
72
|
+
private readonly splitter;
|
|
73
|
+
constructor(options?: CyrillicToArabicOptions);
|
|
74
|
+
private getInitialHarmony;
|
|
75
|
+
private applyHamzaRule;
|
|
76
|
+
convertWord(word: string, isSuffix?: boolean): string;
|
|
77
|
+
convertCompoundWord(word: string): string;
|
|
78
|
+
convert(text: string): string;
|
|
79
|
+
}
|
|
80
|
+
declare function syr2arb(text: string, options?: CyrillicToArabicOptions): string;
|
|
81
|
+
|
|
82
|
+
export { ArabicToCyrillicConverter, type ArabicToCyrillicOptions, type ContextDisambiguator, CyrillicToArabicConverter, type CyrillicToArabicOptions, type LexiconData, type MaybePromise, NoopDisambiguator, type RawToken, arb2syr, arb2syrAsync, syr2arb };
|