@sarmay/kaz-converter 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -134,11 +134,38 @@ var EXCEPTIONS = {
134
134
  "\u0627\u0644\u0645\u0627\u062A\u0649": "\u0410\u043B\u043C\u0430\u0442\u044B",
135
135
  "\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430",
136
136
  "\u0642\u0627\u0632\u0627\u0642\u0633\u062A\u0627\u0646": "\u049A\u0430\u0437\u0430\u049B\u0441\u0442\u0430\u043D",
137
+ "\u0621\u0627\u0628\u062F\u0649": "\u04D9\u0431\u0434\u0456",
138
+ "\u0627\u0628\u062F\u0649": "\u04D9\u0431\u0434\u0456",
139
+ "\u0627\u0645\u0649\u0631": "\u04D9\u043C\u0456\u0440",
140
+ "\u0628\u06C7\u0644\u0627\u0646": "\u0431\u04B1\u043B\u0430\u043D",
141
+ "\u0628\u0627\u0642\u064A": "\u0431\u0430\u049B\u0438",
142
+ "\u0628\u0627\u0642\u064A\u062A": "\u0431\u0430\u049B\u0438\u0442",
143
+ "\u0628\u0627\u0642\u0649\u062A": "\u0431\u0430\u049B\u044B\u0442",
144
+ "\u0646\u0627\u0633\u0649\u0631": "\u043D\u0430\u0441\u044B\u0440",
145
+ "\u062F\u0627\u06CB\u0644\u06D5\u062A": "\u0434\u04D9\u0443\u043B\u0435\u0442",
146
+ "\u0628\u0648\u0644": "\u0431\u043E\u043B",
147
+ "\u0628\u0648\u0644\u0633\u0649\u0646": "\u0431\u043E\u043B\u0441\u044B\u043D",
148
+ "\u0628\u0648\u0644\u0644\u0627": "\u0431\u043E\u043B\u043B\u0430",
149
+ "\u0642\u0648\u062C\u0627": "\u049B\u043E\u0436\u0430",
150
+ "\u0639\u0648\u062C\u0627": "\u0493\u043E\u0436\u0430",
151
+ "\u0646\u0627\u0631": "\u043D\u0430\u0440",
152
+ "\u0646\u0627\u0632\u0627\u0631": "\u043D\u0430\u0437\u0430\u0440",
153
+ "\u0633\u0627\u0644\u064A": "\u0441\u0430\u043B\u0438",
154
+ "\u062F\u064A\u0627\u0633": "\u0434\u0438\u0430\u0441",
155
+ "\u062C\u0649\u0628\u06D5\u0643": "\u0436\u0456\u0431\u0435\u043A",
156
+ "\u062D\u0627\u0643\u0649\u0645": "\u0445\u0430\u043A\u0456\u043C",
157
+ "\u0643\u064A\u0631\u0627": "\u043A\u0438\u0440\u0430",
158
+ "\u064A\u0643\u0627\u0645\u0627\u0644": "\u0438\u043A\u0430\u043C\u0430\u043B",
137
159
  "\u062C\u06C7\u06AD\u06AF\u0648": "\u0416\u04B1\u04A3\u0433\u043E",
138
160
  "\u0634\u064A": "\u0421\u0438",
139
161
  "\u062C\u064A\u0646\u067E\u064A\u06AD": "\u0426\u0437\u0438\u043D\u044C\u043F\u0438\u043D",
140
162
  "\u0643\u0649\u062A\u0627\u067E": "\u043A\u0456\u0442\u0430\u043F",
141
163
  "\u0631\u0627\u062D\u0645\u06D5\u062A": "\u0440\u0430\u0445\u043C\u0435\u0442",
164
+ "\u0627\u062D\u0645\u06D5\u062A": "\u0430\u0445\u043C\u0435\u0442",
165
+ "\u0645\u0627\u062D\u0645\u06D5\u062A": "\u043C\u0430\u0445\u043C\u0435\u0442",
166
+ "\u064A\u0627\u0631": "\u0438\u044F\u0440",
167
+ "\u064A\u0627\u0644": "\u0438\u044F\u043B",
168
+ "\u064A\u0627\u0632": "\u0438\u044F\u0437",
142
169
  "\u0627\u06CB\u0649\u0644": "\u0430\u0443\u044B\u043B",
143
170
  "\u06AF\u0628": "\u0413\u0411",
144
171
  "\u067E\u0631\u0648\u06AF\u0631\u06D5\u0633": "\u043F\u0440\u043E\u0433\u0440\u0435\u0441\u0441",
@@ -443,6 +470,75 @@ var COMPOUND_PIVOT_ROOTS = [
443
470
  "\u0648\u064A\u0649\u0646",
444
471
  "\u062A\u06C7\u0633\u062A\u0649\u06AF"
445
472
  ];
473
+ var NAME_PREFIX_COMPONENTS = [
474
+ "\u0621\u0627\u0628\u062F\u0649",
475
+ "\u0627\u0628\u062F\u0649",
476
+ "\u062F\u0627\u06CB\u0644\u06D5\u062A",
477
+ "\u06AF\u06C7\u0644",
478
+ "\u0628\u06D5\u0643",
479
+ "\u0646\u06C7\u0631"
480
+ ];
481
+ var NAME_SUFFIX_COMPONENTS = [
482
+ "\u0645\u06C7\u062D\u0627\u0645\u0628\u06D5\u062A",
483
+ "\u0627\u062D\u0645\u06D5\u062A",
484
+ "\u062D\u0627\u0644\u0649\u0642",
485
+ "\u0642\u0627\u0633\u0649\u0645",
486
+ "\u0633\u0627\u062F\u0649\u0642",
487
+ "\u0633\u0627\u0644\u0627\u0645",
488
+ "\u0628\u0648\u0644\u0633\u0649\u0646",
489
+ "\u0628\u0648\u0644\u0644\u0627",
490
+ "\u0646\u0627\u0633\u0649\u0631",
491
+ "\u0646\u0627\u0632\u0627\u0631",
492
+ "\u0628\u064A\u0631\u0627",
493
+ "\u0628\u064A\u0628\u0627",
494
+ "\u064A\u067E\u0627",
495
+ "\u0631\u064A\u067E\u0627",
496
+ "\u0646\u064A\u067E\u0627",
497
+ "\u064A\u0645\u0627",
498
+ "\u0633\u064A\u0645\u0627",
499
+ "\u0645\u064A\u0644\u0627",
500
+ "\u0628\u064A\u0644\u0627",
501
+ "\u062C\u064A\u062F\u0627",
502
+ "\u0632\u064A\u0644\u0627",
503
+ "\u0641\u064A\u0631\u0627",
504
+ "\u0646\u0627\u0631\u0627",
505
+ "\u0644\u064A\u067E\u0627",
506
+ "\u0644\u064A\u0645\u0627",
507
+ "\u062F\u064A\u0628\u0627",
508
+ "\u062F\u064A\u0644\u0627",
509
+ "\u062F\u064A\u0631\u0627",
510
+ "\u0644\u064A\u0646\u0627",
511
+ "\u0643\u064A\u0631\u0627",
512
+ "\u0646\u0646\u0627",
513
+ "\u0627\u062F\u0627",
514
+ "\u0628\u0627\u0631\u0649\u0633",
515
+ "\u0643\u06D5\u0644\u062F\u0649",
516
+ "\u06AF\u06D5\u0644\u062F\u0649",
517
+ "\u0628\u06C7\u0644\u0627\u0646",
518
+ "\u064A\u0643\u0627\u0645\u0627\u0644",
519
+ "\u0628\u0627\u0642\u0649\u062A",
520
+ "\u0628\u0627\u0642\u064A\u062A",
521
+ "\u0628\u0627\u0642\u064A",
522
+ "\u0642\u0648\u062C\u0627",
523
+ "\u0639\u0648\u062C\u0627",
524
+ "\u0633\u0627\u0644\u064A",
525
+ "\u0642\u0627\u0644\u064A",
526
+ "\u062F\u064A\u064A\u0627\u0631",
527
+ "\u064A\u064A\u0627\u0631",
528
+ "\u064A\u064A\u0627\u0632",
529
+ "\u064A\u064A\u0627\u0633",
530
+ "\u064A\u064A\u0627",
531
+ "\u064A\u0627\u0631",
532
+ "\u064A\u0627\u0644",
533
+ "\u064A\u0627\u0632",
534
+ "\u0646\u0627\u0631",
535
+ "\u062C\u0627\u0646",
536
+ "\u0646\u06C7\u0631",
537
+ "\u0628\u06D5\u0643",
538
+ "\u0628\u0627\u064A",
539
+ "\u062D\u0627\u0646",
540
+ "\u06AF\u06C7\u0644"
541
+ ];
446
542
  var IMPLICIT_SOFT_ROOTS = /* @__PURE__ */ new Set([
447
543
  "\u06C7\u0645\u0649\u062A",
448
544
  "\u062A\u06C7\u0628",
@@ -578,6 +674,7 @@ var ArabicToCyrillicConverter = class {
578
674
  );
579
675
  }
580
676
  this.disambiguator = options.disambiguator ?? new NoopDisambiguator();
677
+ this.nameYSequenceStyle = options.nameYSequenceStyle ?? "normalize";
581
678
  for (const prefix of LOANWORD_PREFIXES) {
582
679
  this.loanwordPrefixTrie.insert(prefix);
583
680
  }
@@ -635,6 +732,36 @@ var ArabicToCyrillicConverter = class {
635
732
  }
636
733
  return false;
637
734
  }
735
+ crossesProtectedNameEnding(word, splitIndex) {
736
+ return NAME_SUFFIX_COMPONENTS.some((ending) => {
737
+ if (!word.endsWith(ending)) {
738
+ return false;
739
+ }
740
+ const protectedStart = word.length - ending.length;
741
+ return splitIndex > protectedStart && splitIndex < word.length;
742
+ });
743
+ }
744
+ convertNameYSequence(word) {
745
+ const normalized = {
746
+ "\u064A\u064A\u0627": "\u0438\u044F",
747
+ "\u064A\u064A\u0627\u0631": "\u0438\u044F\u0440",
748
+ "\u064A\u064A\u0627\u0632": "\u0438\u044F\u0437",
749
+ "\u064A\u064A\u0627\u0633": "\u0438\u044F\u0441"
750
+ };
751
+ const preserved = {
752
+ "\u064A\u064A\u0627": "\u0438\u0439\u0430",
753
+ "\u064A\u064A\u0627\u0631": "\u0438\u0439\u0430\u0440",
754
+ "\u064A\u064A\u0627\u0632": "\u0438\u0439\u0430\u0437",
755
+ "\u064A\u064A\u0627\u0633": "\u0438\u0439\u0430\u0441"
756
+ };
757
+ if (!(word in normalized)) {
758
+ return null;
759
+ }
760
+ if (this.nameYSequenceStyle === "preserve") {
761
+ return preserved[word];
762
+ }
763
+ return normalized[word];
764
+ }
638
765
  isValidSuffixSequence(suffix) {
639
766
  if (!suffix) {
640
767
  return true;
@@ -685,6 +812,16 @@ var ArabicToCyrillicConverter = class {
685
812
  if (word.includes("-")) {
686
813
  return word.split("-");
687
814
  }
815
+ for (const prefix of NAME_PREFIX_COMPONENTS) {
816
+ if (word.startsWith(prefix) && word.length > prefix.length) {
817
+ return [word.slice(0, prefix.length), word.slice(prefix.length)];
818
+ }
819
+ }
820
+ for (const suffix of NAME_SUFFIX_COMPONENTS) {
821
+ if (word.endsWith(suffix) && word.length > suffix.length) {
822
+ return [word.slice(0, -suffix.length), suffix];
823
+ }
824
+ }
688
825
  if (word.startsWith(this.HAMZA)) {
689
826
  return [word];
690
827
  }
@@ -731,6 +868,9 @@ var ArabicToCyrillicConverter = class {
731
868
  if (![...prefix].some((char) => this.arabicVowels.has(char))) {
732
869
  continue;
733
870
  }
871
+ if (this.crossesProtectedNameEnding(word, length)) {
872
+ continue;
873
+ }
734
874
  if (this.isValidSuffixSequence(suffix)) {
735
875
  return { matchType: "anonymous", base: prefix, suffix };
736
876
  }
@@ -829,12 +969,20 @@ var ArabicToCyrillicConverter = class {
829
969
  if (!word) {
830
970
  return word;
831
971
  }
972
+ const nameYSequence = this.convertNameYSequence(word);
973
+ if (nameYSequence) {
974
+ return nameYSequence;
975
+ }
832
976
  if (hasKey(EXCEPTIONS, word)) {
833
977
  return EXCEPTIONS[word];
834
978
  }
835
979
  if (hasKey(PROPER_NOUNS, word)) {
836
980
  return PROPER_NOUNS[word];
837
981
  }
982
+ const segments = this.segmentCompoundWord(word);
983
+ if (segments.length > 1) {
984
+ return word.includes("-") ? segments.map((segment) => this.convertWord(segment)).join("-") : segments.map((segment) => this.convertWord(segment)).join("");
985
+ }
838
986
  if (LOANWORD_EXACT.has(word)) {
839
987
  return this.convertWordInternal(word);
840
988
  }
package/dist/index.d.cts CHANGED
@@ -4,6 +4,7 @@ interface LexiconData {
4
4
  }
5
5
  type MaybePromise<T> = T | Promise<T>;
6
6
  type RawToken = readonly [source: string, converted: string];
7
+ type NameYSequenceStyle = "normalize" | "preserve";
7
8
  interface ContextDisambiguator {
8
9
  disambiguate(rawTokens: readonly RawToken[], contextSentence: string): MaybePromise<string[]>;
9
10
  }
@@ -13,6 +14,7 @@ interface CyrillicToArabicOptions {
13
14
  interface ArabicToCyrillicOptions {
14
15
  useLm?: boolean;
15
16
  disambiguator?: ContextDisambiguator;
17
+ nameYSequenceStyle?: NameYSequenceStyle;
16
18
  }
17
19
 
18
20
  declare class NoopDisambiguator implements ContextDisambiguator {
@@ -29,6 +31,7 @@ interface RootMatch {
29
31
  declare class ArabicToCyrillicConverter {
30
32
  readonly HAMZA = "\u0674";
31
33
  private readonly disambiguator;
34
+ private readonly nameYSequenceStyle;
32
35
  private readonly loanwordPrefixTrie;
33
36
  private readonly reZwnjEtc;
34
37
  private readonly reSpaces;
@@ -48,6 +51,8 @@ declare class ArabicToCyrillicConverter {
48
51
  hasConsonantCluster(word: string): boolean;
49
52
  isLoanwordWithEPrefix(word: string): boolean;
50
53
  getCyrillicVowelState(cyrillicWord: string): boolean;
54
+ private crossesProtectedNameEnding;
55
+ private convertNameYSequence;
51
56
  isValidSuffixSequence(suffix: string): boolean;
52
57
  getHarmonyFromArabicRoot(word: string): HarmonyState;
53
58
  segmentCompoundWord(word: string): string[];
@@ -79,4 +84,4 @@ declare class CyrillicToArabicConverter {
79
84
  }
80
85
  declare function syr2arb(text: string, options?: CyrillicToArabicOptions): string;
81
86
 
82
- export { ArabicToCyrillicConverter, type ArabicToCyrillicOptions, type ContextDisambiguator, CyrillicToArabicConverter, type CyrillicToArabicOptions, type LexiconData, type MaybePromise, NoopDisambiguator, type RawToken, arb2syr, arb2syrAsync, syr2arb };
87
+ export { ArabicToCyrillicConverter, type ArabicToCyrillicOptions, type ContextDisambiguator, CyrillicToArabicConverter, type CyrillicToArabicOptions, type LexiconData, type MaybePromise, type NameYSequenceStyle, NoopDisambiguator, type RawToken, arb2syr, arb2syrAsync, syr2arb };
package/dist/index.d.ts CHANGED
@@ -4,6 +4,7 @@ interface LexiconData {
4
4
  }
5
5
  type MaybePromise<T> = T | Promise<T>;
6
6
  type RawToken = readonly [source: string, converted: string];
7
+ type NameYSequenceStyle = "normalize" | "preserve";
7
8
  interface ContextDisambiguator {
8
9
  disambiguate(rawTokens: readonly RawToken[], contextSentence: string): MaybePromise<string[]>;
9
10
  }
@@ -13,6 +14,7 @@ interface CyrillicToArabicOptions {
13
14
  interface ArabicToCyrillicOptions {
14
15
  useLm?: boolean;
15
16
  disambiguator?: ContextDisambiguator;
17
+ nameYSequenceStyle?: NameYSequenceStyle;
16
18
  }
17
19
 
18
20
  declare class NoopDisambiguator implements ContextDisambiguator {
@@ -29,6 +31,7 @@ interface RootMatch {
29
31
  declare class ArabicToCyrillicConverter {
30
32
  readonly HAMZA = "\u0674";
31
33
  private readonly disambiguator;
34
+ private readonly nameYSequenceStyle;
32
35
  private readonly loanwordPrefixTrie;
33
36
  private readonly reZwnjEtc;
34
37
  private readonly reSpaces;
@@ -48,6 +51,8 @@ declare class ArabicToCyrillicConverter {
48
51
  hasConsonantCluster(word: string): boolean;
49
52
  isLoanwordWithEPrefix(word: string): boolean;
50
53
  getCyrillicVowelState(cyrillicWord: string): boolean;
54
+ private crossesProtectedNameEnding;
55
+ private convertNameYSequence;
51
56
  isValidSuffixSequence(suffix: string): boolean;
52
57
  getHarmonyFromArabicRoot(word: string): HarmonyState;
53
58
  segmentCompoundWord(word: string): string[];
@@ -79,4 +84,4 @@ declare class CyrillicToArabicConverter {
79
84
  }
80
85
  declare function syr2arb(text: string, options?: CyrillicToArabicOptions): string;
81
86
 
82
- export { ArabicToCyrillicConverter, type ArabicToCyrillicOptions, type ContextDisambiguator, CyrillicToArabicConverter, type CyrillicToArabicOptions, type LexiconData, type MaybePromise, NoopDisambiguator, type RawToken, arb2syr, arb2syrAsync, syr2arb };
87
+ export { ArabicToCyrillicConverter, type ArabicToCyrillicOptions, type ContextDisambiguator, CyrillicToArabicConverter, type CyrillicToArabicOptions, type LexiconData, type MaybePromise, type NameYSequenceStyle, NoopDisambiguator, type RawToken, arb2syr, arb2syrAsync, syr2arb };
package/dist/index.js CHANGED
@@ -103,11 +103,38 @@ var EXCEPTIONS = {
103
103
  "\u0627\u0644\u0645\u0627\u062A\u0649": "\u0410\u043B\u043C\u0430\u0442\u044B",
104
104
  "\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430",
105
105
  "\u0642\u0627\u0632\u0627\u0642\u0633\u062A\u0627\u0646": "\u049A\u0430\u0437\u0430\u049B\u0441\u0442\u0430\u043D",
106
+ "\u0621\u0627\u0628\u062F\u0649": "\u04D9\u0431\u0434\u0456",
107
+ "\u0627\u0628\u062F\u0649": "\u04D9\u0431\u0434\u0456",
108
+ "\u0627\u0645\u0649\u0631": "\u04D9\u043C\u0456\u0440",
109
+ "\u0628\u06C7\u0644\u0627\u0646": "\u0431\u04B1\u043B\u0430\u043D",
110
+ "\u0628\u0627\u0642\u064A": "\u0431\u0430\u049B\u0438",
111
+ "\u0628\u0627\u0642\u064A\u062A": "\u0431\u0430\u049B\u0438\u0442",
112
+ "\u0628\u0627\u0642\u0649\u062A": "\u0431\u0430\u049B\u044B\u0442",
113
+ "\u0646\u0627\u0633\u0649\u0631": "\u043D\u0430\u0441\u044B\u0440",
114
+ "\u062F\u0627\u06CB\u0644\u06D5\u062A": "\u0434\u04D9\u0443\u043B\u0435\u0442",
115
+ "\u0628\u0648\u0644": "\u0431\u043E\u043B",
116
+ "\u0628\u0648\u0644\u0633\u0649\u0646": "\u0431\u043E\u043B\u0441\u044B\u043D",
117
+ "\u0628\u0648\u0644\u0644\u0627": "\u0431\u043E\u043B\u043B\u0430",
118
+ "\u0642\u0648\u062C\u0627": "\u049B\u043E\u0436\u0430",
119
+ "\u0639\u0648\u062C\u0627": "\u0493\u043E\u0436\u0430",
120
+ "\u0646\u0627\u0631": "\u043D\u0430\u0440",
121
+ "\u0646\u0627\u0632\u0627\u0631": "\u043D\u0430\u0437\u0430\u0440",
122
+ "\u0633\u0627\u0644\u064A": "\u0441\u0430\u043B\u0438",
123
+ "\u062F\u064A\u0627\u0633": "\u0434\u0438\u0430\u0441",
124
+ "\u062C\u0649\u0628\u06D5\u0643": "\u0436\u0456\u0431\u0435\u043A",
125
+ "\u062D\u0627\u0643\u0649\u0645": "\u0445\u0430\u043A\u0456\u043C",
126
+ "\u0643\u064A\u0631\u0627": "\u043A\u0438\u0440\u0430",
127
+ "\u064A\u0643\u0627\u0645\u0627\u0644": "\u0438\u043A\u0430\u043C\u0430\u043B",
106
128
  "\u062C\u06C7\u06AD\u06AF\u0648": "\u0416\u04B1\u04A3\u0433\u043E",
107
129
  "\u0634\u064A": "\u0421\u0438",
108
130
  "\u062C\u064A\u0646\u067E\u064A\u06AD": "\u0426\u0437\u0438\u043D\u044C\u043F\u0438\u043D",
109
131
  "\u0643\u0649\u062A\u0627\u067E": "\u043A\u0456\u0442\u0430\u043F",
110
132
  "\u0631\u0627\u062D\u0645\u06D5\u062A": "\u0440\u0430\u0445\u043C\u0435\u0442",
133
+ "\u0627\u062D\u0645\u06D5\u062A": "\u0430\u0445\u043C\u0435\u0442",
134
+ "\u0645\u0627\u062D\u0645\u06D5\u062A": "\u043C\u0430\u0445\u043C\u0435\u0442",
135
+ "\u064A\u0627\u0631": "\u0438\u044F\u0440",
136
+ "\u064A\u0627\u0644": "\u0438\u044F\u043B",
137
+ "\u064A\u0627\u0632": "\u0438\u044F\u0437",
111
138
  "\u0627\u06CB\u0649\u0644": "\u0430\u0443\u044B\u043B",
112
139
  "\u06AF\u0628": "\u0413\u0411",
113
140
  "\u067E\u0631\u0648\u06AF\u0631\u06D5\u0633": "\u043F\u0440\u043E\u0433\u0440\u0435\u0441\u0441",
@@ -412,6 +439,75 @@ var COMPOUND_PIVOT_ROOTS = [
412
439
  "\u0648\u064A\u0649\u0646",
413
440
  "\u062A\u06C7\u0633\u062A\u0649\u06AF"
414
441
  ];
442
+ var NAME_PREFIX_COMPONENTS = [
443
+ "\u0621\u0627\u0628\u062F\u0649",
444
+ "\u0627\u0628\u062F\u0649",
445
+ "\u062F\u0627\u06CB\u0644\u06D5\u062A",
446
+ "\u06AF\u06C7\u0644",
447
+ "\u0628\u06D5\u0643",
448
+ "\u0646\u06C7\u0631"
449
+ ];
450
+ var NAME_SUFFIX_COMPONENTS = [
451
+ "\u0645\u06C7\u062D\u0627\u0645\u0628\u06D5\u062A",
452
+ "\u0627\u062D\u0645\u06D5\u062A",
453
+ "\u062D\u0627\u0644\u0649\u0642",
454
+ "\u0642\u0627\u0633\u0649\u0645",
455
+ "\u0633\u0627\u062F\u0649\u0642",
456
+ "\u0633\u0627\u0644\u0627\u0645",
457
+ "\u0628\u0648\u0644\u0633\u0649\u0646",
458
+ "\u0628\u0648\u0644\u0644\u0627",
459
+ "\u0646\u0627\u0633\u0649\u0631",
460
+ "\u0646\u0627\u0632\u0627\u0631",
461
+ "\u0628\u064A\u0631\u0627",
462
+ "\u0628\u064A\u0628\u0627",
463
+ "\u064A\u067E\u0627",
464
+ "\u0631\u064A\u067E\u0627",
465
+ "\u0646\u064A\u067E\u0627",
466
+ "\u064A\u0645\u0627",
467
+ "\u0633\u064A\u0645\u0627",
468
+ "\u0645\u064A\u0644\u0627",
469
+ "\u0628\u064A\u0644\u0627",
470
+ "\u062C\u064A\u062F\u0627",
471
+ "\u0632\u064A\u0644\u0627",
472
+ "\u0641\u064A\u0631\u0627",
473
+ "\u0646\u0627\u0631\u0627",
474
+ "\u0644\u064A\u067E\u0627",
475
+ "\u0644\u064A\u0645\u0627",
476
+ "\u062F\u064A\u0628\u0627",
477
+ "\u062F\u064A\u0644\u0627",
478
+ "\u062F\u064A\u0631\u0627",
479
+ "\u0644\u064A\u0646\u0627",
480
+ "\u0643\u064A\u0631\u0627",
481
+ "\u0646\u0646\u0627",
482
+ "\u0627\u062F\u0627",
483
+ "\u0628\u0627\u0631\u0649\u0633",
484
+ "\u0643\u06D5\u0644\u062F\u0649",
485
+ "\u06AF\u06D5\u0644\u062F\u0649",
486
+ "\u0628\u06C7\u0644\u0627\u0646",
487
+ "\u064A\u0643\u0627\u0645\u0627\u0644",
488
+ "\u0628\u0627\u0642\u0649\u062A",
489
+ "\u0628\u0627\u0642\u064A\u062A",
490
+ "\u0628\u0627\u0642\u064A",
491
+ "\u0642\u0648\u062C\u0627",
492
+ "\u0639\u0648\u062C\u0627",
493
+ "\u0633\u0627\u0644\u064A",
494
+ "\u0642\u0627\u0644\u064A",
495
+ "\u062F\u064A\u064A\u0627\u0631",
496
+ "\u064A\u064A\u0627\u0631",
497
+ "\u064A\u064A\u0627\u0632",
498
+ "\u064A\u064A\u0627\u0633",
499
+ "\u064A\u064A\u0627",
500
+ "\u064A\u0627\u0631",
501
+ "\u064A\u0627\u0644",
502
+ "\u064A\u0627\u0632",
503
+ "\u0646\u0627\u0631",
504
+ "\u062C\u0627\u0646",
505
+ "\u0646\u06C7\u0631",
506
+ "\u0628\u06D5\u0643",
507
+ "\u0628\u0627\u064A",
508
+ "\u062D\u0627\u0646",
509
+ "\u06AF\u06C7\u0644"
510
+ ];
415
511
  var IMPLICIT_SOFT_ROOTS = /* @__PURE__ */ new Set([
416
512
  "\u06C7\u0645\u0649\u062A",
417
513
  "\u062A\u06C7\u0628",
@@ -547,6 +643,7 @@ var ArabicToCyrillicConverter = class {
547
643
  );
548
644
  }
549
645
  this.disambiguator = options.disambiguator ?? new NoopDisambiguator();
646
+ this.nameYSequenceStyle = options.nameYSequenceStyle ?? "normalize";
550
647
  for (const prefix of LOANWORD_PREFIXES) {
551
648
  this.loanwordPrefixTrie.insert(prefix);
552
649
  }
@@ -604,6 +701,36 @@ var ArabicToCyrillicConverter = class {
604
701
  }
605
702
  return false;
606
703
  }
704
+ crossesProtectedNameEnding(word, splitIndex) {
705
+ return NAME_SUFFIX_COMPONENTS.some((ending) => {
706
+ if (!word.endsWith(ending)) {
707
+ return false;
708
+ }
709
+ const protectedStart = word.length - ending.length;
710
+ return splitIndex > protectedStart && splitIndex < word.length;
711
+ });
712
+ }
713
+ convertNameYSequence(word) {
714
+ const normalized = {
715
+ "\u064A\u064A\u0627": "\u0438\u044F",
716
+ "\u064A\u064A\u0627\u0631": "\u0438\u044F\u0440",
717
+ "\u064A\u064A\u0627\u0632": "\u0438\u044F\u0437",
718
+ "\u064A\u064A\u0627\u0633": "\u0438\u044F\u0441"
719
+ };
720
+ const preserved = {
721
+ "\u064A\u064A\u0627": "\u0438\u0439\u0430",
722
+ "\u064A\u064A\u0627\u0631": "\u0438\u0439\u0430\u0440",
723
+ "\u064A\u064A\u0627\u0632": "\u0438\u0439\u0430\u0437",
724
+ "\u064A\u064A\u0627\u0633": "\u0438\u0439\u0430\u0441"
725
+ };
726
+ if (!(word in normalized)) {
727
+ return null;
728
+ }
729
+ if (this.nameYSequenceStyle === "preserve") {
730
+ return preserved[word];
731
+ }
732
+ return normalized[word];
733
+ }
607
734
  isValidSuffixSequence(suffix) {
608
735
  if (!suffix) {
609
736
  return true;
@@ -654,6 +781,16 @@ var ArabicToCyrillicConverter = class {
654
781
  if (word.includes("-")) {
655
782
  return word.split("-");
656
783
  }
784
+ for (const prefix of NAME_PREFIX_COMPONENTS) {
785
+ if (word.startsWith(prefix) && word.length > prefix.length) {
786
+ return [word.slice(0, prefix.length), word.slice(prefix.length)];
787
+ }
788
+ }
789
+ for (const suffix of NAME_SUFFIX_COMPONENTS) {
790
+ if (word.endsWith(suffix) && word.length > suffix.length) {
791
+ return [word.slice(0, -suffix.length), suffix];
792
+ }
793
+ }
657
794
  if (word.startsWith(this.HAMZA)) {
658
795
  return [word];
659
796
  }
@@ -700,6 +837,9 @@ var ArabicToCyrillicConverter = class {
700
837
  if (![...prefix].some((char) => this.arabicVowels.has(char))) {
701
838
  continue;
702
839
  }
840
+ if (this.crossesProtectedNameEnding(word, length)) {
841
+ continue;
842
+ }
703
843
  if (this.isValidSuffixSequence(suffix)) {
704
844
  return { matchType: "anonymous", base: prefix, suffix };
705
845
  }
@@ -798,12 +938,20 @@ var ArabicToCyrillicConverter = class {
798
938
  if (!word) {
799
939
  return word;
800
940
  }
941
+ const nameYSequence = this.convertNameYSequence(word);
942
+ if (nameYSequence) {
943
+ return nameYSequence;
944
+ }
801
945
  if (hasKey(EXCEPTIONS, word)) {
802
946
  return EXCEPTIONS[word];
803
947
  }
804
948
  if (hasKey(PROPER_NOUNS, word)) {
805
949
  return PROPER_NOUNS[word];
806
950
  }
951
+ const segments = this.segmentCompoundWord(word);
952
+ if (segments.length > 1) {
953
+ return word.includes("-") ? segments.map((segment) => this.convertWord(segment)).join("-") : segments.map((segment) => this.convertWord(segment)).join("");
954
+ }
807
955
  if (LOANWORD_EXACT.has(word)) {
808
956
  return this.convertWordInternal(word);
809
957
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sarmay/kaz-converter",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Lightweight Kazakh Arabic/Cyrillic script converter for web and Node.js.",
5
5
  "author": {
6
6
  "name": "sarmay",