interscript 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,112 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: kat
5
+ source_script: Geok
6
+ destination_script: Latn
7
+ name: ALA-LC Georgian System (1997)
8
+ url: https://www.loc.gov/catdir/cpso/romanization/georgian.pdf
9
+ creation_date: 1997
10
+ confirmation_date: 1997
11
+ description: |
12
+ Values are shown for the Khutsuri alphabet.
13
+
14
+ notes:
15
+
16
+ tests:
17
+
18
+ - source: ႼႨႢႬႨ
19
+ expected: CIGNI
20
+
21
+ - source: ⴜⴈⴂⴌⴈ
22
+ expected: cigni
23
+
24
+ - source: ႱႭႪႭႫႭႬ
25
+ expected: SOLOMON
26
+
27
+ - source: ⴑⴍⴊⴍⴋⴍⴌ
28
+ expected: solomon
29
+
30
+ - source: ႠႡႰႠჀႠႫ
31
+ expected: ABRAHAM
32
+
33
+ map:
34
+ characters:
35
+ '\u10a0' : 'A' # Ⴀ
36
+ '\u10a1' : 'B' # Ⴁ
37
+ '\u10a2' : 'G' # Ⴂ
38
+ '\u10a3' : 'D' # Ⴃ
39
+ '\u10a4' : 'E' # Ⴄ
40
+ '\u10a5' : 'V' # Ⴅ
41
+ '\u10a6' : 'Z' # Ⴆ
42
+ '\u10a7' : 'Tʻ' # Ⴇ
43
+ '\u10a8' : 'I' # Ⴈ
44
+ '\u10a9' : 'K' # Ⴉ
45
+ '\u10aa' : 'L' # Ⴊ
46
+ '\u10ab' : 'M' # Ⴋ
47
+ '\u10ac' : 'N' # Ⴌ
48
+ '\u10ad' : 'O' # Ⴍ
49
+ '\u10ae' : 'P' # Ⴎ
50
+ '\u10af' : 'Ž' # Ⴏ
51
+ '\u10b0' : 'R' # Ⴐ
52
+ '\u10b1' : 'S' # Ⴑ
53
+ '\u10b2' : 'T' # Ⴒ
54
+ '\u10b3' : 'U' # Ⴓ
55
+ '\u10b4' : 'Pʻ' # Ⴔ
56
+ '\u10b5' : 'Kʻ' # Ⴕ
57
+ '\u10b6' : 'Ġ' # Ⴖ
58
+ '\u10b7' : 'Q' # Ⴗ
59
+ '\u10b8' : 'Š' # Ⴘ
60
+ '\u10b9' : 'Čʻ' # Ⴙ
61
+ '\u10ba' : 'Cʻ' # Ⴚ
62
+ '\u10bb' : 'Ż' # Ⴛ
63
+ '\u10bc' : 'C' # Ⴜ
64
+ '\u10bd' : 'Č' # Ⴝ
65
+ '\u10be' : 'X' # Ⴞ
66
+ '\u10bf' : 'J' # Ⴟ
67
+ '\u10c0' : 'H' # Ⴠ
68
+ '\u10c1' : 'Ē' # Ⴡ
69
+ '\u10c2' : 'Y' # Ⴢ
70
+ '\u10c3' : 'W' # Ⴣ
71
+ '\u10c4' : 'X̣' # Ⴤ
72
+ '\u10c5' : 'Ō' # Ⴥ
73
+
74
+ '\u2d00' : 'a' # ⴀ
75
+ '\u2d01' : 'b' # ⴁ
76
+ '\u2d02' : 'g' # ⴂ
77
+ '\u2d03' : 'd' # ⴃ
78
+ '\u2d04' : 'e' # ⴄ
79
+ '\u2d05' : 'v' # ⴅ
80
+ '\u2d06' : 'z' # ⴆ
81
+ '\u2d07' : 'tʻ' # ⴇ
82
+ '\u2d08' : 'i' # ⴈ
83
+ '\u2d09' : 'k' # ⴉ
84
+ '\u2d0a' : 'l' # ⴊ
85
+ '\u2d0b' : 'm' # ⴋ
86
+ '\u2d0c' : 'n' # ⴌ
87
+ '\u2d0d' : 'o' # ⴍ
88
+ '\u2d0e' : 'p' # ⴎ
89
+ '\u2d0f' : 'ž' # ⴏ
90
+ '\u2d10' : 'r' # ⴐ
91
+ '\u2d11' : 's' # ⴑ
92
+ '\u2d12' : 't' # ⴒ
93
+ '\u2d13' : 'u' # ⴓ
94
+ '\u2d14' : 'pʻ' # ⴔ
95
+ '\u2d15' : 'kʻ' # ⴕ
96
+ '\u2d16' : 'ġ' # ⴖ
97
+ '\u2d17' : 'q' # ⴗ
98
+ '\u2d18' : 'š' # ⴘ
99
+ '\u2d19' : 'čʻ' # ⴙ
100
+ '\u2d1a' : 'cʻ' # ⴚ
101
+ '\u2d1b' : 'ż' # ⴛ
102
+ '\u2d1c' : 'c' # ⴜ
103
+ '\u2d1d' : 'č' # ⴝ
104
+ '\u2d1e' : 'x' # ⴞ
105
+ '\u2d1f' : 'j' # ⴟ
106
+ '\u2d20' : 'h' # ⴠ
107
+ '\u2d21' : 'ē' # ⴡ
108
+ '\u2d22' : 'y' # ⴢ
109
+ '\u2d23' : 'w' # ⴣ
110
+ '\u2d24' : 'x̣' # ⴤ
111
+ '\u2d25' : 'ō' # ⴥ
112
+
@@ -0,0 +1,146 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: kat
5
+ source_script: Geor
6
+ destination_script: Latn
7
+ name: ALA-LC Georgian System (1997)
8
+ url: https://www.loc.gov/catdir/cpso/romanization/georgian.pdf
9
+ creation_date: 1997
10
+ confirmation_date: 1997
11
+ description: |
12
+ Values are shown for the older Khutsuri and the modern Mkhedruli alphabets.
13
+ There are no upper case letters in Mkhedruli.
14
+
15
+ notes:
16
+
17
+ tests:
18
+
19
+ - source: ხაოფსე
20
+ expected: xaopʻse
21
+
22
+ - source: ჭლოუ
23
+ expected: člou
24
+
25
+ - source: ჩოხულდი
26
+ expected: čʻoxuldi
27
+
28
+ - source: ქვემო ლინდა
29
+ expected: kʻvemo linda
30
+
31
+ - source: ტამკვაჩ იგვავერა
32
+ expected: tamkvačʻ igvavera
33
+
34
+ - source: სვანეთი
35
+ expected: svanetʻi
36
+
37
+ - source: საცხვარისი
38
+ expected: sacʻxvarisi
39
+
40
+ - source: მუხრან-თელეთი
41
+ expected: muxran-tʻeletʻi
42
+
43
+ - source: მუცდი
44
+ expected: mucʻdi
45
+
46
+ - source: ლეჩხუმი
47
+ expected: lečʻxumi
48
+
49
+ - source: ვერხნაია მწარა
50
+ expected: verxnaia mcara
51
+
52
+ - source: ეგრისის ქედი
53
+ expected: egrisis kʻedi
54
+
55
+ - source: დოჩარიფშა
56
+ expected: dočʻaripʻša
57
+
58
+ - source: ბოლოკო
59
+ expected: boloko
60
+
61
+ - source: აჭანდარა
62
+ expected: ačandara
63
+
64
+ - source: აუალიცა
65
+ expected: aualicʻa
66
+
67
+ - source: აკალამრა
68
+ expected: akalamra
69
+
70
+ - source: ლასილი
71
+ expected: lasili
72
+
73
+ - source: გუბაზეული
74
+ expected: gubazeuli
75
+
76
+ - source: ბაყაყი
77
+ expected: baqaqi
78
+
79
+ - source: ძროხა
80
+ expected: żroxa
81
+
82
+ - source: ჰაერი
83
+ expected: haeri
84
+
85
+ - source: ჟოლო
86
+ expected: žolo
87
+
88
+ - source: ჯართი
89
+ expected: jartʻi
90
+
91
+ - source: ღრმაღელე
92
+ expected: ġrmaġele
93
+
94
+ map:
95
+ characters:
96
+ '\u10d0' : 'a' # ა
97
+ '\u10d1' : 'b' # ბ
98
+ '\u10d2' : 'g' # გ
99
+ '\u10d3' : 'd' # დ
100
+ '\u10d4' : 'e' # ე
101
+ '\u10d5' : 'v' # ვ
102
+ '\u10d6' : 'z' # ზ
103
+
104
+ '\u10f1' : 'ē' # ჱ
105
+
106
+ '\u10d7' : 'tʻ' # თ
107
+ '\u10d8' : 'i' # ი
108
+ '\u10d9' : 'k' # კ
109
+ '\u10da' : 'l' # ლ
110
+ '\u10db' : 'm' # მ
111
+ '\u10dc' : 'n' # ნ
112
+
113
+ '\u10f2' : 'y' # ჲ
114
+
115
+ '\u10dd' : 'o' # ო
116
+ '\u10de' : 'p' # პ
117
+ '\u10df' : 'ž' # ჟ
118
+ '\u10e0' : 'r' # რ
119
+ '\u10e1' : 's' # ს
120
+ '\u10e2' : 't' # ტ
121
+
122
+ '\u10f3' : 'w' # ჳ
123
+
124
+ '\u10e3' : 'u' # უ
125
+ '\u10e4' : 'pʻ' # ფ
126
+ '\u10e5' : 'kʻ' # ქ
127
+ '\u10e6' : 'ġ' # ღ
128
+ '\u10e7' : 'q' # ყ
129
+ '\u10e8' : 'š' # შ
130
+ '\u10e9' : 'čʻ' # ჩ
131
+ '\u10ea' : 'cʻ' # ც
132
+ '\u10eb' : 'ż' # ძ
133
+ '\u10ec' : 'c' # წ
134
+ '\u10ed' : 'č' # ჭ
135
+ '\u10ee' : 'x' # ხ
136
+
137
+ '\u10f4' : 'x̣' # ჴ
138
+
139
+ '\u10ef' : 'j' # ჯ
140
+ '\u10f0' : 'h' # ჰ
141
+
142
+ '\u10f5' : 'ō' # ჵ
143
+
144
+ '\u10f6' : 'f' # ჶ
145
+ '\u10f7' : 'ĕ' # ჷ
146
+ '\u10f8' : 'ʻ' # ჸ
@@ -0,0 +1,94 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization Table -- Korean (1997)
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/korean.pdf
9
+ creation_date: 1997
10
+ adoption_date:
11
+ description:
12
+ "1. General Practice
13
+ The Library of Congress will continue to follow the McCune-Reischauer system
14
+ to romanize Korean with the exceptions noted in this document. See:
15
+ Romanization of the Korean Language: Based upon its Phonetic Structure by
16
+ G.M. McCune and E.O. Reischauer ([S.l.: s.n., 1939?), reprinted from the
17
+ Transactions of the Korea Branch of the Royal Asiatic Society. Full text of the
18
+ original document is available online from the National Library of Australia Web
19
+ site: http://www.nla.gov.au/librariesaustralia/cjk/download/ras_1939.pdf"
20
+
21
+ notes:
22
+
23
+
24
+ tests:
25
+ - source: 은하-리
26
+ expected: "Ŭnha-ri"
27
+ - source: 은중-리
28
+ expected: "Ŭnjung-ni"
29
+ - source: 은장-령
30
+ expected: "Ŭnjang-nyŏng"
31
+ - source: 은혜-동
32
+ expected: "Ŭnhye-dong"
33
+ - source: 은호-리
34
+ expected: "Ŭnho-ri"
35
+ - source: 은행정
36
+ expected: "Ŭnhaengjŏng"
37
+ - source: 은행-동
38
+ expected: "Ŭnhaeng-dong"
39
+ - source: 은행-촌
40
+ expected: "Ŭnhaeng-ch’on"
41
+ - source: 원수
42
+ expected: "Wŏnsu"
43
+ - source: 원소리-고개
44
+ expected: "Wŏnsori-gogae"
45
+ - source: 원소참
46
+ expected: "Wŏnsoch’am"
47
+ - source: 원소-리
48
+ expected: "Wŏnso-ri"
49
+ - source: 원신-리
50
+ expected: "Wŏnsil-li"
51
+ - source: 난곡
52
+ expected: "Nan’gok"
53
+ - source: 난산-리
54
+ expected: "Nansal-li"
55
+ - source: 난직
56
+ expected: "Nanjik"
57
+ - source: 영곡
58
+ expected: "Yŏnggok"
59
+ - source: 윗두밀
60
+ expected: "Wittumil"
61
+ - source: 윗도심이
62
+ expected: "Wittosimi"
63
+ - source: 둔지
64
+ expected: "Tunji"
65
+ - source: 서승
66
+ expected: "Sŏsŭng"
67
+ # - source: 신촌
68
+ # expected: "Sinch’on"
69
+ - source: 비암덕
70
+ expected: "Piamdŏk"
71
+ - source: 바위안
72
+ expected: "Pawian"
73
+ - source: 오송평
74
+ expected: "Osongp’yŏng"
75
+ - source: 그물목
76
+ expected: "Kŭmulmok"
77
+ - source: 구원정
78
+ expected: "Kuwŏnjŏng"
79
+ - source: 일하
80
+ expected: "Irha"
81
+ - source: 황우
82
+ expected: "Hwangu"
83
+ - source: 자작보
84
+ expected: "Chajakpo"
85
+ # - source: 비파1-동
86
+ # expected: "Pip’a Il-tong"
87
+ - source: 문암 오-동
88
+ expected: "Munam O-dong"
89
+
90
+ map:
91
+ character_separator: ""
92
+ word_separator: " "
93
+ title_case: True
94
+ inherit: "var-kor-Hang-Latn-mr-1939"
@@ -0,0 +1,103 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2013
4
+ language: mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Makedonian Romanization, ALA-LC 2013 System
8
+ url: https://www.loc.gov/catdir/cpso/romanization/macedonian.pdf
9
+ creation_date: 2013
10
+ description: ALA-LC Romanization table for Makedonian.
11
+
12
+ tests:
13
+ - source: Општина Ердут
14
+ expected: Opština Erdut
15
+ - source: Општина Двор
16
+ expected: Opština Dvor
17
+ - source: ЛУЃЕ луѓе
18
+ expected: LUǴE luǵe
19
+ - source: ЅВЕЗДА ѕвезда Ѕвезда
20
+ expected: DZVEZDA dzvezda Dzvezda
21
+ - source: ЌАРУВАЊЕ ќарување
22
+ expected: ḰARUVANJE ḱaruvanje
23
+
24
+ map:
25
+ postrules:
26
+ # DZ
27
+ - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
28
+ result: "DZ"
29
+ #LJ
30
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
31
+ result: "LJ"
32
+ #NJ
33
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
34
+ result: "NJ"
35
+ #DŽ
36
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
37
+ result: "DŽ"
38
+
39
+ characters:
40
+ "\u0410": "A"
41
+ "\u0411": "B"
42
+ "\u0412": "V"
43
+ "\u0413": "G"
44
+ "\u0403": "\u01F4" # Ǵ
45
+ "\u0414": "D"
46
+ "\u0402": "\u0110" # Đ
47
+ "\u0415": "E"
48
+ "\u0416": "\u005a\u030c" # Ž
49
+ "\u0417": "Z"
50
+ "\u0405": "Dz"
51
+ "\u0418": "I"
52
+ "\u0408": "J"
53
+ "\u041A": "K"
54
+ "\u040C": "\u1E30" # Ḱ
55
+ "\u041B": "L"
56
+ "\u0409": "Lj"
57
+ "\u041C": "M"
58
+ "\u041D": "N"
59
+ "\u040A": "Nj"
60
+ "\u041E": "O"
61
+ "\u041F": "P"
62
+ "\u0420": "R"
63
+ "\u0421": "S"
64
+ "\u0422": "T"
65
+ "\u0423": "U"
66
+ "\u0424": "F"
67
+ "\u0425": "H"
68
+ "\u0426": "C"
69
+ "\u0427": "\u0043\u030c" # Č
70
+ "\u040F": "D\u007a\u030c" # Dž
71
+ "\u0428": "\u0053\u030c" # Š
72
+ "\u0430": "a"
73
+ "\u0431": "b"
74
+ "\u0432": "v"
75
+ "\u0433": "g"
76
+ "\u0453": "\u01F5" # ǵ
77
+ "\u0434": "d"
78
+ "\u0452": "\u0111" # đ
79
+ "\u0435": "e"
80
+ "\u0436": "\u007a\u030c" # ž
81
+ "\u0437": "z"
82
+ "\u0455": "dz"
83
+ "\u0438": "i"
84
+ "\u0458": "j"
85
+ "\u043A": "k"
86
+ "\u045C": "\u1E31" # ḱ
87
+ "\u043B": "l"
88
+ "\u0459": "lj"
89
+ "\u043C": "m"
90
+ "\u043D": "n"
91
+ "\u045A": "nj"
92
+ "\u043E": "o"
93
+ "\u043F": "p"
94
+ "\u0440": "r"
95
+ "\u0441": "s"
96
+ "\u0442": "t"
97
+ "\u0443": "u"
98
+ "\u0444": "f"
99
+ "\u0445": "h"
100
+ "\u0446": "c"
101
+ "\u0447": "\u0063\u030c" # č
102
+ "\u045F": "d\u007a\u030c" # dž
103
+ "\u0448": "\u0073\u030c" # š