interscript 0.1.1 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  6. data/lib/g2pwrapper.py +34 -0
  7. data/lib/interscript-opal.rb +2 -0
  8. data/lib/interscript.rb +138 -20
  9. data/lib/interscript/command.rb +28 -0
  10. data/lib/interscript/fs.rb +71 -0
  11. data/lib/interscript/mapping.rb +142 -0
  12. data/lib/interscript/opal.rb +27 -0
  13. data/lib/interscript/opal/maps.js.erb +10 -0
  14. data/lib/interscript/opal_map_translate.rb +12 -0
  15. data/lib/interscript/version.rb +1 -1
  16. data/lib/model-7 +0 -0
  17. data/lib/tha-pt-b-7 +0 -0
  18. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  19. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  20. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  21. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  22. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  23. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  24. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +125 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  27. data/maps/alalc-ell-Grek-Latn-1997.yaml +624 -0
  28. data/maps/alalc-ell-Grek-Latn-2010.yaml +627 -0
  29. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  30. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  31. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  32. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  33. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  34. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  35. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  36. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  37. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +221 -0
  38. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  39. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  40. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  41. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  42. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  43. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  44. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +174 -0
  45. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  46. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  47. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  48. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  49. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  50. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  51. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  52. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  53. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  54. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  55. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  56. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +285 -0
  57. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  58. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  59. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +701 -0
  60. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +19 -0
  61. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  62. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  63. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  64. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  65. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  66. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  67. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  68. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  69. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  70. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +92 -0
  71. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  72. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  73. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +162 -0
  74. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  75. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  76. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  77. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  78. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  79. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  80. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  81. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  82. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  83. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  84. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  85. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  86. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  87. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  88. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  89. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  90. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  91. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  92. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  93. data/maps/ggg-kat-Geor-Latn-2002.yaml +88 -0
  94. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  95. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  96. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +186 -0
  97. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  98. data/maps/icao-bel-Cyrl-Latn-9303.yaml +136 -0
  99. data/maps/icao-bul-Cyrl-Latn-9303.yaml +118 -0
  100. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  101. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  102. data/maps/icao-per-Arab-Latn-9303.yaml +103 -0
  103. data/maps/icao-rus-Cyrl-Latn-9303.yaml +117 -0
  104. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  105. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +119 -0
  106. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  107. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +609 -0
  108. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +40 -0
  109. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  110. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +271 -0
  111. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  112. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  113. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  114. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  115. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  116. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  117. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  118. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  119. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  120. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  121. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  122. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  123. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  124. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  125. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  126. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  127. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  128. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  129. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  130. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  131. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  132. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  133. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  134. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  135. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  136. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  137. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  138. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  139. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  140. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  141. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  142. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  143. data/maps/ses-ara-Arab-Latn-1930.yaml +279 -0
  144. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  145. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  146. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  147. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  148. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  149. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  150. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  151. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  152. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  153. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  154. data/maps/un-mon-Mong-Latn-2013.yaml +99 -0
  155. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  156. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  157. data/maps/un-ukr-Cyrl-Latn-1998.yaml +30 -0
  158. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  159. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  160. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  161. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  162. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  163. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  164. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  165. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  166. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  167. data/spec/interscript/mapping_spec.rb +42 -0
  168. data/spec/interscript_spec.rb +26 -0
  169. data/spec/spec_helper.rb +3 -0
  170. metadata +298 -18
@@ -0,0 +1,48 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2011
4
+ language: kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+ BGN/PCGN 2011 Agreement
15
+
16
+ tests:
17
+ - source: 불국사
18
+ expected: "Bulguksa"
19
+ - source: 묵호
20
+ expected: "Mukho"
21
+ - source: 울산
22
+ expected: "Ulsan"
23
+ - source: 독립문
24
+ expected: "Dongnimmun"
25
+ - source: 강남역
26
+ expected: "Gangnamyeok"
27
+ - source: 남산리
28
+ expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
+ - source: 내월리
30
+ expected: "Naewol-ri"
31
+ - source: 울릉군
32
+ expected: "Ulleung-gun"
33
+ - source: 설악산
34
+ expected: "Seoraksan"
35
+ - source: 삼죽면
36
+ expected: "Samjuk-myeon"
37
+ - source: 평리1동
38
+ expected: "Pyeongni Il-dong"
39
+ - source: 평리2동
40
+ expected: "Pyeongni I-dong"
41
+ - source: 탑안이
42
+ expected: "Tabani"
43
+
44
+ map:
45
+ character_separator: ""
46
+ word_separator: " "
47
+ title_case: True
48
+ inherit: moct-kor-Hang-Latn-2000
@@ -0,0 +1,48 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2011
4
+ language: kor
5
+ source_script: Kore
6
+ destination_script: Latn
7
+ name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+ BGN/PCGN 2011 Agreement
15
+
16
+ tests:
17
+ - source: 佛國寺
18
+ expected: "Bulguksa"
19
+ - source: 묵호
20
+ expected: "Mukho"
21
+ - source: 蔚山
22
+ expected: "Ulsan"
23
+ - source: 獨立門
24
+ expected: "Dongnimmun"
25
+ - source: 江南驛
26
+ expected: "Gangnamyeok"
27
+ - source: 南山里
28
+ expected: "Namsan-ri" #Note: no assimilation for -ri even after nasals
29
+ - source: 내월里
30
+ expected: "Naewol-ri"
31
+ - source: 鬱陵郡
32
+ expected: "Ulleung-gun"
33
+ - source: 雪嶽山
34
+ expected: "Seoraksan"
35
+ - source: 三竹面
36
+ expected: "Samjuk-myeon"
37
+ - source: 坪里1洞
38
+ expected: "Pyeongni Il-dong"
39
+ - source: 坪里2洞
40
+ expected: "Pyeongni I-dong"
41
+ - source: 탑안이
42
+ expected: "Tabani"
43
+
44
+ map:
45
+ character_separator: ""
46
+ word_separator: " "
47
+ title_case: True
48
+ inherit: [var-kor-Kore-Hang-2013, moct-kor-Hang-Latn-2000]
@@ -0,0 +1,159 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1981
4
+ language: mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Makedonian Romanization, BGN/PCGN 1981 System
8
+ url: https://github.com/riboseinc/interscript/files/4247920/USBGN_romanization_Macedonian_1981.pdf
9
+ creation_date: 1981
10
+ description: BGN/PCGN Romanization table for Makedonian.
11
+
12
+ tests:
13
+ - source: Ѓол
14
+ expected: Đol
15
+ - source: Јусек Тепеси
16
+ expected: Jusek Tepesi
17
+ - source: Љуги Ќарит
18
+ expected: Ljugi Ćarit
19
+ - source: Ќафа Сан
20
+ expected: Ćafa San
21
+ - source: Агроплод Ресен
22
+ expected: Agroplod Resen
23
+ - source: Алта Чука
24
+ expected: Alta Čuka
25
+ - source: Баш Тепе
26
+ expected: Baš Tepe
27
+ - source: Браќам
28
+ expected: Braćam
29
+ - source: Винарска Визба Агропин
30
+ expected: Vinarska Vizba Agropin
31
+ - source: Галичица
32
+ expected: Galičica
33
+ - source: Дрењево
34
+ expected: Drenjevo
35
+ - source: Енешево
36
+ expected: Eneševo
37
+ - source: Иберлија
38
+ expected: Iberlija
39
+ - source: Крмзи Су
40
+ expected: Krmzi Su
41
+ - source: Лесноски Рид
42
+ expected: Lesnoski Rid
43
+ - source: Мала Корабска Врата
44
+ expected: Mala Korabska Vrata
45
+ - source: Низок Врв
46
+ expected: Nizok Vrv
47
+ - source: Охридско Езеро
48
+ expected: Ohridsko Ezero
49
+ - source: Прлиќ
50
+ expected: Prlić
51
+ - source: Равна Гора
52
+ expected: Ravna Gora
53
+ - source: Сеѓавечкиот Рид
54
+ expected: Seđavečkiot Rid
55
+ - source: Трновите Њиве
56
+ expected: Trnovite Njive
57
+ - source: Фасов Рид
58
+ expected: Fasov Rid
59
+ - source: Црни Камен
60
+ expected: Crni Kamen
61
+ - source: Чатал Чешми
62
+ expected: Čatal Češmi
63
+ - source: Шехово
64
+ expected: Šehovo
65
+
66
+ notes:
67
+ - The character ѓ should be romanized g when it occurs before е and и. In other
68
+ instances, it should be romanized ǵ (Ǵ).
69
+ - The character ќ should be romanized k when it occurs before е and и. In other
70
+ instances, it should be romanized ć.
71
+
72
+ map:
73
+ rules:
74
+ - pattern: "Ѓ(?=[еЕиИ])"
75
+ result: "G"
76
+ - pattern: "ѓ(?=[еЕиИ])"
77
+ result: "g"
78
+ - pattern: "Ќ(?=[еЕиИ])"
79
+ result: "K"
80
+ - pattern: "ќ(?=[еЕиИ])"
81
+ result: "k"
82
+
83
+ postrules:
84
+ # DZ
85
+ - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
86
+ result: "DZ"
87
+ #LJ
88
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
89
+ result: "LJ"
90
+ #NJ
91
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
92
+ result: "NJ"
93
+ #DŽ
94
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
95
+ result: "DŽ"
96
+
97
+ characters:
98
+ "\u0410": "A"
99
+ "\u0411": "B"
100
+ "\u0412": "V"
101
+ "\u0413": "G"
102
+ "\u0414": "D"
103
+ "\u0403": "\u0110" # Đ
104
+ "\u0415": "E"
105
+ "\u0416": "\u005a\u030c" # Ž
106
+ "\u0417": "Z"
107
+ "\u0405": "Dz"
108
+ "\u0418": "I"
109
+ "\u0408": "J"
110
+ "\u041A": "K"
111
+ "\u041B": "L"
112
+ "\u0409": "Lj"
113
+ "\u041C": "M"
114
+ "\u041D": "N"
115
+ "\u040A": "Nj"
116
+ "\u041E": "O"
117
+ "\u041F": "P"
118
+ "\u0420": "R"
119
+ "\u0421": "S"
120
+ "\u0422": "T"
121
+ "\u040c": "\u0106" # Ć
122
+ "\u0423": "U"
123
+ "\u0424": "F"
124
+ "\u0425": "H"
125
+ "\u0426": "C"
126
+ "\u0427": "\u0043\u030c" # Č
127
+ "\u040F": "D\u007a\u030c" # Dž
128
+ "\u0428": "\u0053\u030c" # Š
129
+ "\u0430": "a"
130
+ "\u0431": "b"
131
+ "\u0432": "v"
132
+ "\u0433": "g"
133
+ "\u0434": "d"
134
+ "\u0453": "\u0111" # đ
135
+ "\u0435": "e"
136
+ "\u0436": "\u007a\u030c" # ž
137
+ "\u0437": "z"
138
+ "\u0455": "dz"
139
+ "\u0438": "i"
140
+ "\u0458": "j"
141
+ "\u043A": "k"
142
+ "\u043B": "l"
143
+ "\u0459": "lj"
144
+ "\u043C": "m"
145
+ "\u043D": "n"
146
+ "\u045A": "nj"
147
+ "\u043E": "o"
148
+ "\u043F": "p"
149
+ "\u0440": "r"
150
+ "\u0441": "s"
151
+ "\u0442": "t"
152
+ "\u045c": "\u0107" # ć
153
+ "\u0443": "u"
154
+ "\u0444": "f"
155
+ "\u0445": "h"
156
+ "\u0446": "c"
157
+ "\u0447": "\u0063\u030c" # č
158
+ "\u045F": "d\u007a\u030c" # dž
159
+ "\u0448": "\u0073\u030c" # š
@@ -0,0 +1,190 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2013
4
+ language: mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Makedonian Romanization, BGN/PCGN 2013 System
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811514/ROMANIZATION_OF_MACEDONIAN.pdf
9
+ creation_date: 2013
10
+ description: |
11
+ Macedonian was officially established as a written language in Yugoslavia during
12
+ World War II and is now the official language of North Macedonia.
13
+ This romanization system replaces the BGN/PCGN 1981 agreement and adheres
14
+ to the most widely-accepted standardization of Macedonian orthography.
15
+
16
+ notes:
17
+ - The Macedonian Cyrillic lowercase italic Д may sometimes be seen as g.
18
+ There is no specific Unicode encoding for this variant form so a comparable character
19
+ has been used here for illustrative purposes.
20
+ - The Macedonian Cyrillic lowercase italic Ѓ may sometimes be seen as ī.
21
+ There is no specific Unicode encoding for this variant form so a comparable character
22
+ has been used here for illustrative purposes.
23
+ - The Macedonian Cyrillic lowercase italic П may sometimes be seen as ū.
24
+ There is no specific Unicode encoding for this variant form so a comparable character
25
+ has been used here for illustrative purposes.
26
+ - The Macedonian Cyrillic lowercase italic Т may sometimes be seen as w̄.
27
+ There is no specific Unicode encoding for this variant form so a comparable character
28
+ has been used here for illustrative purposes.
29
+ - |
30
+ An inventory of letter-diacritic combinations, with their Unicode encoding,
31
+ in addition to the unmodified letters of the basic Roman script is:
32
+ | Ǵ (U+01F4) | ǵ (U+01F5) |
33
+ | Ž (U+017D) | ž (U+017E) |
34
+ | Dz (U+01F2)* | dz (U+01F3)* |
35
+ | Lj (U+01C8)* | lj (U+01C9)* |
36
+ | Nj (U+01CB)* | nj(U+01CC)* |
37
+ | Ḱ (U+IE30) | ḱ (U+IE31) |
38
+ | Č (U+010C) | č (U+010D) |
39
+ | Dž (U+01C5)* | dž (U+01C6)* |
40
+ | Š (U+0160) | š (U+0161) |
41
+ * Note that these characters can also be reproduced with individual letters (e.g. l+j).
42
+ - The Romanization column shows only lowercase forms but, when romanizing,
43
+ uppercase and lowercase Roman letters as appropriate should be used.
44
+
45
+ tests:
46
+ - source: Ѓенови Ливаѓе
47
+ expected: Ǵenovi Livaǵe
48
+ - source: ЛУЃЕ луѓе
49
+ expected: LUǴE luǵe
50
+ - source: ЅВЕЗДА ѕвезда Ѕвезда
51
+ expected: DZVEZDA dzvezda Dzvezda
52
+ - source: Јабежица
53
+ expected: Jabežica
54
+ - source: Љиќен и Бард
55
+ expected: Ljiḱen i Bard
56
+ - source: Ќамилов Чукар
57
+ expected: Ḱamilov Čukar
58
+ - source: Џавидин Кајнак
59
+ expected: Džavidin Kajnak
60
+ - source: Џамалџи
61
+ expected: Džamaldži
62
+ - source: Џибра Гури и Зи
63
+ expected: Džibra Guri i Zi
64
+ - source: Абазова Куќарица
65
+ expected: Abazova Kuḱarica
66
+ - source: Баба Анѓина Маала
67
+ expected: Baba Anǵina Maala
68
+ - source: Ваљановец
69
+ expected: Valjanovec
70
+ - source: Галал Једи Дереш
71
+ expected: Galal Jedi Dereš
72
+ - source: Дванаесет Клајнци
73
+ expected: Dvanaeset Klajnci
74
+ - source: Електродистрибуција Струга
75
+ expected: Elektrodistribucija Struga
76
+ - source: Железничка Станица Рајко Жинзифов
77
+ expected: Železnička Stanica Rajko Žinzifov
78
+ - source: Заедничко Речиште
79
+ expected: Zaedničko Rečište
80
+ - source: Испраена Плоча
81
+ expected: Ispraena Ploča
82
+ - source: Казнено-Поправна Установа Идризово
83
+ expected: Kazneno-Popravna Ustanova Idrizovo
84
+ - source: Лази и Зејнелит
85
+ expected: Lazi i Zejnelit
86
+ - source: Мавровско Езеро
87
+ expected: Mavrovsko Ezero
88
+ - source: Национален Парк Галичица
89
+ expected: Nacionalen Park Galičica
90
+ - source: Одморалиште Свети Стефан
91
+ expected: Odmoralište Sveti Stefan
92
+ - source: Планинарски Дом Караџица
93
+ expected: Planinarski Dom Karadžica
94
+ - source: Раса е Лисењит
95
+ expected: Rasa e Lisenjit
96
+ - source: Скочивирска Клисура
97
+ expected: Skočivirska Klisura
98
+ - source: Термо-електроцентрала Неготино
99
+ expected: Termo-elektrocentrala Negotino
100
+ - source: Узуновско Бресје
101
+ expected: Uzunovsko Bresje
102
+ - source: Фабрика Југохром
103
+ expected: Fabrika Jugohrom
104
+ - source: Хидроелектрана Сапунџица
105
+ expected: Hidroelektrana Sapundžica
106
+ - source: Цветковско Рамниште
107
+ expected: Cvetkovsko Ramnište
108
+ - source: Чалтанова Пештера
109
+ expected: Čaltanova Peštera
110
+ - source: Шкемби Вишнејц
111
+ expected: Škembi Višnejc
112
+
113
+ map:
114
+ postrules:
115
+ # DZ
116
+ - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
117
+ result: "DZ"
118
+ #LJ
119
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
120
+ result: "LJ"
121
+ #NJ
122
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
123
+ result: "NJ"
124
+ #DŽ
125
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
126
+ result: "DŽ"
127
+
128
+ characters:
129
+ "\u0410": "A"
130
+ "\u0411": "B"
131
+ "\u0412": "V"
132
+ "\u0413": "G"
133
+ "\u0414": "D"
134
+ "\u0403": "\u01F4" # Ǵ
135
+ "\u0415": "E"
136
+ "\u0416": "\u005a\u030c" # Ž
137
+ "\u0417": "Z"
138
+ "\u0405": "Dz"
139
+ "\u0418": "I"
140
+ "\u0408": "J"
141
+ "\u041A": "K"
142
+ "\u041B": "L"
143
+ "\u0409": "Lj"
144
+ "\u041C": "M"
145
+ "\u041D": "N"
146
+ "\u040A": "Nj"
147
+ "\u041E": "O"
148
+ "\u041F": "P"
149
+ "\u0420": "R"
150
+ "\u0421": "S"
151
+ "\u0422": "T"
152
+ "\u040C": "\u004b\u0301" # Ḱ
153
+ "\u0423": "U"
154
+ "\u0424": "F"
155
+ "\u0425": "H"
156
+ "\u0426": "C"
157
+ "\u0427": "\u0043\u030c" # Č
158
+ "\u040F": "D\u007a\u030c" # Dž
159
+ "\u0428": "\u0053\u030c" # Š
160
+ "\u0430": "a"
161
+ "\u0431": "b"
162
+ "\u0432": "v"
163
+ "\u0433": "g"
164
+ "\u0434": "d"
165
+ "\u0453": "\u01F5" # ǵ
166
+ "\u0435": "e"
167
+ "\u0436": "\u007a\u030c" # ž
168
+ "\u0437": "z"
169
+ "\u0455": "dz"
170
+ "\u0438": "i"
171
+ "\u0458": "j"
172
+ "\u043A": "k"
173
+ "\u043B": "l"
174
+ "\u0459": "lj"
175
+ "\u043C": "m"
176
+ "\u043D": "n"
177
+ "\u045A": "nj"
178
+ "\u043E": "o"
179
+ "\u043F": "p"
180
+ "\u0440": "r"
181
+ "\u0441": "s"
182
+ "\u0442": "t"
183
+ "\u045C": "\u1E31" # ḱ
184
+ "\u0443": "u"
185
+ "\u0444": "f"
186
+ "\u0445": "h"
187
+ "\u0446": "c"
188
+ "\u0447": "\u0063\u030c" # č
189
+ "\u045F": "d\u007a\u030c" # dž
190
+ "\u0448": "\u0073\u030c" # š
@@ -0,0 +1,200 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2020
4
+ language: nep
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Nepali Romanization, 2020
8
+ url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20NEPALI.pdf
9
+ creation_date: 1964
10
+ description: |
11
+ BGN/PCGN 2011 Agreement Romanization of Nepali
12
+ The BGN and the PCGN have adopted the Nepal Survey Department (NSD) system for the
13
+ romanization of Nepali names. This system, below, should be applied to Nepali names for which Roman‐
14
+ script spellings in materials produced by the government of Nepal are not available.
15
+
16
+ notes:
17
+
18
+ - Only the isolated forms of the characters are given in the consonant table. See any grammar of Nepali
19
+ (or other language using the Devanagari alphabet) for variant forms used in conjunct characters.
20
+ - These two consonant characters appear sometimes to represent ṛ (cerebral r), e.g., पहाड → pahāṛ
21
+ instead of pahāḍ. At one time they were written with dots below, i.e., as ड़ and ढ़, though this is no
22
+ longer normal practice in Nepali. The romanizations ṛ and ṛh, respectively, are optional for
23
+ documentary purposes if such dots appear in Nepali writing.
24
+ - व , can be romanized as either v or w. This character is primarily
25
+ romanized as v in consonant initial, medial, and final position; however, initial, medial, and final w
26
+ romanizations can occur. The w romanization is a special case which is believed to be dependent on
27
+ dialect, pronunciation, or stress.
28
+ - |
29
+ An inventory of letter‐diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
30
+ Ṅ(U+1E44) ṅ (U+1E45)
31
+ Ñ (U+00D1) ñ (U+00F1)
32
+ Ṭ (1E6C) ṭ (1E6D)
33
+ Ḍ (1E0C) ḍ (1E0D)
34
+ Ṇ (1E46) ṇ (1E47)
35
+ Ṣ (1E62) ṣ (1E63)
36
+ Ā (U+0100) ā (U+0101)
37
+ Ī (U+012A) ī (U+012B)
38
+ Ū (U+016A) ū (U+016B)
39
+ Ṛ (1E5A) ṛ (1E5B)
40
+
41
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase
42
+ Roman letters as appropriate should be used.
43
+
44
+ - |
45
+ ं (anusvara) is rendered by
46
+ ṅ before क, ख, ग, and घ
47
+ ñ before च, छ, ज, and झ
48
+ ṇ before ट, ठ, ड, and ढ
49
+ n before त, थ, द, and ध
50
+ ṁ before य, र, ल, व, श, ष, स and ह
51
+
52
+ tests:
53
+ - source: "लेखन"
54
+ expected: "lekhn"
55
+ - source: "मुद्रा"
56
+ expected: "mudarā"
57
+ - source: "प्रशंसा"
58
+ expected: "parshṃsā" # note 5 rule checking
59
+ - source: "अंक"
60
+ expected: "aṅk" # note 5 rule checking
61
+ - source: "नेकपाले स्थगित स्थायी कमिटीको बैठक भदौ गते बोलाउने भएको"
62
+ expected: "nekpāle sathgit sathāyī kmiṭīko baiṭhk bhdau gte bolāune bheko"
63
+ - source: "न घर रह्यो, न परिवार"
64
+ expected: "n ghr rhayo, n privār"
65
+ - source: "ढोरपाटनमा भुजीखोला बाढीपहिरोले अभिभावक गुमाएका बालबालिकाको बिचल्ली"
66
+ expected: "ḍhorpāṭnmā bhujīkholā bāḍhīphirole abhibhāvk gumāekā bālbālikāko bichlalī"
67
+ - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
68
+ expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
69
+ - source: "संविधान जारी भएसँगै सार्वजनिक प्रशासनमा नयाँ उत्साह आउने अपेक्षा थियो"
70
+ expected: "sṃvidhān jārī bhes~gai sāravjnik parshāsnmā nyā~ utasāh āune apekṣā thiyo"
71
+ - source: "देशमा कोरोना संक्रमित र मृतकको संख्या हरेक दिन बढ्दो छ"
72
+ expected: "deshmā koronā sṅkarmit r mṛitkko sṅkhayā hrek din bḍhado chh"
73
+ - source: "गाउँपालिकाका अध्यक्ष टिका गुरुङका अनुसार विष्णुदासलाई राजुले सुत्नका लागि बेलुका साथी लगेका थिए"
74
+ expected: "gāu~pālikākā adhaykṣ ṭikā guruṅkā anusār viṣaṇudāslāī rājule sutankā lāgi belukā sāthī lgekā thie"
75
+ - source: "यो आयोजना गाउँपालिकाको केन्द्र तेल्लोकमा पर्छ"
76
+ expected: "yo āyojnā gāu~pālikāko kenadar telalokmā prachh"
77
+ - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
78
+ expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
79
+ - source: "चैत पहिलो साता घर आएका उनी लकडाउन भएपछि यतै रोकिए"
80
+ expected: "chait philo sātā ghr āekā unī lkḍāun bhepchhi ytai rokie"
81
+ - source: "काम गर्न जानेको हकमा रोजगारदाता कम्पनीको पत्रसँगै वडा र जिल्ला प्रशासनको सिफारिस अनिवार्य गरिएको छ"
82
+ expected: "kām gran jāneko hkmā rojgārdātā kmapnīko ptrs~gai vḍā r jilalā parshāsnko siphāris anivāray grieko chh"
83
+ - source: "दुःख"
84
+ expected: "duḥkh"
85
+
86
+ map:
87
+
88
+ rules:
89
+ # note[5]
90
+ - pattern: \u0902(?=[कखगघ]) # ं before क, ख, ग, and घ
91
+ result: ṅ
92
+ - pattern: \u0902(?=[चछजझ]) # ं before च, छ, ज, and झ
93
+ result: ñ
94
+ - pattern: \u0902(?=[टठडढ]) # ं before ट, ठ, ड, and ढ
95
+ result: ṇ
96
+ - pattern: \u0902(?=[तथदध]) # ं before त, थ, द, and ध
97
+ result: n
98
+
99
+ characters:
100
+
101
+ # Vowels and Diphthongs
102
+
103
+ 'अ': 'a'
104
+ 'आ': 'ā'
105
+ 'इ': 'i'
106
+ 'ई': 'ī'
107
+ 'उ': 'u'
108
+ 'ऊ': 'ū'
109
+ 'ऋ': 'ṛi'
110
+ 'ॠ': 'rī'
111
+ 'ए': 'e'
112
+ 'ऐ': 'ai'
113
+ 'ओ': 'o'
114
+ 'औ': 'au'
115
+
116
+ # Medials # Needed for connecting constants
117
+
118
+ 'ा': "ā"
119
+ 'ि': "i"
120
+ 'ी': "ī"
121
+ 'ु': "u"
122
+ 'ू': "ū"
123
+ 'ृ': "ṛi"
124
+ 'ॄ': "rī"
125
+ 'े': "e"
126
+ 'ै': "ai"
127
+ 'ो': "o"
128
+ 'ौ': "au"
129
+
130
+
131
+ # Consonants (see Note 1)
132
+
133
+ # Gutturals
134
+ 'क': 'k'
135
+ 'ख': 'kh'
136
+ 'ग': 'g'
137
+ 'घ': 'gh'
138
+ 'ङ': 'ṅ'
139
+
140
+ # Palatals
141
+ 'च': 'ch'
142
+ 'छ': 'chh'
143
+ 'ज': 'j'
144
+ 'झ': 'jh'
145
+ 'ञ': 'ñ'
146
+
147
+ # Cerebrals
148
+ 'ट': 'ṭ'
149
+ 'ठ': 'ṭh'
150
+ 'ड': 'ḍ'
151
+ 'ढ': 'ḍh'
152
+ 'ण': 'ṇ'
153
+
154
+ # Dentals
155
+ 'त': 't'
156
+ 'थ': 'th'
157
+ 'द': 'd'
158
+ 'ध': 'dh'
159
+ 'न': 'n'
160
+
161
+ # Labials
162
+ 'प': 'p'
163
+ 'फ': 'ph'
164
+ 'ब': 'b'
165
+ 'भ': 'bh'
166
+ 'म': 'm'
167
+
168
+ # Semivowels
169
+ 'य': 'y'
170
+ 'र': 'r'
171
+ 'ल': 'l'
172
+ 'व': 'v' # or wa [Note#3]
173
+
174
+ # Sibilants
175
+ 'श': 'sh'
176
+ 'ष': 'ṣ'
177
+ 'स': 's'
178
+ 'क्ष': 'kṣ'
179
+ 'त्र': 'tr'
180
+ 'ज्ञ' : 'jñ'
181
+
182
+ # Aspirate
183
+ 'ह': 'h'
184
+
185
+ # Anusvāra
186
+ 'ं': 'ṃ'
187
+
188
+ # Bisarga
189
+ 'ः': 'ḥ'
190
+
191
+ # Anunāsika
192
+ 'ँ': '~'
193
+
194
+ 'ॅ': 'r'
195
+
196
+ # halanta
197
+ '्': 'a'
198
+
199
+ # Abagraha
200
+ 'ऽ': '’' # (apostrophe)