interscript 0.1.5 → 2.1.0a8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -1,193 +0,0 @@
1
- ---
2
- authority_id: ua
3
- id: 1996
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Government of Ukraine Ukrainian System (1996)
8
- url: http://transliteration.eki.ee/pdf/Ukrainian.pdf
9
- creation_date: 1996
10
- description: Romanization table for Ukrainian. The current national system of romanization.
11
-
12
- notes:
13
- - gh is used in the romanization of зг zgh.
14
- - In initial position є -> ye, ї -> yi, й -> y, ю -> yu, я -> ya.
15
-
16
- tests:
17
- - source: Алушта
18
- expected: Alushta
19
- - source: Борщагівка
20
- expected: Borschahivka
21
- - source: Вишгород
22
- expected: Vyshhorod
23
- - source: Гадяч
24
- expected: Hadiach
25
- - source: Згорани
26
- expected: Zghorany
27
- - source: Ґалаґан
28
- expected: Galagan
29
- - source: Дон
30
- expected: Don
31
- - source: Рівне
32
- expected: Rivne
33
- - source: Єнакієве
34
- expected: Yenakiieve
35
- - source: Наєнко
36
- expected: Naienko
37
- - source: Житомир
38
- expected: Zhytomyr
39
- - source: Запоріжжя
40
- expected: Zaporizhzhia
41
- - source: Закарпаття
42
- expected: Zakarpattia
43
- - source: Медвин
44
- expected: Medvyn
45
- - source: Іршава
46
- expected: Irshava
47
- - source: Їжакевич
48
- expected: Yizhakevych
49
- - source: Кадіївка
50
- expected: Kadiivka
51
- - source: Йосипівка
52
- expected: Yosypivka
53
- - source: Київ
54
- expected: Kyiv
55
- - source: Лебедин
56
- expected: Lebedyn
57
- - source: Миколаїв
58
- expected: Mykolaiv
59
- - source: Ніжин
60
- expected: Nizhyn
61
- - source: Одеса
62
- expected: Odesa
63
- - source: Полтава
64
- expected: Poltava
65
- - source: Ромни
66
- expected: Romny
67
- - source: Суми
68
- expected: Sumy
69
- - source: Тетерів
70
- expected: Teteriv
71
- - source: Ужгород
72
- expected: Uzhhorod
73
- - source: Фастів
74
- expected: Fastiv
75
- - source: Харків
76
- expected: Kharkiv
77
- - source: Біла Церква
78
- expected: Bila Tserkva
79
- - source: Чернівці
80
- expected: Chernivtsi
81
- - source: Шостка
82
- expected: Shostka
83
- - source: Гоща
84
- expected: Hoscha
85
- - source: Русь
86
- expected: Rus’
87
- - source: Юрій
88
- expected: Yurii
89
- - source: Крюківка
90
- expected: Kriukivka
91
- - source: Яготин
92
- expected: Yahotyn
93
- - source: Ічня
94
- expected: Ichnia
95
- - source: Знам’янка
96
- expected: Znam”ianka
97
-
98
- map:
99
- rules:
100
- - pattern: (?<=[Зз])\u0413 # Г after З or з
101
- result: Gh
102
- - pattern: (?<=[Зз])\u0433 # г after З or з
103
- result: gh
104
- - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
105
- result: Ye
106
- - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
107
- result: ye
108
- - pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
109
- result: Yi
110
- - pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
111
- result: yi
112
- - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
113
- result: "Y"
114
- - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
115
- result: "y"
116
- - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
117
- result: Yu
118
- - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
119
- result: yu
120
- - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
121
- result: Ya
122
- - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
123
- result: ya
124
- - pattern: \b\u2019\b # ’ in the middle of a word -> ”
125
- result: "\u201d"
126
-
127
- characters:
128
- "\u0410": "A" # А
129
- "\u0411": "B" # Б
130
- "\u0412": "V" # В
131
- "\u0413": "H" # Г
132
- "\u0490": "G" # Ґ
133
- "\u0414": "D" # Д
134
- "\u0415": "E" # Е
135
- "\u0404": "Ie" # Є
136
- "\u0416": "Zh" # Ж
137
- "\u0417": "Z" # З
138
- "\u0418": "Y" # И
139
- "\u0406": "I" # І
140
- "\u0407": "I" # Ї
141
- "\u0419": "I" # Й
142
- "\u041a": "K" # К
143
- "\u041b": "L" # Л
144
- "\u041c": "M" # М
145
- "\u041d": "N" # Н
146
- "\u041e": "O" # О
147
- "\u041f": "P" # П
148
- "\u0420": "R" # Р
149
- "\u0421": "S" # С
150
- "\u0422": "T" # Т
151
- "\u0423": "U" # У
152
- "\u0424": "F" # Ф
153
- "\u0425": "Kh" # Х
154
- "\u0426": "Ts" # Ц
155
- "\u0427": "Ch" # Ч
156
- "\u0428": "Sh" # Ш
157
- "\u0429": "Sch" # Щ
158
- "\u042e": "Iu" # Ю
159
- "\u042f": "Ia" # Я
160
- "\u042c": "\u2019" # Ь -> ’
161
- "\u0430": "a" # а
162
- "\u0431": "b" # б
163
- "\u0432": "v" # в
164
- "\u0433": "h" # г
165
- "\u0491": "g" # ґ
166
- "\u0434": "d" # д
167
- "\u0435": "e" # е
168
- "\u0454": "ie" # є
169
- "\u0436": "zh" # ж
170
- "\u0437": "z" # з
171
- "\u0438": "y" # и
172
- "\u0456": "i" # і
173
- "\u0457": "i" # ї
174
- "\u0439": "i" # й
175
- "\u043a": "k" # к
176
- "\u043b": "l" # л
177
- "\u043c": "m" # м
178
- "\u043d": "n" # н
179
- "\u043e": "o" # о
180
- "\u043f": "p" # п
181
- "\u0440": "r" # р
182
- "\u0441": "s" # с
183
- "\u0442": "t" # т
184
- "\u0443": "u" # у
185
- "\u0444": "f" # ф
186
- "\u0445": "kh" # х
187
- "\u0446": "ts" # ц
188
- "\u0447": "ch" # ч
189
- "\u0448": "sh" # ш
190
- "\u0449": "sch" # щ
191
- "\u044e": "iu" # ю
192
- "\u044f": "ia" # я
193
- "\u044c": "\u2019" # Ь -> ’
@@ -1,127 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 1971
4
- language: ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: 1971 "Beirut system"
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/2nd-uncsgn-docs/E_Conf61_4_Add1_e.pdf
9
- creation_date: 1971
10
- confirmation date: 2018-06
11
- description: |
12
- The current United Nations recommended romanization
13
- system was approved in 2017 (resolution XI/3), based on
14
- the system adopted by Arabic experts at the conference
15
- held in Beirut in 2007, the Unified Arabic
16
- Transliteration System, taking into account the
17
- practical amendments and corrections carried out and
18
- agreed upon by the representatives of the Arabic-
19
- speaking countries at the Fourth Arab Conference on
20
- Geographical Names, held in Beirut in 2008, and some
21
- clarifications and amendments agreed in Riyadh in 20171.
22
- Previously, the United Nations had approved a
23
- romanization system in 1972 (resolution II/8), based on the
24
- system adopted by Arabic experts at the conference
25
- held at Beirut in 1971 with the practical amendments carried out
26
- and agreed upon by the representatives of the Arabic-speaking
27
- countries at their conference. The table was published in volume
28
- II of the conference report.
29
- In UN resolution XI/3 it is specifically stated that the
30
- system was recommended for the “romanization of the
31
- geographical names within those Arabic-speaking countries
32
- where this system is officially adopted”. There is
33
- evidence of its partial implementation in Jordan, Oman and
34
- Saudi Arabia. The UNGEGN Working Group on Romanization
35
- Systems intends to continue monitoring the UN system’s
36
- implementation across Arabic-speaking countries.
37
- In some countries there exist local romanization schemes
38
- or practices. The geographical names of Algeria, Djibouti,
39
- Mauritania, Morocco and Tunisia are generally rendered in
40
- the traditional manner which conforms to the principles of
41
- the French orthography.
42
- The previous UN-approved system is still found in
43
- considerable international usage.
44
- Arabic is written from right to left. The Arabic script
45
- usually omits vowel points and diacritical marks from
46
- writing which makes it difficult to obtain uniform results
47
- in the romanization of Arabic. It is essential to identify
48
- correctly the words which appear in any particular name
49
- and to know the standard Arabic-script spelling including
50
- the relevant vowels. One must also take into account
51
- dialectal and idiosyncratic deviations. The romanization
52
- is generally reversible though there may be some ambiguous
53
- letter sequences (dh, kh, sh, th) which may also point to
54
- combinations of Arabic characters in addition to the
55
- respective single characters.
56
- notes:
57
- - |
58
- ث is t͟h (th with sub marcon)
59
- خ is k͟h (kh with sub marcon)
60
- ذ is d͟h (dh with sub marcon)
61
- ش is s͟h (sh with sub marcon)
62
- ظ is z͟h (zh with sub marcon)
63
- غ is g͟h (gh witg sub marcon)
64
- The previous UN 1972 System had the following differences:
65
- the character (ظ) was romanized as z̧ instead of d͟h;
66
- the cedilla (¸) was used instead of sub-macron (_) in all characters with sub-macrons. - |
67
-
68
- tests:
69
-
70
- # Examples taken from:
71
- # https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/2nd-uncsgn-docs/E_Conf61_4_Add1_e.pdf
72
- # page 31 (38 digital)
73
-
74
- - source: خَيبَر
75
- expected: k͟haybar
76
-
77
- - source: ظَهران
78
- expected: z͟hahrān
79
-
80
- - source: القُدس
81
- expected: al quds
82
-
83
- map:
84
- inherit: "un-ara-Arab-Latn-2017"
85
- characters:
86
-
87
- # sun letters
88
- '\b\u0627\u0644\u062b' : 'at͟h t͟h' # الث
89
- '\b\u0627\u0644\u0630' : 'ad͟h d͟h' # الذ
90
- '\b\u0627\u0644\u0634' : 'as͟h s͟h' # الش
91
- '\b\u0627\u0644\u0638' : 'az͟h z͟h' # الظ
92
-
93
- # shadda
94
- '\u062e\u0651' : 'k͟hk͟h' # خ
95
- '\u0630\u0651' : 'd͟hd͟h' # ذ
96
- '\u0634\u0651' : 's͟h' # ش
97
- '\u0638\u0651' : 'z͟hz͟h' # ظ
98
- '\u063a\u0651' : 'g͟hg͟h' # غ
99
-
100
- '\u062b' : 't͟h' # ث
101
- '\ufe9b' : 't͟h' # ﺛ
102
- '\ufe9c' : 't͟h' # ﺜ
103
- '\ufe9a' : 't͟h' # ﺚ
104
-
105
- '\u062e' : 'k͟h' # خ
106
- '\ufea7' : 'k͟h' # ﺧ
107
- '\ufea8' : 'k͟h' # ﺨ
108
- '\ufea6' : 'k͟h' # ﺦ
109
-
110
- '\u063a' : 'g͟h' # غ
111
- '\ufecf' : 'g͟h' # ﻏ
112
- '\ufed0' : 'g͟h' # ﻐ
113
- '\ufece' : 'g͟h' # ﻎ
114
-
115
- '\u0630' : 'd͟h' # ذ
116
- '\ufeac' : 'd͟h' # ﺬ
117
-
118
- '\u0634' : 's͟h' # ش
119
- '\ufeb7' : 's͟h' # ﺷ
120
- '\ufeb8' : 's͟h' # ﺸ
121
- '\ufeb6' : 's͟h' # ﺶ
122
-
123
- '\u0638' : 'z͟h' # ظ
124
- '\ufec7' : 'z͟h' # ﻇ
125
- '\ufec8' : 'z͟h' # ﻈ
126
- '\ufec6' : 'z͟h' # ﻆ
127
-
@@ -1,152 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 1972
4
- language: ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF ARABIC -- UNGEGN 1972 System
8
- url: http://www.eki.ee/wgrs/obs_rom_vers/rom1_ar_v4_0.pdf
9
- creation_date: 1972
10
- confirmation date: 2018-06
11
- description: |
12
- The United Nations recommended romanization
13
- system was approved in 1972 (resolution II/8),
14
- based on the system adopted by Arabic experts at
15
- the conference held at Beirut in 1971 with the
16
- practical amendments carried out and agreed upon
17
- by the representatives of the Arabic-speaking
18
- countries at their conference. The table was
19
- published in volume II of the conference report1
20
- . In the UN resolution it was specifically
21
- pointed out that the system was recommended "for
22
- the romanization of the geographical names within
23
- those Arabic-speaking countries where this system
24
- is officially acknowledged". It cannot be
25
- definitely ascertained which of the
26
- Arabicspeaking countries have adopted this system
27
- officially, especially since 2007 when there are
28
- efforts by the Arabic Division to promote a
29
- modification of the UN system (ADEGN
30
- romanization, see the section on other
31
- romanization systems below), with varying
32
- success2 . Judging by the use of names in
33
- international cartographic products which rely
34
- mostly on national sources it appears that the UN
35
- system or its modification is more or less
36
- current in Iraq, Kuwait, Libya, Saudi Arabia3 ,
37
- United Arab Emirates and Yemen, there and in some
38
- other countries the system is often used without
39
- diacritical marks. For the geographical names of
40
- the Syrian Arab Republic international maps
41
- favour the UN system while the local usage seems
42
- to prefer a French-oriented romanization. Also in
43
- Egypt and Sudan there exist local romanization
44
- schemes or practices side by side with the UN
45
- system. The geographical names of Algeria,
46
- Djibouti, Mauritania, Morocco and Tunisia are
47
- generally rendered in the traditional manner
48
- which conforms to the principles of the French
49
- orthography. Resolution 7 of the Seventh UN
50
- Conference on the Standardization of Geographical
51
- Names (1998) recommended that "the League of Arab
52
- States should, through its specialized
53
- structures, continue its efforts to organize a
54
- conference with a view to considering the
55
- difficulties encountered in applying the amended
56
- Beirut system of 1972 for the romanization of
57
- Arabic script, and submit, as soon as possible, a
58
- solution to the United Nations Group of Experts
59
- on Geographical Names". At the Eighth UN
60
- Conference on the Standardization of Geographical
61
- Names (2002), the Arabic Division of the UN Group
62
- of Experts announced that it had finalised
63
- proposed modifications to the UN recommended
64
- romanization system. These proposals would be
65
- submitted to the League of Arab States for
66
- approval. Arabic is written from right to left.
67
- The Arabic script usually omits vowel points and
68
- diacritical marks from writing which makes it
69
- difficult to obtain uniform results in the
70
- romanization of Arabic. It is essential to
71
- identify correctly the words which appear in any
72
- particular name and to know the standard Arabic-
73
- script spelling including proper pointing. One
74
- must also take into account dialectal and
75
- idiosyncratic deviations. The romanization is
76
- generally reversible though there are some
77
- ambiguous letter sequences (dh, kh, sh, th) which
78
- may also point to combinations of Arabic
79
- characters in addition to the respective single
80
- characters.
81
- notes:
82
- - |
83
- The previous UN 1972 System had the following differences:
84
- the character (ظ) was romanized as z̧ instead of d͟h;
85
- ح, ص, ض the cedilla (¸) was used instead of sub-macron (_) in all characters with sub-macrons. - |
86
- When the definite article al precedes a word beginning with one of the "sun letters" (t,
87
- th, d, dh, r, z, s, sh, ş, ḑ, ţ, z, l, n ̧ ) the l of the definite article is assimilated with the first
88
- consonant of the word: ash-Sh الشارقة āriqah.
89
-
90
-
91
- tests:
92
-
93
- # Examples taken from:
94
- # https://unstats.un.org/unsd/geoinfo/geonames/
95
-
96
- - source: مِصر
97
- expected: mişr
98
-
99
- - source: قَطَر
100
- expected: qaţar
101
-
102
- - source: الجُمهُورِيَّة العِراقِيَّة
103
- expected: al jumhūrīyah al ‘irāqīyah
104
-
105
- - source: جُمهُورِيَّة مِصر العَرَبِيَّة
106
- expected: jumhūrīyat mişr al ‘arabīyah
107
-
108
- - source: الرِيَاض
109
- expected: ar riyāḑ
110
-
111
- - source: الشارِقة
112
- expected: ash shāriqah
113
-
114
- map:
115
- inherit: "un-ara-Arab-Latn-2017"
116
- characters:
117
-
118
- '\b\u0627\u0644\u0635' : 'aş ş' # الص
119
- '\b\u0627\u0644\u0636' : 'aḑ ḑ' # الض
120
- '\b\u0627\u0644\u0637' : 'aţ ţ' # الط
121
-
122
- '\u062d\u0651' : 'ḩḩ' # ح
123
- '\u0635\u0651' : 'şş' # ص
124
- '\u0636\u0651' : 'ḑḑ' # ض
125
- '\u0637\u0651' : 'ţţ' # ط
126
- '\u0638\u0651' : 'z̧z̧' # ظ
127
-
128
- '\u062d' : 'ḩ' # ح
129
- '\ufea3' : 'ḩ' # ﺣ
130
- '\ufea4' : 'ḩ' # ﺤ
131
- '\ufea2' : 'ḩ' # ﺢ
132
-
133
- '\u0635' : 'ş' # ص
134
- '\ufebb' : 'ş' # ﺻ
135
- '\ufebc' : 'ş' # ﺼ
136
- '\ufeba' : 'ş' # ﺺ
137
-
138
- '\u0636' : 'ḑ' # ض
139
- '\ufebf' : 'ḑ' # ﺿ
140
- '\ufec0' : 'ḑ' # ﻀ
141
- '\ufebe' : 'ḑ' # ﺾ
142
-
143
- '\u0637' : 'ţ' # ط
144
- '\ufec3' : 'ţ' # ﻃ
145
- '\ufec4' : 'ţ' # ﻄ
146
- '\ufec2' : 'ţ' # ﻂ
147
-
148
- '\u0638' : 'z̧' # ظ
149
- '\ufec7' : 'z̧' # ﻇ
150
- '\ufec8' : 'z̧' # ﻈ
151
- '\ufec6' : 'z̧' # ﻆ
152
-