interscript 0.1.5 → 2.1.0a8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -1,193 +0,0 @@
1
- ---
2
- authority_id: ua
3
- id: 1996
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Government of Ukraine Ukrainian System (1996)
8
- url: http://transliteration.eki.ee/pdf/Ukrainian.pdf
9
- creation_date: 1996
10
- description: Romanization table for Ukrainian. The current national system of romanization.
11
-
12
- notes:
13
- - gh is used in the romanization of зг zgh.
14
- - In initial position є -> ye, ї -> yi, й -> y, ю -> yu, я -> ya.
15
-
16
- tests:
17
- - source: Алушта
18
- expected: Alushta
19
- - source: Борщагівка
20
- expected: Borschahivka
21
- - source: Вишгород
22
- expected: Vyshhorod
23
- - source: Гадяч
24
- expected: Hadiach
25
- - source: Згорани
26
- expected: Zghorany
27
- - source: Ґалаґан
28
- expected: Galagan
29
- - source: Дон
30
- expected: Don
31
- - source: Рівне
32
- expected: Rivne
33
- - source: Єнакієве
34
- expected: Yenakiieve
35
- - source: Наєнко
36
- expected: Naienko
37
- - source: Житомир
38
- expected: Zhytomyr
39
- - source: Запоріжжя
40
- expected: Zaporizhzhia
41
- - source: Закарпаття
42
- expected: Zakarpattia
43
- - source: Медвин
44
- expected: Medvyn
45
- - source: Іршава
46
- expected: Irshava
47
- - source: Їжакевич
48
- expected: Yizhakevych
49
- - source: Кадіївка
50
- expected: Kadiivka
51
- - source: Йосипівка
52
- expected: Yosypivka
53
- - source: Київ
54
- expected: Kyiv
55
- - source: Лебедин
56
- expected: Lebedyn
57
- - source: Миколаїв
58
- expected: Mykolaiv
59
- - source: Ніжин
60
- expected: Nizhyn
61
- - source: Одеса
62
- expected: Odesa
63
- - source: Полтава
64
- expected: Poltava
65
- - source: Ромни
66
- expected: Romny
67
- - source: Суми
68
- expected: Sumy
69
- - source: Тетерів
70
- expected: Teteriv
71
- - source: Ужгород
72
- expected: Uzhhorod
73
- - source: Фастів
74
- expected: Fastiv
75
- - source: Харків
76
- expected: Kharkiv
77
- - source: Біла Церква
78
- expected: Bila Tserkva
79
- - source: Чернівці
80
- expected: Chernivtsi
81
- - source: Шостка
82
- expected: Shostka
83
- - source: Гоща
84
- expected: Hoscha
85
- - source: Русь
86
- expected: Rus’
87
- - source: Юрій
88
- expected: Yurii
89
- - source: Крюківка
90
- expected: Kriukivka
91
- - source: Яготин
92
- expected: Yahotyn
93
- - source: Ічня
94
- expected: Ichnia
95
- - source: Знам’янка
96
- expected: Znam”ianka
97
-
98
- map:
99
- rules:
100
- - pattern: (?<=[Зз])\u0413 # Г after З or з
101
- result: Gh
102
- - pattern: (?<=[Зз])\u0433 # г after З or з
103
- result: gh
104
- - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
105
- result: Ye
106
- - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
107
- result: ye
108
- - pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
109
- result: Yi
110
- - pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
111
- result: yi
112
- - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
113
- result: "Y"
114
- - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
115
- result: "y"
116
- - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
117
- result: Yu
118
- - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
119
- result: yu
120
- - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
121
- result: Ya
122
- - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
123
- result: ya
124
- - pattern: \b\u2019\b # ’ in the middle of a word -> ”
125
- result: "\u201d"
126
-
127
- characters:
128
- "\u0410": "A" # А
129
- "\u0411": "B" # Б
130
- "\u0412": "V" # В
131
- "\u0413": "H" # Г
132
- "\u0490": "G" # Ґ
133
- "\u0414": "D" # Д
134
- "\u0415": "E" # Е
135
- "\u0404": "Ie" # Є
136
- "\u0416": "Zh" # Ж
137
- "\u0417": "Z" # З
138
- "\u0418": "Y" # И
139
- "\u0406": "I" # І
140
- "\u0407": "I" # Ї
141
- "\u0419": "I" # Й
142
- "\u041a": "K" # К
143
- "\u041b": "L" # Л
144
- "\u041c": "M" # М
145
- "\u041d": "N" # Н
146
- "\u041e": "O" # О
147
- "\u041f": "P" # П
148
- "\u0420": "R" # Р
149
- "\u0421": "S" # С
150
- "\u0422": "T" # Т
151
- "\u0423": "U" # У
152
- "\u0424": "F" # Ф
153
- "\u0425": "Kh" # Х
154
- "\u0426": "Ts" # Ц
155
- "\u0427": "Ch" # Ч
156
- "\u0428": "Sh" # Ш
157
- "\u0429": "Sch" # Щ
158
- "\u042e": "Iu" # Ю
159
- "\u042f": "Ia" # Я
160
- "\u042c": "\u2019" # Ь -> ’
161
- "\u0430": "a" # а
162
- "\u0431": "b" # б
163
- "\u0432": "v" # в
164
- "\u0433": "h" # г
165
- "\u0491": "g" # ґ
166
- "\u0434": "d" # д
167
- "\u0435": "e" # е
168
- "\u0454": "ie" # є
169
- "\u0436": "zh" # ж
170
- "\u0437": "z" # з
171
- "\u0438": "y" # и
172
- "\u0456": "i" # і
173
- "\u0457": "i" # ї
174
- "\u0439": "i" # й
175
- "\u043a": "k" # к
176
- "\u043b": "l" # л
177
- "\u043c": "m" # м
178
- "\u043d": "n" # н
179
- "\u043e": "o" # о
180
- "\u043f": "p" # п
181
- "\u0440": "r" # р
182
- "\u0441": "s" # с
183
- "\u0442": "t" # т
184
- "\u0443": "u" # у
185
- "\u0444": "f" # ф
186
- "\u0445": "kh" # х
187
- "\u0446": "ts" # ц
188
- "\u0447": "ch" # ч
189
- "\u0448": "sh" # ш
190
- "\u0449": "sch" # щ
191
- "\u044e": "iu" # ю
192
- "\u044f": "ia" # я
193
- "\u044c": "\u2019" # Ь -> ’
@@ -1,127 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 1971
4
- language: ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: 1971 "Beirut system"
8
- url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/2nd-uncsgn-docs/E_Conf61_4_Add1_e.pdf
9
- creation_date: 1971
10
- confirmation date: 2018-06
11
- description: |
12
- The current United Nations recommended romanization
13
- system was approved in 2017 (resolution XI/3), based on
14
- the system adopted by Arabic experts at the conference
15
- held in Beirut in 2007, the Unified Arabic
16
- Transliteration System, taking into account the
17
- practical amendments and corrections carried out and
18
- agreed upon by the representatives of the Arabic-
19
- speaking countries at the Fourth Arab Conference on
20
- Geographical Names, held in Beirut in 2008, and some
21
- clarifications and amendments agreed in Riyadh in 20171.
22
- Previously, the United Nations had approved a
23
- romanization system in 1972 (resolution II/8), based on the
24
- system adopted by Arabic experts at the conference
25
- held at Beirut in 1971 with the practical amendments carried out
26
- and agreed upon by the representatives of the Arabic-speaking
27
- countries at their conference. The table was published in volume
28
- II of the conference report.
29
- In UN resolution XI/3 it is specifically stated that the
30
- system was recommended for the “romanization of the
31
- geographical names within those Arabic-speaking countries
32
- where this system is officially adopted”. There is
33
- evidence of its partial implementation in Jordan, Oman and
34
- Saudi Arabia. The UNGEGN Working Group on Romanization
35
- Systems intends to continue monitoring the UN system’s
36
- implementation across Arabic-speaking countries.
37
- In some countries there exist local romanization schemes
38
- or practices. The geographical names of Algeria, Djibouti,
39
- Mauritania, Morocco and Tunisia are generally rendered in
40
- the traditional manner which conforms to the principles of
41
- the French orthography.
42
- The previous UN-approved system is still found in
43
- considerable international usage.
44
- Arabic is written from right to left. The Arabic script
45
- usually omits vowel points and diacritical marks from
46
- writing which makes it difficult to obtain uniform results
47
- in the romanization of Arabic. It is essential to identify
48
- correctly the words which appear in any particular name
49
- and to know the standard Arabic-script spelling including
50
- the relevant vowels. One must also take into account
51
- dialectal and idiosyncratic deviations. The romanization
52
- is generally reversible though there may be some ambiguous
53
- letter sequences (dh, kh, sh, th) which may also point to
54
- combinations of Arabic characters in addition to the
55
- respective single characters.
56
- notes:
57
- - |
58
- ث is t͟h (th with sub marcon)
59
- خ is k͟h (kh with sub marcon)
60
- ذ is d͟h (dh with sub marcon)
61
- ش is s͟h (sh with sub marcon)
62
- ظ is z͟h (zh with sub marcon)
63
- غ is g͟h (gh witg sub marcon)
64
- The previous UN 1972 System had the following differences:
65
- the character (ظ) was romanized as z̧ instead of d͟h;
66
- the cedilla (¸) was used instead of sub-macron (_) in all characters with sub-macrons. - |
67
-
68
- tests:
69
-
70
- # Examples taken from:
71
- # https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/2nd-uncsgn-docs/E_Conf61_4_Add1_e.pdf
72
- # page 31 (38 digital)
73
-
74
- - source: خَيبَر
75
- expected: k͟haybar
76
-
77
- - source: ظَهران
78
- expected: z͟hahrān
79
-
80
- - source: القُدس
81
- expected: al quds
82
-
83
- map:
84
- inherit: "un-ara-Arab-Latn-2017"
85
- characters:
86
-
87
- # sun letters
88
- '\b\u0627\u0644\u062b' : 'at͟h t͟h' # الث
89
- '\b\u0627\u0644\u0630' : 'ad͟h d͟h' # الذ
90
- '\b\u0627\u0644\u0634' : 'as͟h s͟h' # الش
91
- '\b\u0627\u0644\u0638' : 'az͟h z͟h' # الظ
92
-
93
- # shadda
94
- '\u062e\u0651' : 'k͟hk͟h' # خ
95
- '\u0630\u0651' : 'd͟hd͟h' # ذ
96
- '\u0634\u0651' : 's͟h' # ش
97
- '\u0638\u0651' : 'z͟hz͟h' # ظ
98
- '\u063a\u0651' : 'g͟hg͟h' # غ
99
-
100
- '\u062b' : 't͟h' # ث
101
- '\ufe9b' : 't͟h' # ﺛ
102
- '\ufe9c' : 't͟h' # ﺜ
103
- '\ufe9a' : 't͟h' # ﺚ
104
-
105
- '\u062e' : 'k͟h' # خ
106
- '\ufea7' : 'k͟h' # ﺧ
107
- '\ufea8' : 'k͟h' # ﺨ
108
- '\ufea6' : 'k͟h' # ﺦ
109
-
110
- '\u063a' : 'g͟h' # غ
111
- '\ufecf' : 'g͟h' # ﻏ
112
- '\ufed0' : 'g͟h' # ﻐ
113
- '\ufece' : 'g͟h' # ﻎ
114
-
115
- '\u0630' : 'd͟h' # ذ
116
- '\ufeac' : 'd͟h' # ﺬ
117
-
118
- '\u0634' : 's͟h' # ش
119
- '\ufeb7' : 's͟h' # ﺷ
120
- '\ufeb8' : 's͟h' # ﺸ
121
- '\ufeb6' : 's͟h' # ﺶ
122
-
123
- '\u0638' : 'z͟h' # ظ
124
- '\ufec7' : 'z͟h' # ﻇ
125
- '\ufec8' : 'z͟h' # ﻈ
126
- '\ufec6' : 'z͟h' # ﻆ
127
-
@@ -1,152 +0,0 @@
1
- ---
2
- authority_id: ungegn
3
- id: 1972
4
- language: ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF ARABIC -- UNGEGN 1972 System
8
- url: http://www.eki.ee/wgrs/obs_rom_vers/rom1_ar_v4_0.pdf
9
- creation_date: 1972
10
- confirmation date: 2018-06
11
- description: |
12
- The United Nations recommended romanization
13
- system was approved in 1972 (resolution II/8),
14
- based on the system adopted by Arabic experts at
15
- the conference held at Beirut in 1971 with the
16
- practical amendments carried out and agreed upon
17
- by the representatives of the Arabic-speaking
18
- countries at their conference. The table was
19
- published in volume II of the conference report1
20
- . In the UN resolution it was specifically
21
- pointed out that the system was recommended "for
22
- the romanization of the geographical names within
23
- those Arabic-speaking countries where this system
24
- is officially acknowledged". It cannot be
25
- definitely ascertained which of the
26
- Arabicspeaking countries have adopted this system
27
- officially, especially since 2007 when there are
28
- efforts by the Arabic Division to promote a
29
- modification of the UN system (ADEGN
30
- romanization, see the section on other
31
- romanization systems below), with varying
32
- success2 . Judging by the use of names in
33
- international cartographic products which rely
34
- mostly on national sources it appears that the UN
35
- system or its modification is more or less
36
- current in Iraq, Kuwait, Libya, Saudi Arabia3 ,
37
- United Arab Emirates and Yemen, there and in some
38
- other countries the system is often used without
39
- diacritical marks. For the geographical names of
40
- the Syrian Arab Republic international maps
41
- favour the UN system while the local usage seems
42
- to prefer a French-oriented romanization. Also in
43
- Egypt and Sudan there exist local romanization
44
- schemes or practices side by side with the UN
45
- system. The geographical names of Algeria,
46
- Djibouti, Mauritania, Morocco and Tunisia are
47
- generally rendered in the traditional manner
48
- which conforms to the principles of the French
49
- orthography. Resolution 7 of the Seventh UN
50
- Conference on the Standardization of Geographical
51
- Names (1998) recommended that "the League of Arab
52
- States should, through its specialized
53
- structures, continue its efforts to organize a
54
- conference with a view to considering the
55
- difficulties encountered in applying the amended
56
- Beirut system of 1972 for the romanization of
57
- Arabic script, and submit, as soon as possible, a
58
- solution to the United Nations Group of Experts
59
- on Geographical Names". At the Eighth UN
60
- Conference on the Standardization of Geographical
61
- Names (2002), the Arabic Division of the UN Group
62
- of Experts announced that it had finalised
63
- proposed modifications to the UN recommended
64
- romanization system. These proposals would be
65
- submitted to the League of Arab States for
66
- approval. Arabic is written from right to left.
67
- The Arabic script usually omits vowel points and
68
- diacritical marks from writing which makes it
69
- difficult to obtain uniform results in the
70
- romanization of Arabic. It is essential to
71
- identify correctly the words which appear in any
72
- particular name and to know the standard Arabic-
73
- script spelling including proper pointing. One
74
- must also take into account dialectal and
75
- idiosyncratic deviations. The romanization is
76
- generally reversible though there are some
77
- ambiguous letter sequences (dh, kh, sh, th) which
78
- may also point to combinations of Arabic
79
- characters in addition to the respective single
80
- characters.
81
- notes:
82
- - |
83
- The previous UN 1972 System had the following differences:
84
- the character (ظ) was romanized as z̧ instead of d͟h;
85
- ح, ص, ض the cedilla (¸) was used instead of sub-macron (_) in all characters with sub-macrons. - |
86
- When the definite article al precedes a word beginning with one of the "sun letters" (t,
87
- th, d, dh, r, z, s, sh, ş, ḑ, ţ, z, l, n ̧ ) the l of the definite article is assimilated with the first
88
- consonant of the word: ash-Sh الشارقة āriqah.
89
-
90
-
91
- tests:
92
-
93
- # Examples taken from:
94
- # https://unstats.un.org/unsd/geoinfo/geonames/
95
-
96
- - source: مِصر
97
- expected: mişr
98
-
99
- - source: قَطَر
100
- expected: qaţar
101
-
102
- - source: الجُمهُورِيَّة العِراقِيَّة
103
- expected: al jumhūrīyah al ‘irāqīyah
104
-
105
- - source: جُمهُورِيَّة مِصر العَرَبِيَّة
106
- expected: jumhūrīyat mişr al ‘arabīyah
107
-
108
- - source: الرِيَاض
109
- expected: ar riyāḑ
110
-
111
- - source: الشارِقة
112
- expected: ash shāriqah
113
-
114
- map:
115
- inherit: "un-ara-Arab-Latn-2017"
116
- characters:
117
-
118
- '\b\u0627\u0644\u0635' : 'aş ş' # الص
119
- '\b\u0627\u0644\u0636' : 'aḑ ḑ' # الض
120
- '\b\u0627\u0644\u0637' : 'aţ ţ' # الط
121
-
122
- '\u062d\u0651' : 'ḩḩ' # ح
123
- '\u0635\u0651' : 'şş' # ص
124
- '\u0636\u0651' : 'ḑḑ' # ض
125
- '\u0637\u0651' : 'ţţ' # ط
126
- '\u0638\u0651' : 'z̧z̧' # ظ
127
-
128
- '\u062d' : 'ḩ' # ح
129
- '\ufea3' : 'ḩ' # ﺣ
130
- '\ufea4' : 'ḩ' # ﺤ
131
- '\ufea2' : 'ḩ' # ﺢ
132
-
133
- '\u0635' : 'ş' # ص
134
- '\ufebb' : 'ş' # ﺻ
135
- '\ufebc' : 'ş' # ﺼ
136
- '\ufeba' : 'ş' # ﺺ
137
-
138
- '\u0636' : 'ḑ' # ض
139
- '\ufebf' : 'ḑ' # ﺿ
140
- '\ufec0' : 'ḑ' # ﻀ
141
- '\ufebe' : 'ḑ' # ﺾ
142
-
143
- '\u0637' : 'ţ' # ط
144
- '\ufec3' : 'ţ' # ﻃ
145
- '\ufec4' : 'ţ' # ﻄ
146
- '\ufec2' : 'ţ' # ﻂ
147
-
148
- '\u0638' : 'z̧' # ظ
149
- '\ufec7' : 'z̧' # ﻇ
150
- '\ufec8' : 'z̧' # ﻈ
151
- '\ufec6' : 'z̧' # ﻆ
152
-