interscript 0.1.6 → 2.1.0a9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -1,190 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2013
4
- language: mkd
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: Makedonian Romanization, BGN/PCGN 2013 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811514/ROMANIZATION_OF_MACEDONIAN.pdf
9
- creation_date: 2013
10
- description: |
11
- Macedonian was officially established as a written language in Yugoslavia during
12
- World War II and is now the official language of North Macedonia.
13
- This romanization system replaces the BGN/PCGN 1981 agreement and adheres
14
- to the most widely-accepted standardization of Macedonian orthography.
15
-
16
- notes:
17
- - The Macedonian Cyrillic lowercase italic Д may sometimes be seen as g.
18
- There is no specific Unicode encoding for this variant form so a comparable character
19
- has been used here for illustrative purposes.
20
- - The Macedonian Cyrillic lowercase italic Ѓ may sometimes be seen as ī.
21
- There is no specific Unicode encoding for this variant form so a comparable character
22
- has been used here for illustrative purposes.
23
- - The Macedonian Cyrillic lowercase italic П may sometimes be seen as ū.
24
- There is no specific Unicode encoding for this variant form so a comparable character
25
- has been used here for illustrative purposes.
26
- - The Macedonian Cyrillic lowercase italic Т may sometimes be seen as w̄.
27
- There is no specific Unicode encoding for this variant form so a comparable character
28
- has been used here for illustrative purposes.
29
- - |
30
- An inventory of letter-diacritic combinations, with their Unicode encoding,
31
- in addition to the unmodified letters of the basic Roman script is:
32
- | Ǵ (U+01F4) | ǵ (U+01F5) |
33
- | Ž (U+017D) | ž (U+017E) |
34
- | Dz (U+01F2)* | dz (U+01F3)* |
35
- | Lj (U+01C8)* | lj (U+01C9)* |
36
- | Nj (U+01CB)* | nj(U+01CC)* |
37
- | Ḱ (U+IE30) | ḱ (U+IE31) |
38
- | Č (U+010C) | č (U+010D) |
39
- | Dž (U+01C5)* | dž (U+01C6)* |
40
- | Š (U+0160) | š (U+0161) |
41
- * Note that these characters can also be reproduced with individual letters (e.g. l+j).
42
- - The Romanization column shows only lowercase forms but, when romanizing,
43
- uppercase and lowercase Roman letters as appropriate should be used.
44
-
45
- tests:
46
- - source: Ѓенови Ливаѓе
47
- expected: Ǵenovi Livaǵe
48
- - source: ЛУЃЕ луѓе
49
- expected: LUǴE luǵe
50
- - source: ЅВЕЗДА ѕвезда Ѕвезда
51
- expected: DZVEZDA dzvezda Dzvezda
52
- - source: Јабежица
53
- expected: Jabežica
54
- - source: Љиќен и Бард
55
- expected: Ljiḱen i Bard
56
- - source: Ќамилов Чукар
57
- expected: Ḱamilov Čukar
58
- - source: Џавидин Кајнак
59
- expected: Džavidin Kajnak
60
- - source: Џамалџи
61
- expected: Džamaldži
62
- - source: Џибра Гури и Зи
63
- expected: Džibra Guri i Zi
64
- - source: Абазова Куќарица
65
- expected: Abazova Kuḱarica
66
- - source: Баба Анѓина Маала
67
- expected: Baba Anǵina Maala
68
- - source: Ваљановец
69
- expected: Valjanovec
70
- - source: Галал Једи Дереш
71
- expected: Galal Jedi Dereš
72
- - source: Дванаесет Клајнци
73
- expected: Dvanaeset Klajnci
74
- - source: Електродистрибуција Струга
75
- expected: Elektrodistribucija Struga
76
- - source: Железничка Станица Рајко Жинзифов
77
- expected: Železnička Stanica Rajko Žinzifov
78
- - source: Заедничко Речиште
79
- expected: Zaedničko Rečište
80
- - source: Испраена Плоча
81
- expected: Ispraena Ploča
82
- - source: Казнено-Поправна Установа Идризово
83
- expected: Kazneno-Popravna Ustanova Idrizovo
84
- - source: Лази и Зејнелит
85
- expected: Lazi i Zejnelit
86
- - source: Мавровско Езеро
87
- expected: Mavrovsko Ezero
88
- - source: Национален Парк Галичица
89
- expected: Nacionalen Park Galičica
90
- - source: Одморалиште Свети Стефан
91
- expected: Odmoralište Sveti Stefan
92
- - source: Планинарски Дом Караџица
93
- expected: Planinarski Dom Karadžica
94
- - source: Раса е Лисењит
95
- expected: Rasa e Lisenjit
96
- - source: Скочивирска Клисура
97
- expected: Skočivirska Klisura
98
- - source: Термо-електроцентрала Неготино
99
- expected: Termo-elektrocentrala Negotino
100
- - source: Узуновско Бресје
101
- expected: Uzunovsko Bresje
102
- - source: Фабрика Југохром
103
- expected: Fabrika Jugohrom
104
- - source: Хидроелектрана Сапунџица
105
- expected: Hidroelektrana Sapundžica
106
- - source: Цветковско Рамниште
107
- expected: Cvetkovsko Ramnište
108
- - source: Чалтанова Пештера
109
- expected: Čaltanova Peštera
110
- - source: Шкемби Вишнејц
111
- expected: Škembi Višnejc
112
-
113
- map:
114
- postrules:
115
- # DZ
116
- - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
117
- result: "DZ"
118
- #LJ
119
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
120
- result: "LJ"
121
- #NJ
122
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
123
- result: "NJ"
124
- #DŽ
125
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
126
- result: "DŽ"
127
-
128
- characters:
129
- "\u0410": "A"
130
- "\u0411": "B"
131
- "\u0412": "V"
132
- "\u0413": "G"
133
- "\u0414": "D"
134
- "\u0403": "\u01F4" # Ǵ
135
- "\u0415": "E"
136
- "\u0416": "\u005a\u030c" # Ž
137
- "\u0417": "Z"
138
- "\u0405": "Dz"
139
- "\u0418": "I"
140
- "\u0408": "J"
141
- "\u041A": "K"
142
- "\u041B": "L"
143
- "\u0409": "Lj"
144
- "\u041C": "M"
145
- "\u041D": "N"
146
- "\u040A": "Nj"
147
- "\u041E": "O"
148
- "\u041F": "P"
149
- "\u0420": "R"
150
- "\u0421": "S"
151
- "\u0422": "T"
152
- "\u040C": "\u004b\u0301" # Ḱ
153
- "\u0423": "U"
154
- "\u0424": "F"
155
- "\u0425": "H"
156
- "\u0426": "C"
157
- "\u0427": "\u0043\u030c" # Č
158
- "\u040F": "D\u007a\u030c" # Dž
159
- "\u0428": "\u0053\u030c" # Š
160
- "\u0430": "a"
161
- "\u0431": "b"
162
- "\u0432": "v"
163
- "\u0433": "g"
164
- "\u0434": "d"
165
- "\u0453": "\u01F5" # ǵ
166
- "\u0435": "e"
167
- "\u0436": "\u007a\u030c" # ž
168
- "\u0437": "z"
169
- "\u0455": "dz"
170
- "\u0438": "i"
171
- "\u0458": "j"
172
- "\u043A": "k"
173
- "\u043B": "l"
174
- "\u0459": "lj"
175
- "\u043C": "m"
176
- "\u043D": "n"
177
- "\u045A": "nj"
178
- "\u043E": "o"
179
- "\u043F": "p"
180
- "\u0440": "r"
181
- "\u0441": "s"
182
- "\u0442": "t"
183
- "\u045C": "\u1E31" # ḱ
184
- "\u0443": "u"
185
- "\u0444": "f"
186
- "\u0445": "h"
187
- "\u0446": "c"
188
- "\u0447": "\u0063\u030c" # č
189
- "\u045F": "d\u007a\u030c" # dž
190
- "\u0448": "\u0073\u030c" # š
@@ -1,200 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2020
4
- language: nep
5
- source_script: Deva
6
- destination_script: Latn
7
- name: Nepali Romanization, 2020
8
- url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20NEPALI.pdf
9
- creation_date: 1964
10
- description: |
11
- BGN/PCGN 2011 Agreement Romanization of Nepali
12
- The BGN and the PCGN have adopted the Nepal Survey Department (NSD) system for the
13
- romanization of Nepali names. This system, below, should be applied to Nepali names for which Roman‐
14
- script spellings in materials produced by the government of Nepal are not available.
15
-
16
- notes:
17
-
18
- - Only the isolated forms of the characters are given in the consonant table. See any grammar of Nepali
19
- (or other language using the Devanagari alphabet) for variant forms used in conjunct characters.
20
- - These two consonant characters appear sometimes to represent ṛ (cerebral r), e.g., पहाड → pahāṛ
21
- instead of pahāḍ. At one time they were written with dots below, i.e., as ड़ and ढ़, though this is no
22
- longer normal practice in Nepali. The romanizations ṛ and ṛh, respectively, are optional for
23
- documentary purposes if such dots appear in Nepali writing.
24
- - व , can be romanized as either v or w. This character is primarily
25
- romanized as v in consonant initial, medial, and final position; however, initial, medial, and final w
26
- romanizations can occur. The w romanization is a special case which is believed to be dependent on
27
- dialect, pronunciation, or stress.
28
- - |
29
- An inventory of letter‐diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
30
- Ṅ(U+1E44) ṅ (U+1E45)
31
- Ñ (U+00D1) ñ (U+00F1)
32
- Ṭ (1E6C) ṭ (1E6D)
33
- Ḍ (1E0C) ḍ (1E0D)
34
- Ṇ (1E46) ṇ (1E47)
35
- Ṣ (1E62) ṣ (1E63)
36
- Ā (U+0100) ā (U+0101)
37
- Ī (U+012A) ī (U+012B)
38
- Ū (U+016A) ū (U+016B)
39
- Ṛ (1E5A) ṛ (1E5B)
40
-
41
- - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase
42
- Roman letters as appropriate should be used.
43
-
44
- - |
45
- ं (anusvara) is rendered by
46
- ṅ before क, ख, ग, and घ
47
- ñ before च, छ, ज, and झ
48
- ṇ before ट, ठ, ड, and ढ
49
- n before त, थ, द, and ध
50
- ṁ before य, र, ल, व, श, ष, स and ह
51
-
52
- tests:
53
- - source: "लेखन"
54
- expected: "lekhn"
55
- - source: "मुद्रा"
56
- expected: "mudarā"
57
- - source: "प्रशंसा"
58
- expected: "parshṃsā" # note 5 rule checking
59
- - source: "अंक"
60
- expected: "aṅk" # note 5 rule checking
61
- - source: "नेकपाले स्थगित स्थायी कमिटीको बैठक भदौ गते बोलाउने भएको"
62
- expected: "nekpāle sathgit sathāyī kmiṭīko baiṭhk bhdau gte bolāune bheko"
63
- - source: "न घर रह्यो, न परिवार"
64
- expected: "n ghr rhayo, n privār"
65
- - source: "ढोरपाटनमा भुजीखोला बाढीपहिरोले अभिभावक गुमाएका बालबालिकाको बिचल्ली"
66
- expected: "ḍhorpāṭnmā bhujīkholā bāḍhīphirole abhibhāvk gumāekā bālbālikāko bichlalī"
67
- - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
68
- expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
69
- - source: "संविधान जारी भएसँगै सार्वजनिक प्रशासनमा नयाँ उत्साह आउने अपेक्षा थियो"
70
- expected: "sṃvidhān jārī bhes~gai sāravjnik parshāsnmā nyā~ utasāh āune apekṣā thiyo"
71
- - source: "देशमा कोरोना संक्रमित र मृतकको संख्या हरेक दिन बढ्दो छ"
72
- expected: "deshmā koronā sṅkarmit r mṛitkko sṅkhayā hrek din bḍhado chh"
73
- - source: "गाउँपालिकाका अध्यक्ष टिका गुरुङका अनुसार विष्णुदासलाई राजुले सुत्नका लागि बेलुका साथी लगेका थिए"
74
- expected: "gāu~pālikākā adhaykṣ ṭikā guruṅkā anusār viṣaṇudāslāī rājule sutankā lāgi belukā sāthī lgekā thie"
75
- - source: "यो आयोजना गाउँपालिकाको केन्द्र तेल्लोकमा पर्छ"
76
- expected: "yo āyojnā gāu~pālikāko kenadar telalokmā prachh"
77
- - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
78
- expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
79
- - source: "चैत पहिलो साता घर आएका उनी लकडाउन भएपछि यतै रोकिए"
80
- expected: "chait philo sātā ghr āekā unī lkḍāun bhepchhi ytai rokie"
81
- - source: "काम गर्न जानेको हकमा रोजगारदाता कम्पनीको पत्रसँगै वडा र जिल्ला प्रशासनको सिफारिस अनिवार्य गरिएको छ"
82
- expected: "kām gran jāneko hkmā rojgārdātā kmapnīko ptrs~gai vḍā r jilalā parshāsnko siphāris anivāray grieko chh"
83
- - source: "दुःख"
84
- expected: "duḥkh"
85
-
86
- map:
87
-
88
- rules:
89
- # note[5]
90
- - pattern: \u0902(?=[कखगघ]) # ं before क, ख, ग, and घ
91
- result: ṅ
92
- - pattern: \u0902(?=[चछजझ]) # ं before च, छ, ज, and झ
93
- result: ñ
94
- - pattern: \u0902(?=[टठडढ]) # ं before ट, ठ, ड, and ढ
95
- result: ṇ
96
- - pattern: \u0902(?=[तथदध]) # ं before त, थ, द, and ध
97
- result: n
98
-
99
- characters:
100
-
101
- # Vowels and Diphthongs
102
-
103
- 'अ': 'a'
104
- 'आ': 'ā'
105
- 'इ': 'i'
106
- 'ई': 'ī'
107
- 'उ': 'u'
108
- 'ऊ': 'ū'
109
- 'ऋ': 'ṛi'
110
- 'ॠ': 'rī'
111
- 'ए': 'e'
112
- 'ऐ': 'ai'
113
- 'ओ': 'o'
114
- 'औ': 'au'
115
-
116
- # Medials # Needed for connecting constants
117
-
118
- 'ा': "ā"
119
- 'ि': "i"
120
- 'ी': "ī"
121
- 'ु': "u"
122
- 'ू': "ū"
123
- 'ृ': "ṛi"
124
- 'ॄ': "rī"
125
- 'े': "e"
126
- 'ै': "ai"
127
- 'ो': "o"
128
- 'ौ': "au"
129
-
130
-
131
- # Consonants (see Note 1)
132
-
133
- # Gutturals
134
- 'क': 'k'
135
- 'ख': 'kh'
136
- 'ग': 'g'
137
- 'घ': 'gh'
138
- 'ङ': 'ṅ'
139
-
140
- # Palatals
141
- 'च': 'ch'
142
- 'छ': 'chh'
143
- 'ज': 'j'
144
- 'झ': 'jh'
145
- 'ञ': 'ñ'
146
-
147
- # Cerebrals
148
- 'ट': 'ṭ'
149
- 'ठ': 'ṭh'
150
- 'ड': 'ḍ'
151
- 'ढ': 'ḍh'
152
- 'ण': 'ṇ'
153
-
154
- # Dentals
155
- 'त': 't'
156
- 'थ': 'th'
157
- 'द': 'd'
158
- 'ध': 'dh'
159
- 'न': 'n'
160
-
161
- # Labials
162
- 'प': 'p'
163
- 'फ': 'ph'
164
- 'ब': 'b'
165
- 'भ': 'bh'
166
- 'म': 'm'
167
-
168
- # Semivowels
169
- 'य': 'y'
170
- 'र': 'r'
171
- 'ल': 'l'
172
- 'व': 'v' # or wa [Note#3]
173
-
174
- # Sibilants
175
- 'श': 'sh'
176
- 'ष': 'ṣ'
177
- 'स': 's'
178
- 'क्ष': 'kṣ'
179
- 'त्र': 'tr'
180
- 'ज्ञ' : 'jñ'
181
-
182
- # Aspirate
183
- 'ह': 'h'
184
-
185
- # Anusvāra
186
- 'ं': 'ṃ'
187
-
188
- # Bisarga
189
- 'ः': 'ḥ'
190
-
191
- # Anunāsika
192
- 'ँ': '~'
193
-
194
- 'ॅ': 'r'
195
-
196
- # halanta
197
- '्': 'a'
198
-
199
- # Abagraha
200
- 'ऽ': '’' # (apostrophe)
@@ -1,92 +0,0 @@
1
- ---
2
- # TODO: This system is not complete/usable yet!
3
- # TODO: Add tests from PDF
4
- authority_id: bgnpcgn
5
- id: 1956
6
- language: per
7
- source_script: Arab
8
- destination_script: Latn
9
- name: BGN/PCGN 1956 System
10
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/320079/Arabic_Romanization.pdf
11
- creation_date: 1947
12
- confirmation_date: 2019-06
13
- description: |
14
- This System was adopted by the BGN in 1946 and by the PCGN in 1956
15
- and is applied in the systematic romanization of geographic names in
16
- Bahrain, Egypt, Iraq, Jordan, Kuwait, Libya, Oman, Qatar, Saudi Arabia,
17
- Syria, the United Arab Emirates, and Yemen.
18
-
19
- Uniform results in the romanization of Arabic are difficult to
20
- obtain, since vowel points and diacritical marks are generally omitted
21
- from both manual and machine writing. It follows that for correct
22
- identification of the words which appear in any particular name,
23
- knowledge of its standard Arabic- script spelling including proper
24
- pointing, and recognition of dialectal and idiosyncratic deviations are
25
- essential.
26
-
27
- In order to bring about uniformity in the Roman-script spelling of
28
- geographic names in Arabic- language areas, the system is based insofar
29
- as possible on fully pointed modern standard Arabic. In the interest of
30
- clarity, vowel pointing has been applied to the examples below. Arabic
31
- is written from right to left, and does not make a distinction between
32
- upper and lower case.
33
-
34
- notes:
35
- - The symbol ◌ represents any Arabic consonant character.
36
- - "Hamzah (ء) is written in Arabic in association with most instances of initial alif, except those which belong to the definite article al or which bear a maddah (see note 11). Hamzah is written above the alif if the accompanying short vowel is a fatḩah (َأ) or ḑammah (ُأ) and below the alif if the accompanying short vowel is a kasrah (ِإ). When the purpose is to indicate the presence of a glottal stop, hamzah is written over medial and final alif (أ), wāw (ؤ) and yā’ without dots (ئ). Hamzah following kasrah (◌ِ ) is written (ئ). Almost always the yā’ is in the initial or medial form and the dots are omitted: example: (بئر). Hamzah following ḑammah (◌ُ )is written (ؤ). Hamzah following a long vowel is written without a bearer and is positioned on the line of print like a regular character. The romanization of hamzah (’) should always be carefully distinguished from that of ‘ayn (‘)."
37
- - Alif as such is not romanized when it is a bearer of hamzah, but see fatḩah alif (ا◌َ ) and alif maddah .in the vowel table. See also note 2 and 11 above ) آ (
38
- - "In certain endings, an original tā’ (ت) is written (ة), i.e., like hā’ with two dots, and is known as tā marbūţah. It is romanized h, except in the construct form, where it is romanized t instead. Example: hamzah,hamzatalqaţ‘.Theendingfatḩahhā’ (ه◌َ)mayberomanizeda·hwhenthecharacterhā’ .5 is not silent. Example: Muntaza·h. See also note )ه("
39
- - Occasionally, the character sequences ـدهـ ,ـتـهـ, ـكـهـ and سهـ occur. They may be romanized k·h, t·h, d·h, and s·h in order to differentiate those romanizations from the digraphs kh, th, dh, and sh. See also note 4.
40
- - Where special considerations are paramount, the sub-dot ( ִ ) may be used in place of the cedilla.
41
- - The character yā’ (in final form but without dots) preceded by the vowel point fatḩah is a combination known as alif maqşūrah. See character 7 in the vowel table.
42
- - "The classical Arabic grammatical endings written with the nunation symbols (tanwīn) may be romanized, when necessary, by an, in, un. In modern Arabic, these endings have become silent and should not be romanized: classical alifun modern alif."
43
- - Doubled consonant sounds are represented in Arabic script by placing a shaddah (◌ّ ) over a consonant character. In romanization the letter should be doubled. However, the combination of the consonant character yā’ with a shaddah preceded by a kasrah (ــــِّيــ) is romanized īy rather than iyy. e.g., (ـــِّيـة) is romanized (īyah) and not (iyyah). When the definite article (al) precedes a word beginning with one of the “sun letters” t, th, d, dh, r, z, s, sh, ş, ḑ, ţ, z̧ , l, or n – the l is assimilated in pronunciation and romanization, thus yielding tt, thth, etc., in romanization. Example, An Nīl, not Al Nīl.
44
- - Hamzat al waşl (ٱ), which is utilized only in the pointing of classical Arabic, is romanized ’ as illustrated in the classical form of its name hamzatu’l waşli.
45
- - Since maddah (آ), which is placed over alif (ا), nearly always occurs in word-initial position, no .)◌َا( as well as for fatḩah alif )آ( confusion results from the use of ā for alif maddah
46
- - The ligatures لا and لـا represent lām- alif, and should be romanized lā.
47
-
48
- special_rules:
49
- # TODO: These are not used
50
- - Initial definite articles and prepositions should be capitalized and hyphens should not be used to connect parts of names, e.g., Ash Shāriqah and Tall al Laḩm.
51
- - If any evidence is found for the use of the definite article in a name, the article should be used in the name chosen.
52
- - The Arabic word for God should be written Allāh (الله).
53
- - Names which consist of noun phrases should be written as separate words. The definite article within such names should be romanized al, not ul, e.g., ‘Abd Allāh, ‘Abd ar Raḩmān, Dhū al Faqār.
54
- - The Arabic word ِبنshould be romanized Bin rather than Ibn whenever written without alif, that is between two proper nouns, e.g., ‘Umar Bin al Khaţţāb.
55
- - The Turkish word Paşa should be romanized from Arabic script as Bāshā. The Turkish word Bey should be romanized as Bey in Egyptian names, no matter how it is written in Arabic-language sources, but in other Arabic areas it should be romanized as Bak where written بك and as Bayk .بيك when written
56
- - The modern colloquial word Sīdī should be give precedence over the classical form Sayyidī. This does not preclude the spelling Sayyidī if the latter is indicated by the Arabic script or other evidence – for instance, if the yā’ is written with a shaddah (◌ّ ).
57
- - The colloquial word Bū should not be changed to the standard form Abū.
58
- - The colloquial word for water, written مية on Arabic maps, should be romanized Mayyat.
59
- - Place names of Aramaic origin in Syria often contain initial consonant clusters consisting of b plus another consonant such as l or h. In romanization, the clusters bl, bh, etc., should be so represented.
60
- - In names containing the Arabic word for back, ridge, or hill, appearing as either ظهر or ضهر in Arabic sources, the word should be romanized to reflect the particular Arabic spelling shown.
61
- map:
62
- characters:
63
- '\u0627' : 'a'
64
- '\u0628' : 'b'
65
- '\u067e' : 'p'
66
- '\u062a' : 't'
67
- '\u062b' : 's'
68
- '\u062c' : 'j'
69
- '\u062d' : 'h'
70
- '\u0686' : 'ch'
71
- '\u062e' : 'kh'
72
- '\u062f' : 'd'
73
- '\u0630' : 'z'
74
- '\u0631' : 'r'
75
- '\u0632' : 'z'
76
- '\u0633' : 's'
77
- '\u0634' : 'sh'
78
- '\u0635' : 's'
79
- '\u0636' : 'z'
80
- '\u0637' : 't'
81
- '\u0638' : 'z'
82
- '\u0639' : '\u2018'
83
- '\u063a' : 'gh'
84
- '\u0641' : 'f'
85
- '\u0642' : 'q'
86
- '\u0643' : 'k'
87
- '\u0644' : 'l'
88
- '\u0645' : 'm'
89
- '\u0646' : 'n'
90
- '\u0647' : 'h'
91
- '\u0648' : 'v'
92
- '\u0649' : 'y'