interscript 0.1.9 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/Gemfile +29 -0
- data/LICENSE.adoc +31 -0
- data/README.md +3 -0
- data/Rakefile +53 -0
- data/bin/console +14 -0
- data/bin/interscript +5 -0
- data/bin/maps_analyze_staging +168 -0
- data/bin/maps_debug_compilers +58 -0
- data/bin/maps_debug_ordering +88 -0
- data/bin/maps_debug_ruby_compile +24 -0
- data/bin/maps_debug_step_by_step +44 -0
- data/bin/maps_optimize_order +112 -0
- data/bin/maps_v1_analyze_regexps +45 -0
- data/bin/maps_v1_to_v2 +426 -0
- data/bin/setup +8 -0
- data/exe/interscript +6 -0
- data/interscript.gemspec +31 -0
- data/lib/interscript.rb +80 -135
- data/lib/interscript/command.rb +5 -5
- data/lib/interscript/compiler.rb +22 -0
- data/lib/interscript/compiler/javascript.rb +292 -0
- data/lib/interscript/compiler/ruby.rb +262 -0
- data/lib/interscript/dsl.rb +67 -0
- data/lib/interscript/dsl/aliases.rb +23 -0
- data/lib/interscript/dsl/document.rb +46 -0
- data/lib/interscript/dsl/group.rb +45 -0
- data/lib/interscript/dsl/group/parallel.rb +6 -0
- data/lib/interscript/dsl/items.rb +89 -0
- data/lib/interscript/dsl/metadata.rb +26 -0
- data/lib/interscript/dsl/stage.rb +6 -0
- data/lib/interscript/dsl/symbol_mm.rb +11 -0
- data/lib/interscript/dsl/tests.rb +12 -0
- data/lib/interscript/interpreter.rb +251 -0
- data/lib/interscript/node.rb +25 -0
- data/lib/interscript/node/alias_def.rb +15 -0
- data/lib/interscript/node/dependency.rb +13 -0
- data/lib/interscript/node/document.rb +45 -0
- data/lib/interscript/node/group.rb +34 -0
- data/lib/interscript/node/group/parallel.rb +9 -0
- data/lib/interscript/node/group/sequential.rb +2 -0
- data/lib/interscript/node/item.rb +52 -0
- data/lib/interscript/node/item/alias.rb +42 -0
- data/lib/interscript/node/item/any.rb +61 -0
- data/lib/interscript/node/item/capture.rb +50 -0
- data/lib/interscript/node/item/group.rb +51 -0
- data/lib/interscript/node/item/repeat.rb +40 -0
- data/lib/interscript/node/item/stage.rb +23 -0
- data/lib/interscript/node/item/string.rb +51 -0
- data/lib/interscript/node/metadata.rb +18 -0
- data/lib/interscript/node/rule.rb +6 -0
- data/lib/interscript/node/rule/funcall.rb +18 -0
- data/lib/interscript/node/rule/run.rb +15 -0
- data/lib/interscript/node/rule/sub.rb +65 -0
- data/lib/interscript/node/stage.rb +19 -0
- data/lib/interscript/node/tests.rb +15 -0
- data/lib/interscript/stdlib.rb +211 -0
- data/lib/interscript/utils/regexp_converter.rb +283 -0
- data/lib/interscript/version.rb +1 -1
- data/requirements.txt +1 -0
- metadata +73 -458
- data/README.adoc +0 -296
- data/aliases.json +0 -1
- data/lib/g2pwrapper.py +0 -34
- data/lib/interscript/fs.rb +0 -96
- data/lib/interscript/mapping.rb +0 -144
- data/lib/interscript/opal.rb +0 -196
- data/lib/interscript/opal/entrypoint.rb +0 -20
- data/lib/interscript/opal/exports.rb +0 -11
- data/lib/interscript/opal/maps.js.erb +0 -8
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
- data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
- data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
- data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
- data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
- data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
- data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
- data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
- data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
- data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
- data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
- data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
- data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
- data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
- data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
- data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
- data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
- data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
- data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
- data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
- data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
- data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
- data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
- data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
- data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
- data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
- data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
- data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
- data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
- data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
- data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
- data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
- data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
- data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
- data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
- data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
- data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
- data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
- data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
- data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
- data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
- data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
- data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
- data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
- data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
- data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
- data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
- data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
- data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
- data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
- data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
- data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
- data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
- data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
- data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
- data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
- data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
- data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
- data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
- data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
- data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
- data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
- data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
- data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
- data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
- data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
- data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
- data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
- data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
- data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
- data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
- data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
- data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
- data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
- data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
- data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
- data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
- data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
- data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
- data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
- data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
- data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
- data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
- data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
- data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
- data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
- data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
- data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
- data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
- data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
- data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
- data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
- data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
- data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
- data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
- data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
- data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
- data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
- data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
- data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
- data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
- data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
- data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
- data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
- data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
- data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
- data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
- data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
- data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
- data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
- data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
- data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
- data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
- data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
- data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
- data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
- data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
- data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
- data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
- data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
- data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
- data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
- data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
- data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
- data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
- data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
- data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
- data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
- data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
- data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
- data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
- data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
- data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
- data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
- data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
- data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
- data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
- data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
- data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
- data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
- data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
- data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
- data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
- data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
- data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
- data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
- data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
- data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
- data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
- data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
- data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
- data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
- data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
- data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
- data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
- data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
- data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
- data/spec/interscript/filenames_spec.rb +0 -21
- data/spec/interscript/mapping_spec.rb +0 -42
- data/spec/interscript_spec.rb +0 -37
- data/spec/spec_helper.rb +0 -3
@@ -1,402 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: un
|
3
|
-
id: 1972
|
4
|
-
language: iso-639-2:pan
|
5
|
-
source_script: Guru
|
6
|
-
destination_script: Latn
|
7
|
-
name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES --Panjabi Romanization Version 4.0
|
8
|
-
url: https://www.eki.ee/wgrs/rom1_pa.htm
|
9
|
-
creation_date: 1972
|
10
|
-
confirmation_date: 2016
|
11
|
-
description: |
|
12
|
-
The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
|
13
|
-
based on a report prepared by D. N. Sharma. The tables and their corrections were published in volume
|
14
|
-
II of the conference reports1,2.
|
15
|
-
|
16
|
-
There is no evidence of the use of the system either in India or in international cartographic products.
|
17
|
-
|
18
|
-
Punjabi (Panjābī) in India uses an alphasyllabic script (Gurmukhi) whereby each character represents a syllable
|
19
|
-
rather than one sound. Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially)
|
20
|
-
and in an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous. The system is mostly
|
21
|
-
reversible but there exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters) and
|
22
|
-
consonants (combinations with subscript consonants vs. character sequences).
|
23
|
-
|
24
|
-
References
|
25
|
-
|
26
|
-
Second United Nations Conference on the Standardization of Geographical Names. London, 10–31 May 1972. Vol. II. Technical papers.
|
27
|
-
United Nations. New York 1974, pp. 136–138.
|
28
|
-
|
29
|
-
Third United Nations Conference on the Standardization of Geographical Names. Athens, 17 August – 7 September 1977. Vol. II,
|
30
|
-
Technical papers, pp. 393 etc.
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
notes:
|
35
|
-
- |
|
36
|
-
These characters are used in combination with abbreviated vowel characters: ਉ u, ਊ ū, ਅ a, ਆ ā, ਐ ai, ਔ au, ਇ i, ਈ ī, ਏ e; exceptional variation: ਓ o.
|
37
|
-
- |
|
38
|
-
Dotted variants of the characters: ਸ਼ sha, ਖ਼ ḳha, ਗ਼ g̣a, ਜ਼ za, ਫ਼ fa.
|
39
|
-
- |
|
40
|
-
(ੰ) Used if it is preceded by short vowels (a, i, u) and ū, excluding the independent vowel character ਉ (u, ū), e.g. ਸੰਗ saṁg, ਸਿੰਗ siṁg, ਬੁੰਦਾ buṁdā, ਬੂੰਦ būṁd, ਇੰਜਨ iṁjan.
|
41
|
-
- |
|
42
|
-
(ਂ) Used in all other occasions, e.g. ਝੋਂਕਾ jhoṁkā.
|
43
|
-
- |
|
44
|
-
(ੱ) Marks doubling of the following consonant: ਨਿੱਕਾ nikkā, ਲੱਭਣਾ labhbhṇā, ਕੁੱਤਾ kuttā, ਹਿੱਸਾ hissā, ਲੱਮਾ lammā.
|
45
|
-
- |
|
46
|
-
Absence of the inherent vowel (-a) is not marked in the spelling in any way except for the combinations
|
47
|
-
with subscript characters and those which are doubled by ੱ (adhaka).
|
48
|
-
|
49
|
-
tests:
|
50
|
-
- source: "ਪੰਜਾਬ 'ਚ ਵਧ ਰਿਹਾ ਖ਼ੁਦਕੁਸ਼ੀਆਂ ਦਾ ਰੁਝਾਨ"
|
51
|
-
expected: "paṁzāba 'cha vadha rihā khaḳhudakusḳhīāṁ dā rujhāna"
|
52
|
-
- source: "ਲੱਖ ਤੋਂ ਪਾਰ ਪੁੱਜਾ ਸਰਗਰਮ ਕੇਸਾਂ ਦਾ ਅੰਕੜਾ, ਦਿੱਲੀ 'ਚ ਦੋ ਲੱਖ ਤੋਂ ਪਾਰ ਇਨਫੈਕਟਿਡ"
|
53
|
-
expected: "lakkha toṁ pāra puzzā sragarama kesāṁ dā aṁkaṙā, dillī 'cha do lakkha toṁ pāra inaphaikaṭiḍa"
|
54
|
-
- source: "ਪਰਿਵਾਰਕ ਸਮੱਸਿਆਵਾਂ ਅਤੇ ਵਿਆਹ ਵੀ ਹੈ ਹੋਰ ਅਹਿਮ ਕਾਰਨ"
|
55
|
-
expected: "parivāraka smassiāvāṁ ate viāh vī hai hora ahima kārana"
|
56
|
-
- source: "ਮਰਦਾਂ 'ਚ ਔਰਤਾਂ ਨਾਲੋਂ ਵੱਧ ਹੈ ਖ਼ੁਦਕੁਸ਼ੀ ਦਾ ਰੁਝਾਨ"
|
57
|
-
expected: "maradāṁ 'cha auratāṁ nāloṁ vaddha hai khaḳhudakusḳhī dā rujhāna"
|
58
|
-
- source: "ਰਾਸ਼ਟਰੀ ਪੱਧਰ 'ਤੇ ਪੰਜਾਬ ਦੀ ਸਥਿਤੀ ਕਾਫ਼ੀ ਸੂਬਿਆਂ ਤੋਂ ਬਿਹਤਰ"
|
59
|
-
expected: "rāsṭarī paddhara 'te paṁzāba dī sthitī kāphaḳhī sūbiāṁ toṁ bihtara"
|
60
|
-
- source: "ਚੀਨੀ ਸੈਨਾ ਨੇ ਲਾਪਤਾ ਅਰੁਣਾਚਲ ਦੇ 5 ਨੌਜਵਾਨਾਂ ਬਾਰੇ ਦੱਸਿਆ"
|
61
|
-
expected: "chīnī sainā ne lāpatā aruṇāchala de 5 naujavānāṁ bāre dassiā"
|
62
|
-
- source: "ਸਾਖਰਤਾ ਦੇ ਮਾਮਲੇ 'ਚ ਦੇਸ਼ 'ਚ 7ਵੇਂ ਨੰਬਰ 'ਤੇ ਪੰਜਾਬ"
|
63
|
-
expected: "sākharatā de māmale 'cha des 'cha 7veṁ naṁbara 'te paṁzāba"
|
64
|
-
- source: "ਦਿੱਲੀ ਕਮੇਟੀ ਦੇ ਮੈਂਬਰ ਸ਼ੰਟੀ ਨੇ ਅਕਾਲੀ ਦਲ ਤੋਂ ਦਿੱਤਾ ਅਸਤੀਫ਼ਾ"
|
65
|
-
expected: "dillī kameṭī de maiṁbara sṁṭī ne akālī dala toṁ dittā astīphaḳhā"
|
66
|
-
- source: "੧੦੨ ਹੋਰ ਕੋਰੋਨਾ ਪਾਜ਼ੀਟਿਵ ਮਰੀਜ਼ਾਂ ਦੀ ਪੁਸ਼ਟੀ, ਇਕ ਦੀ ਮੌਤ"
|
67
|
-
expected: "102 hora koronā pājaḳhīṭiva marījaḳhāṁ dī pusṭī, ika dī mauta"
|
68
|
-
- source: "ਸੜਕ ਹਾਦਸੇ ਦੌਰਾਨ ਇਕ ਦੀ ਮੌਤ"
|
69
|
-
expected: "sṙaka hādase daurāna ika dī mauta"
|
70
|
-
|
71
|
-
map:
|
72
|
-
|
73
|
-
rules:
|
74
|
-
- pattern: (?<!ੱ)([ਕ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
75
|
-
result: 'k'
|
76
|
-
- pattern: (?<!ੱ)([ਖ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
77
|
-
result: 'kh'
|
78
|
-
- pattern: (?<!ੱ)([ਖ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
79
|
-
result: 'ḳh'
|
80
|
-
- pattern: (?<!ੱ)([ਗ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
81
|
-
result: 'g'
|
82
|
-
- pattern: (?<!ੱ)([ਗ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
83
|
-
result: 'g̣'
|
84
|
-
- pattern: (?<!ੱ)([ਘ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
85
|
-
result: 'gh'
|
86
|
-
- pattern: (?<!ੱ)([ਙ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
87
|
-
result: 'ṅ'
|
88
|
-
- pattern: (?<!ੱ)([ਚ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
89
|
-
result: 'ch'
|
90
|
-
- pattern: (?<!ੱ)([ਛ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
91
|
-
result: 'chh'
|
92
|
-
- pattern: (?<!ੱ)([ਜ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
93
|
-
result: 'z'
|
94
|
-
- pattern: (?<!ੱ)([ਜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
95
|
-
result: 'j'
|
96
|
-
- pattern: (?<!ੱ)([ਝ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
97
|
-
result: 'jh'
|
98
|
-
- pattern: (?<!ੱ)([ਞ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
99
|
-
result: 'ñ'
|
100
|
-
- pattern: (?<!ੱ)([ਟ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
101
|
-
result: 'ṭ'
|
102
|
-
- pattern: (?<!ੱ)([ਠ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
103
|
-
result: 'ṭh'
|
104
|
-
- pattern: (?<!ੱ)([ਡ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
105
|
-
result: 'ḍ'
|
106
|
-
- pattern: (?<!ੱ)([ਢ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
107
|
-
result: 'ḍh'
|
108
|
-
- pattern: (?<!ੱ)([ਣ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
109
|
-
result: 'ṇ'
|
110
|
-
- pattern: (?<!ੱ)([ਤ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
111
|
-
result: 't'
|
112
|
-
- pattern: (?<!ੱ)([ਥ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
113
|
-
result: 'th'
|
114
|
-
- pattern: (?<!ੱ)([ਦ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
115
|
-
result: 'd'
|
116
|
-
- pattern: (?<!ੱ)([ਧ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
117
|
-
result: 'dh'
|
118
|
-
- pattern: (?<!ੱ)([ਨ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
119
|
-
result: 'n'
|
120
|
-
- pattern: (?<!ੱ)([ਪ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
121
|
-
result: 'p'
|
122
|
-
- pattern: (?<!ੱ)([ਫ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
123
|
-
result: 'ph'
|
124
|
-
- pattern: (?<!ੱ)([ਫ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
125
|
-
result: 'f'
|
126
|
-
- pattern: (?<!ੱ)([ਬ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
127
|
-
result: 'b'
|
128
|
-
- pattern: (?<!ੱ)([ਭ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
129
|
-
result: 'bh'
|
130
|
-
- pattern: (?<!ੱ)([ਮ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
131
|
-
result: 'm'
|
132
|
-
- pattern: (?<!ੱ)([ਯ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
133
|
-
result: 'y'
|
134
|
-
- pattern: (?<!ੱ)([ਰ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
135
|
-
result: 'r'
|
136
|
-
- pattern: (?<!ੱ)([ਲ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
137
|
-
result: 'l'
|
138
|
-
- pattern: (?<!ੱ)([ਲੵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
139
|
-
result: 'l'
|
140
|
-
- pattern: (?<!ੱ)([ਲ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
141
|
-
result: 'l'
|
142
|
-
- pattern: (?<!ੱ)([ਵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
143
|
-
result: 'v'
|
144
|
-
- pattern: (?<!ੱ)([ੜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
145
|
-
result: 'ṙ'
|
146
|
-
- pattern: (?<!ੱ)([ਸ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
147
|
-
result: 's'
|
148
|
-
- pattern: (?<!ੱ)([ਸ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
149
|
-
result: 'sh'
|
150
|
-
- pattern: (?<!ੱ)([ਹ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
151
|
-
result: 'h'
|
152
|
-
|
153
|
-
- pattern: (?<=ੱ)([ਕ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
154
|
-
result: 'kk'
|
155
|
-
- pattern: (?<=ੱ)([ਖ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
156
|
-
result: 'kkh'
|
157
|
-
- pattern: (?<=ੱ)([ਖ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
158
|
-
result: 'ḳḳh'
|
159
|
-
- pattern: (?<=ੱ)([ਗ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
160
|
-
result: 'gg'
|
161
|
-
- pattern: (?<=ੱ)([ਗ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
162
|
-
result: 'gg̣'
|
163
|
-
- pattern: (?<=ੱ)([ਘ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
164
|
-
result: 'ggh'
|
165
|
-
- pattern: (?<=ੱ)([ਙ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
166
|
-
result: 'ṅṅ'
|
167
|
-
- pattern: (?<=ੱ)([ਚ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
168
|
-
result: 'cch'
|
169
|
-
- pattern: (?<=ੱ)([ਛ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
170
|
-
result: 'cchh'
|
171
|
-
- pattern: (?<=ੱ)([ਜ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
172
|
-
result: 'zz'
|
173
|
-
- pattern: (?<=ੱ)([ਜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
174
|
-
result: 'jj'
|
175
|
-
- pattern: (?<=ੱ)([ਝ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
176
|
-
result: 'jjh'
|
177
|
-
- pattern: (?<=ੱ)([ਞ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
178
|
-
result: 'ññ'
|
179
|
-
- pattern: (?<=ੱ)([ਟ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
180
|
-
result: 'ṭṭ'
|
181
|
-
- pattern: (?<=ੱ)([ਠ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
182
|
-
result: 'ṭṭh'
|
183
|
-
- pattern: (?<=ੱ)([ਡ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
184
|
-
result: 'ḍḍ'
|
185
|
-
- pattern: (?<=ੱ)([ਢ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
186
|
-
result: 'ḍḍh'
|
187
|
-
- pattern: (?<=ੱ)([ਣ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
188
|
-
result: 'ṇṇ'
|
189
|
-
- pattern: (?<=ੱ)([ਤ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
190
|
-
result: 'tt'
|
191
|
-
- pattern: (?<=ੱ)([ਥ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
192
|
-
result: 'tth'
|
193
|
-
- pattern: (?<=ੱ)([ਦ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
194
|
-
result: 'dd'
|
195
|
-
- pattern: (?<=ੱ)([ਧ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
196
|
-
result: 'ddh'
|
197
|
-
- pattern: (?<=ੱ)([ਨ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
198
|
-
result: 'nn'
|
199
|
-
- pattern: (?<=ੱ)([ਪ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
200
|
-
result: 'pp'
|
201
|
-
- pattern: (?<=ੱ)([ਫ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
202
|
-
result: 'pph'
|
203
|
-
- pattern: (?<=ੱ)([ਫ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
204
|
-
result: 'ff'
|
205
|
-
- pattern: (?<=ੱ)([ਬ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
206
|
-
result: 'bb'
|
207
|
-
- pattern: (?<=ੱ)([ਭ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
208
|
-
result: 'bbh'
|
209
|
-
- pattern: (?<=ੱ)([ਮ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
210
|
-
result: 'mm'
|
211
|
-
- pattern: (?<=ੱ)([ਯ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
212
|
-
result: 'yy'
|
213
|
-
- pattern: (?<=ੱ)([ਰ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
214
|
-
result: 'rr'
|
215
|
-
- pattern: (?<=ੱ)([ਲ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
216
|
-
result: 'll'
|
217
|
-
- pattern: (?<=ੱ)([ਲੵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
218
|
-
result: 'll'
|
219
|
-
- pattern: (?<=ੱ)([ਲ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
220
|
-
result: 'll'
|
221
|
-
- pattern: (?<=ੱ)([ਵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
222
|
-
result: 'vv'
|
223
|
-
- pattern: (?<=ੱ)([ੜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
224
|
-
result: 'ṙṙ'
|
225
|
-
- pattern: (?<=ੱ)([ਸ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
226
|
-
result: 'ss'
|
227
|
-
- pattern: (?<=ੱ)([ਸ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
228
|
-
result: 'ssh'
|
229
|
-
- pattern: (?<=ੱ)([ਹ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
|
230
|
-
result: 'hh'
|
231
|
-
|
232
|
-
characters:
|
233
|
-
|
234
|
-
# I. Vowels and Diphthongs (see Note 1)
|
235
|
-
'ਅ': 'a'
|
236
|
-
'ਆ': 'ā'
|
237
|
-
'ਇ': 'i'
|
238
|
-
'ਈ': 'ī'
|
239
|
-
'ਉ': 'u'
|
240
|
-
'ਊ': 'ū'
|
241
|
-
'ਏ': 'e'
|
242
|
-
'ਐ': 'ai'
|
243
|
-
'ਓ': 'o'
|
244
|
-
'ਔ': 'au'
|
245
|
-
|
246
|
-
'ਾ': "ā"
|
247
|
-
'ਿ': "i"
|
248
|
-
'ੀ': "ī"
|
249
|
-
'ੁ': "u"
|
250
|
-
'ੂ': "ū"
|
251
|
-
'ੇ': "e"
|
252
|
-
'ੈ': "ai"
|
253
|
-
'ੋ': "o"
|
254
|
-
'ੌ': "au"
|
255
|
-
|
256
|
-
# II. Consonants
|
257
|
-
'ਕ': 'ka'
|
258
|
-
'ਖ': 'kha'
|
259
|
-
'ਖ਼': 'ḳha'
|
260
|
-
'ਗ': 'ga'
|
261
|
-
'ਗ਼': 'g̣a'
|
262
|
-
'ਘ': 'gha'
|
263
|
-
'ਙ': 'ṅa'
|
264
|
-
'ਚ': 'cha'
|
265
|
-
'ਛ': 'chha'
|
266
|
-
'ਜ਼': 'za'
|
267
|
-
'ਜ': 'ja'
|
268
|
-
'ਝ': 'jha'
|
269
|
-
'ਞ': 'ña'
|
270
|
-
'ਟ': 'ṭa'
|
271
|
-
'ਠ': 'ṭha'
|
272
|
-
'ਡ': 'ḍa'
|
273
|
-
'ਢ': 'ḍha'
|
274
|
-
'ਣ': 'ṇa'
|
275
|
-
'ਤ': 'ta'
|
276
|
-
'ਥ': 'tha'
|
277
|
-
'ਦ': 'da'
|
278
|
-
'ਧ': 'dha'
|
279
|
-
'ਨ': 'na'
|
280
|
-
'ਪ': 'pa'
|
281
|
-
'ਫ': 'pha'
|
282
|
-
'ਫ਼': 'fa'
|
283
|
-
'ਬ': 'ba'
|
284
|
-
'ਭ': 'bha'
|
285
|
-
'ਮ': 'ma'
|
286
|
-
'ਯ': 'ya'
|
287
|
-
'ਰ': 'ra'
|
288
|
-
'ਲ': 'la'
|
289
|
-
'ਲੵ': 'la'
|
290
|
-
'ਲ਼': 'la'
|
291
|
-
'ਵ': 'va'
|
292
|
-
'ੜ': 'ṙa'
|
293
|
-
'ਸ': 's'
|
294
|
-
'ਸ਼': 'sha'
|
295
|
-
'ਹ': 'h'
|
296
|
-
'ਂ': 'ṁ'
|
297
|
-
'ੰ': 'ṁ'
|
298
|
-
|
299
|
-
# Adhik character doubling of the following consonant[Note 5]
|
300
|
-
'ੱਕ': 'kka'
|
301
|
-
'ੱਖ': 'kkha'
|
302
|
-
'ੱਖ਼': 'ḳḳha'
|
303
|
-
'ੱਗ': 'gga'
|
304
|
-
'ੱਗ਼': 'gg̣a'
|
305
|
-
'ੱਘ': 'ggha'
|
306
|
-
'ੱਙ': 'ṅṅa'
|
307
|
-
'ੱਚ': 'ccha'
|
308
|
-
'ੱਛ': 'cchha'
|
309
|
-
'ੱਜ਼': 'zza'
|
310
|
-
'ੱਜ': 'jja'
|
311
|
-
'ੱਝ': 'jjha'
|
312
|
-
'ੱਞ': 'ñña'
|
313
|
-
'ੱਟ': 'ṭṭa'
|
314
|
-
'ੱਠ': 'ṭṭha'
|
315
|
-
'ੱਡ': 'ḍḍa'
|
316
|
-
'ੱਢ': 'ḍḍha'
|
317
|
-
'ੱਣ': 'ṇṇa'
|
318
|
-
'ੱਤ': 'tta'
|
319
|
-
'ੱਥ': 'ttha'
|
320
|
-
'ੱਦ': 'dda'
|
321
|
-
'ੱਧ': 'ddha'
|
322
|
-
'ੱਨ': 'nna'
|
323
|
-
'ੱਪ': 'ppa'
|
324
|
-
'ੱਫ': 'ppha'
|
325
|
-
'ੱਫ਼': 'ffa'
|
326
|
-
'ੱਬ': 'bba'
|
327
|
-
'ੱਭ': 'bbha'
|
328
|
-
'ੱਮ': 'mma'
|
329
|
-
'ੱਯ': 'yya'
|
330
|
-
'ੱਰ': 'rra'
|
331
|
-
'ੱਲ': 'lla'
|
332
|
-
'ੱਲੵ': 'lla'
|
333
|
-
'ੱਲ਼': 'lla'
|
334
|
-
'ੱਵ': 'vva'
|
335
|
-
'ੱੜ': 'ṙṙa'
|
336
|
-
'ੱਸ': 'ss'
|
337
|
-
'ੱਸ਼': 'ssha'
|
338
|
-
'ੱਹ': 'hh'
|
339
|
-
|
340
|
-
# Adhik character doubling of the following consonant and ends with ੍ [Note 5]
|
341
|
-
'ੱਕ੍': 'kk'
|
342
|
-
'ੱਖ੍': 'kkh'
|
343
|
-
'ੱਖ਼੍': 'ḳḳh'
|
344
|
-
'ੱਗ੍': 'gg'
|
345
|
-
'ੱਗ਼੍': 'gg̣'
|
346
|
-
'ੱਘ੍': 'ggh'
|
347
|
-
'ੱਙ੍': 'ṅṅ'
|
348
|
-
'ੱਚ੍': 'cch'
|
349
|
-
'ੱਛ੍': 'cchh'
|
350
|
-
'ੱਜ਼੍': 'zz'
|
351
|
-
'ੱਜ੍': 'jj'
|
352
|
-
'ੱਝ੍': 'jjh'
|
353
|
-
'ੱਞ੍': 'ññ'
|
354
|
-
'ੱਟ੍': 'ṭṭ'
|
355
|
-
'ੱਠ੍': 'ṭṭh'
|
356
|
-
'ੱਡ੍': 'ḍḍ'
|
357
|
-
'ੱਢ੍': 'ḍḍh'
|
358
|
-
'ੱਣ੍': 'ṇṇ'
|
359
|
-
'ੱਤ੍': 'tt'
|
360
|
-
'ੱਥ੍': 'tth'
|
361
|
-
'ੱਦ੍': 'dd'
|
362
|
-
'ੱਧ੍': 'ddh'
|
363
|
-
'ੱਨ੍': 'nn'
|
364
|
-
'ੱਪ੍': 'pp'
|
365
|
-
'ੱਫ੍': 'pph'
|
366
|
-
'ੱਫ਼੍': 'ff'
|
367
|
-
'ੱਬ੍': 'bb'
|
368
|
-
'ੱਭ੍': 'bbh'
|
369
|
-
'ੱਮ੍': 'mm'
|
370
|
-
'ੱਯ੍': 'yy'
|
371
|
-
'ੱਰ੍': 'rr'
|
372
|
-
'ੱਲ੍': 'll'
|
373
|
-
'ੱਲੵ੍': 'll'
|
374
|
-
'ੱਲ਼੍': 'll'
|
375
|
-
'ੱਵ੍': 'vv'
|
376
|
-
'ੱੜ੍': 'ṙṙ'
|
377
|
-
'ੱਸ੍': 'ss'
|
378
|
-
'ੱਸ਼੍': 'ssh'
|
379
|
-
'ੱਹ੍': 'hh'
|
380
|
-
|
381
|
-
|
382
|
-
# III. Subscript consonant characters
|
383
|
-
"੍ਹ": "-h"
|
384
|
-
"੍ਵ": "-v"
|
385
|
-
"੍ਰ": "-r"
|
386
|
-
"੍ਯ": "-y"
|
387
|
-
|
388
|
-
"੍": ""
|
389
|
-
"ੱ": ""
|
390
|
-
"਼": ""
|
391
|
-
|
392
|
-
# digits
|
393
|
-
'੦': '0'
|
394
|
-
'੧': '1'
|
395
|
-
'੨': '2'
|
396
|
-
'੩': '3'
|
397
|
-
'੪': '4'
|
398
|
-
'੫': '5'
|
399
|
-
'੬': '6'
|
400
|
-
'੭': '7'
|
401
|
-
'੮': '8'
|
402
|
-
'੯': '9'
|
@@ -1,236 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: ungen
|
3
|
-
id: 2017
|
4
|
-
language: iso-639-3:prs # prs stands for Dari (https://iso639-3.sil.org/code/prs&_ga=GA1.2.2054538372.1574092823)
|
5
|
-
source_script: Arab
|
6
|
-
destination_script: Latn
|
7
|
-
name: Persian UN 1967
|
8
|
-
url: http://www.eki.ee/wgrs/v2_2/rom1_fa.htm
|
9
|
-
creation_date: 1967
|
10
|
-
confirmation_date: 01-2003
|
11
|
-
description: |
|
12
|
-
The United Nations recommended system was approved in 1967 (
|
13
|
-
I/13), based on the official system adopted by Iran and
|
14
|
-
published in its English version as Transliteration of
|
15
|
-
Farsi Geographic Names to Latin Alphabet (September 1966).
|
16
|
-
The romanization table was also published as an annex to
|
17
|
-
the Toponymic Guidelines for the Islamic Republic of Iran
|
18
|
-
in 2000 (Toponymic Guidelines for map and other editors –
|
19
|
-
Revised edition 1998. Submitted by the Islamic Republic of
|
20
|
-
Iran. UNGEGN, 20th session. New York, 17-28 January 2000,
|
21
|
-
Working Paper No. 41.).
|
22
|
-
|
23
|
-
The system is used in the Islamic Republic of Iran and in
|
24
|
-
international cartographic products.
|
25
|
-
|
26
|
-
Persian (Farsi) uses the Perso-Arabic script that is
|
27
|
-
written from right to left. The Persian script usually
|
28
|
-
omits vowel points and diacritical marks from writing which
|
29
|
-
makes it difficult to obtain uniform results in the
|
30
|
-
romanization of Persian. The romanization is generally
|
31
|
-
reversible though there are some ambiguous letter
|
32
|
-
sequences (kh, sh, th, zh) which also may represent the
|
33
|
-
romanized values of two Persian characters in addition to
|
34
|
-
the respective single ones.
|
35
|
-
|
36
|
-
|
37
|
-
notes:
|
38
|
-
- A Word-initially.
|
39
|
-
- B Not romanized; marks absence of the vowel.
|
40
|
-
- C Doubling of the consonant letter.
|
41
|
-
- D After a consonant (excl. -ah).
|
42
|
-
- E After a vowel (see also note 2).
|
43
|
-
- 1-The adjectival ending of Arabic origin -يه in Persian is
|
44
|
-
romanized -īyeh. In romanizing the definite article the
|
45
|
-
same rules of assimilation of consonants are applied as in
|
46
|
-
Arabic, e.g. زين الدين Zeyn od Dīn.
|
47
|
-
|
48
|
-
- 2-The relational suffix (eẕāfeh) -e is usually not
|
49
|
-
expressed in Persian writing after a consonant. After final
|
50
|
-
ا or و it is written with ى, e.g. پاى آب Pā-ye Āb. After
|
51
|
-
final ى and ه it is expressed by writing hamzeh over the
|
52
|
-
character دهانۀ ممبر Dahāneh-ye Mambar.
|
53
|
-
|
54
|
-
- 3-To point Persian vowels two systems are in use that are
|
55
|
-
separated by a column in the table. The first system is a
|
56
|
-
Persian one while the other adheres to the Arabic
|
57
|
-
tradition. In normal spelling vowel points are not used.
|
58
|
-
|
59
|
-
tests:
|
60
|
-
- source: اَنجِيرة
|
61
|
-
expected: Anjīrah
|
62
|
-
|
63
|
-
- source: اِيْوَانِي
|
64
|
-
expected: Eyvānī
|
65
|
-
|
66
|
-
- source: آبَادَان
|
67
|
-
expected: Ābādān
|
68
|
-
|
69
|
-
- source: قُرآن
|
70
|
-
expected: Qor’ān
|
71
|
-
|
72
|
-
- source: مَآب
|
73
|
-
expected: Ma’āb
|
74
|
-
|
75
|
-
- source: مُحَمَّد
|
76
|
-
expected: Moḩammad
|
77
|
-
|
78
|
-
- source: كُوهِ مَرغُوب
|
79
|
-
expected: Kūh-e Marghūb
|
80
|
-
|
81
|
-
- source: پَايِ آب
|
82
|
-
expected: Pā-ye Āb
|
83
|
-
|
84
|
-
- source: جُويِ آس
|
85
|
-
expected: Jū-ye Ās
|
86
|
-
|
87
|
-
- source: دَهَانِهٴ مَمبَر
|
88
|
-
expected: Dahāneh-ye Mambar
|
89
|
-
|
90
|
-
- source: سَلَسِيٴ بُذُرگ
|
91
|
-
expected: Salasī-ye Boz̄org
|
92
|
-
|
93
|
-
- source: ذُو الفَقَار
|
94
|
-
expected: Z̄ū ol Faqār
|
95
|
-
|
96
|
-
|
97
|
-
map:
|
98
|
-
postrules:
|
99
|
-
- pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
|
100
|
-
result: "upcase"
|
101
|
-
|
102
|
-
- pattern: " Al"
|
103
|
-
result: " al"
|
104
|
-
|
105
|
-
- pattern: " Ol"
|
106
|
-
result: " ol"
|
107
|
-
|
108
|
-
characters:
|
109
|
-
|
110
|
-
'\u064e' : 'a' # َ fatha
|
111
|
-
'\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
|
112
|
-
'\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
|
113
|
-
'\u0650' : 'e' # ِ kasra
|
114
|
-
'\u064f' : 'o' # ُ damma
|
115
|
-
'\u0652' : '' # ْ sokoon, see Note B
|
116
|
-
|
117
|
-
'\u064e\u0627' : 'ā' # ـَا fatha followed by ا
|
118
|
-
'\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
|
119
|
-
'\b\u0622' : 'ā' # آ NOTE A
|
120
|
-
'\u0622' : '’ā' # آ
|
121
|
-
'\u0650\u064a' : 'ī' # ـِي kasra followed by ي
|
122
|
-
'\u064f\u0648' : 'ū' # ـُو damma followed by و
|
123
|
-
'[\u064e|\u0650]\u064a\u0652' : 'ey' # ـَيْ
|
124
|
-
'[\u064e|\u064f]\u0648\u0652' : 'ow' # ـَوْ
|
125
|
-
'\u0621' : '’' # ء
|
126
|
-
'\u2013' : '–'
|
127
|
-
'\u2013[\u0649|\u064a]\u0647' : '-īyeh'
|
128
|
-
'[\u0654|\u0674]' : '-e' # ٴ ezafeh
|
129
|
-
'(?<=[\u064a|\u0647])[\u0654|\u0674]' : '-ye' # ٴ ezafeh
|
130
|
-
'\u0650\b' : '-e' # ِ kasra
|
131
|
-
'[\u064a|\u06cc]\u0650\b' : '-ye' # ِ kasra
|
132
|
-
|
133
|
-
# NOTE C
|
134
|
-
'\u0628\u0651' : 'bb' # ب
|
135
|
-
'\u062a\u0651' : 'tt' # ت
|
136
|
-
'\u062b\u0651' : 's̄s̄' # ث
|
137
|
-
'\u062c\u0651' : 'jj' # ج
|
138
|
-
'\u062d\u0651' : 'ḩḩ' # ح
|
139
|
-
'\u062e\u0651' : 'kh' # خ
|
140
|
-
'\u062f\u0651' : 'dd' # د
|
141
|
-
'\u0630\u0651' : 'z̄z̄' # ذ
|
142
|
-
'\u0631\u0651' : 'rr' # ر
|
143
|
-
'\u0632\u0651' : 'zz' # ز
|
144
|
-
'\u0633\u0651' : 'ss' # س
|
145
|
-
'\u0634\u0651' : 'sh' # ش
|
146
|
-
'\u0635\u0651' : 'şş' # ص
|
147
|
-
'\u0636\u0651' : 'ẕẕ' # ض
|
148
|
-
'\u0637\u0651' : 'ţţ' # ط
|
149
|
-
'\u0638\u0651' : 'z̧z̧' # ظ
|
150
|
-
'\u063a\u0651' : 'gh' # غ
|
151
|
-
'\u0641\u0651' : 'ff' # ف
|
152
|
-
'\u0642\u0651' : 'qq' # ق
|
153
|
-
'\u0643\u0651' : 'kk' # ك
|
154
|
-
'\u0644\u0651' : 'll' # ل
|
155
|
-
'\u0645\u0651' : 'mm' # م
|
156
|
-
'\u0646\u0651' : 'nn' # ن
|
157
|
-
'\u0647\u0651' : 'hh' # ه
|
158
|
-
'\u0648\u0651' : 'vv' # و
|
159
|
-
'\u064a\u0651' : 'yy' # ي
|
160
|
-
|
161
|
-
# NOTE 1
|
162
|
-
# Sun letters
|
163
|
-
'\b\u0627\u0644\u062a' : 'ot t' # الت
|
164
|
-
'\b\u0627\u0644\u062b' : 'os̄ s̄' # الث
|
165
|
-
'\b\u0627\u0644\u062f' : 'od d' # الد
|
166
|
-
'\b\u0627\u0644\u0630' : 'oz̄ z̄' # الذ
|
167
|
-
'\b\u0627\u0644\u0631' : 'or r' # الر
|
168
|
-
'\b\u0627\u0644\u0632' : 'oz z' # الز
|
169
|
-
'\b\u0627\u0644\u0633' : 'os s' # الس
|
170
|
-
'\b\u0627\u0644\u0634' : 'osh sh' # الش
|
171
|
-
'\b\u0627\u0644\u0635' : 'oş ş' # الص
|
172
|
-
'\b\u0627\u0644\u0636' : 'oẕ ẕ' # الض
|
173
|
-
'\b\u0627\u0644\u0637' : 'oţ ţ' # الط
|
174
|
-
'\b\u0627\u0644\u0638' : 'oz̧ z̧' # الظ
|
175
|
-
'\b\u0627\u0644\u0644' : 'ol l' # الل
|
176
|
-
'\b\u0627\u0644\u0646' : 'on n' # الن
|
177
|
-
|
178
|
-
'\u0650\u064a\u0651' : 'īy' # ـِيَّ
|
179
|
-
'\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
|
180
|
-
|
181
|
-
# ta' marboota
|
182
|
-
'\u0629' : 'at' # ة in the middle of the sentence
|
183
|
-
'\u0629$' : 'ah'
|
184
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
|
185
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
|
186
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
|
187
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
|
188
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
|
189
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
|
190
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
|
191
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
|
192
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
|
193
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
|
194
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
|
195
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
|
196
|
-
|
197
|
-
'\b\u0627\u0644' : 'al ' # ال
|
198
|
-
'\s\b\u0627\u0644' : ' ol ' # ال #special Rule 1
|
199
|
-
|
200
|
-
'\b\u0627' : '' # ا initial
|
201
|
-
'\u0627' : 'ā' # ا middial
|
202
|
-
'\u0627\b' : 'ā' # ا final
|
203
|
-
|
204
|
-
'\u0628' : 'b' # ب
|
205
|
-
'\u067E' : 'p' # پ
|
206
|
-
'\u062A' : 't' # ت
|
207
|
-
'\u062B' : 's̄' # ث
|
208
|
-
'\u062C' : 'j' # ج
|
209
|
-
'\u0686' : 'ch' # چ
|
210
|
-
'\u062D' : 'ḩ' # ح
|
211
|
-
'\u062E' : 'kh' # خ
|
212
|
-
'\u062F' : 'd' # د
|
213
|
-
'\u0630' : 'z̄' # ذ
|
214
|
-
'\u0631' : 'r' # ر
|
215
|
-
'\u0632' : 'z' # ز
|
216
|
-
'\u0698' : 'zh' # ژ
|
217
|
-
'\u0633' : 's' # س
|
218
|
-
'\u0634' : 'sh' # ش
|
219
|
-
'\u0635' : 'ş' # ص
|
220
|
-
'\u0636' : 'ẕ' # ض
|
221
|
-
'\u0637' : 'ţ' # ط
|
222
|
-
'\u0638' : 'z̧' # ظ
|
223
|
-
'\u0639' : '’' # ع
|
224
|
-
'\u063A' : 'gh' # غ
|
225
|
-
'\u0641' : 'f' # ف
|
226
|
-
'\u0642' : 'q' # ق
|
227
|
-
'\u0643' : 'k' # ك
|
228
|
-
'\u06A9' : 'k' # ک
|
229
|
-
'\u06AF' : 'g' # گ
|
230
|
-
'\u0644' : 'l' # ل
|
231
|
-
'\u0645' : 'm' # م
|
232
|
-
'\u0646' : 'n' # ن
|
233
|
-
'\u0648' : 'v' # و
|
234
|
-
'\u0647' : 'h' # ه
|
235
|
-
'\u0649' : 'y' # ي
|
236
|
-
'\u064a' : 'y' # ي
|