interscript 0.1.9 → 2.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/Gemfile +29 -0
- data/LICENSE.adoc +31 -0
- data/README.md +3 -0
- data/Rakefile +53 -0
- data/bin/console +14 -0
- data/bin/interscript +5 -0
- data/bin/maps_analyze_staging +168 -0
- data/bin/maps_debug_compilers +58 -0
- data/bin/maps_debug_ordering +88 -0
- data/bin/maps_debug_ruby_compile +24 -0
- data/bin/maps_debug_step_by_step +44 -0
- data/bin/maps_optimize_order +112 -0
- data/bin/maps_v1_analyze_regexps +45 -0
- data/bin/maps_v1_to_v2 +426 -0
- data/bin/setup +8 -0
- data/exe/interscript +6 -0
- data/interscript.gemspec +31 -0
- data/lib/interscript.rb +80 -135
- data/lib/interscript/command.rb +5 -5
- data/lib/interscript/compiler.rb +22 -0
- data/lib/interscript/compiler/javascript.rb +292 -0
- data/lib/interscript/compiler/ruby.rb +262 -0
- data/lib/interscript/dsl.rb +67 -0
- data/lib/interscript/dsl/aliases.rb +23 -0
- data/lib/interscript/dsl/document.rb +46 -0
- data/lib/interscript/dsl/group.rb +45 -0
- data/lib/interscript/dsl/group/parallel.rb +6 -0
- data/lib/interscript/dsl/items.rb +89 -0
- data/lib/interscript/dsl/metadata.rb +26 -0
- data/lib/interscript/dsl/stage.rb +6 -0
- data/lib/interscript/dsl/symbol_mm.rb +11 -0
- data/lib/interscript/dsl/tests.rb +12 -0
- data/lib/interscript/interpreter.rb +251 -0
- data/lib/interscript/node.rb +25 -0
- data/lib/interscript/node/alias_def.rb +15 -0
- data/lib/interscript/node/dependency.rb +13 -0
- data/lib/interscript/node/document.rb +45 -0
- data/lib/interscript/node/group.rb +34 -0
- data/lib/interscript/node/group/parallel.rb +9 -0
- data/lib/interscript/node/group/sequential.rb +2 -0
- data/lib/interscript/node/item.rb +52 -0
- data/lib/interscript/node/item/alias.rb +42 -0
- data/lib/interscript/node/item/any.rb +61 -0
- data/lib/interscript/node/item/capture.rb +50 -0
- data/lib/interscript/node/item/group.rb +51 -0
- data/lib/interscript/node/item/repeat.rb +40 -0
- data/lib/interscript/node/item/stage.rb +23 -0
- data/lib/interscript/node/item/string.rb +51 -0
- data/lib/interscript/node/metadata.rb +18 -0
- data/lib/interscript/node/rule.rb +6 -0
- data/lib/interscript/node/rule/funcall.rb +18 -0
- data/lib/interscript/node/rule/run.rb +15 -0
- data/lib/interscript/node/rule/sub.rb +65 -0
- data/lib/interscript/node/stage.rb +19 -0
- data/lib/interscript/node/tests.rb +15 -0
- data/lib/interscript/stdlib.rb +211 -0
- data/lib/interscript/utils/regexp_converter.rb +283 -0
- data/lib/interscript/version.rb +1 -1
- data/requirements.txt +1 -0
- metadata +73 -458
- data/README.adoc +0 -296
- data/aliases.json +0 -1
- data/lib/g2pwrapper.py +0 -34
- data/lib/interscript/fs.rb +0 -96
- data/lib/interscript/mapping.rb +0 -144
- data/lib/interscript/opal.rb +0 -196
- data/lib/interscript/opal/entrypoint.rb +0 -20
- data/lib/interscript/opal/exports.rb +0 -11
- data/lib/interscript/opal/maps.js.erb +0 -8
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
- data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
- data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
- data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
- data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
- data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
- data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
- data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
- data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
- data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
- data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
- data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
- data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
- data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
- data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
- data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
- data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
- data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
- data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
- data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
- data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
- data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
- data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
- data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
- data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
- data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
- data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
- data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
- data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
- data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
- data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
- data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
- data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
- data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
- data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
- data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
- data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
- data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
- data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
- data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
- data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
- data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
- data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
- data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
- data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
- data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
- data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
- data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
- data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
- data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
- data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
- data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
- data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
- data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
- data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
- data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
- data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
- data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
- data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
- data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
- data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
- data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
- data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
- data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
- data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
- data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
- data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
- data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
- data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
- data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
- data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
- data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
- data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
- data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
- data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
- data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
- data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
- data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
- data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
- data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
- data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
- data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
- data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
- data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
- data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
- data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
- data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
- data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
- data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
- data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
- data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
- data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
- data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
- data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
- data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
- data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
- data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
- data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
- data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
- data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
- data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
- data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
- data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
- data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
- data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
- data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
- data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
- data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
- data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
- data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
- data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
- data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
- data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
- data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
- data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
- data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
- data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
- data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
- data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
- data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
- data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
- data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
- data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
- data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
- data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
- data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
- data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
- data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
- data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
- data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
- data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
- data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
- data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
- data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
- data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
- data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
- data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
- data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
- data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
- data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
- data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
- data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
- data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
- data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
- data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
- data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
- data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
- data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
- data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
- data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
- data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
- data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
- data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
- data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
- data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
- data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
- data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
- data/spec/interscript/filenames_spec.rb +0 -21
- data/spec/interscript/mapping_spec.rb +0 -42
- data/spec/interscript_spec.rb +0 -37
- data/spec/spec_helper.rb +0 -3
@@ -1,166 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: bgnpcgn
|
3
|
-
id: 1965
|
4
|
-
language: iso-639-2:ukr
|
5
|
-
source_script: Cyrl
|
6
|
-
destination_script: Latn
|
7
|
-
name: BGN/PCGN 1965 System
|
8
|
-
alias:
|
9
|
-
ogc11122:
|
10
|
-
code: ukr_Cyrl2Latn_BGN_1965
|
11
|
-
description: Ukrainian Board on Geographic Names/Permanent Committee on Geographical Names for British Official Use(PCGN) 1965 System
|
12
|
-
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
|
13
|
-
creation_date: 1947
|
14
|
-
confirmation_date: 2019-06
|
15
|
-
description: |
|
16
|
-
The BGN/PCGN system for Ukrainian was designed for use in romanizing
|
17
|
-
names written in the Ukrainian alphabet. The Ukrainian alphabet
|
18
|
-
contains five characters not present in the Russian alphabet: ґ, є, і,
|
19
|
-
ї, and ’.
|
20
|
-
|
21
|
-
notes:
|
22
|
-
- The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
|
23
|
-
- All apostrophes appearing in romanization are Unicode encoding 2019.
|
24
|
-
- The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
|
25
|
-
|
26
|
-
tests:
|
27
|
-
- source: Авдіївська Міськрада
|
28
|
-
expected: Avdiyivs’ka Mis’krada
|
29
|
-
- source: Бабаї
|
30
|
-
expected: Babayi
|
31
|
-
- source: Віленька
|
32
|
-
expected: Vilen’ka
|
33
|
-
- source: Гагарінський Район
|
34
|
-
expected: Haharins’kyy Rayon
|
35
|
-
- source: Довбушева Криниця
|
36
|
-
expected: Dovbusheva Krynytsya
|
37
|
-
- source: Дідівщина
|
38
|
-
expected: Didivshchyna
|
39
|
-
- source: Економічна
|
40
|
-
expected: Ekonomichna
|
41
|
-
- source: Єфросинівка
|
42
|
-
expected: Yefrosynivka
|
43
|
-
- source: Жигуліна Роща
|
44
|
-
expected: Zhyhulina Roshcha
|
45
|
-
- source: Загір’я
|
46
|
-
expected: Zahir”ya
|
47
|
-
- source: З’єднувальний Канал
|
48
|
-
expected: Z”yednuval’nyy Kanal
|
49
|
-
- source: Ивахи
|
50
|
-
expected: Yvakhy
|
51
|
-
- source: Івано-Франківська Міськрада
|
52
|
-
expected: Ivano-Frankivs’ka Mis’krada
|
53
|
-
- source: Їжаківка
|
54
|
-
expected: Yizhakivka
|
55
|
-
- source: Йосиповичі
|
56
|
-
expected: Yosypovychi
|
57
|
-
- source: Кабичівка
|
58
|
-
expected: Kabychivka
|
59
|
-
- source: Лазуровий Провулок
|
60
|
-
expected: Lazurovyy Provulok
|
61
|
-
- source: Мала Сейдеминуха
|
62
|
-
expected: Mala Seydemynukha
|
63
|
-
- source: Нагірний
|
64
|
-
expected: Nahirnyy
|
65
|
-
- source: Овер’янівське Озеро
|
66
|
-
expected: Over”yanivs’ke Ozero
|
67
|
-
- source: Павлопільське Водосховище
|
68
|
-
expected: Pavlopil’s’ke Vodoskhovyshche
|
69
|
-
- source: Приґородний
|
70
|
-
expected: Prygorodnyy
|
71
|
-
- source: Радгосп Правда
|
72
|
-
expected: Radhosp Pravda
|
73
|
-
- source: Садово-Хрустальненський
|
74
|
-
expected: Sadovo-Khrustal’nens’kyy
|
75
|
-
- source: Таратутине
|
76
|
-
expected: Taratutyne
|
77
|
-
- source: Улу-Узень
|
78
|
-
expected: Ulu-Uzen’
|
79
|
-
- source: Христофорівка
|
80
|
-
expected: Khrystoforivka
|
81
|
-
- source: Центральна Вулиця
|
82
|
-
expected: Tsentral’na Vulytsya
|
83
|
-
- source: Чайковичі
|
84
|
-
expected: Chaykovychi
|
85
|
-
- source: Шалаші
|
86
|
-
expected: Shalashi
|
87
|
-
- source: Щербинівка
|
88
|
-
expected: Shcherbynivka
|
89
|
-
- source: Южноукраїнська Міськрада
|
90
|
-
expected: Yuzhnoukrayins’ka Mis’krada
|
91
|
-
- source: Ясениця
|
92
|
-
expected: Yasenytsya
|
93
|
-
|
94
|
-
map:
|
95
|
-
rules:
|
96
|
-
- pattern: \b\u2019\b # ’ in the middle of a word -> ”
|
97
|
-
result: "\u201d"
|
98
|
-
|
99
|
-
characters:
|
100
|
-
"\u0430": 'a'
|
101
|
-
"\u0431": 'b'
|
102
|
-
"\u0432": 'v'
|
103
|
-
"\u0433": 'h'
|
104
|
-
"\u0434": 'd'
|
105
|
-
"\u0435": 'e'
|
106
|
-
"\u0436": 'zh'
|
107
|
-
"\u0437": 'z'
|
108
|
-
"\u0438": 'y'
|
109
|
-
"\u0439": 'y'
|
110
|
-
"\u043a": 'k'
|
111
|
-
"\u043b": 'l'
|
112
|
-
"\u043c": 'm'
|
113
|
-
"\u043d": 'n'
|
114
|
-
"\u043e": 'o'
|
115
|
-
"\u043f": 'p'
|
116
|
-
"\u0440": 'r'
|
117
|
-
"\u0441": 's'
|
118
|
-
"\u0442": 't'
|
119
|
-
"\u0443": 'u'
|
120
|
-
"\u0444": 'f'
|
121
|
-
"\u0445": 'kh'
|
122
|
-
"\u0446": 'ts'
|
123
|
-
"\u0447": 'ch'
|
124
|
-
"\u0448": 'sh'
|
125
|
-
"\u0449": 'shch'
|
126
|
-
"\u044c": "\u2019"
|
127
|
-
"\u044e": 'yu'
|
128
|
-
"\u044f": 'ya'
|
129
|
-
"\u0454": 'ye'
|
130
|
-
"\u0456": 'i'
|
131
|
-
"\u0457": 'yi'
|
132
|
-
"\u0491": 'g'
|
133
|
-
"\ufeff": ' '
|
134
|
-
"\u0404": 'Ye'
|
135
|
-
"\u0406": 'I'
|
136
|
-
"\u0407": 'Yi'
|
137
|
-
"\u0410": 'A'
|
138
|
-
"\u0411": 'B'
|
139
|
-
"\u0412": 'V'
|
140
|
-
"\u0413": 'H'
|
141
|
-
"\u0414": 'D'
|
142
|
-
"\u0415": 'E'
|
143
|
-
"\u0416": 'Zh'
|
144
|
-
"\u0417": 'Z'
|
145
|
-
"\u0418": 'Y'
|
146
|
-
"\u0419": 'Y'
|
147
|
-
"\u041a": 'K'
|
148
|
-
"\u041b": 'L'
|
149
|
-
"\u041c": 'M'
|
150
|
-
"\u041d": 'N'
|
151
|
-
"\u041e": 'O'
|
152
|
-
"\u041f": 'P'
|
153
|
-
"\u0420": 'R'
|
154
|
-
"\u0421": 'S'
|
155
|
-
"\u0422": 'T'
|
156
|
-
"\u0423": 'U'
|
157
|
-
"\u0424": 'F'
|
158
|
-
"\u0425": 'Kh'
|
159
|
-
"\u0426": 'Ts'
|
160
|
-
"\u0427": 'Ch'
|
161
|
-
"\u0428": 'Sh'
|
162
|
-
"\u0429": 'Shch'
|
163
|
-
"\u042c": "\u2019"
|
164
|
-
"\u042e": 'Yu'
|
165
|
-
"\u042f": 'Ya'
|
166
|
-
"\u0490": 'G'
|
@@ -1,119 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: bgnpcgn
|
3
|
-
id: 2019
|
4
|
-
language: iso-639-2:ukr
|
5
|
-
source_script: Cyrl
|
6
|
-
destination_script: Latn
|
7
|
-
name: BGN/PCGN 2019 Agreement
|
8
|
-
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
|
9
|
-
creation_date: 2019
|
10
|
-
confirmation_date: 2020-01
|
11
|
-
description: |
|
12
|
-
The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
|
13
|
-
in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
|
14
|
-
since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
|
15
|
-
|
16
|
-
notes:
|
17
|
-
- |
|
18
|
-
The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
|
19
|
-
of the national system within Ukraine. Note, however, that this system is not recommended for
|
20
|
-
reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
|
21
|
-
This system also lacks the methodology outlined in the 1965 System to provide additional
|
22
|
-
differentiation between digraphs and individual character sequences.
|
23
|
-
For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
|
24
|
-
sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
|
25
|
-
from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
|
26
|
-
the characters ж, х, ш, ц and the character sequence тш.
|
27
|
-
- To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
|
28
|
-
- The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
|
29
|
-
- These characters differ significantly in romanization from the BGN/PCGN 1965 system.
|
30
|
-
|
31
|
-
tests:
|
32
|
-
- source: Алушта
|
33
|
-
expected: Alushta
|
34
|
-
- source: Борщагівка
|
35
|
-
expected: Borshchahivka
|
36
|
-
- source: Вишгород
|
37
|
-
expected: Vyshhorod
|
38
|
-
- source: Гадяч
|
39
|
-
expected: Hadiach
|
40
|
-
- source: Згорани
|
41
|
-
expected: Zghorany
|
42
|
-
- source: Ґалаґан
|
43
|
-
expected: Galagan
|
44
|
-
- source: Дон
|
45
|
-
expected: Don
|
46
|
-
- source: Рівне
|
47
|
-
expected: Rivne
|
48
|
-
- source: Єнакієве
|
49
|
-
expected: Yenakiieve
|
50
|
-
- source: Наєнко
|
51
|
-
expected: Naienko
|
52
|
-
- source: Житомир
|
53
|
-
expected: Zhytomyr
|
54
|
-
- source: Запоріжжя
|
55
|
-
expected: Zaporizhzhia
|
56
|
-
- source: Закарпаття
|
57
|
-
expected: Zakarpattia
|
58
|
-
- source: Медвин
|
59
|
-
expected: Medvyn
|
60
|
-
- source: Іршава
|
61
|
-
expected: Irshava
|
62
|
-
- source: Їжакевич
|
63
|
-
expected: Yizhakevych
|
64
|
-
- source: Кадіївка
|
65
|
-
expected: Kadiivka
|
66
|
-
- source: Йосипівка
|
67
|
-
expected: Yosypivka
|
68
|
-
- source: Стрий
|
69
|
-
expected: Stryi
|
70
|
-
- source: Київ
|
71
|
-
expected: Kyiv
|
72
|
-
- source: Лебедин
|
73
|
-
expected: Lebedyn
|
74
|
-
- source: Миколаїв
|
75
|
-
expected: Mykolaiv
|
76
|
-
- source: Ніжин
|
77
|
-
expected: Nizhyn
|
78
|
-
- source: Одеса
|
79
|
-
expected: Odesa
|
80
|
-
- source: Полтава
|
81
|
-
expected: Poltava
|
82
|
-
- source: Ромни
|
83
|
-
expected: Romny
|
84
|
-
- source: Суми
|
85
|
-
expected: Sumy
|
86
|
-
- source: Тетерів
|
87
|
-
expected: Teteriv
|
88
|
-
- source: Ужгород
|
89
|
-
expected: Uzhhorod
|
90
|
-
- source: Фастів
|
91
|
-
expected: Fastiv
|
92
|
-
- source: Харків
|
93
|
-
expected: Kharkiv
|
94
|
-
- source: Біла Церква
|
95
|
-
expected: Bila Tserkva
|
96
|
-
- source: Чернівці
|
97
|
-
expected: Chernivtsi
|
98
|
-
- source: Шостка
|
99
|
-
expected: Shostka
|
100
|
-
- source: Гоща
|
101
|
-
expected: Hoshcha
|
102
|
-
- source: Русь
|
103
|
-
expected: Rus
|
104
|
-
- source: Юрій
|
105
|
-
expected: Yurii
|
106
|
-
- source: Крюківка
|
107
|
-
expected: Kriukivka
|
108
|
-
- source: Яготин
|
109
|
-
expected: Yahotyn
|
110
|
-
- source: Ічня
|
111
|
-
expected: Ichnia
|
112
|
-
- source: Знам’янка
|
113
|
-
expected: Znamianka
|
114
|
-
|
115
|
-
map:
|
116
|
-
inherit: un-ukr-Cyrl-Latn-2012
|
117
|
-
|
118
|
-
characters:
|
119
|
-
"\u0027": '' # ' ->
|
@@ -1,459 +0,0 @@
|
|
1
|
-
---
|
2
|
-
authority_id: bgnpcgn
|
3
|
-
id: 2007
|
4
|
-
language: iso-639-2:urd
|
5
|
-
source_script: Arab
|
6
|
-
destination_script: Latn
|
7
|
-
name: BGN/PCGN Romanization System -- Urdu (2007)
|
8
|
-
alias:
|
9
|
-
ogc11122:
|
10
|
-
code: uas_Arab2Latn_BGN_2007
|
11
|
-
description: Unified Afghan Romanization System US Board on Geographic Names (BGN)/The Permanent Committee on Geographical Names (PCGN) 2007
|
12
|
-
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693788/ROMANIZATION_OF_URDU.pdf
|
13
|
-
creation_date: 2007
|
14
|
-
confirmation_date: 2017-11
|
15
|
-
description: |
|
16
|
-
The following is the approved romanization system for
|
17
|
-
deriving standard spellings of Urdu geographical names for
|
18
|
-
Pakistan. It was jointly adopted by BGN and PCGN at the
|
19
|
-
23rd BGN/PCGN Conference in Washington, DC, in 2007 and it
|
20
|
-
is based on the Hunterian romanization system for Urdu,
|
21
|
-
which has been used by the Surveys of India and Pakistan
|
22
|
-
for romanizing Urdu geographical names for more than one
|
23
|
-
hundred years. The BGN/PCGN system laid out below includes
|
24
|
-
diacritical marks in order that the original script can be
|
25
|
-
derived from the romanized form (i.e. it is reversible).
|
26
|
-
For desk users requiring a diacritic-free form, these
|
27
|
-
diacritics can simply be removed. In every case the same
|
28
|
-
basic Roman-script characters are kept as are used in the
|
29
|
-
Hunterian system. The BGN/PCGN forms have further been
|
30
|
-
designed to harmonize with the BGN/PCGN Persian
|
31
|
-
romanization system.
|
32
|
-
notes:
|
33
|
-
- 1. When the vowel sign zīr ( ِ) occurs word-finally in the
|
34
|
-
first element of a compound, it is assumed to mark the
|
35
|
-
Persian izafat
|
36
|
-
morpheme, and is romanized -e, not i.
|
37
|
-
- 2. The source of almost all example names is the 1951
|
38
|
-
Census of Pakistan, Village List, Northwest Frontier
|
39
|
-
Province, Chitral
|
40
|
-
State. Office of the Provincial Superintendant of Census,
|
41
|
-
North-West Frontier Province, Peshawar.
|
42
|
-
- 3. No examples of aspirated dental r (rh, رھ ( were found,
|
43
|
-
though this phoneme is assumed to be part of the phonology
|
44
|
-
of
|
45
|
-
Urdu, and was therefore left out of Table 2.
|
46
|
-
- 4. Note that the short vowels in the Urdu examples are not
|
47
|
-
pointed.
|
48
|
-
- 5. Occasionally, sequences of /z/ or /s/ plus /h/ may be
|
49
|
-
encountered, i.e. z·h, s·h. These may be romanized with the
|
50
|
-
Unicode
|
51
|
-
'center dot' (U+00B7) separating the two letters, to
|
52
|
-
distinguish them from the digraphs /zh/ and /sh/.
|
53
|
-
- Commented tests are blocked by this issue https://github.com/interscript/interscript/issues/572
|
54
|
-
depends on the different ways of handling ي to y or e AND و to u or o
|
55
|
-
|
56
|
-
|
57
|
-
tests:
|
58
|
-
# - source: بوغدِی
|
59
|
-
# expected: Boghdī
|
60
|
-
|
61
|
-
- source: پَالِير
|
62
|
-
expected: Pālīr
|
63
|
-
|
64
|
-
# - source: بیزوت كَلے
|
65
|
-
# expected: Bezot Kale
|
66
|
-
|
67
|
-
# - source: عَمَل كوٹ
|
68
|
-
# expected: ‘Amal Koṭ
|
69
|
-
|
70
|
-
- source: ثَابِر
|
71
|
-
expected: S̄ābir
|
72
|
-
|
73
|
-
- source: شَاه نَثَار ميلة
|
74
|
-
expected: Shāh Nas̄ār Mylah
|
75
|
-
|
76
|
-
# - source: بَرجُو ميلَه
|
77
|
-
# expected: Barjū Melah
|
78
|
-
|
79
|
-
- source: چَپرِی
|
80
|
-
expected: Chaprī
|
81
|
-
|
82
|
-
- source: أَحمَد خَان كَلے
|
83
|
-
expected: Aḩmad Khān Kale
|
84
|
-
|
85
|
-
# - source: آكَا خيل
|
86
|
-
# expected: Ākā Khel
|
87
|
-
|
88
|
-
- source: دُرَانِي
|
89
|
-
expected: Durānī
|
90
|
-
|
91
|
-
- source: ڈَنگِیلا
|
92
|
-
expected: Ḍangīlā
|
93
|
-
|
94
|
-
- source: ذَرَانِی
|
95
|
-
expected: Z̄arānī
|
96
|
-
|
97
|
-
- source: بُركِي
|
98
|
-
expected: Burkī
|
99
|
-
|
100
|
-
- source: گِیدَڑَه
|
101
|
-
expected: Gīdaṛah
|
102
|
-
|
103
|
-
- source: عَلِي زَائِي
|
104
|
-
expected: ‘Alī Zā’ī
|
105
|
-
|
106
|
-
# - source: ژوب
|
107
|
-
# expected: Zhob
|
108
|
-
|
109
|
-
- source: بِسَاتُو
|
110
|
-
expected: Bisātū
|
111
|
-
|
112
|
-
- source: أَحمَدِي شَامَا
|
113
|
-
expected: Aḩmadī Shāmā
|
114
|
-
|
115
|
-
- source: اَصَالَت كَلے
|
116
|
-
expected: Aşālat Kale
|
117
|
-
|
118
|
-
- source: خَضَر خَان
|
119
|
-
expected: Khaẕar Khān
|
120
|
-
|
121
|
-
- source: سُلْطَان
|
122
|
-
expected: Sulţān
|
123
|
-
|
124
|
-
- source: عَزَم سَيِّد نُور كَلے
|
125
|
-
expected: ‘Azam Sayyid Nūr Kale
|
126
|
-
|
127
|
-
# - source: عَلَم شير
|
128
|
-
# expected: ‘Alam Sher
|
129
|
-
|
130
|
-
- source: بغَاكِي
|
131
|
-
expected: Bghākī
|
132
|
-
|
133
|
-
# - source: مُظَفَر كوٹ
|
134
|
-
# expected: Muz̧afar Koṭ
|
135
|
-
|
136
|
-
- source: حَقدَرَه
|
137
|
-
expected: Ḩaqdarah
|
138
|
-
|
139
|
-
- source: کَچکِینَہ
|
140
|
-
expected: Kachkīnah
|
141
|
-
|
142
|
-
- source: بَاگَن
|
143
|
-
expected: Bāgan
|
144
|
-
|
145
|
-
- source: بُلبَلَک
|
146
|
-
expected: Bulbalak
|
147
|
-
|
148
|
-
- source: بِلیَامِین
|
149
|
-
expected: Bilyāmīn
|
150
|
-
|
151
|
-
- source: نَہر
|
152
|
-
expected: Nahr
|
153
|
-
|
154
|
-
# - source: جوکَالِیَاں
|
155
|
-
# expected: Jokālīāñ
|
156
|
-
|
157
|
-
- source: اَرَوْالِی
|
158
|
-
expected: Arawālī
|
159
|
-
|
160
|
-
# - source: هیروشاه
|
161
|
-
# expected: Heroshāh
|
162
|
-
|
163
|
-
- source: مَہردِی
|
164
|
-
expected: Mahrdī
|
165
|
-
|
166
|
-
- source: بَڑھ
|
167
|
-
expected: Baṛh
|
168
|
-
|
169
|
-
# - source: شِیوَاؤ
|
170
|
-
# expected: Shīwā’o
|
171
|
-
|
172
|
-
- source: یَاردَا کَلے
|
173
|
-
expected: Yārdā Kale
|
174
|
-
|
175
|
-
- source: بهَائِي خَان
|
176
|
-
expected: Bhā’ī Khān
|
177
|
-
|
178
|
-
- source: پھاشک
|
179
|
-
expected: Phāshk
|
180
|
-
|
181
|
-
- source: تھَلّ
|
182
|
-
expected: Thall
|
183
|
-
|
184
|
-
- source: پَٹھان ريَا
|
185
|
-
expected: Paṭhān Ryā
|
186
|
-
|
187
|
-
- source: جھِیل
|
188
|
-
expected: Jhīl
|
189
|
-
|
190
|
-
- source: غَزْنِي سْپِين
|
191
|
-
expected: Ghaznī Spīn
|
192
|
-
|
193
|
-
- source: بَادشَاه چھُم
|
194
|
-
expected: Bādshāh Chhum
|
195
|
-
|
196
|
-
- source: سِندھ
|
197
|
-
expected: Sindh
|
198
|
-
|
199
|
-
- source: ڈھَنڈ
|
200
|
-
expected: Ḍhanḍ
|
201
|
-
|
202
|
-
# - source: غوزگَڑھِی
|
203
|
-
# expected: Ghozgaṛhī
|
204
|
-
|
205
|
-
# - source: دوغَل گاکھَر
|
206
|
-
# expected: Doghal Gākhar
|
207
|
-
|
208
|
-
- source: خَان گھَڑِی
|
209
|
-
expected: Khān Ghaṛī
|
210
|
-
|
211
|
-
- source: غُلَامَک كَلے
|
212
|
-
expected: Ghulāmak Kale
|
213
|
-
|
214
|
-
# - source: کاراخیل
|
215
|
-
# expected: Kārākhel
|
216
|
-
|
217
|
-
- source: خَپیَنگا
|
218
|
-
expected: Khapyangā
|
219
|
-
|
220
|
-
- source: گَندَه كَلے
|
221
|
-
expected: Gandah Kale
|
222
|
-
|
223
|
-
# - source: گُلونَا ڈھيرِي
|
224
|
-
# expected: Gulonā Ḍherī
|
225
|
-
|
226
|
-
# - source: خيرَه دِين
|
227
|
-
# expected: Kherah Dīn
|
228
|
-
|
229
|
-
- source: مَورپِتھِی
|
230
|
-
expected: Maurpithī
|
231
|
-
|
232
|
-
- source: درے پلارِی
|
233
|
-
expected: Dre Plārī
|
234
|
-
|
235
|
-
- source: آگرَہ
|
236
|
-
expected: Āgrah
|
237
|
-
|
238
|
-
- source: ڈَنڈَر
|
239
|
-
expected: Ḍanḍar
|
240
|
-
|
241
|
-
# - source: گِیدو
|
242
|
-
# expected: Gīdo
|
243
|
-
|
244
|
-
- source: گُبازانَہ
|
245
|
-
expected: Gubāzānah
|
246
|
-
|
247
|
-
# - source: اُوشو
|
248
|
-
# expected: Ūsho
|
249
|
-
|
250
|
-
- source: حَےدَر عَلِی كَلے
|
251
|
-
expected: Ḩaidar ‘Alī Kale
|
252
|
-
|
253
|
-
- source: تَودَہ چِینَہ
|
254
|
-
expected: Taudah Chīnah
|
255
|
-
|
256
|
-
- source: مُوسى خَان كَلے
|
257
|
-
expected: Mūsá Khān Kale
|
258
|
-
|
259
|
-
- source: مُلَّا بَاغ
|
260
|
-
expected: Mullā Bāgh
|
261
|
-
|
262
|
-
map:
|
263
|
-
postrules:
|
264
|
-
- pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
|
265
|
-
result: "upcase"
|
266
|
-
# don't capitalize defined article in the middle of a sentence
|
267
|
-
- pattern : ' At T' # الت
|
268
|
-
result: ' at T'
|
269
|
-
- pattern : ' As̄ S̄' # الث
|
270
|
-
result: ' as̄ S̄'
|
271
|
-
- pattern : ' Ad D' # الد
|
272
|
-
result: ' ad D'
|
273
|
-
- pattern : ' Az̄ Z̄' # الذ
|
274
|
-
result: ' az̄ Z̄'
|
275
|
-
- pattern : ' Ar R' # الر
|
276
|
-
result: ' ar R'
|
277
|
-
- pattern : ' Az Z' # الز
|
278
|
-
result: ' az Z'
|
279
|
-
- pattern : ' As S' # الس
|
280
|
-
result: ' as S'
|
281
|
-
- pattern : ' Ash Sh' # الش
|
282
|
-
result: ' ash Sh'
|
283
|
-
- pattern : ' Aş Ş' # الص
|
284
|
-
result: ' aş Ş'
|
285
|
-
- pattern : ' Aẕ Ẕ' # الض
|
286
|
-
result: ' aẕ Ẕ'
|
287
|
-
- pattern : ' Aţ Ţ' # الط
|
288
|
-
result: ' aţ Ţ'
|
289
|
-
- pattern : ' Az̧ Z̧' # الظ
|
290
|
-
result: ' az̧ Z̧'
|
291
|
-
- pattern : ' Al L' # الل
|
292
|
-
result: ' al L'
|
293
|
-
- pattern : ' An N' # الن
|
294
|
-
result: ' an N'
|
295
|
-
- pattern: " Al " # ال
|
296
|
-
result: " al "
|
297
|
-
characters:
|
298
|
-
# special rules
|
299
|
-
|
300
|
-
'\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
|
301
|
-
'\ufdf2': 'Allāh' # See note 5
|
302
|
-
|
303
|
-
# Vowels, Diphthongs, and Diacritical Marks
|
304
|
-
'\u064e' : 'a' # َ fatha
|
305
|
-
'\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
|
306
|
-
'\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
|
307
|
-
|
308
|
-
'\u0652' : '' # ْ sokoon
|
309
|
-
'\u0659': 'ê'
|
310
|
-
|
311
|
-
'\u0650[\u064a|\u06cc]' : 'ī' # ـِي kasra followed by ي
|
312
|
-
'\u0650' : 'i' # karsra
|
313
|
-
'\u06d2' : 'e' # ـے
|
314
|
-
|
315
|
-
'\u0622' : 'ā' # آ
|
316
|
-
'\u064e\u0627' : 'ā' # ـَا fatha followed by ا
|
317
|
-
'\u0627' : 'ā' # ا
|
318
|
-
'\b\u0627' : '' # ا
|
319
|
-
|
320
|
-
'\u0648' : 'o' # و # suspect
|
321
|
-
'\u064f' : 'u' # ُ damma
|
322
|
-
'\u064f\u0648' : 'ū' # ـُو damma followed by و
|
323
|
-
|
324
|
-
'\u064e\u06d2' : 'ai' # ـے
|
325
|
-
'\u064e\u0648' : 'au' # ـَو
|
326
|
-
'\u0670': 'á' # ىٰ
|
327
|
-
'\u0649': 'á' # ىٰ
|
328
|
-
|
329
|
-
# shadda
|
330
|
-
'\u0628\u0651' : 'bb' # ب
|
331
|
-
'\u062a\u0651' : 'tt' # ت
|
332
|
-
'\u062b\u0651' : 'thth' # ث
|
333
|
-
'\u062c\u0651' : 'jj' # ج
|
334
|
-
'\u062d\u0651' : 'ẖẖ' # ح
|
335
|
-
'\u062e\u0651' : 'khkh' # خ
|
336
|
-
'\u062f\u0651' : 'dd' # د
|
337
|
-
'\u0630\u0651' : 'z̄z̄' # ذ
|
338
|
-
'\u0631\u0651' : 'rr' # ر
|
339
|
-
'\u0632\u0651' : 'zz' # ز
|
340
|
-
'\u0633\u0651' : 'ss' # س
|
341
|
-
'\u0634\u0651' : 'sh' # ش
|
342
|
-
'\u0635\u0651' : 'şş' # ص
|
343
|
-
'\u0636\u0651' : 'ḏḏ' # ض
|
344
|
-
'\u0637\u0651' : 'ţţ' # ط
|
345
|
-
'\u0638\u0651' : 'z̧z̧' # ظ
|
346
|
-
'\u063a\u0651' : 'ghgh' # غ
|
347
|
-
'\u0641\u0651' : 'ff' # ف
|
348
|
-
'\u0642\u0651' : 'qq' # ق
|
349
|
-
'\u0643\u0651' : 'kk' # ك
|
350
|
-
'\u0644\u0651' : 'll' # ل
|
351
|
-
'\u0645\u0651' : 'mm' # م
|
352
|
-
'\u0646\u0651' : 'nn' # ن
|
353
|
-
'\u0647\u0651' : 'hh' # ه
|
354
|
-
'\u0648\u0651' : 'ww' # و
|
355
|
-
'[\u064a|\u06cc]\u0651' : 'yy' # ي
|
356
|
-
|
357
|
-
# NOTE 1
|
358
|
-
'\u0650\b' : '-e' # ِ kasra
|
359
|
-
'\u0674' : '-e' # ٴ
|
360
|
-
'\u0654' : '-e' # ٔ
|
361
|
-
|
362
|
-
'\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
|
363
|
-
'\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
|
364
|
-
'\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
|
365
|
-
'\u064e\u0648\u0652' : 'aw' # ـَوْ
|
366
|
-
'\u064e\u064a\u0652' : 'ay' # ـَيْ
|
367
|
-
'\u0650\u06cc\u0651\u064e' : 'īy' # ـِيَّ
|
368
|
-
'\u064e\u064a' : 'aī' # ـَي
|
369
|
-
'\u064e\u06cc' : 'aī' # ـَي
|
370
|
-
# - '-ye'
|
371
|
-
|
372
|
-
|
373
|
-
# ta' marboota
|
374
|
-
'\u0629' : 'at' # ة in the middle of the sentence
|
375
|
-
'\u0629$' : 'ah'
|
376
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
|
377
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
|
378
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
|
379
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
|
380
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
|
381
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
|
382
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
|
383
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
|
384
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
|
385
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
|
386
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
|
387
|
-
'(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
'\u0621' : '’' # ء
|
392
|
-
'\u0624' : '’' # ؤ
|
393
|
-
'\u0624\b' : '’o' # ؤ
|
394
|
-
'\u0626' : '’' # ئ
|
395
|
-
|
396
|
-
'\u0623' : '' # أ
|
397
|
-
'\u0625' : '' # إ
|
398
|
-
# See note B
|
399
|
-
'\b\u0627\u0644' : 'al ' # ال
|
400
|
-
# '\uFE8E' : '' # ﺎ
|
401
|
-
|
402
|
-
# Sun letters
|
403
|
-
'\b\u0627\u0644\u062a' : 'at t' # الت
|
404
|
-
'\b\u0627\u0644\u062b' : 'as̄ s̄' # الث
|
405
|
-
'\b\u0627\u0644\u062f' : 'ad d' # الد
|
406
|
-
'\b\u0627\u0644\u0630' : 'az̄ z̄' # الذ
|
407
|
-
'\b\u0627\u0644\u0631' : 'ar r' # الر
|
408
|
-
'\b\u0627\u0644\u0632' : 'az z' # الز
|
409
|
-
'\b\u0627\u0644\u0633' : 'as s' # الس
|
410
|
-
'\b\u0627\u0644\u0634' : 'ash sh' # الش
|
411
|
-
'\b\u0627\u0644\u0635' : 'aş ş' # الص
|
412
|
-
'\b\u0627\u0644\u0636' : 'aẕ ẕ' # الض
|
413
|
-
'\b\u0627\u0644\u0637' : 'aţ ţ' # الط
|
414
|
-
'\b\u0627\u0644\u0638' : 'az̧ z̧' # الظ
|
415
|
-
'\b\u0627\u0644\u0644' : 'al l' # الل
|
416
|
-
'\b\u0627\u0644\u0646' : 'an n' # الن
|
417
|
-
|
418
|
-
|
419
|
-
# consonant characters
|
420
|
-
|
421
|
-
'\u0628' : 'b' # ب
|
422
|
-
'\u067E' : 'p' # پ
|
423
|
-
'\u062a' : 't' # ت
|
424
|
-
'\u0679' : 'ṭ' # ٹ
|
425
|
-
'\u062B' : 's̄' # ث
|
426
|
-
'\u062c' : 'j' # ج
|
427
|
-
'\u0686' : 'ch' # چ
|
428
|
-
'\u062d' : 'ḩ' # ح
|
429
|
-
'\u062e' : 'kh' # خ
|
430
|
-
'\u062f' : 'd' # د
|
431
|
-
'\u0688' : 'ḍ' # ڈ
|
432
|
-
'\u0630' : 'z̄' # ذ
|
433
|
-
'\u0631' : 'r' # ر
|
434
|
-
'\u0691' : 'ṛ' # ڑ
|
435
|
-
'\u0632' : 'z' # ز
|
436
|
-
'\u0698' : 'zh' # ژ
|
437
|
-
'\u0633' : 's' # س
|
438
|
-
'\u0634' : 'sh' # ش
|
439
|
-
'\u0635' : 'ş' # ص
|
440
|
-
'\u0636' : 'ẕ' # ض
|
441
|
-
'\u0637' : 'ţ' # ط
|
442
|
-
'\u0638' : 'z̧' # ظ
|
443
|
-
'\u0639' : '‘' # ع
|
444
|
-
'\u063a' : 'gh' # غ
|
445
|
-
'\u0641' : 'f' # ف
|
446
|
-
'\u0642' : 'q' # ق
|
447
|
-
'\u0643' : 'k' # ك
|
448
|
-
'\u06A9' : 'k' # ک
|
449
|
-
'\u06AF' : 'g' # گ
|
450
|
-
'\u0644' : 'l' # ل
|
451
|
-
'\u0645' : 'm' # م
|
452
|
-
'\u0646' : 'n' # ن
|
453
|
-
'\u06BA' : 'ñ' # ڼ
|
454
|
-
'[\u0647|\u06c1|\u06be]' : 'h' # ه
|
455
|
-
'\u0648' : 'w' # و
|
456
|
-
'[\u064a|\u06cc]' : 'y' # ي
|
457
|
-
# '\u0649' : 'y' # ي
|
458
|
-
'\u06D0' : 'ē' # ې
|
459
|
-
'\u06CD' : 'êy' # ۍ
|