interscript 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript.rb +5 -1
- data/lib/interscript/fs.rb +3 -1
- data/lib/interscript/mapping.rb +2 -2
- data/lib/interscript/opal.rb +5 -1
- data/lib/interscript/opal/maps.js.erb +7 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
- data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
- data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
- data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
- data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
- data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
- data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
- data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
- data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
- data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
- data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
- data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
- data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
- data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
- data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
- data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
- data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
- data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
- data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
- data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
- data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
- data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
- data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
- data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
- data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
- data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
- data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
- data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
- data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
- metadata +41 -15
@@ -9,10 +9,10 @@ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/20th-gegn-docs/20th_gegn_WP
|
|
9
9
|
creation_date: 1998
|
10
10
|
description: |
|
11
11
|
The national system of romanization for Belarusian was approved by the State Committee and Land Resources,
|
12
|
-
Geodesy and Cartography, Republic of Belarus, on 20 March, 1998. This scheme was also supported by
|
12
|
+
Geodesy and Cartography, Republic of Belarus, on 20 March, 1998. This scheme was also supported by
|
13
13
|
the Y. Kolas Institute of Linguistics and the Republic Committee on Toponymy at the Belarusian Academy of Sciences.
|
14
|
-
While the system is still based on GOST 1983, it takes more precisely into account the peculiarities of
|
15
|
-
the Belarusian orthography. The system is reversible though there may exist some ambiguous consonant combinations.
|
14
|
+
While the system is still based on GOST 1983, it takes more precisely into account the peculiarities of
|
15
|
+
the Belarusian orthography. The system is reversible though there may exist some ambiguous consonant combinations.
|
16
16
|
|
17
17
|
tests: # the same as in by-bel-Cyrl-Latn-2007
|
18
18
|
- source: Аршанскi
|
@@ -107,7 +107,7 @@ tests: # the same as in by-bel-Cyrl-Latn-2007
|
|
107
107
|
expected: Viazynka
|
108
108
|
|
109
109
|
map:
|
110
|
-
inherit: gost-rus-
|
110
|
+
inherit: gost-rus-Cyrl-Latn-16876-71-1983
|
111
111
|
|
112
112
|
rules:
|
113
113
|
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0415 # Е after consonants
|
@@ -8,11 +8,11 @@ name: REGULATORY LEGAL ACTS OF THE NATIONAL BANK, NATIONAL ACADEMY OF SCIENCES O
|
|
8
8
|
url: http://www.pravo.by/pdf/2007-159/2007-159(027-028).pdf
|
9
9
|
creation_date: 2007
|
10
10
|
description: |
|
11
|
-
RESOLUTION OF THE STATE COMMITTEE
|
11
|
+
RESOLUTION OF THE STATE COMMITTEE
|
12
12
|
ON PROPERTY OF THE REPUBLIC OF BELARUS June 11, 2007 No. 38
|
13
13
|
|
14
|
-
8/16668 (06/18/2007) On amendments and additions to the Instructions
|
15
|
-
for the transliteration of geographical names of the
|
14
|
+
8/16668 (06/18/2007) On amendments and additions to the Instructions
|
15
|
+
for the transliteration of geographical names of the
|
16
16
|
Republic of Belarus in letters of the Latin alphabet
|
17
17
|
|
18
18
|
Based on the Regulation on the State Property Committee of the Republic of Belarus,
|
@@ -0,0 +1,33 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1997
|
4
|
+
language: nep
|
5
|
+
source_script: Deva
|
6
|
+
destination_script: Latn
|
7
|
+
name: Nepali Romanization, 1997
|
8
|
+
url: http://nationalgeoportal.gov.np/old/pdf/translation2.pdf
|
9
|
+
creation_date: 1997
|
10
|
+
description: |
|
11
|
+
Survey Department, Ministry of Land Management, Cooperatives and Poverty Alleviation, Government of Nepal.
|
12
|
+
|
13
|
+
notes:
|
14
|
+
- |
|
15
|
+
ं (anusvara) is rendered by
|
16
|
+
ṅ before क, ख, ग, and घ
|
17
|
+
ñ before च, छ, ज, and झ
|
18
|
+
ṇ before ट, ठ, ड, and ढ
|
19
|
+
n before त, थ, द, and ध
|
20
|
+
ṁ before य, र, ल, व, श, ष, स
|
21
|
+
|
22
|
+
tests:
|
23
|
+
- source: "दुःख"
|
24
|
+
expected: "duhkh"
|
25
|
+
|
26
|
+
map:
|
27
|
+
|
28
|
+
inherit: "bgnpcgn-nep-Deva-Latn-2011"
|
29
|
+
|
30
|
+
characters:
|
31
|
+
|
32
|
+
# Bisarga
|
33
|
+
'ः': 'h'
|
@@ -5,7 +5,7 @@ language: ell
|
|
5
5
|
source_script: Grek
|
6
6
|
destination_script: Latn
|
7
7
|
name: ELOT 743:1982 (transliteration)
|
8
|
-
url:
|
8
|
+
url:
|
9
9
|
creation_date: 1982
|
10
10
|
description: |
|
11
11
|
ELOT 743:1982 transliteration table for Greek
|
@@ -22,7 +22,7 @@ tests:
|
|
22
22
|
|
23
23
|
expected: |
|
24
24
|
Éna práma mónon me parakíni̱se ki eména na grápso̱ óti toúti̱n ti̱n patrída ti̱n échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai fto̱choí kai politikoí kai stratio̱tikoí kai oi pléon mikróteroi ánthro̱poi; ósoi ago̱nistí̱kamen, analógo̱s o katheís, échomen na zí̱somen edó̱. To loipón doulépsamen óloi mazí, na ti̱n fylámen ki óloi mazí kai na mi̱n légei oúte o dynatós «egó̱» oúte o adýnatos. Xérete póte na légei o katheís «egó̱»? Ótan ago̱nisteí mónos tou kai fkiásei í̱ chalásei, na légei «egó̱»; ótan ómo̱s ago̱nízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó̱». Kai eis to exí̱s na máthomen gnó̱si̱, an thélomen na fkiásomen cho̱rión, na zí̱somen óloi mazí.
|
25
|
-
|
25
|
+
|
26
26
|
Giánni̱s Makrygiánni̱s.
|
27
27
|
|
28
28
|
|
@@ -70,7 +70,7 @@ tests:
|
|
70
70
|
expected: Taÿ́getos
|
71
71
|
- source: σπρέυ
|
72
72
|
expected: spréy
|
73
|
-
|
73
|
+
|
74
74
|
- source: Αθήνα
|
75
75
|
expected: Athí̱na
|
76
76
|
- source: Άγιον Όρος
|
@@ -566,7 +566,7 @@ map:
|
|
566
566
|
- pattern: (?<=[Οο])\u03C5 # υ (after Ο)
|
567
567
|
result: u
|
568
568
|
- pattern: (?<=[Οο])\u03CD # ύ (after Ο)
|
569
|
-
result: ú
|
569
|
+
result: ú
|
570
570
|
- pattern: (?<=[ΆάΈέΉήΌό])\u03A5 # Άυ, Έυ, Ήυ, Όυ
|
571
571
|
result: Υ
|
572
572
|
- pattern: (?<=[ΆάΈέΉήΌό])\u03C5 # Άυ, Έυ, Ήυ, Όυ
|
@@ -682,4 +682,3 @@ map:
|
|
682
682
|
|
683
683
|
"\u0387": ";" # ·
|
684
684
|
"\u00B7": ";" # ·
|
685
|
-
|
@@ -5,7 +5,7 @@ language: ell
|
|
5
5
|
source_script: Grek
|
6
6
|
destination_script: Latn
|
7
7
|
name: ELOT 743:1982
|
8
|
-
url:
|
8
|
+
url:
|
9
9
|
creation_date: 1982
|
10
10
|
description: |
|
11
11
|
ELOT 743:1982 transcription table for Greek.
|
@@ -20,7 +20,7 @@ tests:
|
|
20
20
|
|
21
21
|
expected: |
|
22
22
|
Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrída tin échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai ftochoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o katheís, échomen na zísomen edó. To loipón doulépsamen óloi mazí, na tin fylámen ki óloi mazí kai na min légei oúte o dynatós «egó» oúte o adýnatos. Xérete póte na légei o katheís «egó»? Ótan agonisteí mónos tou kai fkiásei í chalásei, na légei «egó»; ótan ómos agonízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó». Kai eis to exís na máthomen gnósi, an thélomen na fkiásomen chorión, na zísomen óloi mazí.
|
23
|
-
|
23
|
+
|
24
24
|
Giánnis Makrygiánnis.
|
25
25
|
|
26
26
|
- source: ΑΘΗΝΑ
|
@@ -67,7 +67,7 @@ tests:
|
|
67
67
|
expected: Taÿ́getos
|
68
68
|
- source: σπρέυ
|
69
69
|
expected: spréy
|
70
|
-
|
70
|
+
|
71
71
|
- source: Αθήνα
|
72
72
|
expected: Athína
|
73
73
|
- source: Άγιον Όρος
|
@@ -563,7 +563,7 @@ map:
|
|
563
563
|
- pattern: (?<=[Οο])\u03C5 # υ (after Ο)
|
564
564
|
result: u
|
565
565
|
- pattern: (?<=[Οο])\u03CD # ύ (after Ο)
|
566
|
-
result: ú
|
566
|
+
result: ú
|
567
567
|
- pattern: (?<=[ΆάΈέΉήΌό])\u03A5 # Άυ, Έυ, Ήυ, Όυ
|
568
568
|
result: Υ
|
569
569
|
- pattern: (?<=[ΆάΈέΉήΌό])\u03C5 # Άυ, Έυ, Ήυ, Όυ
|
@@ -678,4 +678,3 @@ map:
|
|
678
678
|
|
679
679
|
"\u0387": ";" # ·
|
680
680
|
"\u00B7": ";" # ·
|
681
|
-
|
@@ -8,7 +8,7 @@ name: On approval of the Instructions for the transliteration of geographical na
|
|
8
8
|
url: https://registr.by/doc/103003
|
9
9
|
creation_date: 2000
|
10
10
|
description: |
|
11
|
-
Act name:
|
11
|
+
Act name:
|
12
12
|
On approval of the Instructions for the transliteration of geographical names of
|
13
13
|
the Republic of Belarus in letters of the Latin alphabet
|
14
14
|
Type of act, adoption authority, date and number of adoption (publication):
|
@@ -24,7 +24,7 @@ description: |
|
|
24
24
|
In 1978, COMECON adopted GOST 16876-71 with minor modifications as its official transliteration standard,
|
25
25
|
under the name of SEV 1362-78 (Russian: СЭВ 1362-78).
|
26
26
|
|
27
|
-
In 1982, In accordance with Order No. 169 of April 16, 1982, GOST 16876-71 / ST SEV 1362-78 was put into effect on May 1, 1982.
|
27
|
+
In 1982, In accordance with Order No. 169 of April 16, 1982, GOST 16876-71 / ST SEV 1362-78 was put into effect on May 1, 1982.
|
28
28
|
|
29
29
|
In 1983, In accordance with Order No. 231 of May 16, 1983, Additional guidelines was released (check notes[2])
|
30
30
|
|
@@ -0,0 +1,323 @@
|
|
1
|
+
---
|
2
|
+
authority_id: iso
|
3
|
+
id: 233-1984
|
4
|
+
language: ara
|
5
|
+
source_script: Arab
|
6
|
+
destination_script: Latn
|
7
|
+
name: ISO 233:1984 Documentation — Transliteration of Arabic characters into Latin characters
|
8
|
+
url:
|
9
|
+
- https://www.iso.org/standard/4117.html
|
10
|
+
- http://transliteration.eki.ee/pdf/Arabic_2.2.pdf
|
11
|
+
- http://www.eki.ee/wgrs/rom1_ar.pdf
|
12
|
+
creation_date: 1984
|
13
|
+
confirmation date: 2018-06
|
14
|
+
description: |
|
15
|
+
Is one of a series of International Standards dealing with
|
16
|
+
the conversion of systems of writing, following the
|
17
|
+
principles of stringent conversion in order to permit
|
18
|
+
international information exchange. Its aim is to provide a
|
19
|
+
means for international communication of written messages
|
20
|
+
in a form which permits the automatic transmission and
|
21
|
+
reconstitution of these by men or machines. Cancels and
|
22
|
+
replaces ISO Recommendation R 233-1961
|
23
|
+
notes:
|
24
|
+
- |
|
25
|
+
The transliteration ISO 233:1984 WRT ara-arab-latn-2017 gives every character and diacritical mark a unique
|
26
|
+
equivalent and e.g. long vowels in Arabic ā, ī and ū are consequently written a’, iy and uw
|
27
|
+
respectively in the ISO transliteration. Other main correspondences
|
28
|
+
ث is ṯ instead of th
|
29
|
+
ج is ǧ instead of j
|
30
|
+
ح is ḥ instead of ẖ
|
31
|
+
خ is ẖ instead of kh
|
32
|
+
ذ is ḏ instead of dh
|
33
|
+
ش is š instead of sh
|
34
|
+
ص is ṣ instead of s̱
|
35
|
+
ض is ḍ instead of ḏ
|
36
|
+
ط is ṭ instead of ṯ
|
37
|
+
ظ is ẓ instead of d͟h
|
38
|
+
غ is ġ instead of gh
|
39
|
+
ة is ẗ instead of h/t
|
40
|
+
ى is ỳ
|
41
|
+
ـِي is iy instead of iy
|
42
|
+
ـُو is uw instead of ū
|
43
|
+
ـَا is a’ instead of ā
|
44
|
+
ـَى is aỳ instead of á
|
45
|
+
|
46
|
+
tests:
|
47
|
+
|
48
|
+
- source: مِصر
|
49
|
+
expected: Miṣr
|
50
|
+
|
51
|
+
- source: قَطَر
|
52
|
+
expected: Qaṭar
|
53
|
+
|
54
|
+
- source: الجُمهُورِيَّة العِرَاقِيَّة
|
55
|
+
expected: Al Ǧumhuwriyaẗ al ‘Ira’qiyaẗ
|
56
|
+
|
57
|
+
- source: جُمهُورِيَّة مِصر العَرَبِيَّة
|
58
|
+
expected: Ǧumhuwriyaẗ Miṣr al ‘Arabiyaẗ
|
59
|
+
|
60
|
+
- source: الرِيَاض
|
61
|
+
expected: Ar Riya’ḍ
|
62
|
+
|
63
|
+
- source: الشارِقة
|
64
|
+
expected: Aš Šâriqaẗ
|
65
|
+
|
66
|
+
map:
|
67
|
+
postrules:
|
68
|
+
- pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
|
69
|
+
result: "upcase"
|
70
|
+
# don't capitalize defined article in the middle of a sentence
|
71
|
+
- pattern : ' At T' # الت
|
72
|
+
result: ' at T'
|
73
|
+
- pattern : ' Aṯ Ṯ' # الث
|
74
|
+
result: ' aṯ Ṯ'
|
75
|
+
- pattern : ' Ad D' # الد
|
76
|
+
result: ' ad D'
|
77
|
+
- pattern : ' Aḏ Ḏ' # الذ
|
78
|
+
result: ' aḏ Ḏ'
|
79
|
+
- pattern : ' Ar R' # الر
|
80
|
+
result: ' ar R'
|
81
|
+
- pattern : ' Az Z' # الز
|
82
|
+
result: ' az Z'
|
83
|
+
- pattern : ' As S' # الس
|
84
|
+
result: ' as S'
|
85
|
+
- pattern : ' Aš Š' # الش
|
86
|
+
result: ' aš Š'
|
87
|
+
- pattern : ' Aṣ Ṣ' # الص
|
88
|
+
result: ' aṣ Ṣ'
|
89
|
+
- pattern : ' Aḍ Ḍ' # الض
|
90
|
+
result: ' aḍ Ḍ'
|
91
|
+
- pattern : ' Aṭ Ṭ' # الط
|
92
|
+
result: ' aṭ Ṭ'
|
93
|
+
- pattern : ' Aẓ Ẓ' # الظ
|
94
|
+
result: ' aẓ Ẓ'
|
95
|
+
- pattern : ' Al L' # الل
|
96
|
+
result: ' al L'
|
97
|
+
- pattern : ' an n' # الن
|
98
|
+
result: ' an N'
|
99
|
+
- pattern: " Al " # ال
|
100
|
+
result: " al "
|
101
|
+
|
102
|
+
characters:
|
103
|
+
|
104
|
+
# pointing
|
105
|
+
'\u064e' : 'a' # َ fatha
|
106
|
+
'\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
|
107
|
+
'\u0650' : 'i' # ِ kasra
|
108
|
+
'\u064f' : 'u' # ُ damma
|
109
|
+
'\u0652' : '' # ْ sokoon, see note A below
|
110
|
+
|
111
|
+
# special pointed letters
|
112
|
+
# special pointed letters
|
113
|
+
'\u0639\u064e' : '‘a' # عَ
|
114
|
+
'\u0639\u0650' : '‘i' # عِ
|
115
|
+
'\u0639\u064f' : '‘ū' # عُ
|
116
|
+
# handle MacOS regex difference
|
117
|
+
'\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
|
118
|
+
|
119
|
+
'\u0650\u064a' : 'iy' # ـِي kasra followed by ي
|
120
|
+
'\u0650\u064a\u0651\u064e' : 'iy' # ـِيَّ
|
121
|
+
'\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
|
122
|
+
'\u064e\u0627' : 'a’' # ـَا fatha followed by ا
|
123
|
+
'\u064e\u0649' : 'aỳ' # ـَى fatha followed by ى which is ا not ي
|
124
|
+
'\u064f\u0648' : 'uw' # ـُو damma followed by و
|
125
|
+
'\u064e\u0648\u0652' : 'aw' # ـَوْ
|
126
|
+
'\u064e\u064a\u0652' : 'ay' # ـَيْ
|
127
|
+
|
128
|
+
# Sun letters
|
129
|
+
|
130
|
+
'\b\u0627\u0644\u062a' : 'at t' # الت
|
131
|
+
'\b\u0627\u0644\u062b' : 'aṯ ṯ' # الث
|
132
|
+
'\b\u0627\u0644\u062f' : 'ad d' # الد
|
133
|
+
'\b\u0627\u0644\u0630' : 'aḏ ḏ' # الذ
|
134
|
+
'\b\u0627\u0644\u0631' : 'ar r' # الر
|
135
|
+
'\b\u0627\u0644\u0632' : 'az z' # الز
|
136
|
+
'\b\u0627\u0644\u0633' : 'as s' # الس
|
137
|
+
'\b\u0627\u0644\u0634' : 'aš š' # الش
|
138
|
+
'\b\u0627\u0644\u0635' : 'aṣ ṣ' # الص
|
139
|
+
'\b\u0627\u0644\u0636' : 'aḍ ḍ' # الض
|
140
|
+
'\b\u0627\u0644\u0637' : 'aṭ ṭ' # الط
|
141
|
+
'\b\u0627\u0644\u0638' : 'aẓ ẓ' # الظ
|
142
|
+
'\b\u0627\u0644\u0644' : 'al l' # الل
|
143
|
+
'\b\u0627\u0644\u0646' : 'an n' # الن
|
144
|
+
|
145
|
+
# ta' marboota in iso-233-1984 is all the same `aẗ`
|
146
|
+
'\u0629' : 'aẗ' # ة in the middle of the sentence
|
147
|
+
|
148
|
+
# Shadda
|
149
|
+
|
150
|
+
|
151
|
+
'\u0628\u0651' : 'bb' # ب
|
152
|
+
'\u062a\u0651' : 'tt' # ت
|
153
|
+
'\u062b\u0651' : 'ṯṯ' # ث
|
154
|
+
'\u062c\u0651' : 'ǧǧ' # ج
|
155
|
+
'\u062d\u0651' : 'ḥḥ' # ح
|
156
|
+
'\u062e\u0651' : 'ẖẖ' # خ
|
157
|
+
'\u062f\u0651' : 'dd' # د
|
158
|
+
'\u0630\u0651' : 'ḏḏ' # ذ
|
159
|
+
'\u0631\u0651' : 'rr' # ر
|
160
|
+
'\u0632\u0651' : 'zz' # ز
|
161
|
+
'\u0633\u0651' : 'ss' # س
|
162
|
+
'\u0634\u0651' : 'šš' # ش
|
163
|
+
'\u0635\u0651' : 'ṣṣ' # ص
|
164
|
+
'\u0636\u0651' : 'ḍḍ' # ض
|
165
|
+
'\u0637\u0651' : 'ṭṭ' # ط
|
166
|
+
'\u0638\u0651' : 'ẓẓ' # ظ
|
167
|
+
'\u063a\u0651' : 'ġġ' # غ
|
168
|
+
'\u0641\u0651' : 'ff' # ف
|
169
|
+
'\u0642\u0651' : 'qq' # ق
|
170
|
+
'\u0643\u0651' : 'kk' # ك
|
171
|
+
'\u0644\u0651' : 'll' # ل
|
172
|
+
'\u0645\u0651' : 'mm' # م
|
173
|
+
'\u0646\u0651' : 'nn' # ن
|
174
|
+
'\u0647\u0651' : 'hh' # ه
|
175
|
+
'\u0648\u0651' : 'ww' # و
|
176
|
+
'\u064a\u0651' : 'yy' # ي
|
177
|
+
|
178
|
+
|
179
|
+
'\u0622' : '’â' # آ
|
180
|
+
|
181
|
+
'\u0627' : 'â' # ا
|
182
|
+
|
183
|
+
'\u0649' : 'ỳ' # ى
|
184
|
+
|
185
|
+
'\u0626' : "'" # ئ
|
186
|
+
|
187
|
+
|
188
|
+
'\u0621' : # ء
|
189
|
+
- '’'
|
190
|
+
- '' # see note A
|
191
|
+
|
192
|
+
'\u0623' : 'a' # أ
|
193
|
+
|
194
|
+
# See note B
|
195
|
+
'\b\u0627\u0644' : 'al ' # ال
|
196
|
+
# '\uFE8E' : '' # ﺎ
|
197
|
+
|
198
|
+
'\u0628' : 'b' # ب
|
199
|
+
'\uFE91' : 'b' # ﺑ
|
200
|
+
'\uFE92' : 'b' # ﺒ
|
201
|
+
'\uFE90' : 'b' # ﺐ
|
202
|
+
|
203
|
+
# See note C
|
204
|
+
'\u062a' : 't' # ت
|
205
|
+
'\ufe97' : 't' # ﺗ
|
206
|
+
'\ufe98' : 't' # ﺘ
|
207
|
+
'\ufe96' : 't' # ﺖ
|
208
|
+
|
209
|
+
'\u062b' : 'ṯ' # ث
|
210
|
+
'\ufe9b' : 'ṯ' # ﺛ
|
211
|
+
'\ufe9c' : 'ṯ' # ﺜ
|
212
|
+
'\ufe9a' : 'ṯ' # ﺚ
|
213
|
+
|
214
|
+
'\u062c' : 'ǧ' # ج
|
215
|
+
'\ufe9f' : 'ǧ' # ﺟ
|
216
|
+
'\ufea0' : 'ǧ' # ﺠ
|
217
|
+
'\ufe9e' : 'ǧ' # ﺞ
|
218
|
+
|
219
|
+
'\u062d' : 'ḥ' # ح
|
220
|
+
'\ufea3' : 'ḥ' # ﺣ
|
221
|
+
'\ufea4' : 'ḥ' # ﺤ
|
222
|
+
'\ufea2' : 'ḥ' # ﺢ
|
223
|
+
|
224
|
+
'\u062e' : 'ẖ' # خ
|
225
|
+
'\ufea7' : 'ẖ' # ﺧ
|
226
|
+
'\ufea8' : 'ẖ' # ﺨ
|
227
|
+
'\ufea6' : 'ẖ' # ﺦ
|
228
|
+
|
229
|
+
'\u062f' : 'd' # د
|
230
|
+
'\ufeaa' : 'd' # ﺪ
|
231
|
+
|
232
|
+
'\u0630' : 'ḏ' # ذ
|
233
|
+
'\ufeac' : 'ḏ' # ﺬ
|
234
|
+
|
235
|
+
'\u0631' : 'r' # ر
|
236
|
+
'\ufeae' : 'r' # ﺮ
|
237
|
+
|
238
|
+
'\u0632' : 'z' # ز
|
239
|
+
'\ufeb0' : 'z' # ﺰ
|
240
|
+
|
241
|
+
'\u0633' : 's' # س
|
242
|
+
'\ufeb3' : 's' # ﺳ
|
243
|
+
'\ufeb4' : 's' # ﺴ
|
244
|
+
'\ufeb2' : 's' # ﺲ
|
245
|
+
|
246
|
+
'\u0634' : 'š' # ش
|
247
|
+
'\ufeb7' : 'š' # ﺷ
|
248
|
+
'\ufeb8' : 'š' # ﺸ
|
249
|
+
'\ufeb6' : 'š' # ﺶ
|
250
|
+
|
251
|
+
'\u0635' : 'ṣ' # ص
|
252
|
+
'\ufebb' : 'ṣ' # ﺻ
|
253
|
+
'\ufebc' : 'ṣ' # ﺼ
|
254
|
+
'\ufeba' : 'ṣ' # ﺺ
|
255
|
+
|
256
|
+
'\u0636' : 'ḍ' # ض
|
257
|
+
'\ufebf' : 'ḍ' # ﺿ
|
258
|
+
'\ufec0' : 'ḍ' # ﻀ
|
259
|
+
'\ufebe' : 'ḍ' # ﺾ
|
260
|
+
|
261
|
+
'\u0637' : 'ṭ' # ط
|
262
|
+
'\ufec3' : 'ṭ' # ﻃ
|
263
|
+
'\ufec4' : 'ṭ' # ﻄ
|
264
|
+
'\ufec2' : 'ṭ' # ﻂ
|
265
|
+
|
266
|
+
'\u0638' : 'ẓ' # ظ
|
267
|
+
'\ufec7' : 'ẓ' # ﻇ
|
268
|
+
'\ufec8' : 'ẓ' # ﻈ
|
269
|
+
'\ufec6' : 'ẓ' # ﻆ
|
270
|
+
|
271
|
+
'\u0639' : '‘' # ع
|
272
|
+
'\ufecb' : '‘' # ﻋ
|
273
|
+
'\ufecc' : '‘' # ﻌ
|
274
|
+
'\ufeca' : '‘' # ﻊ
|
275
|
+
|
276
|
+
'\u063a' : 'ġ' # غ
|
277
|
+
'\ufecf' : 'ġ' # ﻏ
|
278
|
+
'\ufed0' : 'ġ' # ﻐ
|
279
|
+
'\ufece' : 'ġ' # ﻎ
|
280
|
+
|
281
|
+
'\u0641' : 'f' # ف
|
282
|
+
'\ufed3' : 'f' # ﻓ
|
283
|
+
'\ufed4' : 'f' # ﻔ
|
284
|
+
'\ufed2' : 'f' # ﻒ
|
285
|
+
|
286
|
+
'\u0642' : 'q' # ق
|
287
|
+
'\ufed7' : 'q' # ﻗ
|
288
|
+
'\ufed8' : 'q' # ﻘ
|
289
|
+
'\ufed6' : 'q' # ﻖ
|
290
|
+
|
291
|
+
'\u0643' : 'k' # ك
|
292
|
+
'\ufedb' : 'k' # ﻛ
|
293
|
+
'\ufedc' : 'k' # ﻜ
|
294
|
+
'\ufeda' : 'k' # ﻚ
|
295
|
+
|
296
|
+
'\u0644' : 'l' # ل
|
297
|
+
'\ufedf' : 'l' # ﻟ
|
298
|
+
'\ufee0' : 'l' # ﻠ
|
299
|
+
'\ufede' : 'l' # ﻞ
|
300
|
+
|
301
|
+
'\u0645' : 'm' # م
|
302
|
+
'\ufee3' : 'm' # ﻣ
|
303
|
+
'\ufee4' : 'm' # ﻤ
|
304
|
+
'\ufee2' : 'm' # ﻢ
|
305
|
+
|
306
|
+
'\u0646' : 'n' # ن
|
307
|
+
'\ufee7' : 'n' # ﻧ
|
308
|
+
'\ufee8' : 'n' # ﻨ
|
309
|
+
'\ufee6' : 'n' # ﻦ
|
310
|
+
|
311
|
+
# See note C
|
312
|
+
'\u0647' : 'h' # ه
|
313
|
+
'\ufeeb' : 'h' # ﻫ
|
314
|
+
'\ufeec' : 'h' # ﻬ
|
315
|
+
'\ufeea' : 'h' # ﻪ
|
316
|
+
|
317
|
+
'\u0648' : 'w' # و
|
318
|
+
'\ufeee' : 'w' # ﻮ
|
319
|
+
|
320
|
+
'\u064a' : 'y' # ي
|
321
|
+
'\ufef3' : 'y' # ﻳ
|
322
|
+
'\ufef4' : 'y' # ﻴ
|
323
|
+
'\ufef1' : 'y' # ﻱ
|