interscript 0.1.7 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
@@ -40,9 +40,11 @@ tests:
|
|
40
40
|
- source: "ନବନିଯୁକ୍ତ ଓଡିଶା କଂଗ୍ରେସ ପ୍ରଭାରୀ ଏ.ଚେଲ୍ଲା କୁମାରଙ୍କୁ କରୋନା"
|
41
41
|
expected: "nbniyukt ŏḍiśā kṅgrēs prbhārī ē.cēllā kumārṅku krŏnā"
|
42
42
|
- source: "ଦିଲ୍ଲୀ: ଦିନ ଦ୍ବିପହରରେ ଗାଡ଼ି ଉପରକୁ ଦୁର୍ବୃତ୍ତ ଚଳାଇଲେ ୮ ରାଉଣ୍ଡ ଗୁଳି: ଚାଳକଙ୍କ ମୃତ୍ୟୁ"
|
43
|
-
expected: "dillī: din dbiphrrē gād̂i uprku durbṛtt cḷāilē
|
43
|
+
expected: "dillī: din dbiphrrē gād̂i uprku durbṛtt cḷāilē 8 rāuṇḍ guḷi: cāḷkṅk mṛtẏu"
|
44
44
|
- source: "ବୟସରେ ଆର ପାରିକୁ ଚାଲିଗଲେ କଣ୍ଠଶିଳ୍ପୀ ଅନୁରାଧା ପୋଡୱାଲଙ୍କ ପୁଅ ଆଦିତ୍ୟ"
|
45
45
|
expected: "bẏsrē ār pāriku cāliglē kṇṭhśiḷpī anurādhā pēāḍୱālṅk pua āditẏ"
|
46
|
+
- source: "୦୧୭୧୬୪୨୯୭୦୦"
|
47
|
+
expected: "01716429700"
|
46
48
|
|
47
49
|
map:
|
48
50
|
|
@@ -157,4 +159,17 @@ map:
|
|
157
159
|
'଼': ''
|
158
160
|
'।': '.'
|
159
161
|
"": ''# Used for joining
|
160
|
-
"": ''# Used for non joining
|
162
|
+
"": ''# Used for non joining
|
163
|
+
|
164
|
+
# Numbers
|
165
|
+
|
166
|
+
'୦': '0'
|
167
|
+
'୧': '1'
|
168
|
+
'୨': '2'
|
169
|
+
'୩': '3'
|
170
|
+
'୪': '4'
|
171
|
+
'୫': '5'
|
172
|
+
'୬': '6'
|
173
|
+
'୭': '7'
|
174
|
+
'୮': '8'
|
175
|
+
'୯': '9'
|
@@ -2,7 +2,7 @@
|
|
2
2
|
authority_id: iso
|
3
3
|
id: 15919-2001
|
4
4
|
language: iso-639-2:kan
|
5
|
-
source_script:
|
5
|
+
source_script: kana
|
6
6
|
destination_script: Latn
|
7
7
|
name: "Information and documentation — Transliteration of Devanagari and related Indic scripts into Latin characters"
|
8
8
|
url: https://www.chatranjali.fr/Scripts/Standards/ISO15919.pdf
|
@@ -1,5 +1,5 @@
|
|
1
1
|
---
|
2
|
-
authority_id:
|
2
|
+
authority_id: masm
|
3
3
|
id: 2012
|
4
4
|
language: iso-639-2:mon
|
5
5
|
source_script: Cyrl
|
@@ -13,7 +13,7 @@ description: |
|
|
13
13
|
for transliteration from Cyrillic Mongolian in Latin alphabet. It repeats and supplements the previous
|
14
14
|
standard dating from 2003 (MNS 5217:2003).
|
15
15
|
|
16
|
-
This standard differs from the ISO 9 system for Cyrillic adopted by Russia, Armenia, Azerbaijan,
|
16
|
+
This standard differs from the ISO 9 system for Cyrillic adopted by Russia, Armenia, Azerbaijan,
|
17
17
|
Belarus, Kazakhstan, Kyrgyzstan, Uzbekistan, Tajikistan and Turkmenistan.
|
18
18
|
|
19
19
|
tests:
|
@@ -0,0 +1,200 @@
|
|
1
|
+
---
|
2
|
+
authority_id: mv
|
3
|
+
id: 1987
|
4
|
+
language: iso-639-2:div
|
5
|
+
source_script: Thaa
|
6
|
+
destination_script: Latn
|
7
|
+
name: Maldivian (Divehi) Maldivian Government 1987 system
|
8
|
+
url: https://www.eki.ee/wgrs/rom2_dv.htm
|
9
|
+
creation_date: 1887
|
10
|
+
description: |
|
11
|
+
In 1987 the Maldivian government adopted a system for the romanization of Maldivian. It was also approved by the BGN and the PCGN in 1988.
|
12
|
+
Maldivian is written from right to left. Vowels are marked as diacritical marks accompanying a consonant character.
|
13
|
+
|
14
|
+
notes:
|
15
|
+
- The character އ is not romanized. If it bears a vowel character, that vowel character alone is romanized, e.g. އިރުގައި irugai.
|
16
|
+
- Characters އް and ށް word-finally and before a digraph are romanized h, e.g. ވޭވަށް Veyvah, ކެރެއްދޫ Kerehdhoo. In other cases,
|
17
|
+
when އް and ށް are used, the romanized value of the following consonant character is doubled, e.g. ބައްޓެށް batteh, ކަޅެހުއްޓާ Kalhehuttaa.
|
18
|
+
- The character ނ is romanized n’ if used without any vowel or auxiliary sign. For example kan’du (ކަނޑު).
|
19
|
+
- The character އ is Not romanized but see note 1
|
20
|
+
- Character ތް is romanized iy, e.g. ޒިޔާރަތްފުށި Ziyaaraiyfushi.
|
21
|
+
|
22
|
+
tests:
|
23
|
+
- source: "އެނބޫދޫ"
|
24
|
+
expected: "en’boodhoo"
|
25
|
+
- source: "ކަޅެހުއްޓާ"
|
26
|
+
expected: "kalhehuttaa"
|
27
|
+
- source: "ކެރެށްދޫ"
|
28
|
+
expected: "kerehdhoo"
|
29
|
+
- source: "ވޭވައް"
|
30
|
+
expected: "veyvah"
|
31
|
+
- source: "ކަނޑުފުށި"
|
32
|
+
expected: "kan’dufushi"
|
33
|
+
- source: "ޒިޔާރަތްފުށި"
|
34
|
+
expected: "ziyaaraiyfushi"
|
35
|
+
- source: "ރައްކާތެރިކުރުމާއި"
|
36
|
+
expected: "rakkaatherikurumaai"
|
37
|
+
- source: "ދަރިވަރެއްގެވެސް"
|
38
|
+
expected: "dharivareggeves"
|
39
|
+
- source: "ދަރިވަރުންނާއި"
|
40
|
+
expected: "dharivarun’n’aai"
|
41
|
+
- source: "ރަށްރަށުގައި"
|
42
|
+
expected: "rarrashugai"
|
43
|
+
- source: "ޑިޕާޓްމަންޓުން"
|
44
|
+
expected: "dipaatman’tun’"
|
45
|
+
- source: "ހޯދިފައިނުވާ"
|
46
|
+
expected: "hoadhifain’uvaa"
|
47
|
+
|
48
|
+
|
49
|
+
map:
|
50
|
+
|
51
|
+
rules:
|
52
|
+
# Note 4
|
53
|
+
- pattern: \u0787\u07b0(?=[ހ])
|
54
|
+
result: "h"
|
55
|
+
- pattern: \u0787\u07b0(?=[ށ])
|
56
|
+
result: "h"
|
57
|
+
- pattern: \u0787\u07b0(?=[ނ])
|
58
|
+
result: "n"
|
59
|
+
- pattern: \u0787\u07b0(?=[ރ])
|
60
|
+
result: "r"
|
61
|
+
- pattern: \u0787\u07b0(?=[ބ])
|
62
|
+
result: "b"
|
63
|
+
- pattern: \u0787\u07b0(?=[ޅ])
|
64
|
+
result: "h"
|
65
|
+
- pattern: \u0787\u07b0(?=[ކ])
|
66
|
+
result: "k"
|
67
|
+
- pattern: \u0787\u07b0(?=[ވ])
|
68
|
+
result: "v"
|
69
|
+
- pattern: \u0787\u07b0(?=[މ])
|
70
|
+
result: "m"
|
71
|
+
- pattern: \u0787\u07b0(?=[ފ])
|
72
|
+
result: "f"
|
73
|
+
- pattern: \u0787\u07b0(?=[ދ])
|
74
|
+
result: "h"
|
75
|
+
- pattern: \u0787\u07b0(?=[ތ])
|
76
|
+
result: "h"
|
77
|
+
- pattern: \u0787\u07b0(?=[ލ])
|
78
|
+
result: "l"
|
79
|
+
- pattern: \u0787\u07b0(?=[ގ])
|
80
|
+
result: "g"
|
81
|
+
- pattern: \u0787\u07b0(?=[ޏ])
|
82
|
+
result: "h"
|
83
|
+
- pattern: \u0787\u07b0(?=[ސ])
|
84
|
+
result: "s"
|
85
|
+
- pattern: \u0787\u07b0(?=[ޑ])
|
86
|
+
result: "d"
|
87
|
+
- pattern: \u0787\u07b0(?=[ޖ])
|
88
|
+
result: "j"
|
89
|
+
- pattern: \u0787\u07b0(?=[ޗ])
|
90
|
+
result: "h"
|
91
|
+
- pattern: \u0787\u07b0(?=[ޒ])
|
92
|
+
result: "z"
|
93
|
+
- pattern: \u0787\u07b0(?=[ޓ])
|
94
|
+
result: "t"
|
95
|
+
- pattern: \u0787\u07b0(?=[ޕ])
|
96
|
+
result: "p"
|
97
|
+
- pattern: \u0787\u07b0(?=[ޔ])
|
98
|
+
result: "y"
|
99
|
+
|
100
|
+
- pattern: \u0781\u07b0(?=[ހ])
|
101
|
+
result: "h"
|
102
|
+
- pattern: \u0781\u07b0(?=[ށ])
|
103
|
+
result: "h"
|
104
|
+
- pattern: \u0781\u07b0(?=[ނ])
|
105
|
+
result: "n"
|
106
|
+
- pattern: \u0781\u07b0(?=[ރ])
|
107
|
+
result: "r"
|
108
|
+
- pattern: \u0781\u07b0(?=[ބ])
|
109
|
+
result: "b"
|
110
|
+
- pattern: \u0781\u07b0(?=[ޅ])
|
111
|
+
result: "h"
|
112
|
+
- pattern: \u0781\u07b0(?=[ކ])
|
113
|
+
result: "k"
|
114
|
+
- pattern: \u0781\u07b0(?=[ވ])
|
115
|
+
result: "v"
|
116
|
+
- pattern: \u0781\u07b0(?=[މ])
|
117
|
+
result: "m"
|
118
|
+
- pattern: \u0781\u07b0(?=[ފ])
|
119
|
+
result: "f"
|
120
|
+
- pattern: \u0781\u07b0(?=[ދ])
|
121
|
+
result: "h"
|
122
|
+
- pattern: \u0781\u07b0(?=[ތ])
|
123
|
+
result: "h"
|
124
|
+
- pattern: \u0781\u07b0(?=[ލ])
|
125
|
+
result: "l"
|
126
|
+
- pattern: \u0781\u07b0(?=[ގ])
|
127
|
+
result: "g"
|
128
|
+
- pattern: \u0781\u07b0(?=[ޏ])
|
129
|
+
result: "h"
|
130
|
+
- pattern: \u0781\u07b0(?=[ސ])
|
131
|
+
result: "s"
|
132
|
+
- pattern: \u0781\u07b0(?=[ޑ])
|
133
|
+
result: "d"
|
134
|
+
- pattern: \u0781\u07b0(?=[ޖ])
|
135
|
+
result: "j"
|
136
|
+
- pattern: \u0781\u07b0(?=[ޗ])
|
137
|
+
result: "h"
|
138
|
+
- pattern: \u0781\u07b0(?=[ޒ])
|
139
|
+
result: "z"
|
140
|
+
- pattern: \u0781\u07b0(?=[ޓ])
|
141
|
+
result: "t"
|
142
|
+
- pattern: \u0781\u07b0(?=[ޕ])
|
143
|
+
result: "p"
|
144
|
+
- pattern: \u0781\u07b0(?=[ޔ])
|
145
|
+
result: "y"
|
146
|
+
|
147
|
+
- pattern: \u0787\u07b0(?=\b)
|
148
|
+
result: 'h'
|
149
|
+
- pattern: \u0781\u07b0(?=\b)
|
150
|
+
result: 'h'
|
151
|
+
|
152
|
+
# Note 5
|
153
|
+
- pattern: \u0782(?!=[\u07a6\u07a7\u07a8\u07a9\u07aa\u07ab\u07ac\u07ad\u07ae\u07af\u07B0])
|
154
|
+
result: "n’"
|
155
|
+
|
156
|
+
# Note 6
|
157
|
+
- pattern: \u078C(?=\u07B0)
|
158
|
+
result: "iy"
|
159
|
+
|
160
|
+
characters:
|
161
|
+
# Vowels
|
162
|
+
"\u07a6" : "a"
|
163
|
+
"\u07a7" : "aa"
|
164
|
+
"\u07a8" : "i"
|
165
|
+
"\u07a9" : "ee"
|
166
|
+
"\u07aa" : "u"
|
167
|
+
"\u07ab" : "oo"
|
168
|
+
"\u07ac" : "e"
|
169
|
+
"\u07ad" : "ey"
|
170
|
+
"\u07ae" : "o"
|
171
|
+
"\u07af" : "oa"
|
172
|
+
"\u07B0" : "" # not romanized but see notes 4 & 6
|
173
|
+
|
174
|
+
|
175
|
+
# Consonants
|
176
|
+
"ހ": "h"
|
177
|
+
"ށ": "sh" # Note 4
|
178
|
+
"ނ": "n" # Note 5
|
179
|
+
"ރ": "r"
|
180
|
+
"ބ": "b"
|
181
|
+
"ޅ": "lh"
|
182
|
+
"ކ": "k"
|
183
|
+
"އ": "" # Notes 3 & 4
|
184
|
+
"ވ": "v"
|
185
|
+
"މ": "m"
|
186
|
+
"ފ": "f"
|
187
|
+
"ދ": "dh"
|
188
|
+
"ތ": "th" # Note 6
|
189
|
+
"ލ": "l"
|
190
|
+
"ގ": "g"
|
191
|
+
"ޏ": "gn"
|
192
|
+
"ސ": "s"
|
193
|
+
"ޑ": "d"
|
194
|
+
"ޖ": "j"
|
195
|
+
"ޗ": "ch"
|
196
|
+
"ޒ": "z"
|
197
|
+
"ޓ": "t"
|
198
|
+
"ޕ": "p"
|
199
|
+
"ޔ": "y"
|
200
|
+
|
@@ -0,0 +1,137 @@
|
|
1
|
+
---
|
2
|
+
authority_id: odni
|
3
|
+
id: 2004
|
4
|
+
language: ics-630-01:ara
|
5
|
+
source_script: Arab
|
6
|
+
destination_script: Latn
|
7
|
+
name: Arabic Personal Names Office of the Director Of National Intelligence 2004 System
|
8
|
+
url: https://github.com/interscript/interscript-private-references/blob/master/odni/Arabic_IC_Standard.doc
|
9
|
+
creation_date: 2004
|
10
|
+
confirmation date: 2004-06
|
11
|
+
description:
|
12
|
+
notes:
|
13
|
+
- Long/Short Vowels Long and short vowels are not
|
14
|
+
distinguished in this system Samir (could be Saamir or
|
15
|
+
Samiir in Arabic).
|
16
|
+
|
17
|
+
- Double consonants Double consonants represented by the
|
18
|
+
Arabic shaddah are shown in most cases (e.g., Hassan,
|
19
|
+
Muhammad). Exceptions ’ayn and consonants represented by
|
20
|
+
digraphs are not doubled (e.g., al-Qadhafi [not
|
21
|
+
alQadhdhafi], Mubashir [not Mubashshir]).
|
22
|
+
|
23
|
+
- Hamzah (glottal stop) The hamzah is represented by an
|
24
|
+
apostrophe (’). Note that this is the same symbol used to
|
25
|
+
represent another consonant, the ’ayn.
|
26
|
+
|
27
|
+
- Ta’ marbutah (feminine ending marker) On the construct
|
28
|
+
form or when pronounced “t”, it is represented with a roman
|
29
|
+
t. In all other cases, it is represented with an h.
|
30
|
+
|
31
|
+
- Digraphs No distinction is made between digraphs such as
|
32
|
+
sh and single contiguous letters (e.g., s followed by h).
|
33
|
+
|
34
|
+
- Definite article “al” (‘the’) Follows Arabic spelling
|
35
|
+
rather than pronunciation. That is, sun letter assimilation
|
36
|
+
is not shown in the Romanized form (e.g., ’Abd-alRahman,
|
37
|
+
not ’Abd-ar-Rahman).
|
38
|
+
|
39
|
+
- Diphthongs the second element of the diphthong is
|
40
|
+
represented by a y or a w (rather than an i or a u)
|
41
|
+
Haytham, Faysal, Tawfiq, Rawdah.
|
42
|
+
|
43
|
+
- Hyphens Hyphens (-) are used to connect name elements
|
44
|
+
within a name ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
|
45
|
+
Exceptions Names that incorporate “Allah” as part of the
|
46
|
+
name (e.g., ’Abdallah, Nasrallah), names marked by the
|
47
|
+
lineage/family marker “Al” (e.g., Al Thani) are not
|
48
|
+
hyphenated.
|
49
|
+
|
50
|
+
- The definite article, “al”, within name phrases, is
|
51
|
+
Romanized as al and not as ul Nur-al-Din (not Nur-ul-Din).
|
52
|
+
It is not capitalized when name-initial.
|
53
|
+
|
54
|
+
- Names that incorporate Allah as part of the name retain the
|
55
|
+
a of Allah rather than a grammatical marker u ’Abdallah (
|
56
|
+
not ’Abdullah).
|
57
|
+
|
58
|
+
- Foreign names borrowed or appearing in Arabic are spelled
|
59
|
+
according to the standard Western tradition Georges,
|
60
|
+
Michel. However, names of non-Arabic origin no longer
|
61
|
+
considered foreign by Arabic speakers follow the IC
|
62
|
+
conventions Butrus (not Peter).
|
63
|
+
|
64
|
+
- Prefix بن (bin ‘son of’) is Romanized Bin unless written
|
65
|
+
with an alif, in which case it is Romanized as Ibn. The
|
66
|
+
colloquial form Bu (‘father’) should not be standardized as
|
67
|
+
Abu. These prefixes are capitalized.
|
68
|
+
|
69
|
+
- In general, Romanization follows the Modern Standard
|
70
|
+
Arabic (MSA) form rather than local pronunciation
|
71
|
+
standards. For example, the letter ج (jim) is represented
|
72
|
+
as a j even when pronounced as a “g” (e.g., Egyptian Gamal
|
73
|
+
is Romanized as Jamal).
|
74
|
+
|
75
|
+
tests:
|
76
|
+
|
77
|
+
- source: مِصر
|
78
|
+
expected: Miṣr
|
79
|
+
|
80
|
+
- source: قَطَر
|
81
|
+
expected: Qaṭar
|
82
|
+
|
83
|
+
- source: المَغرِب
|
84
|
+
expected: Al Maghrib
|
85
|
+
|
86
|
+
- source: الجُمهُورِيَّة العِراقِيَّة
|
87
|
+
expected: Al Jumhuriyah al ’Iraqiyah
|
88
|
+
|
89
|
+
- source: جُمهُورِيَّة العِراق
|
90
|
+
expected: Jumhuriyat al ’Iraq
|
91
|
+
|
92
|
+
- source: جُمهُورِيَّة مِصر العَرَبِيَّة
|
93
|
+
expected: Jumhuriyat Miṣr al ’Arabiyah
|
94
|
+
|
95
|
+
- source: بَغداد
|
96
|
+
expected: Baghdad
|
97
|
+
|
98
|
+
- source: تُونِس
|
99
|
+
expected: Tunis
|
100
|
+
|
101
|
+
- source: حَسّان
|
102
|
+
expected: Hassan
|
103
|
+
|
104
|
+
- source: مُحَمَّد
|
105
|
+
expected: Muhammad
|
106
|
+
|
107
|
+
- source: القَذَّافِي
|
108
|
+
expected: Al Qadhafi
|
109
|
+
|
110
|
+
- source: مُبَشِّر
|
111
|
+
expected: Mubashir
|
112
|
+
|
113
|
+
- source: الجَزائِر
|
114
|
+
expected: Al Jaza’ir
|
115
|
+
|
116
|
+
- source: عَبدالرَحمَن
|
117
|
+
expected: ’Abd al Rahman
|
118
|
+
|
119
|
+
- source: هَيْثَم
|
120
|
+
expected: Haytham
|
121
|
+
|
122
|
+
- source: فَيْصَل
|
123
|
+
expected: Fayṣal
|
124
|
+
|
125
|
+
- source: تَوْفِيق
|
126
|
+
expected: Tawfiq
|
127
|
+
|
128
|
+
- source: رَوْضَة
|
129
|
+
expected: Rawḍah
|
130
|
+
|
131
|
+
- source: نُورُالدِين
|
132
|
+
expected: Nur al Din
|
133
|
+
|
134
|
+
- source: عَبدُاللَّه
|
135
|
+
expected: ’Abdallah
|
136
|
+
map:
|
137
|
+
inherit: odni-ara-Arab-Latn-2015
|
@@ -1,10 +1,10 @@
|
|
1
1
|
---
|
2
|
-
authority_id:
|
2
|
+
authority_id: odni
|
3
3
|
id: 2017
|
4
4
|
language: ics-630-01:ara
|
5
5
|
source_script: Arab
|
6
6
|
destination_script: Latn
|
7
|
-
name:
|
7
|
+
name: Office of the Director Of National Intelligence Arabic Personal Names 2015 System
|
8
8
|
url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20A%20-%20Arabic_Personal_Names_FLTS%20(U).pdf
|
9
9
|
creation_date: 2017
|
10
10
|
confirmation date: 2018-06
|
@@ -131,10 +131,10 @@ notes: |
|
|
131
131
|
tests:
|
132
132
|
|
133
133
|
- source: مِصر
|
134
|
-
expected:
|
134
|
+
expected: Miṣr
|
135
135
|
|
136
136
|
- source: قَطَر
|
137
|
-
expected:
|
137
|
+
expected: Qaṭar
|
138
138
|
|
139
139
|
- source: المَغرِب
|
140
140
|
expected: Al Maghrib
|
@@ -146,7 +146,7 @@ tests:
|
|
146
146
|
expected: Jumhuriyat al ’Iraq
|
147
147
|
|
148
148
|
- source: جُمهُورِيَّة مِصر العَرَبِيَّة
|
149
|
-
expected: Jumhuriyat
|
149
|
+
expected: Jumhuriyat Miṣr al ’Arabiyah
|
150
150
|
|
151
151
|
- source: بَغداد
|
152
152
|
expected: Baghdad
|
@@ -170,22 +170,22 @@ tests:
|
|
170
170
|
expected: Al Jaza’ir
|
171
171
|
|
172
172
|
- source: عَبدالرَحمَن
|
173
|
-
expected: ’Abd
|
173
|
+
expected: ’Abd al Rahman
|
174
174
|
|
175
175
|
- source: هَيْثَم
|
176
176
|
expected: Haytham
|
177
177
|
|
178
178
|
- source: فَيْصَل
|
179
|
-
expected:
|
179
|
+
expected: Fayṣal
|
180
180
|
|
181
181
|
- source: تَوْفِيق
|
182
182
|
expected: Tawfiq
|
183
183
|
|
184
184
|
- source: رَوْضَة
|
185
|
-
expected:
|
185
|
+
expected: Rawḍah
|
186
186
|
|
187
187
|
- source: نُورُالدِين
|
188
|
-
expected: Nur
|
188
|
+
expected: Nur al Din
|
189
189
|
|
190
190
|
- source: عَبدُاللَّه
|
191
191
|
expected: ’Abdallah
|
@@ -195,8 +195,6 @@ map:
|
|
195
195
|
result: "upcase"
|
196
196
|
- pattern: " Al " # ال
|
197
197
|
result: " al "
|
198
|
-
- pattern: "-Al-" # ال
|
199
|
-
result: "-al-"
|
200
198
|
|
201
199
|
# don't capitalize defined article in the middle of a sentence
|
202
200
|
|
@@ -254,10 +252,10 @@ map:
|
|
254
252
|
'\u0632\u0651' : 'zz' # ز
|
255
253
|
'\u0633\u0651' : 'ss' # س
|
256
254
|
'\u0634\u0651' : 'sh' # ش
|
257
|
-
'\u0635\u0651' : '
|
258
|
-
'\u0636\u0651' : '
|
259
|
-
'\u0637\u0651' : '
|
260
|
-
'\u0638\u0651' : '
|
255
|
+
'\u0635\u0651' : 'ṣṣ' # ص
|
256
|
+
'\u0636\u0651' : 'ḍḍ' # ض
|
257
|
+
'\u0637\u0651' : 'ṭṭ' # ط
|
258
|
+
'\u0638\u0651' : 'ẓẓ' # ظ
|
261
259
|
'\u063a\u0651' : 'gh' # غ
|
262
260
|
'\u0641\u0651' : 'ff' # ف
|
263
261
|
'\u0642\u0651' : 'qq' # ق
|
@@ -280,146 +278,38 @@ map:
|
|
280
278
|
- ''
|
281
279
|
|
282
280
|
'\b\u0627\u0644' : 'al ' # ال
|
283
|
-
'\B\u064f?\u0627\u0644' : '
|
281
|
+
'\B\u064f?\u0627\u0644' : ' al ' # ال in middle of composite name
|
282
|
+
|
284
283
|
# '\uFE8E' : '' # ﺎ
|
285
284
|
|
286
285
|
|
287
286
|
'\u0623' : '' # أ
|
288
287
|
'\b\u0627' : '' # ا
|
289
288
|
'\u0627' : 'a' # ا
|
290
|
-
|
291
289
|
'\u0628' : 'b' # ب
|
292
|
-
'\uFE91' : 'b' # ﺑ
|
293
|
-
'\uFE92' : 'b' # ﺒ
|
294
|
-
'\uFE90' : 'b' # ﺐ
|
295
|
-
|
296
290
|
'\u062a' : 't' # ت
|
297
|
-
'\ufe97' : 't' # ﺗ
|
298
|
-
'\ufe98' : 't' # ﺘ
|
299
|
-
'\ufe96' : 't' # ﺖ
|
300
|
-
|
301
291
|
'\u062b' : 'th' # ث
|
302
|
-
'\ufe9b' : 'th' # ﺛ
|
303
|
-
'\ufe9c' : 'th' # ﺜ
|
304
|
-
'\ufe9a' : 'th' # ﺚ
|
305
|
-
|
306
292
|
'\u062c' : 'j' # ج
|
307
|
-
'\ufe9f' : 'j' # ﺟ
|
308
|
-
'\ufea0' : 'j' # ﺠ
|
309
|
-
'\ufe9e' : 'j' # ﺞ
|
310
|
-
|
311
293
|
'\u062d' : 'h' # ح
|
312
|
-
'\ufea3' : 'h' # ﺣ
|
313
|
-
'\ufea4' : 'h' # ﺤ
|
314
|
-
'\ufea2' : 'h' # ﺢ
|
315
|
-
|
316
294
|
'\u062e' : 'kh' # خ
|
317
|
-
'\ufea7' : 'kh' # ﺧ
|
318
|
-
'\ufea8' : 'kh' # ﺨ
|
319
|
-
'\ufea6' : 'kh' # ﺦ
|
320
|
-
|
321
295
|
'\u062f' : 'd' # د
|
322
|
-
'\ufeaa' : 'd' # ﺪ
|
323
|
-
|
324
296
|
'\u0630' : 'dh' # ذ
|
325
|
-
'\ufeac' : 'dh' # ﺬ
|
326
|
-
|
327
297
|
'\u0631' : 'r' # ر
|
328
|
-
'\ufeae' : 'r' # ﺮ
|
329
|
-
|
330
298
|
'\u0632' : 'z' # ز
|
331
|
-
'\ufeb0' : 'z' # ﺰ
|
332
|
-
|
333
299
|
'\u0633' : 's' # س
|
334
|
-
'\ufeb3' : 's' # ﺳ
|
335
|
-
'\ufeb4' : 's' # ﺴ
|
336
|
-
'\ufeb2' : 's' # ﺲ
|
337
|
-
|
338
300
|
'\u0634' : 'sh' # ش
|
339
|
-
'\
|
340
|
-
'\
|
341
|
-
'\
|
342
|
-
|
343
|
-
'\u0635' : 's' # ص
|
344
|
-
'\ufebb' : 's' # ﺻ
|
345
|
-
'\ufebc' : 's' # ﺼ
|
346
|
-
'\ufeba' : 's' # ﺺ
|
347
|
-
|
348
|
-
'\u0636' : 'd' # ض
|
349
|
-
'\ufebf' : 'd' # ﺿ
|
350
|
-
'\ufec0' : 'd' # ﻀ
|
351
|
-
'\ufebe' : 'd' # ﺾ
|
352
|
-
|
353
|
-
'\u0637' : 't' # ط
|
354
|
-
'\ufec3' : 't' # ﻃ
|
355
|
-
'\ufec4' : 't' # ﻄ
|
356
|
-
'\ufec2' : 't' # ﻂ
|
357
|
-
|
358
|
-
'\u0638' : 'z' # ظ
|
359
|
-
'\ufec7' : 'z' # ﻇ
|
360
|
-
'\ufec8' : 'z' # ﻈ
|
361
|
-
'\ufec6' : 'z' # ﻆ
|
362
|
-
|
301
|
+
'\u0635' : 'ṣ' # ص
|
302
|
+
'\u0636' : 'ḍ' # ض
|
303
|
+
'\u0637' : 'ṭ' # ط
|
304
|
+
'\u0638' : 'ẓ' # ظ
|
363
305
|
'\u0639' : '’' # ع
|
364
|
-
'\ufecb' : '’' # ﻋ
|
365
|
-
'\ufecc' : '’' # ﻌ
|
366
|
-
'\ufeca' : '’' # ﻊ
|
367
|
-
|
368
306
|
'\u063a' : 'gh' # غ
|
369
|
-
'\ufecf' : 'gh' # ﻏ
|
370
|
-
'\ufed0' : 'gh' # ﻐ
|
371
|
-
'\ufece' : 'gh' # ﻎ
|
372
|
-
|
373
307
|
'\u0641' : 'f' # ف
|
374
|
-
'\ufed3' : 'f' # ﻓ
|
375
|
-
'\ufed4' : 'f' # ﻔ
|
376
|
-
'\ufed2' : 'f' # ﻒ
|
377
|
-
|
378
308
|
'\u0642' : 'q' # ق
|
379
|
-
'\ufed7' : 'q' # ﻗ
|
380
|
-
'\ufed8' : 'q' # ﻘ
|
381
|
-
'\ufed6' : 'q' # ﻖ
|
382
|
-
|
383
309
|
'\u0643' : 'k' # ك
|
384
|
-
'\ufedb' : 'k' # ﻛ
|
385
|
-
'\ufedc' : 'k' # ﻜ
|
386
|
-
'\ufeda' : 'k' # ﻚ
|
387
|
-
|
388
310
|
'\u0644' : 'l' # ل
|
389
|
-
'\ufedf' : 'l' # ﻟ
|
390
|
-
'\ufee0' : 'l' # ﻠ
|
391
|
-
'\ufede' : 'l' # ﻞ
|
392
|
-
|
393
311
|
'\u0645' : 'm' # م
|
394
|
-
'\ufee3' : 'm' # ﻣ
|
395
|
-
'\ufee4' : 'm' # ﻤ
|
396
|
-
'\ufee2' : 'm' # ﻢ
|
397
|
-
|
398
312
|
'\u0646' : 'n' # ن
|
399
|
-
'\ufee7' : 'n' # ﻧ
|
400
|
-
'\ufee8' : 'n' # ﻨ
|
401
|
-
'\ufee6' : 'n' # ﻦ
|
402
|
-
|
403
|
-
# See note C
|
404
313
|
'\u0647' : 'h' # ه
|
405
|
-
'\ufeeb' : 'h' # ﻫ
|
406
|
-
'\ufeec' : 'h' # ﻬ
|
407
|
-
'\ufeea' : 'h' # ﻪ
|
408
|
-
|
409
314
|
'\u0648' : 'w' # و
|
410
|
-
'\
|
411
|
-
|
412
|
-
'\u064a' : 'y' # ي
|
413
|
-
'\ufef3' : 'y' # ﻳ
|
414
|
-
'\ufef4' : 'y' # ﻴ
|
415
|
-
'\ufef1' : 'y' # ﻱ
|
416
|
-
|
417
|
-
# (A) Not romanized word-initially.
|
418
|
-
|
419
|
-
# (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
|
420
|
-
|
421
|
-
# (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
|
422
|
-
|
423
|
-
|
424
|
-
# Vowels, diphthongs and diacritical marks
|
425
|
-
# (ـ stands for any consonant)
|
315
|
+
'\u064a' : 'y' # ي
|