interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -1,8 +1,8 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: iso-639-2:knd
5
- source_script: Knda
4
+ language: iso-639-2:kan
5
+ source_script: Kana
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Kannada Romanization
8
8
  #url:
@@ -40,9 +40,11 @@ tests:
40
40
  - source: "ନବନିଯୁକ୍ତ ଓଡିଶା କଂଗ୍ରେସ ପ୍ରଭାରୀ ଏ.ଚେଲ୍ଲା କୁମାରଙ୍କୁ କରୋନା"
41
41
  expected: "nbniyukt ŏḍiśā kṅgrēs prbhārī ē.cēllā kumārṅku krŏnā"
42
42
  - source: "ଦିଲ୍ଲୀ: ଦିନ ଦ୍ବିପହରରେ ଗାଡ଼ି ଉପରକୁ ଦୁର୍ବୃତ୍ତ ଚଳାଇଲେ ୮ ରାଉଣ୍ଡ ଗୁଳି: ଚାଳକଙ୍କ ମୃତ୍ୟୁ"
43
- expected: "dillī: din dbiphrrē gād̂i uprku durbṛtt cḷāilē rāuṇḍ guḷi: cāḷkṅk mṛtẏu"
43
+ expected: "dillī: din dbiphrrē gād̂i uprku durbṛtt cḷāilē 8 rāuṇḍ guḷi: cāḷkṅk mṛtẏu"
44
44
  - source: "ବୟସରେ ଆର ପାରିକୁ ଚାଲିଗଲେ କଣ୍ଠଶିଳ୍ପୀ ଅନୁରାଧା ପୋଡୱାଲଙ୍କ ପୁଅ ଆଦିତ୍ୟ"
45
45
  expected: "bẏsrē ār pāriku cāliglē kṇṭhśiḷpī anurādhā pēāḍୱālṅk pua āditẏ"
46
+ - source: "୦୧୭୧୬୪୨୯୭୦୦"
47
+ expected: "01716429700"
46
48
 
47
49
  map:
48
50
 
@@ -157,4 +159,17 @@ map:
157
159
  '଼': ''
158
160
  '।': '.'
159
161
  "‍": ''# Used for joining
160
- "‌": ''# Used for non joining
162
+ "‌": ''# Used for non joining
163
+
164
+ # Numbers
165
+
166
+ '୦': '0'
167
+ '୧': '1'
168
+ '୨': '2'
169
+ '୩': '3'
170
+ '୪': '4'
171
+ '୫': '5'
172
+ '୬': '6'
173
+ '୭': '7'
174
+ '୮': '8'
175
+ '୯': '9'
@@ -94,7 +94,7 @@ map:
94
94
  result: ' aẓ Ẓ'
95
95
  - pattern : ' Al L' # الل
96
96
  result: ' al L'
97
- - pattern : ' an n' # الن
97
+ - pattern : ' An N' # الن
98
98
  result: ' an N'
99
99
  - pattern: " Al " # ال
100
100
  result: " al "
@@ -2,7 +2,7 @@
2
2
  authority_id: iso
3
3
  id: 15919-2001
4
4
  language: iso-639-2:kan
5
- source_script: Knda
5
+ source_script: kana
6
6
  destination_script: Latn
7
7
  name: "Information and documentation — Transliteration of Devanagari and related Indic scripts into Latin characters"
8
8
  url: https://www.chatranjali.fr/Scripts/Standards/ISO15919.pdf
@@ -1,5 +1,5 @@
1
1
  ---
2
- authority_id: mns
2
+ authority_id: masm
3
3
  id: 2012
4
4
  language: iso-639-2:mon
5
5
  source_script: Cyrl
@@ -13,7 +13,7 @@ description: |
13
13
  for transliteration from Cyrillic Mongolian in Latin alphabet. It repeats and supplements the previous
14
14
  standard dating from 2003 (MNS 5217:2003).
15
15
 
16
- This standard differs from the ISO 9 system for Cyrillic adopted by Russia, Armenia, Azerbaijan,
16
+ This standard differs from the ISO 9 system for Cyrillic adopted by Russia, Armenia, Azerbaijan,
17
17
  Belarus, Kazakhstan, Kyrgyzstan, Uzbekistan, Tajikistan and Turkmenistan.
18
18
 
19
19
  tests:
@@ -1,5 +1,5 @@
1
1
  ---
2
- authority_id: mns
2
+ authority_id: masm
3
3
  id: 2012
4
4
  language: iso-639-2:mon
5
5
  source_script: Latn
@@ -0,0 +1,200 @@
1
+ ---
2
+ authority_id: mv
3
+ id: 1987
4
+ language: iso-639-2:div
5
+ source_script: Thaa
6
+ destination_script: Latn
7
+ name: Maldivian (Divehi) Maldivian Government 1987 system
8
+ url: https://www.eki.ee/wgrs/rom2_dv.htm
9
+ creation_date: 1887
10
+ description: |
11
+ In 1987 the Maldivian government adopted a system for the romanization of Maldivian. It was also approved by the BGN and the PCGN in 1988.
12
+ Maldivian is written from right to left. Vowels are marked as diacritical marks accompanying a consonant character.
13
+
14
+ notes:
15
+ - The character އ is not romanized. If it bears a vowel character, that vowel character alone is romanized, e.g. އިރުގައި irugai.
16
+ - Characters އް and ށް word-finally and before a digraph are romanized h, e.g. ވޭވަށް Veyvah, ކެރެއްދޫ Kerehdhoo. In other cases,
17
+ when އް and ށް are used, the romanized value of the following consonant character is doubled, e.g. ބައްޓެށް batteh, ކަޅެހުއްޓާ Kalhehuttaa.
18
+ - The character ނ is romanized n’ if used without any vowel or auxiliary sign. For example kan’du (ކަނޑު).
19
+ - The character އ is Not romanized but see note 1
20
+ - Character ތް is romanized iy, e.g. ޒިޔާރަތްފުށި Ziyaaraiyfushi.
21
+
22
+ tests:
23
+ - source: "އެނބޫދޫ"
24
+ expected: "en’boodhoo"
25
+ - source: "ކަޅެހުއްޓާ"
26
+ expected: "kalhehuttaa"
27
+ - source: "ކެރެށްދޫ"
28
+ expected: "kerehdhoo"
29
+ - source: "ވޭވައް"
30
+ expected: "veyvah"
31
+ - source: "ކަނޑުފުށި"
32
+ expected: "kan’dufushi"
33
+ - source: "ޒިޔާރަތްފުށި"
34
+ expected: "ziyaaraiyfushi"
35
+ - source: "ރައްކާތެރިކުރުމާއި"
36
+ expected: "rakkaatherikurumaai"
37
+ - source: "ދަރިވަރެއްގެވެސް"
38
+ expected: "dharivareggeves"
39
+ - source: "ދަރިވަރުންނާއި"
40
+ expected: "dharivarun’n’aai"
41
+ - source: "ރަށްރަށުގައި"
42
+ expected: "rarrashugai"
43
+ - source: "ޑިޕާޓްމަންޓުން"
44
+ expected: "dipaatman’tun’"
45
+ - source: "ހޯދިފައިނުވާ"
46
+ expected: "hoadhifain’uvaa"
47
+
48
+
49
+ map:
50
+
51
+ rules:
52
+ # Note 4
53
+ - pattern: \u0787\u07b0(?=[ހ])
54
+ result: "h"
55
+ - pattern: \u0787\u07b0(?=[ށ])
56
+ result: "h"
57
+ - pattern: \u0787\u07b0(?=[ނ])
58
+ result: "n"
59
+ - pattern: \u0787\u07b0(?=[ރ])
60
+ result: "r"
61
+ - pattern: \u0787\u07b0(?=[ބ])
62
+ result: "b"
63
+ - pattern: \u0787\u07b0(?=[ޅ])
64
+ result: "h"
65
+ - pattern: \u0787\u07b0(?=[ކ])
66
+ result: "k"
67
+ - pattern: \u0787\u07b0(?=[ވ])
68
+ result: "v"
69
+ - pattern: \u0787\u07b0(?=[މ])
70
+ result: "m"
71
+ - pattern: \u0787\u07b0(?=[ފ])
72
+ result: "f"
73
+ - pattern: \u0787\u07b0(?=[ދ])
74
+ result: "h"
75
+ - pattern: \u0787\u07b0(?=[ތ])
76
+ result: "h"
77
+ - pattern: \u0787\u07b0(?=[ލ])
78
+ result: "l"
79
+ - pattern: \u0787\u07b0(?=[ގ])
80
+ result: "g"
81
+ - pattern: \u0787\u07b0(?=[ޏ])
82
+ result: "h"
83
+ - pattern: \u0787\u07b0(?=[ސ])
84
+ result: "s"
85
+ - pattern: \u0787\u07b0(?=[ޑ])
86
+ result: "d"
87
+ - pattern: \u0787\u07b0(?=[ޖ])
88
+ result: "j"
89
+ - pattern: \u0787\u07b0(?=[ޗ])
90
+ result: "h"
91
+ - pattern: \u0787\u07b0(?=[ޒ])
92
+ result: "z"
93
+ - pattern: \u0787\u07b0(?=[ޓ])
94
+ result: "t"
95
+ - pattern: \u0787\u07b0(?=[ޕ])
96
+ result: "p"
97
+ - pattern: \u0787\u07b0(?=[ޔ])
98
+ result: "y"
99
+
100
+ - pattern: \u0781\u07b0(?=[ހ])
101
+ result: "h"
102
+ - pattern: \u0781\u07b0(?=[ށ])
103
+ result: "h"
104
+ - pattern: \u0781\u07b0(?=[ނ])
105
+ result: "n"
106
+ - pattern: \u0781\u07b0(?=[ރ])
107
+ result: "r"
108
+ - pattern: \u0781\u07b0(?=[ބ])
109
+ result: "b"
110
+ - pattern: \u0781\u07b0(?=[ޅ])
111
+ result: "h"
112
+ - pattern: \u0781\u07b0(?=[ކ])
113
+ result: "k"
114
+ - pattern: \u0781\u07b0(?=[ވ])
115
+ result: "v"
116
+ - pattern: \u0781\u07b0(?=[މ])
117
+ result: "m"
118
+ - pattern: \u0781\u07b0(?=[ފ])
119
+ result: "f"
120
+ - pattern: \u0781\u07b0(?=[ދ])
121
+ result: "h"
122
+ - pattern: \u0781\u07b0(?=[ތ])
123
+ result: "h"
124
+ - pattern: \u0781\u07b0(?=[ލ])
125
+ result: "l"
126
+ - pattern: \u0781\u07b0(?=[ގ])
127
+ result: "g"
128
+ - pattern: \u0781\u07b0(?=[ޏ])
129
+ result: "h"
130
+ - pattern: \u0781\u07b0(?=[ސ])
131
+ result: "s"
132
+ - pattern: \u0781\u07b0(?=[ޑ])
133
+ result: "d"
134
+ - pattern: \u0781\u07b0(?=[ޖ])
135
+ result: "j"
136
+ - pattern: \u0781\u07b0(?=[ޗ])
137
+ result: "h"
138
+ - pattern: \u0781\u07b0(?=[ޒ])
139
+ result: "z"
140
+ - pattern: \u0781\u07b0(?=[ޓ])
141
+ result: "t"
142
+ - pattern: \u0781\u07b0(?=[ޕ])
143
+ result: "p"
144
+ - pattern: \u0781\u07b0(?=[ޔ])
145
+ result: "y"
146
+
147
+ - pattern: \u0787\u07b0(?=\b)
148
+ result: 'h'
149
+ - pattern: \u0781\u07b0(?=\b)
150
+ result: 'h'
151
+
152
+ # Note 5
153
+ - pattern: \u0782(?!=[\u07a6\u07a7\u07a8\u07a9\u07aa\u07ab\u07ac\u07ad\u07ae\u07af\u07B0])
154
+ result: "n’"
155
+
156
+ # Note 6
157
+ - pattern: \u078C(?=\u07B0)
158
+ result: "iy"
159
+
160
+ characters:
161
+ # Vowels
162
+ "\u07a6" : "a"
163
+ "\u07a7" : "aa"
164
+ "\u07a8" : "i"
165
+ "\u07a9" : "ee"
166
+ "\u07aa" : "u"
167
+ "\u07ab" : "oo"
168
+ "\u07ac" : "e"
169
+ "\u07ad" : "ey"
170
+ "\u07ae" : "o"
171
+ "\u07af" : "oa"
172
+ "\u07B0" : "" # not romanized but see notes 4 & 6
173
+
174
+
175
+ # Consonants
176
+ "ހ": "h"
177
+ "ށ": "sh" # Note 4
178
+ "ނ": "n" # Note 5
179
+ "ރ": "r"
180
+ "ބ": "b"
181
+ "ޅ": "lh"
182
+ "ކ": "k"
183
+ "އ": "" # Notes 3 & 4
184
+ "ވ": "v"
185
+ "މ": "m"
186
+ "ފ": "f"
187
+ "ދ": "dh"
188
+ "ތ": "th" # Note 6
189
+ "ލ": "l"
190
+ "ގ": "g"
191
+ "ޏ": "gn"
192
+ "ސ": "s"
193
+ "ޑ": "d"
194
+ "ޖ": "j"
195
+ "ޗ": "ch"
196
+ "ޒ": "z"
197
+ "ޓ": "t"
198
+ "ޕ": "p"
199
+ "ޔ": "y"
200
+
@@ -0,0 +1,137 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2004
4
+ language: ics-630-01:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Arabic Personal Names Office of the Director Of National Intelligence 2004 System
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Arabic_IC_Standard.doc
9
+ creation_date: 2004
10
+ confirmation date: 2004-06
11
+ description:
12
+ notes:
13
+ - Long/Short Vowels Long and short vowels are not
14
+ distinguished in this system Samir (could be Saamir or
15
+ Samiir in Arabic).
16
+
17
+ - Double consonants Double consonants represented by the
18
+ Arabic shaddah are shown in most cases (e.g., Hassan,
19
+ Muhammad). Exceptions ’ayn and consonants represented by
20
+ digraphs are not doubled (e.g., al-Qadhafi [not
21
+ alQadhdhafi], Mubashir [not Mubashshir]).
22
+
23
+ - Hamzah (glottal stop) The hamzah is represented by an
24
+ apostrophe (’). Note that this is the same symbol used to
25
+ represent another consonant, the ’ayn.
26
+
27
+ - Ta’ marbutah (feminine ending marker) On the construct
28
+ form or when pronounced “t”, it is represented with a roman
29
+ t. In all other cases, it is represented with an h.
30
+
31
+ - Digraphs No distinction is made between digraphs such as
32
+ sh and single contiguous letters (e.g., s followed by h).
33
+
34
+ - Definite article “al” (‘the’) Follows Arabic spelling
35
+ rather than pronunciation. That is, sun letter assimilation
36
+ is not shown in the Romanized form (e.g., ’Abd-alRahman,
37
+ not ’Abd-ar-Rahman).
38
+
39
+ - Diphthongs the second element of the diphthong is
40
+ represented by a y or a w (rather than an i or a u)
41
+ Haytham, Faysal, Tawfiq, Rawdah.
42
+
43
+ - Hyphens Hyphens (-) are used to connect name elements
44
+ within a name ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
45
+ Exceptions Names that incorporate “Allah” as part of the
46
+ name (e.g., ’Abdallah, Nasrallah), names marked by the
47
+ lineage/family marker “Al” (e.g., Al Thani) are not
48
+ hyphenated.
49
+
50
+ - The definite article, “al”, within name phrases, is
51
+ Romanized as al and not as ul Nur-al-Din (not Nur-ul-Din).
52
+ It is not capitalized when name-initial.
53
+
54
+ - Names that incorporate Allah as part of the name retain the
55
+ a of Allah rather than a grammatical marker u ’Abdallah (
56
+ not ’Abdullah).
57
+
58
+ - Foreign names borrowed or appearing in Arabic are spelled
59
+ according to the standard Western tradition Georges,
60
+ Michel. However, names of non-Arabic origin no longer
61
+ considered foreign by Arabic speakers follow the IC
62
+ conventions Butrus (not Peter).
63
+
64
+ - Prefix ‫بن‬ (bin ‘son of’) is Romanized Bin unless written
65
+ with an alif, in which case it is Romanized as Ibn. The
66
+ colloquial form Bu (‘father’) should not be standardized as
67
+ Abu. These prefixes are capitalized.
68
+
69
+ - In general, Romanization follows the Modern Standard
70
+ Arabic (MSA) form rather than local pronunciation
71
+ standards. For example, the letter ‫ج‬ (jim) is represented
72
+ as a j even when pronounced as a “g” (e.g., Egyptian Gamal
73
+ is Romanized as Jamal).
74
+
75
+ tests:
76
+
77
+ - source: مِصر
78
+ expected: Miṣr
79
+
80
+ - source: قَطَر
81
+ expected: Qaṭar
82
+
83
+ - source: المَغرِب
84
+ expected: Al Maghrib
85
+
86
+ - source: الجُمهُورِيَّة العِراقِيَّة
87
+ expected: Al Jumhuriyah al ’Iraqiyah
88
+
89
+ - source: جُمهُورِيَّة العِراق
90
+ expected: Jumhuriyat al ’Iraq
91
+
92
+ - source: جُمهُورِيَّة مِصر العَرَبِيَّة
93
+ expected: Jumhuriyat Miṣr al ’Arabiyah
94
+
95
+ - source: بَغداد
96
+ expected: Baghdad
97
+
98
+ - source: تُونِس
99
+ expected: Tunis
100
+
101
+ - source: حَسّان
102
+ expected: Hassan
103
+
104
+ - source: مُحَمَّد
105
+ expected: Muhammad
106
+
107
+ - source: القَذَّافِي
108
+ expected: Al Qadhafi
109
+
110
+ - source: مُبَشِّر
111
+ expected: Mubashir
112
+
113
+ - source: الجَزائِر
114
+ expected: Al Jaza’ir
115
+
116
+ - source: عَبدالرَحمَن
117
+ expected: ’Abd al Rahman
118
+
119
+ - source: هَيْثَم
120
+ expected: Haytham
121
+
122
+ - source: فَيْصَل
123
+ expected: Fayṣal
124
+
125
+ - source: تَوْفِيق
126
+ expected: Tawfiq
127
+
128
+ - source: رَوْضَة
129
+ expected: Rawḍah
130
+
131
+ - source: نُورُالدِين
132
+ expected: Nur al Din
133
+
134
+ - source: عَبدُاللَّه
135
+ expected: ’Abdallah
136
+ map:
137
+ inherit: odni-ara-Arab-Latn-2015
@@ -1,10 +1,10 @@
1
1
  ---
2
- authority_id: ungegn
2
+ authority_id: odni
3
3
  id: 2017
4
4
  language: ics-630-01:ara
5
5
  source_script: Arab
6
6
  destination_script: Latn
7
- name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
7
+ name: Office of the Director Of National Intelligence Arabic Personal Names 2015 System
8
8
  url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20A%20-%20Arabic_Personal_Names_FLTS%20(U).pdf
9
9
  creation_date: 2017
10
10
  confirmation date: 2018-06
@@ -131,10 +131,10 @@ notes: |
131
131
  tests:
132
132
 
133
133
  - source: مِصر
134
- expected: Misr
134
+ expected: Miṣr
135
135
 
136
136
  - source: قَطَر
137
- expected: Qatar
137
+ expected: Qaṭar
138
138
 
139
139
  - source: المَغرِب
140
140
  expected: Al Maghrib
@@ -146,7 +146,7 @@ tests:
146
146
  expected: Jumhuriyat al ’Iraq
147
147
 
148
148
  - source: جُمهُورِيَّة مِصر العَرَبِيَّة
149
- expected: Jumhuriyat Misr al ’Arabiyah
149
+ expected: Jumhuriyat Miṣr al ’Arabiyah
150
150
 
151
151
  - source: بَغداد
152
152
  expected: Baghdad
@@ -170,22 +170,22 @@ tests:
170
170
  expected: Al Jaza’ir
171
171
 
172
172
  - source: عَبدالرَحمَن
173
- expected: ’Abd-al-Rahman
173
+ expected: ’Abd al Rahman
174
174
 
175
175
  - source: هَيْثَم
176
176
  expected: Haytham
177
177
 
178
178
  - source: فَيْصَل
179
- expected: Faysal
179
+ expected: Fayṣal
180
180
 
181
181
  - source: تَوْفِيق
182
182
  expected: Tawfiq
183
183
 
184
184
  - source: رَوْضَة
185
- expected: Rawdah
185
+ expected: Rawḍah
186
186
 
187
187
  - source: نُورُالدِين
188
- expected: Nur-al-Din
188
+ expected: Nur al Din
189
189
 
190
190
  - source: عَبدُاللَّه
191
191
  expected: ’Abdallah
@@ -195,8 +195,6 @@ map:
195
195
  result: "upcase"
196
196
  - pattern: " Al " # ال
197
197
  result: " al "
198
- - pattern: "-Al-" # ال
199
- result: "-al-"
200
198
 
201
199
  # don't capitalize defined article in the middle of a sentence
202
200
 
@@ -254,10 +252,10 @@ map:
254
252
  '\u0632\u0651' : 'zz' # ز
255
253
  '\u0633\u0651' : 'ss' # س
256
254
  '\u0634\u0651' : 'sh' # ش
257
- '\u0635\u0651' : 'ss' # ص
258
- '\u0636\u0651' : 'dd' # ض
259
- '\u0637\u0651' : 'tt' # ط
260
- '\u0638\u0651' : 'zz' # ظ
255
+ '\u0635\u0651' : 'ṣṣ' # ص
256
+ '\u0636\u0651' : 'ḍḍ' # ض
257
+ '\u0637\u0651' : 'ṭṭ' # ط
258
+ '\u0638\u0651' : 'ẓẓ' # ظ
261
259
  '\u063a\u0651' : 'gh' # غ
262
260
  '\u0641\u0651' : 'ff' # ف
263
261
  '\u0642\u0651' : 'qq' # ق
@@ -280,146 +278,38 @@ map:
280
278
  - ''
281
279
 
282
280
  '\b\u0627\u0644' : 'al ' # ال
283
- '\B\u064f?\u0627\u0644' : '-al-' # ال in middle of composite name
281
+ '\B\u064f?\u0627\u0644' : ' al ' # ال in middle of composite name
282
+
284
283
  # '\uFE8E' : '' # ﺎ
285
284
 
286
285
 
287
286
  '\u0623' : '' # أ
288
287
  '\b\u0627' : '' # ا
289
288
  '\u0627' : 'a' # ا
290
-
291
289
  '\u0628' : 'b' # ب
292
- '\uFE91' : 'b' # ﺑ
293
- '\uFE92' : 'b' # ﺒ
294
- '\uFE90' : 'b' # ﺐ
295
-
296
290
  '\u062a' : 't' # ت
297
- '\ufe97' : 't' # ﺗ
298
- '\ufe98' : 't' # ﺘ
299
- '\ufe96' : 't' # ﺖ
300
-
301
291
  '\u062b' : 'th' # ث
302
- '\ufe9b' : 'th' # ﺛ
303
- '\ufe9c' : 'th' # ﺜ
304
- '\ufe9a' : 'th' # ﺚ
305
-
306
292
  '\u062c' : 'j' # ج
307
- '\ufe9f' : 'j' # ﺟ
308
- '\ufea0' : 'j' # ﺠ
309
- '\ufe9e' : 'j' # ﺞ
310
-
311
293
  '\u062d' : 'h' # ح
312
- '\ufea3' : 'h' # ﺣ
313
- '\ufea4' : 'h' # ﺤ
314
- '\ufea2' : 'h' # ﺢ
315
-
316
294
  '\u062e' : 'kh' # خ
317
- '\ufea7' : 'kh' # ﺧ
318
- '\ufea8' : 'kh' # ﺨ
319
- '\ufea6' : 'kh' # ﺦ
320
-
321
295
  '\u062f' : 'd' # د
322
- '\ufeaa' : 'd' # ﺪ
323
-
324
296
  '\u0630' : 'dh' # ذ
325
- '\ufeac' : 'dh' # ﺬ
326
-
327
297
  '\u0631' : 'r' # ر
328
- '\ufeae' : 'r' # ﺮ
329
-
330
298
  '\u0632' : 'z' # ز
331
- '\ufeb0' : 'z' # ﺰ
332
-
333
299
  '\u0633' : 's' # س
334
- '\ufeb3' : 's' # ﺳ
335
- '\ufeb4' : 's' # ﺴ
336
- '\ufeb2' : 's' # ﺲ
337
-
338
300
  '\u0634' : 'sh' # ش
339
- '\ufeb7' : 'sh' #
340
- '\ufeb8' : 'sh' #
341
- '\ufeb6' : 'sh' #
342
-
343
- '\u0635' : 's' # ص
344
- '\ufebb' : 's' # ﺻ
345
- '\ufebc' : 's' # ﺼ
346
- '\ufeba' : 's' # ﺺ
347
-
348
- '\u0636' : 'd' # ض
349
- '\ufebf' : 'd' # ﺿ
350
- '\ufec0' : 'd' # ﻀ
351
- '\ufebe' : 'd' # ﺾ
352
-
353
- '\u0637' : 't' # ط
354
- '\ufec3' : 't' # ﻃ
355
- '\ufec4' : 't' # ﻄ
356
- '\ufec2' : 't' # ﻂ
357
-
358
- '\u0638' : 'z' # ظ
359
- '\ufec7' : 'z' # ﻇ
360
- '\ufec8' : 'z' # ﻈ
361
- '\ufec6' : 'z' # ﻆ
362
-
301
+ '\u0635' : '' # ص
302
+ '\u0636' : '' # ض
303
+ '\u0637' : '' # ط
304
+ '\u0638' : 'ẓ' # ظ
363
305
  '\u0639' : '’' # ع
364
- '\ufecb' : '’' # ﻋ
365
- '\ufecc' : '’' # ﻌ
366
- '\ufeca' : '’' # ﻊ
367
-
368
306
  '\u063a' : 'gh' # غ
369
- '\ufecf' : 'gh' # ﻏ
370
- '\ufed0' : 'gh' # ﻐ
371
- '\ufece' : 'gh' # ﻎ
372
-
373
307
  '\u0641' : 'f' # ف
374
- '\ufed3' : 'f' # ﻓ
375
- '\ufed4' : 'f' # ﻔ
376
- '\ufed2' : 'f' # ﻒ
377
-
378
308
  '\u0642' : 'q' # ق
379
- '\ufed7' : 'q' # ﻗ
380
- '\ufed8' : 'q' # ﻘ
381
- '\ufed6' : 'q' # ﻖ
382
-
383
309
  '\u0643' : 'k' # ك
384
- '\ufedb' : 'k' # ﻛ
385
- '\ufedc' : 'k' # ﻜ
386
- '\ufeda' : 'k' # ﻚ
387
-
388
310
  '\u0644' : 'l' # ل
389
- '\ufedf' : 'l' # ﻟ
390
- '\ufee0' : 'l' # ﻠ
391
- '\ufede' : 'l' # ﻞ
392
-
393
311
  '\u0645' : 'm' # م
394
- '\ufee3' : 'm' # ﻣ
395
- '\ufee4' : 'm' # ﻤ
396
- '\ufee2' : 'm' # ﻢ
397
-
398
312
  '\u0646' : 'n' # ن
399
- '\ufee7' : 'n' # ﻧ
400
- '\ufee8' : 'n' # ﻨ
401
- '\ufee6' : 'n' # ﻦ
402
-
403
- # See note C
404
313
  '\u0647' : 'h' # ه
405
- '\ufeeb' : 'h' # ﻫ
406
- '\ufeec' : 'h' # ﻬ
407
- '\ufeea' : 'h' # ﻪ
408
-
409
314
  '\u0648' : 'w' # و
410
- '\ufeee' : 'w' #
411
-
412
- '\u064a' : 'y' # ي
413
- '\ufef3' : 'y' # ﻳ
414
- '\ufef4' : 'y' # ﻴ
415
- '\ufef1' : 'y' # ﻱ
416
-
417
- # (A) Not romanized word-initially.
418
-
419
- # (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
420
-
421
- # (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
422
-
423
-
424
- # Vowels, diphthongs and diacritical marks
425
- # (ـ stands for any consonant)
315
+ '\u064a' : 'y' # ي