interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,90 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2005
4
+ language: iso-639-2:bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Intelligence Community (IC) Standard for the Transliteration of Bulgarian Personal Names
8
+ creation_date: 2005
9
+ description:
10
+
11
+ tests:
12
+ - source: Добри Христов
13
+ expected: Dobri Khristov
14
+ - source: болгарица
15
+ expected: bolgaritsa
16
+ - source: български език
17
+ expected: bulgarski ezik
18
+ - source: българска азбука
19
+ expected: bulgarska azbuka
20
+ - source: град
21
+ expected: grad
22
+ - source: аз държа
23
+ expected: az durzha
24
+ - source: Ядеш хляба с чубрица
25
+ expected: Yadesh khlyaba s chubritsa
26
+
27
+ map:
28
+ characters:
29
+ '\u0410': 'A'
30
+ '\u0411': 'B'
31
+ '\u0412': 'V'
32
+ '\u0413': 'G'
33
+ '\u0414': 'D'
34
+ '\u0415': 'E'
35
+ '\u0416': 'Zh'
36
+ '\u0417': 'Z'
37
+ '\u0418': 'I'
38
+ '\u0419': 'Y'
39
+ '\u041a': 'K'
40
+ '\u041b': 'L'
41
+ '\u041c': 'M'
42
+ '\u041d': 'N'
43
+ '\u041e': 'O'
44
+ '\u041f': 'P'
45
+ '\u0420': 'R'
46
+ '\u0421': 'S'
47
+ '\u0422': 'T'
48
+ '\u0423': 'U'
49
+ '\u0424': 'F'
50
+ '\u0425': 'Kh'
51
+ '\u0426': 'Ts'
52
+ '\u0427': 'Ch'
53
+ '\u0428': 'Sh'
54
+ '\u0429': 'Sht'
55
+ '\u042a': 'U'
56
+ '\u042c': 'Y'
57
+ '\u042e': 'Yu'
58
+ '\u042f': 'Ya'
59
+
60
+ '\u0430': 'a'
61
+ '\u0431': 'b'
62
+ '\u0432': 'v'
63
+ '\u0433': 'g'
64
+ '\u0434': 'd'
65
+ '\u0435': 'e'
66
+ '\u0436': 'zh'
67
+ '\u0437': 'z'
68
+ '\u0438': 'i'
69
+ '\u0439': 'y'
70
+ '\u043a': 'k'
71
+ '\u043b': 'l'
72
+ '\u043c': 'm'
73
+ '\u043d': 'n'
74
+ '\u043e': 'o'
75
+ '\u043f': 'p'
76
+ '\u0440': 'r'
77
+ '\u0441': 's'
78
+ '\u0442': 't'
79
+ '\u0443': 'u'
80
+ '\u0444': 'f'
81
+ '\u0445': 'kh'
82
+ '\u0446': 'ts'
83
+ '\u0447': 'ch'
84
+ '\u0448': 'sh'
85
+ '\u0449': 'sht'
86
+ '\u044a': 'u'
87
+ '\u044c': 'y'
88
+ '\u044e': 'yu'
89
+ '\u044f': 'ya'
90
+
@@ -0,0 +1,276 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2004
4
+ language: iso-639-2:fas
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Intelligence Community (IC) Standard for the Transliteration of Farsi (Persian) Personal Names (2004)
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Farsi_(Persian)_%26_Dari_IC_Standards.doc
9
+ creation_date: 2004
10
+ confirmation_date: 2004-11
11
+ description: |
12
+
13
+ notes:
14
+ - Long/short vowels:- There is no distinction made in Roman
15
+ between long and short a:- E.g., Parvas (first a is short,
16
+ second is long).
17
+ - Double consonants:- Double consonants represented by the
18
+ tashdid are shown by doubling the Roman letter:-
19
+ Mo'azzami. Exceptions:- Ain and consonants represented by
20
+ Roman digraphs (e.g., sh, ch) are not doubled:- Mobasher [
21
+ not:- Mobashsher]. Double letters are only used for
22
+ tashdid (thus, Hosein [not Hossein]) or to reflect the ‘sun
23
+ letter’ assimilation (see beelow).
24
+ - Hamzeh:- The hamzeh is represented name-internally by an
25
+ apostrophe, as is the ain. Name-initially, however,
26
+ neither hamzeh nor ain are indicated in transliteration (
27
+ e.g., Abdorrahman, not 'Abdorrahman).
28
+ - Digraphs:- No distinction is drawn in Roman between
29
+ digraphs such as sh and single contiguous letters (e.g., s
30
+ followed by h).
31
+ - Arabic definite article "al" ('the'):- Common in many
32
+ names borrowed from Arabic, the transliteration should
33
+ follow the Arabic rules for “sun letter” assimilation in
34
+ spoken form and reflect the nominative case. That is:-
35
+ Abdorrahman, not Abd al-Rahman. Note also that the
36
+ “Abdollah” and “Abdol + attribute of Allah” names are
37
+ written as one unanalyzed word, as are other names that
38
+ contain the definite article:- Shamsoddin (not Shams al-
39
+ Din), Nezamoddin, etc.
40
+ - Diphthongs:- Diphthongs are written ei and ow, as in,
41
+ respectively:- Hosein; Khosrow.
42
+ - Yeh maqsura (final yeh pronounced as “a”):- should be
43
+ written as “a” as in “Musa”.
44
+
45
+ - Special Rules
46
+
47
+ - Hyphens:- A hyphen is used to indicate the ezafeh
48
+ construction:- Arshad-e Ameri
49
+ - Borrowed names that incorporate the name of God (Allah)
50
+ are transliterated as one word, with the letter "o":- E.g.,
51
+ Abdollah, Ayatollah, Azizollah.
52
+ - Foreign names borrowed or appearing in Farsi are spelled
53
+ according to the standard Western tradition (even if there
54
+ is an Arabic or Farsi version of the same name):- Joseph,
55
+ Michael.
56
+ - Common suffixes, such as nia, pur, fard, far, abad,
57
+ zadeh, khah, and nezhad as well as nesbeh (‘relationship’ (
58
+ to place of birth, etc.)) names derived with these
59
+ suffixes (e.g., nezhadi, abadi) are written as part of the
60
+ name:-
61
+
62
+ asa Mehrasa
63
+ baksh Tajbaksh
64
+ dust Rafighdust
65
+ far Parvizfar
66
+ fard Akhavanfard
67
+ gar Fuladgar
68
+ gol Zarringol
69
+ kar Parhizkar
70
+ khah Vatankhah
71
+ khu Nikkhu
72
+ mand Purmand
73
+ mehr Zadmehr
74
+ nezhad Niknezhad
75
+ nia Montajebnia
76
+ parast Khodaparast
77
+ parvar Golparvar
78
+ pur Mohteshemipur
79
+ tabar Shayestehtbar
80
+ yar Mohammadyar
81
+ zadeh Vakilzadeh
82
+
83
+ abadi Salehabadi
84
+ khani Alikhani
85
+ nezhadi Niknezhadi
86
+
87
+ - Note also that yar can function as a prefix and, as such,
88
+ should be affixed directly to the name:-
89
+
90
+ yar Yarmohammadi, Yarshater
91
+
92
+ - This is in contrast with hyphenated names such as Raja’i-
93
+ Khorasani, Tabataba’i-Shirazi, Soleimani-Maimandi, etc.
94
+
95
+ tests:
96
+ - source: مُوسَى
97
+ expected: musa
98
+
99
+ - source: مُؤمِن
100
+ expected: mo’men
101
+
102
+ - source: رِضايي
103
+ expected: reza’i
104
+
105
+ - source: مُبَشِّر
106
+ expected: mobasher
107
+
108
+ - source: حَسَّان
109
+ expected: hassan
110
+
111
+ - source: حَسَن
112
+ expected: hasan
113
+
114
+ - source: صَفَّار
115
+ expected: saffar
116
+
117
+ - source: صَفَر
118
+ expected: safar
119
+
120
+ map:
121
+ characters:
122
+ # special rules
123
+
124
+ '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
125
+ '\ufdf2': 'Allah' # See note 5
126
+ '\s\u0627\u0644\u0644\u0651\u064e\u0647': 'ollah' # NOTE 9
127
+
128
+ '\u0652' : '' # ْ sokoon
129
+ '\u0659': 'ê'
130
+
131
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
132
+ '\u0649\u0670': 'á' # ىٰ
133
+ '\u0674': '-e' # ٴ
134
+ '\u0654': '-e' # ٔ
135
+ # - '-ye'
136
+
137
+
138
+ # ta' marboota
139
+ '\u0629' : 'eh'
140
+
141
+
142
+
143
+ '\u0626' : '’' # ئ
144
+ '\u0624' : '’' # ؤ
145
+ '\u0623' : '' # أ
146
+ '\u0625': '' # إ
147
+
148
+ # See note B
149
+ '\b\u0627\u0644' : 'al ' # ال
150
+ '\b\u0622\\u0644' : 'Al ' # ‫آل‬
151
+ # '\uFE8E' : '' # ﺎ
152
+
153
+ # Sun letters
154
+ '\b\u0627\u0644\u062a' : 'at t' # الت
155
+ '\b\u0627\u0644\u062b' : 'as s' # الث
156
+ '\b\u0627\u0644\u062f' : 'ad d' # الد
157
+ '\b\u0627\u0644\u0630' : 'az z' # الذ
158
+ '\b\u0627\u0644\u0631' : 'ar r' # الر
159
+ '\b\u0627\u0644\u0632' : 'az z' # الز
160
+ '\b\u0627\u0644\u0633' : 'as s' # الس
161
+ '\b\u0627\u0644\u0634' : 'ash sh' # الش
162
+ '\b\u0627\u0644\u0635' : 'as s' # الص
163
+ '\b\u0627\u0644\u0636' : 'az z' # الض
164
+ '\b\u0627\u0644\u0637' : 'at t' # الط
165
+ '\b\u0627\u0644\u0638' : 'az z' # الظ
166
+ '\b\u0627\u0644\u0644' : 'al l' # الل
167
+ '\b\u0627\u0644\u0646' : 'an n' # الن
168
+
169
+ # Farsi Vowel (Pointing)
170
+ '\u0622' : 'a' # آ alef maddeh
171
+ '\u064e' : 'a' # َ fatha
172
+ '(?<=\u064e)\u0627' : '' # ا
173
+ '(?<!\b)\u0627' : 'a' # ا
174
+ '\b\u0627\u064e' : 'a' # ا initial followed by fatha
175
+ '\b\u0627\u064f' : 'o' # ا initial followed by damma
176
+ '\b\u0627\u0650' : 'e' # ِ ا initial followed by kasra
177
+
178
+ '\u064f' : 'o' # damma
179
+ '\u064f\u0648' : 'u' # ـُو damma followed by و
180
+ # '\u064e\u0648' : 'ow' # ـَو
181
+ # '\u064e\u0648\u0652' : 'aw' # ـَوْ
182
+
183
+
184
+ '\u0650' : 'e' # kasra
185
+ '\u0650\u064a' : 'i' # ـِي kasra followed by ي
186
+ '\u0650\u06cc' : 'i' # ـِي kasra followed by ي
187
+ '\u0650\u064a\u0651\u064e' : 'iy' # ـِيَّ
188
+ '\u0650\u06cc\u0651\u064e' : 'iy' # ـِيَّ
189
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
190
+ # '\u064e\u064a' : 'aī' # ـَي
191
+ # '\u064e\u06cc' : 'aī' # ـَي
192
+ # '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
193
+
194
+ # additional symbols
195
+
196
+ # shadda
197
+
198
+ '\u0628\u0651' : 'bb' # ب
199
+ '\u062a\u0651' : 'tt' # ت
200
+ '\u062b\u0651' : 'ss' # ث
201
+ '\u062c\u0651' : 'jj' # ج
202
+ '\u062d\u0651' : 'hh' # ح
203
+ '\u062e\u0651' : 'kh' # خ
204
+ '\u062f\u0651' : 'dd' # د
205
+ '\u0630\u0651' : 'zz' # ذ
206
+ '\u0631\u0651' : 'rr' # ر
207
+ '\u0632\u0651' : 'zz' # ز
208
+ '\u0633\u0651' : 'ss' # س
209
+ '\u0634\u0651' : 'sh' # ش
210
+ '\u0635\u0651' : 'ss' # ص
211
+ '\u0636\u0651' : 'zz' # ض
212
+ '\u0637\u0651' : 'tt' # ط
213
+ '\u0638\u0651' : 'zz' # ظ
214
+ '\u063a\u0651' : 'gh' # غ
215
+ '\u0641\u0651' : 'ff' # ف
216
+ '\u0642\u0651' : 'gh' # ق
217
+ '\u0643\u0651' : 'kk' # ك
218
+ '\u0644\u0651' : 'll' # ل
219
+ '\u0645\u0651' : 'mm' # م
220
+ '\u0646\u0651' : 'nn' # ن
221
+ '\u0647\u0651' : 'hh' # ه
222
+ '\u0648\u0651' : 'vv' # و
223
+ '\u064a\u0651' : 'yy' # ي
224
+
225
+ '(?<=\b)\u0621': '' # ء
226
+ '\u0621': '’' # ء
227
+
228
+ # FROM NOTES
229
+
230
+ '\u064e\u0649' : 'a' # ـَى fatha followed by ى which is ا not ي
231
+ '\u0649' : 'a' # ى alef maqsura NOTE-1
232
+
233
+ '\u064a\u064a' : '’i' # NOTE 4 (2)
234
+ '\u06cc\u06cc' : '’i'
235
+
236
+ '\u0627\u064a\b' : '’i' # NOTE 4 (3)
237
+ '\u0627\u06cc\b' : '’i'
238
+
239
+ # Farsi consonant characters
240
+
241
+ '\u0628' : 'b' # ب
242
+ '\u067E' : 'p' # پ
243
+ '\u062a' : 't' # ت
244
+ '\u062B' : 's' # ث
245
+ '\u062c' : 'j' # ج
246
+ '\u0686' : 'ch' # ‫چ‬
247
+ '\u062d' : 'h' # ح
248
+ '\u062e' : 'kh' # خ
249
+ '\u062f' : 'd' # د
250
+ '\u0630' : 'z' # ذ
251
+ '\u0631' : 'r' # ر
252
+ '\u0632' : 'z' # ز
253
+ '\u0698' : 'zh' # ‫ژ‬
254
+ '\u0633' : 's' # س
255
+ '\u0634' : 'sh' # ش
256
+ '\u0635' : 's' # ص
257
+ '\u0636' : 'z' # ض
258
+ '\u0637' : 't' # ط
259
+ '\u0638' : 'z' # ظ
260
+ '\u0639' : '‘' # ع
261
+ '(?<=\b)\u0639' : '' # ع not represented initially
262
+ '\u063a' : 'gh' # غ
263
+ '\u0641' : 'f' # ف
264
+ '\u0642' : 'gh' # ق
265
+ '\u0643' : 'k' # ك
266
+ '\u06A9' : 'k' # ک
267
+ '\u06AF' : 'g' # ‫گ‬
268
+ '\u0644' : 'l' # ل
269
+ '\u0645' : 'm' # م
270
+ '\u0646' : 'n' # ن
271
+ '\u0647' : 'h' # ه
272
+ '\u0648' : 'v' # و
273
+ '\u064a' : 'y' # ي
274
+ '\u0649' : 'y' # ي
275
+ '\u06D0' : 'ē' # ې
276
+ '\u06CD' : 'êy' # ‫ۍ‬
@@ -0,0 +1,182 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2004
4
+ language: iso-639-2:hin
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: IC STANDARDS FOR TRANSLITERATION OF HINDI AND URDU PERSONAL NAMES
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Hindi_and_Urdu_IC_Standard.doc
9
+ creation_date: 2004
10
+ description: |
11
+ IC STANDARDS FOR TRANSLITERATION OF HINDI AND URDU PERSONAL NAMES
12
+
13
+ notes:
14
+ - |
15
+ Long/Short Vowels: Long and short vowels are not distinguished in the system:
16
+ The borrowed Arabic name Samir could represent two distinct names, one with a
17
+ long /a/ (Saamir) and one with a long /i/ (Samiir). One solution would be to use
18
+ /ee/ to stand for the long /i/, as is often done (Sameer). The IC Standard will not
19
+ distinguish between these.
20
+ - |
21
+ No distinction is made between: retroflex and non-retroflex consonants; and
22
+ nasalized vowels and vowels followed by /n/.
23
+ - |
24
+ A distinction is drawn between Urdu letters qaf and kaf (and correspondingly,
25
+ Hindi qa and ka).
26
+ - |
27
+ A distinction is drawn between aspirated (e.g., /d/) and nonaspirated consonants
28
+ (e.g., /dh/), with the exception of ch/chh, both represented by /ch/.
29
+ - |
30
+ Digraphs: No distinction is made between digraphs such as /sh/ and single
31
+ contiguous letters such as /s/ followed by /h/.
32
+ - |
33
+ Hyphens: Hyphens (-) are NOT used to connect name elements within a name:
34
+ Abdur Rahman. The single exception to this is the izafat (i.e., linking vowel in
35
+ noun-link-modifier construction of Persian origin), which does show a hypen
36
+ before the /e/ and a following space: Koh-e Nur (‘mountain of light’), “Jaish-e
37
+ xx” (‘Army of xx’ construction).
38
+ - |
39
+ Names incorporating “din” are written as one unit: Azermuddin, Badruddin,
40
+ Faizuddin, Salahuddin.
41
+ - |
42
+ Names that incorporate Allah as part of the name show the Arabic grammatical
43
+ marker /u/ rather than the /a/ of Allah: Abdullah (not Abdallah).
44
+ - |
45
+ Inherent short vowel /a/ in Devanagari is represented with an /a/ in Roman. Final
46
+ consonants are assumed not to have a short /a/ (e.g., masc. name Ram Lal, not
47
+ Rama Lala).
48
+ - |
49
+ As a general rule, Devanagari va is transcribed as a /v/: Vijay, Vishal, etc.
50
+ Exception: /sw/ combination: Saraswati, Krishnaswami. Urdu wau, however, is
51
+ transcribed as /w/: Wasim, Walid.
52
+
53
+ tests:
54
+ - source: "दिल्ली"
55
+ expected: "dilli"
56
+ - source: "भारत"
57
+ expected: "bhart"
58
+ - source: "विजय"
59
+ expected: "vijy"
60
+ - source: "विशाल"
61
+ expected: "vishal"
62
+ - source: "अब्दुल्ला"
63
+ expected: "abdulla"
64
+ - source: "संख्या"
65
+ expected: "snkhya"
66
+ - source: "संख्या"
67
+ expected: "snkhya"
68
+ - source: "समीर"
69
+ expected: "smir"
70
+ - source: "सरस्वती"
71
+ expected: "srsvti"
72
+ - source: "कृष्णास्वामी"
73
+ expected: "krishnasvami"
74
+
75
+ map:
76
+
77
+ characters:
78
+
79
+ #Independent vowel characters
80
+ 'अ': 'a'
81
+ 'आ': 'a'
82
+ 'इ': 'i'
83
+ 'ई': 'i'
84
+ 'उ': 'u'
85
+ 'ऊ': 'u'
86
+ 'ऋ': 'ri'
87
+ 'ऌ': 'l̤'
88
+ 'ए': 'e'
89
+ 'ऐ': 'ai'
90
+ 'ओ': 'o'
91
+ 'ऑ': 'au'
92
+ 'औ': 'au'
93
+
94
+ #Dependent Vowels
95
+ 'ा': "a"
96
+ 'ि': "i"
97
+ 'ी': "i"
98
+ 'ु': "u"
99
+ 'ू': "u"
100
+ 'ृ': "ri"
101
+ 'े': "e"
102
+ 'ै': "ai"
103
+ 'ॅ': "ai"
104
+ 'ो': "o"
105
+ 'ौ': "au"
106
+ 'ॉ': "au"
107
+
108
+
109
+ # Consonants
110
+
111
+ # Gutturals
112
+ 'क': 'k'
113
+ 'क्ष': 'ksha'
114
+ 'क़': 'q'
115
+ 'ख': 'kh'
116
+ 'ख़': 'kh'
117
+ 'ग': 'g'
118
+ 'ग़': 'gh'
119
+ 'घ': 'gh'
120
+ 'ङ': 'n'
121
+
122
+ # Palatals
123
+ 'च': 'ch'
124
+ 'छ': 'ch'
125
+ 'ज': 'j'
126
+ 'ज़': 'z'
127
+ 'झ': 'gya'
128
+ 'झ': 'jh'
129
+ 'ञ': 'n'
130
+
131
+ # Cerebrals
132
+ 'ट': 't'
133
+ 'ठ': 'th'
134
+ 'ड': 'd'
135
+ 'ड़': 'r'
136
+ 'ढ़': 'rh'
137
+ 'ढ': 'dh'
138
+ 'ण': 'n'
139
+
140
+ # Dentals
141
+ 'त': 't'
142
+ 'थ': 'th'
143
+ 'द': 'd'
144
+ 'ध': 'dh'
145
+ 'न': 'n'
146
+
147
+ # Labials
148
+ 'प': 'p'
149
+ 'फ़': 'f'
150
+ 'फ': 'ph'
151
+ 'ब': 'b'
152
+ 'भ': 'bh'
153
+ 'म': 'm'
154
+
155
+ # Semivowels
156
+ 'य': 'y'
157
+ 'र': 'r'
158
+ 'ल': 'l'
159
+ 'व': 'v'
160
+
161
+ # Sibilants
162
+ 'श': 'sh'
163
+ 'ष': 'sh'
164
+ 'स': 's'
165
+
166
+
167
+ # Aspirate
168
+ 'ह': 'h'
169
+
170
+ # Anusvāra
171
+ 'ं': 'n'
172
+
173
+ # Anunāsika
174
+ 'ँ': 'n'
175
+
176
+ # halanta
177
+ '्': ''
178
+
179
+ # bisharga
180
+ 'ः': 'h'
181
+
182
+ '़': ''