interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,90 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2005
4
+ language: iso-639-2:bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Intelligence Community (IC) Standard for the Transliteration of Bulgarian Personal Names
8
+ creation_date: 2005
9
+ description:
10
+
11
+ tests:
12
+ - source: Добри Христов
13
+ expected: Dobri Khristov
14
+ - source: болгарица
15
+ expected: bolgaritsa
16
+ - source: български език
17
+ expected: bulgarski ezik
18
+ - source: българска азбука
19
+ expected: bulgarska azbuka
20
+ - source: град
21
+ expected: grad
22
+ - source: аз държа
23
+ expected: az durzha
24
+ - source: Ядеш хляба с чубрица
25
+ expected: Yadesh khlyaba s chubritsa
26
+
27
+ map:
28
+ characters:
29
+ '\u0410': 'A'
30
+ '\u0411': 'B'
31
+ '\u0412': 'V'
32
+ '\u0413': 'G'
33
+ '\u0414': 'D'
34
+ '\u0415': 'E'
35
+ '\u0416': 'Zh'
36
+ '\u0417': 'Z'
37
+ '\u0418': 'I'
38
+ '\u0419': 'Y'
39
+ '\u041a': 'K'
40
+ '\u041b': 'L'
41
+ '\u041c': 'M'
42
+ '\u041d': 'N'
43
+ '\u041e': 'O'
44
+ '\u041f': 'P'
45
+ '\u0420': 'R'
46
+ '\u0421': 'S'
47
+ '\u0422': 'T'
48
+ '\u0423': 'U'
49
+ '\u0424': 'F'
50
+ '\u0425': 'Kh'
51
+ '\u0426': 'Ts'
52
+ '\u0427': 'Ch'
53
+ '\u0428': 'Sh'
54
+ '\u0429': 'Sht'
55
+ '\u042a': 'U'
56
+ '\u042c': 'Y'
57
+ '\u042e': 'Yu'
58
+ '\u042f': 'Ya'
59
+
60
+ '\u0430': 'a'
61
+ '\u0431': 'b'
62
+ '\u0432': 'v'
63
+ '\u0433': 'g'
64
+ '\u0434': 'd'
65
+ '\u0435': 'e'
66
+ '\u0436': 'zh'
67
+ '\u0437': 'z'
68
+ '\u0438': 'i'
69
+ '\u0439': 'y'
70
+ '\u043a': 'k'
71
+ '\u043b': 'l'
72
+ '\u043c': 'm'
73
+ '\u043d': 'n'
74
+ '\u043e': 'o'
75
+ '\u043f': 'p'
76
+ '\u0440': 'r'
77
+ '\u0441': 's'
78
+ '\u0442': 't'
79
+ '\u0443': 'u'
80
+ '\u0444': 'f'
81
+ '\u0445': 'kh'
82
+ '\u0446': 'ts'
83
+ '\u0447': 'ch'
84
+ '\u0448': 'sh'
85
+ '\u0449': 'sht'
86
+ '\u044a': 'u'
87
+ '\u044c': 'y'
88
+ '\u044e': 'yu'
89
+ '\u044f': 'ya'
90
+
@@ -0,0 +1,276 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2004
4
+ language: iso-639-2:fas
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Intelligence Community (IC) Standard for the Transliteration of Farsi (Persian) Personal Names (2004)
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Farsi_(Persian)_%26_Dari_IC_Standards.doc
9
+ creation_date: 2004
10
+ confirmation_date: 2004-11
11
+ description: |
12
+
13
+ notes:
14
+ - Long/short vowels:- There is no distinction made in Roman
15
+ between long and short a:- E.g., Parvas (first a is short,
16
+ second is long).
17
+ - Double consonants:- Double consonants represented by the
18
+ tashdid are shown by doubling the Roman letter:-
19
+ Mo'azzami. Exceptions:- Ain and consonants represented by
20
+ Roman digraphs (e.g., sh, ch) are not doubled:- Mobasher [
21
+ not:- Mobashsher]. Double letters are only used for
22
+ tashdid (thus, Hosein [not Hossein]) or to reflect the ‘sun
23
+ letter’ assimilation (see beelow).
24
+ - Hamzeh:- The hamzeh is represented name-internally by an
25
+ apostrophe, as is the ain. Name-initially, however,
26
+ neither hamzeh nor ain are indicated in transliteration (
27
+ e.g., Abdorrahman, not 'Abdorrahman).
28
+ - Digraphs:- No distinction is drawn in Roman between
29
+ digraphs such as sh and single contiguous letters (e.g., s
30
+ followed by h).
31
+ - Arabic definite article "al" ('the'):- Common in many
32
+ names borrowed from Arabic, the transliteration should
33
+ follow the Arabic rules for “sun letter” assimilation in
34
+ spoken form and reflect the nominative case. That is:-
35
+ Abdorrahman, not Abd al-Rahman. Note also that the
36
+ “Abdollah” and “Abdol + attribute of Allah” names are
37
+ written as one unanalyzed word, as are other names that
38
+ contain the definite article:- Shamsoddin (not Shams al-
39
+ Din), Nezamoddin, etc.
40
+ - Diphthongs:- Diphthongs are written ei and ow, as in,
41
+ respectively:- Hosein; Khosrow.
42
+ - Yeh maqsura (final yeh pronounced as “a”):- should be
43
+ written as “a” as in “Musa”.
44
+
45
+ - Special Rules
46
+
47
+ - Hyphens:- A hyphen is used to indicate the ezafeh
48
+ construction:- Arshad-e Ameri
49
+ - Borrowed names that incorporate the name of God (Allah)
50
+ are transliterated as one word, with the letter "o":- E.g.,
51
+ Abdollah, Ayatollah, Azizollah.
52
+ - Foreign names borrowed or appearing in Farsi are spelled
53
+ according to the standard Western tradition (even if there
54
+ is an Arabic or Farsi version of the same name):- Joseph,
55
+ Michael.
56
+ - Common suffixes, such as nia, pur, fard, far, abad,
57
+ zadeh, khah, and nezhad as well as nesbeh (‘relationship’ (
58
+ to place of birth, etc.)) names derived with these
59
+ suffixes (e.g., nezhadi, abadi) are written as part of the
60
+ name:-
61
+
62
+ asa Mehrasa
63
+ baksh Tajbaksh
64
+ dust Rafighdust
65
+ far Parvizfar
66
+ fard Akhavanfard
67
+ gar Fuladgar
68
+ gol Zarringol
69
+ kar Parhizkar
70
+ khah Vatankhah
71
+ khu Nikkhu
72
+ mand Purmand
73
+ mehr Zadmehr
74
+ nezhad Niknezhad
75
+ nia Montajebnia
76
+ parast Khodaparast
77
+ parvar Golparvar
78
+ pur Mohteshemipur
79
+ tabar Shayestehtbar
80
+ yar Mohammadyar
81
+ zadeh Vakilzadeh
82
+
83
+ abadi Salehabadi
84
+ khani Alikhani
85
+ nezhadi Niknezhadi
86
+
87
+ - Note also that yar can function as a prefix and, as such,
88
+ should be affixed directly to the name:-
89
+
90
+ yar Yarmohammadi, Yarshater
91
+
92
+ - This is in contrast with hyphenated names such as Raja’i-
93
+ Khorasani, Tabataba’i-Shirazi, Soleimani-Maimandi, etc.
94
+
95
+ tests:
96
+ - source: مُوسَى
97
+ expected: musa
98
+
99
+ - source: مُؤمِن
100
+ expected: mo’men
101
+
102
+ - source: رِضايي
103
+ expected: reza’i
104
+
105
+ - source: مُبَشِّر
106
+ expected: mobasher
107
+
108
+ - source: حَسَّان
109
+ expected: hassan
110
+
111
+ - source: حَسَن
112
+ expected: hasan
113
+
114
+ - source: صَفَّار
115
+ expected: saffar
116
+
117
+ - source: صَفَر
118
+ expected: safar
119
+
120
+ map:
121
+ characters:
122
+ # special rules
123
+
124
+ '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
125
+ '\ufdf2': 'Allah' # See note 5
126
+ '\s\u0627\u0644\u0644\u0651\u064e\u0647': 'ollah' # NOTE 9
127
+
128
+ '\u0652' : '' # ْ sokoon
129
+ '\u0659': 'ê'
130
+
131
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
132
+ '\u0649\u0670': 'á' # ىٰ
133
+ '\u0674': '-e' # ٴ
134
+ '\u0654': '-e' # ٔ
135
+ # - '-ye'
136
+
137
+
138
+ # ta' marboota
139
+ '\u0629' : 'eh'
140
+
141
+
142
+
143
+ '\u0626' : '’' # ئ
144
+ '\u0624' : '’' # ؤ
145
+ '\u0623' : '' # أ
146
+ '\u0625': '' # إ
147
+
148
+ # See note B
149
+ '\b\u0627\u0644' : 'al ' # ال
150
+ '\b\u0622\\u0644' : 'Al ' # ‫آل‬
151
+ # '\uFE8E' : '' # ﺎ
152
+
153
+ # Sun letters
154
+ '\b\u0627\u0644\u062a' : 'at t' # الت
155
+ '\b\u0627\u0644\u062b' : 'as s' # الث
156
+ '\b\u0627\u0644\u062f' : 'ad d' # الد
157
+ '\b\u0627\u0644\u0630' : 'az z' # الذ
158
+ '\b\u0627\u0644\u0631' : 'ar r' # الر
159
+ '\b\u0627\u0644\u0632' : 'az z' # الز
160
+ '\b\u0627\u0644\u0633' : 'as s' # الس
161
+ '\b\u0627\u0644\u0634' : 'ash sh' # الش
162
+ '\b\u0627\u0644\u0635' : 'as s' # الص
163
+ '\b\u0627\u0644\u0636' : 'az z' # الض
164
+ '\b\u0627\u0644\u0637' : 'at t' # الط
165
+ '\b\u0627\u0644\u0638' : 'az z' # الظ
166
+ '\b\u0627\u0644\u0644' : 'al l' # الل
167
+ '\b\u0627\u0644\u0646' : 'an n' # الن
168
+
169
+ # Farsi Vowel (Pointing)
170
+ '\u0622' : 'a' # آ alef maddeh
171
+ '\u064e' : 'a' # َ fatha
172
+ '(?<=\u064e)\u0627' : '' # ا
173
+ '(?<!\b)\u0627' : 'a' # ا
174
+ '\b\u0627\u064e' : 'a' # ا initial followed by fatha
175
+ '\b\u0627\u064f' : 'o' # ا initial followed by damma
176
+ '\b\u0627\u0650' : 'e' # ِ ا initial followed by kasra
177
+
178
+ '\u064f' : 'o' # damma
179
+ '\u064f\u0648' : 'u' # ـُو damma followed by و
180
+ # '\u064e\u0648' : 'ow' # ـَو
181
+ # '\u064e\u0648\u0652' : 'aw' # ـَوْ
182
+
183
+
184
+ '\u0650' : 'e' # kasra
185
+ '\u0650\u064a' : 'i' # ـِي kasra followed by ي
186
+ '\u0650\u06cc' : 'i' # ـِي kasra followed by ي
187
+ '\u0650\u064a\u0651\u064e' : 'iy' # ـِيَّ
188
+ '\u0650\u06cc\u0651\u064e' : 'iy' # ـِيَّ
189
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
190
+ # '\u064e\u064a' : 'aī' # ـَي
191
+ # '\u064e\u06cc' : 'aī' # ـَي
192
+ # '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
193
+
194
+ # additional symbols
195
+
196
+ # shadda
197
+
198
+ '\u0628\u0651' : 'bb' # ب
199
+ '\u062a\u0651' : 'tt' # ت
200
+ '\u062b\u0651' : 'ss' # ث
201
+ '\u062c\u0651' : 'jj' # ج
202
+ '\u062d\u0651' : 'hh' # ح
203
+ '\u062e\u0651' : 'kh' # خ
204
+ '\u062f\u0651' : 'dd' # د
205
+ '\u0630\u0651' : 'zz' # ذ
206
+ '\u0631\u0651' : 'rr' # ر
207
+ '\u0632\u0651' : 'zz' # ز
208
+ '\u0633\u0651' : 'ss' # س
209
+ '\u0634\u0651' : 'sh' # ش
210
+ '\u0635\u0651' : 'ss' # ص
211
+ '\u0636\u0651' : 'zz' # ض
212
+ '\u0637\u0651' : 'tt' # ط
213
+ '\u0638\u0651' : 'zz' # ظ
214
+ '\u063a\u0651' : 'gh' # غ
215
+ '\u0641\u0651' : 'ff' # ف
216
+ '\u0642\u0651' : 'gh' # ق
217
+ '\u0643\u0651' : 'kk' # ك
218
+ '\u0644\u0651' : 'll' # ل
219
+ '\u0645\u0651' : 'mm' # م
220
+ '\u0646\u0651' : 'nn' # ن
221
+ '\u0647\u0651' : 'hh' # ه
222
+ '\u0648\u0651' : 'vv' # و
223
+ '\u064a\u0651' : 'yy' # ي
224
+
225
+ '(?<=\b)\u0621': '' # ء
226
+ '\u0621': '’' # ء
227
+
228
+ # FROM NOTES
229
+
230
+ '\u064e\u0649' : 'a' # ـَى fatha followed by ى which is ا not ي
231
+ '\u0649' : 'a' # ى alef maqsura NOTE-1
232
+
233
+ '\u064a\u064a' : '’i' # NOTE 4 (2)
234
+ '\u06cc\u06cc' : '’i'
235
+
236
+ '\u0627\u064a\b' : '’i' # NOTE 4 (3)
237
+ '\u0627\u06cc\b' : '’i'
238
+
239
+ # Farsi consonant characters
240
+
241
+ '\u0628' : 'b' # ب
242
+ '\u067E' : 'p' # پ
243
+ '\u062a' : 't' # ت
244
+ '\u062B' : 's' # ث
245
+ '\u062c' : 'j' # ج
246
+ '\u0686' : 'ch' # ‫چ‬
247
+ '\u062d' : 'h' # ح
248
+ '\u062e' : 'kh' # خ
249
+ '\u062f' : 'd' # د
250
+ '\u0630' : 'z' # ذ
251
+ '\u0631' : 'r' # ر
252
+ '\u0632' : 'z' # ز
253
+ '\u0698' : 'zh' # ‫ژ‬
254
+ '\u0633' : 's' # س
255
+ '\u0634' : 'sh' # ش
256
+ '\u0635' : 's' # ص
257
+ '\u0636' : 'z' # ض
258
+ '\u0637' : 't' # ط
259
+ '\u0638' : 'z' # ظ
260
+ '\u0639' : '‘' # ع
261
+ '(?<=\b)\u0639' : '' # ع not represented initially
262
+ '\u063a' : 'gh' # غ
263
+ '\u0641' : 'f' # ف
264
+ '\u0642' : 'gh' # ق
265
+ '\u0643' : 'k' # ك
266
+ '\u06A9' : 'k' # ک
267
+ '\u06AF' : 'g' # ‫گ‬
268
+ '\u0644' : 'l' # ل
269
+ '\u0645' : 'm' # م
270
+ '\u0646' : 'n' # ن
271
+ '\u0647' : 'h' # ه
272
+ '\u0648' : 'v' # و
273
+ '\u064a' : 'y' # ي
274
+ '\u0649' : 'y' # ي
275
+ '\u06D0' : 'ē' # ې
276
+ '\u06CD' : 'êy' # ‫ۍ‬
@@ -0,0 +1,182 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2004
4
+ language: iso-639-2:hin
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: IC STANDARDS FOR TRANSLITERATION OF HINDI AND URDU PERSONAL NAMES
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Hindi_and_Urdu_IC_Standard.doc
9
+ creation_date: 2004
10
+ description: |
11
+ IC STANDARDS FOR TRANSLITERATION OF HINDI AND URDU PERSONAL NAMES
12
+
13
+ notes:
14
+ - |
15
+ Long/Short Vowels: Long and short vowels are not distinguished in the system:
16
+ The borrowed Arabic name Samir could represent two distinct names, one with a
17
+ long /a/ (Saamir) and one with a long /i/ (Samiir). One solution would be to use
18
+ /ee/ to stand for the long /i/, as is often done (Sameer). The IC Standard will not
19
+ distinguish between these.
20
+ - |
21
+ No distinction is made between: retroflex and non-retroflex consonants; and
22
+ nasalized vowels and vowels followed by /n/.
23
+ - |
24
+ A distinction is drawn between Urdu letters qaf and kaf (and correspondingly,
25
+ Hindi qa and ka).
26
+ - |
27
+ A distinction is drawn between aspirated (e.g., /d/) and nonaspirated consonants
28
+ (e.g., /dh/), with the exception of ch/chh, both represented by /ch/.
29
+ - |
30
+ Digraphs: No distinction is made between digraphs such as /sh/ and single
31
+ contiguous letters such as /s/ followed by /h/.
32
+ - |
33
+ Hyphens: Hyphens (-) are NOT used to connect name elements within a name:
34
+ Abdur Rahman. The single exception to this is the izafat (i.e., linking vowel in
35
+ noun-link-modifier construction of Persian origin), which does show a hypen
36
+ before the /e/ and a following space: Koh-e Nur (‘mountain of light’), “Jaish-e
37
+ xx” (‘Army of xx’ construction).
38
+ - |
39
+ Names incorporating “din” are written as one unit: Azermuddin, Badruddin,
40
+ Faizuddin, Salahuddin.
41
+ - |
42
+ Names that incorporate Allah as part of the name show the Arabic grammatical
43
+ marker /u/ rather than the /a/ of Allah: Abdullah (not Abdallah).
44
+ - |
45
+ Inherent short vowel /a/ in Devanagari is represented with an /a/ in Roman. Final
46
+ consonants are assumed not to have a short /a/ (e.g., masc. name Ram Lal, not
47
+ Rama Lala).
48
+ - |
49
+ As a general rule, Devanagari va is transcribed as a /v/: Vijay, Vishal, etc.
50
+ Exception: /sw/ combination: Saraswati, Krishnaswami. Urdu wau, however, is
51
+ transcribed as /w/: Wasim, Walid.
52
+
53
+ tests:
54
+ - source: "दिल्ली"
55
+ expected: "dilli"
56
+ - source: "भारत"
57
+ expected: "bhart"
58
+ - source: "विजय"
59
+ expected: "vijy"
60
+ - source: "विशाल"
61
+ expected: "vishal"
62
+ - source: "अब्दुल्ला"
63
+ expected: "abdulla"
64
+ - source: "संख्या"
65
+ expected: "snkhya"
66
+ - source: "संख्या"
67
+ expected: "snkhya"
68
+ - source: "समीर"
69
+ expected: "smir"
70
+ - source: "सरस्वती"
71
+ expected: "srsvti"
72
+ - source: "कृष्णास्वामी"
73
+ expected: "krishnasvami"
74
+
75
+ map:
76
+
77
+ characters:
78
+
79
+ #Independent vowel characters
80
+ 'अ': 'a'
81
+ 'आ': 'a'
82
+ 'इ': 'i'
83
+ 'ई': 'i'
84
+ 'उ': 'u'
85
+ 'ऊ': 'u'
86
+ 'ऋ': 'ri'
87
+ 'ऌ': 'l̤'
88
+ 'ए': 'e'
89
+ 'ऐ': 'ai'
90
+ 'ओ': 'o'
91
+ 'ऑ': 'au'
92
+ 'औ': 'au'
93
+
94
+ #Dependent Vowels
95
+ 'ा': "a"
96
+ 'ि': "i"
97
+ 'ी': "i"
98
+ 'ु': "u"
99
+ 'ू': "u"
100
+ 'ृ': "ri"
101
+ 'े': "e"
102
+ 'ै': "ai"
103
+ 'ॅ': "ai"
104
+ 'ो': "o"
105
+ 'ौ': "au"
106
+ 'ॉ': "au"
107
+
108
+
109
+ # Consonants
110
+
111
+ # Gutturals
112
+ 'क': 'k'
113
+ 'क्ष': 'ksha'
114
+ 'क़': 'q'
115
+ 'ख': 'kh'
116
+ 'ख़': 'kh'
117
+ 'ग': 'g'
118
+ 'ग़': 'gh'
119
+ 'घ': 'gh'
120
+ 'ङ': 'n'
121
+
122
+ # Palatals
123
+ 'च': 'ch'
124
+ 'छ': 'ch'
125
+ 'ज': 'j'
126
+ 'ज़': 'z'
127
+ 'झ': 'gya'
128
+ 'झ': 'jh'
129
+ 'ञ': 'n'
130
+
131
+ # Cerebrals
132
+ 'ट': 't'
133
+ 'ठ': 'th'
134
+ 'ड': 'd'
135
+ 'ड़': 'r'
136
+ 'ढ़': 'rh'
137
+ 'ढ': 'dh'
138
+ 'ण': 'n'
139
+
140
+ # Dentals
141
+ 'त': 't'
142
+ 'थ': 'th'
143
+ 'द': 'd'
144
+ 'ध': 'dh'
145
+ 'न': 'n'
146
+
147
+ # Labials
148
+ 'प': 'p'
149
+ 'फ़': 'f'
150
+ 'फ': 'ph'
151
+ 'ब': 'b'
152
+ 'भ': 'bh'
153
+ 'म': 'm'
154
+
155
+ # Semivowels
156
+ 'य': 'y'
157
+ 'र': 'r'
158
+ 'ल': 'l'
159
+ 'व': 'v'
160
+
161
+ # Sibilants
162
+ 'श': 'sh'
163
+ 'ष': 'sh'
164
+ 'स': 's'
165
+
166
+
167
+ # Aspirate
168
+ 'ह': 'h'
169
+
170
+ # Anusvāra
171
+ 'ं': 'n'
172
+
173
+ # Anunāsika
174
+ 'ँ': 'n'
175
+
176
+ # halanta
177
+ '्': ''
178
+
179
+ # bisharga
180
+ 'ः': 'h'
181
+
182
+ '़': ''