interscript 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: odni
|
|
3
|
+
id: 2005
|
|
4
|
+
language: iso-639-2:bul
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Intelligence Community (IC) Standard for the Transliteration of Bulgarian Personal Names
|
|
8
|
+
creation_date: 2005
|
|
9
|
+
description:
|
|
10
|
+
|
|
11
|
+
tests:
|
|
12
|
+
- source: Добри Христов
|
|
13
|
+
expected: Dobri Khristov
|
|
14
|
+
- source: болгарица
|
|
15
|
+
expected: bolgaritsa
|
|
16
|
+
- source: български език
|
|
17
|
+
expected: bulgarski ezik
|
|
18
|
+
- source: българска азбука
|
|
19
|
+
expected: bulgarska azbuka
|
|
20
|
+
- source: град
|
|
21
|
+
expected: grad
|
|
22
|
+
- source: аз държа
|
|
23
|
+
expected: az durzha
|
|
24
|
+
- source: Ядеш хляба с чубрица
|
|
25
|
+
expected: Yadesh khlyaba s chubritsa
|
|
26
|
+
|
|
27
|
+
map:
|
|
28
|
+
characters:
|
|
29
|
+
'\u0410': 'A'
|
|
30
|
+
'\u0411': 'B'
|
|
31
|
+
'\u0412': 'V'
|
|
32
|
+
'\u0413': 'G'
|
|
33
|
+
'\u0414': 'D'
|
|
34
|
+
'\u0415': 'E'
|
|
35
|
+
'\u0416': 'Zh'
|
|
36
|
+
'\u0417': 'Z'
|
|
37
|
+
'\u0418': 'I'
|
|
38
|
+
'\u0419': 'Y'
|
|
39
|
+
'\u041a': 'K'
|
|
40
|
+
'\u041b': 'L'
|
|
41
|
+
'\u041c': 'M'
|
|
42
|
+
'\u041d': 'N'
|
|
43
|
+
'\u041e': 'O'
|
|
44
|
+
'\u041f': 'P'
|
|
45
|
+
'\u0420': 'R'
|
|
46
|
+
'\u0421': 'S'
|
|
47
|
+
'\u0422': 'T'
|
|
48
|
+
'\u0423': 'U'
|
|
49
|
+
'\u0424': 'F'
|
|
50
|
+
'\u0425': 'Kh'
|
|
51
|
+
'\u0426': 'Ts'
|
|
52
|
+
'\u0427': 'Ch'
|
|
53
|
+
'\u0428': 'Sh'
|
|
54
|
+
'\u0429': 'Sht'
|
|
55
|
+
'\u042a': 'U'
|
|
56
|
+
'\u042c': 'Y'
|
|
57
|
+
'\u042e': 'Yu'
|
|
58
|
+
'\u042f': 'Ya'
|
|
59
|
+
|
|
60
|
+
'\u0430': 'a'
|
|
61
|
+
'\u0431': 'b'
|
|
62
|
+
'\u0432': 'v'
|
|
63
|
+
'\u0433': 'g'
|
|
64
|
+
'\u0434': 'd'
|
|
65
|
+
'\u0435': 'e'
|
|
66
|
+
'\u0436': 'zh'
|
|
67
|
+
'\u0437': 'z'
|
|
68
|
+
'\u0438': 'i'
|
|
69
|
+
'\u0439': 'y'
|
|
70
|
+
'\u043a': 'k'
|
|
71
|
+
'\u043b': 'l'
|
|
72
|
+
'\u043c': 'm'
|
|
73
|
+
'\u043d': 'n'
|
|
74
|
+
'\u043e': 'o'
|
|
75
|
+
'\u043f': 'p'
|
|
76
|
+
'\u0440': 'r'
|
|
77
|
+
'\u0441': 's'
|
|
78
|
+
'\u0442': 't'
|
|
79
|
+
'\u0443': 'u'
|
|
80
|
+
'\u0444': 'f'
|
|
81
|
+
'\u0445': 'kh'
|
|
82
|
+
'\u0446': 'ts'
|
|
83
|
+
'\u0447': 'ch'
|
|
84
|
+
'\u0448': 'sh'
|
|
85
|
+
'\u0449': 'sht'
|
|
86
|
+
'\u044a': 'u'
|
|
87
|
+
'\u044c': 'y'
|
|
88
|
+
'\u044e': 'yu'
|
|
89
|
+
'\u044f': 'ya'
|
|
90
|
+
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: odni
|
|
3
|
+
id: 2004
|
|
4
|
+
language: iso-639-2:fas
|
|
5
|
+
source_script: Arab
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Intelligence Community (IC) Standard for the Transliteration of Farsi (Persian) Personal Names (2004)
|
|
8
|
+
url: https://github.com/interscript/interscript-private-references/blob/master/odni/Farsi_(Persian)_%26_Dari_IC_Standards.doc
|
|
9
|
+
creation_date: 2004
|
|
10
|
+
confirmation_date: 2004-11
|
|
11
|
+
description: |
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
- Long/short vowels:- There is no distinction made in Roman
|
|
15
|
+
between long and short a:- E.g., Parvas (first a is short,
|
|
16
|
+
second is long).
|
|
17
|
+
- Double consonants:- Double consonants represented by the
|
|
18
|
+
tashdid are shown by doubling the Roman letter:-
|
|
19
|
+
Mo'azzami. Exceptions:- Ain and consonants represented by
|
|
20
|
+
Roman digraphs (e.g., sh, ch) are not doubled:- Mobasher [
|
|
21
|
+
not:- Mobashsher]. Double letters are only used for
|
|
22
|
+
tashdid (thus, Hosein [not Hossein]) or to reflect the ‘sun
|
|
23
|
+
letter’ assimilation (see beelow).
|
|
24
|
+
- Hamzeh:- The hamzeh is represented name-internally by an
|
|
25
|
+
apostrophe, as is the ain. Name-initially, however,
|
|
26
|
+
neither hamzeh nor ain are indicated in transliteration (
|
|
27
|
+
e.g., Abdorrahman, not 'Abdorrahman).
|
|
28
|
+
- Digraphs:- No distinction is drawn in Roman between
|
|
29
|
+
digraphs such as sh and single contiguous letters (e.g., s
|
|
30
|
+
followed by h).
|
|
31
|
+
- Arabic definite article "al" ('the'):- Common in many
|
|
32
|
+
names borrowed from Arabic, the transliteration should
|
|
33
|
+
follow the Arabic rules for “sun letter” assimilation in
|
|
34
|
+
spoken form and reflect the nominative case. That is:-
|
|
35
|
+
Abdorrahman, not Abd al-Rahman. Note also that the
|
|
36
|
+
“Abdollah” and “Abdol + attribute of Allah” names are
|
|
37
|
+
written as one unanalyzed word, as are other names that
|
|
38
|
+
contain the definite article:- Shamsoddin (not Shams al-
|
|
39
|
+
Din), Nezamoddin, etc.
|
|
40
|
+
- Diphthongs:- Diphthongs are written ei and ow, as in,
|
|
41
|
+
respectively:- Hosein; Khosrow.
|
|
42
|
+
- Yeh maqsura (final yeh pronounced as “a”):- should be
|
|
43
|
+
written as “a” as in “Musa”.
|
|
44
|
+
|
|
45
|
+
- Special Rules
|
|
46
|
+
|
|
47
|
+
- Hyphens:- A hyphen is used to indicate the ezafeh
|
|
48
|
+
construction:- Arshad-e Ameri
|
|
49
|
+
- Borrowed names that incorporate the name of God (Allah)
|
|
50
|
+
are transliterated as one word, with the letter "o":- E.g.,
|
|
51
|
+
Abdollah, Ayatollah, Azizollah.
|
|
52
|
+
- Foreign names borrowed or appearing in Farsi are spelled
|
|
53
|
+
according to the standard Western tradition (even if there
|
|
54
|
+
is an Arabic or Farsi version of the same name):- Joseph,
|
|
55
|
+
Michael.
|
|
56
|
+
- Common suffixes, such as nia, pur, fard, far, abad,
|
|
57
|
+
zadeh, khah, and nezhad as well as nesbeh (‘relationship’ (
|
|
58
|
+
to place of birth, etc.)) names derived with these
|
|
59
|
+
suffixes (e.g., nezhadi, abadi) are written as part of the
|
|
60
|
+
name:-
|
|
61
|
+
|
|
62
|
+
asa Mehrasa
|
|
63
|
+
baksh Tajbaksh
|
|
64
|
+
dust Rafighdust
|
|
65
|
+
far Parvizfar
|
|
66
|
+
fard Akhavanfard
|
|
67
|
+
gar Fuladgar
|
|
68
|
+
gol Zarringol
|
|
69
|
+
kar Parhizkar
|
|
70
|
+
khah Vatankhah
|
|
71
|
+
khu Nikkhu
|
|
72
|
+
mand Purmand
|
|
73
|
+
mehr Zadmehr
|
|
74
|
+
nezhad Niknezhad
|
|
75
|
+
nia Montajebnia
|
|
76
|
+
parast Khodaparast
|
|
77
|
+
parvar Golparvar
|
|
78
|
+
pur Mohteshemipur
|
|
79
|
+
tabar Shayestehtbar
|
|
80
|
+
yar Mohammadyar
|
|
81
|
+
zadeh Vakilzadeh
|
|
82
|
+
|
|
83
|
+
abadi Salehabadi
|
|
84
|
+
khani Alikhani
|
|
85
|
+
nezhadi Niknezhadi
|
|
86
|
+
|
|
87
|
+
- Note also that yar can function as a prefix and, as such,
|
|
88
|
+
should be affixed directly to the name:-
|
|
89
|
+
|
|
90
|
+
yar Yarmohammadi, Yarshater
|
|
91
|
+
|
|
92
|
+
- This is in contrast with hyphenated names such as Raja’i-
|
|
93
|
+
Khorasani, Tabataba’i-Shirazi, Soleimani-Maimandi, etc.
|
|
94
|
+
|
|
95
|
+
tests:
|
|
96
|
+
- source: مُوسَى
|
|
97
|
+
expected: musa
|
|
98
|
+
|
|
99
|
+
- source: مُؤمِن
|
|
100
|
+
expected: mo’men
|
|
101
|
+
|
|
102
|
+
- source: رِضايي
|
|
103
|
+
expected: reza’i
|
|
104
|
+
|
|
105
|
+
- source: مُبَشِّر
|
|
106
|
+
expected: mobasher
|
|
107
|
+
|
|
108
|
+
- source: حَسَّان
|
|
109
|
+
expected: hassan
|
|
110
|
+
|
|
111
|
+
- source: حَسَن
|
|
112
|
+
expected: hasan
|
|
113
|
+
|
|
114
|
+
- source: صَفَّار
|
|
115
|
+
expected: saffar
|
|
116
|
+
|
|
117
|
+
- source: صَفَر
|
|
118
|
+
expected: safar
|
|
119
|
+
|
|
120
|
+
map:
|
|
121
|
+
characters:
|
|
122
|
+
# special rules
|
|
123
|
+
|
|
124
|
+
'\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
|
|
125
|
+
'\ufdf2': 'Allah' # See note 5
|
|
126
|
+
'\s\u0627\u0644\u0644\u0651\u064e\u0647': 'ollah' # NOTE 9
|
|
127
|
+
|
|
128
|
+
'\u0652' : '' # ْ sokoon
|
|
129
|
+
'\u0659': 'ê'
|
|
130
|
+
|
|
131
|
+
'\u064e\u064a\u0652' : 'ay' # ـَيْ
|
|
132
|
+
'\u0649\u0670': 'á' # ىٰ
|
|
133
|
+
'\u0674': '-e' # ٴ
|
|
134
|
+
'\u0654': '-e' # ٔ
|
|
135
|
+
# - '-ye'
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ta' marboota
|
|
139
|
+
'\u0629' : 'eh'
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
'\u0626' : '’' # ئ
|
|
144
|
+
'\u0624' : '’' # ؤ
|
|
145
|
+
'\u0623' : '' # أ
|
|
146
|
+
'\u0625': '' # إ
|
|
147
|
+
|
|
148
|
+
# See note B
|
|
149
|
+
'\b\u0627\u0644' : 'al ' # ال
|
|
150
|
+
'\b\u0622\\u0644' : 'Al ' # آل
|
|
151
|
+
# '\uFE8E' : '' # ﺎ
|
|
152
|
+
|
|
153
|
+
# Sun letters
|
|
154
|
+
'\b\u0627\u0644\u062a' : 'at t' # الت
|
|
155
|
+
'\b\u0627\u0644\u062b' : 'as s' # الث
|
|
156
|
+
'\b\u0627\u0644\u062f' : 'ad d' # الد
|
|
157
|
+
'\b\u0627\u0644\u0630' : 'az z' # الذ
|
|
158
|
+
'\b\u0627\u0644\u0631' : 'ar r' # الر
|
|
159
|
+
'\b\u0627\u0644\u0632' : 'az z' # الز
|
|
160
|
+
'\b\u0627\u0644\u0633' : 'as s' # الس
|
|
161
|
+
'\b\u0627\u0644\u0634' : 'ash sh' # الش
|
|
162
|
+
'\b\u0627\u0644\u0635' : 'as s' # الص
|
|
163
|
+
'\b\u0627\u0644\u0636' : 'az z' # الض
|
|
164
|
+
'\b\u0627\u0644\u0637' : 'at t' # الط
|
|
165
|
+
'\b\u0627\u0644\u0638' : 'az z' # الظ
|
|
166
|
+
'\b\u0627\u0644\u0644' : 'al l' # الل
|
|
167
|
+
'\b\u0627\u0644\u0646' : 'an n' # الن
|
|
168
|
+
|
|
169
|
+
# Farsi Vowel (Pointing)
|
|
170
|
+
'\u0622' : 'a' # آ alef maddeh
|
|
171
|
+
'\u064e' : 'a' # َ fatha
|
|
172
|
+
'(?<=\u064e)\u0627' : '' # ا
|
|
173
|
+
'(?<!\b)\u0627' : 'a' # ا
|
|
174
|
+
'\b\u0627\u064e' : 'a' # ا initial followed by fatha
|
|
175
|
+
'\b\u0627\u064f' : 'o' # ا initial followed by damma
|
|
176
|
+
'\b\u0627\u0650' : 'e' # ِ ا initial followed by kasra
|
|
177
|
+
|
|
178
|
+
'\u064f' : 'o' # damma
|
|
179
|
+
'\u064f\u0648' : 'u' # ـُو damma followed by و
|
|
180
|
+
# '\u064e\u0648' : 'ow' # ـَو
|
|
181
|
+
# '\u064e\u0648\u0652' : 'aw' # ـَوْ
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
'\u0650' : 'e' # kasra
|
|
185
|
+
'\u0650\u064a' : 'i' # ـِي kasra followed by ي
|
|
186
|
+
'\u0650\u06cc' : 'i' # ـِي kasra followed by ي
|
|
187
|
+
'\u0650\u064a\u0651\u064e' : 'iy' # ـِيَّ
|
|
188
|
+
'\u0650\u06cc\u0651\u064e' : 'iy' # ـِيَّ
|
|
189
|
+
'\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
|
|
190
|
+
# '\u064e\u064a' : 'aī' # ـَي
|
|
191
|
+
# '\u064e\u06cc' : 'aī' # ـَي
|
|
192
|
+
# '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
|
|
193
|
+
|
|
194
|
+
# additional symbols
|
|
195
|
+
|
|
196
|
+
# shadda
|
|
197
|
+
|
|
198
|
+
'\u0628\u0651' : 'bb' # ب
|
|
199
|
+
'\u062a\u0651' : 'tt' # ت
|
|
200
|
+
'\u062b\u0651' : 'ss' # ث
|
|
201
|
+
'\u062c\u0651' : 'jj' # ج
|
|
202
|
+
'\u062d\u0651' : 'hh' # ح
|
|
203
|
+
'\u062e\u0651' : 'kh' # خ
|
|
204
|
+
'\u062f\u0651' : 'dd' # د
|
|
205
|
+
'\u0630\u0651' : 'zz' # ذ
|
|
206
|
+
'\u0631\u0651' : 'rr' # ر
|
|
207
|
+
'\u0632\u0651' : 'zz' # ز
|
|
208
|
+
'\u0633\u0651' : 'ss' # س
|
|
209
|
+
'\u0634\u0651' : 'sh' # ش
|
|
210
|
+
'\u0635\u0651' : 'ss' # ص
|
|
211
|
+
'\u0636\u0651' : 'zz' # ض
|
|
212
|
+
'\u0637\u0651' : 'tt' # ط
|
|
213
|
+
'\u0638\u0651' : 'zz' # ظ
|
|
214
|
+
'\u063a\u0651' : 'gh' # غ
|
|
215
|
+
'\u0641\u0651' : 'ff' # ف
|
|
216
|
+
'\u0642\u0651' : 'gh' # ق
|
|
217
|
+
'\u0643\u0651' : 'kk' # ك
|
|
218
|
+
'\u0644\u0651' : 'll' # ل
|
|
219
|
+
'\u0645\u0651' : 'mm' # م
|
|
220
|
+
'\u0646\u0651' : 'nn' # ن
|
|
221
|
+
'\u0647\u0651' : 'hh' # ه
|
|
222
|
+
'\u0648\u0651' : 'vv' # و
|
|
223
|
+
'\u064a\u0651' : 'yy' # ي
|
|
224
|
+
|
|
225
|
+
'(?<=\b)\u0621': '' # ء
|
|
226
|
+
'\u0621': '’' # ء
|
|
227
|
+
|
|
228
|
+
# FROM NOTES
|
|
229
|
+
|
|
230
|
+
'\u064e\u0649' : 'a' # ـَى fatha followed by ى which is ا not ي
|
|
231
|
+
'\u0649' : 'a' # ى alef maqsura NOTE-1
|
|
232
|
+
|
|
233
|
+
'\u064a\u064a' : '’i' # NOTE 4 (2)
|
|
234
|
+
'\u06cc\u06cc' : '’i'
|
|
235
|
+
|
|
236
|
+
'\u0627\u064a\b' : '’i' # NOTE 4 (3)
|
|
237
|
+
'\u0627\u06cc\b' : '’i'
|
|
238
|
+
|
|
239
|
+
# Farsi consonant characters
|
|
240
|
+
|
|
241
|
+
'\u0628' : 'b' # ب
|
|
242
|
+
'\u067E' : 'p' # پ
|
|
243
|
+
'\u062a' : 't' # ت
|
|
244
|
+
'\u062B' : 's' # ث
|
|
245
|
+
'\u062c' : 'j' # ج
|
|
246
|
+
'\u0686' : 'ch' # چ
|
|
247
|
+
'\u062d' : 'h' # ح
|
|
248
|
+
'\u062e' : 'kh' # خ
|
|
249
|
+
'\u062f' : 'd' # د
|
|
250
|
+
'\u0630' : 'z' # ذ
|
|
251
|
+
'\u0631' : 'r' # ر
|
|
252
|
+
'\u0632' : 'z' # ز
|
|
253
|
+
'\u0698' : 'zh' # ژ
|
|
254
|
+
'\u0633' : 's' # س
|
|
255
|
+
'\u0634' : 'sh' # ش
|
|
256
|
+
'\u0635' : 's' # ص
|
|
257
|
+
'\u0636' : 'z' # ض
|
|
258
|
+
'\u0637' : 't' # ط
|
|
259
|
+
'\u0638' : 'z' # ظ
|
|
260
|
+
'\u0639' : '‘' # ع
|
|
261
|
+
'(?<=\b)\u0639' : '' # ع not represented initially
|
|
262
|
+
'\u063a' : 'gh' # غ
|
|
263
|
+
'\u0641' : 'f' # ف
|
|
264
|
+
'\u0642' : 'gh' # ق
|
|
265
|
+
'\u0643' : 'k' # ك
|
|
266
|
+
'\u06A9' : 'k' # ک
|
|
267
|
+
'\u06AF' : 'g' # گ
|
|
268
|
+
'\u0644' : 'l' # ل
|
|
269
|
+
'\u0645' : 'm' # م
|
|
270
|
+
'\u0646' : 'n' # ن
|
|
271
|
+
'\u0647' : 'h' # ه
|
|
272
|
+
'\u0648' : 'v' # و
|
|
273
|
+
'\u064a' : 'y' # ي
|
|
274
|
+
'\u0649' : 'y' # ي
|
|
275
|
+
'\u06D0' : 'ē' # ې
|
|
276
|
+
'\u06CD' : 'êy' # ۍ
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: odni
|
|
3
|
+
id: 2004
|
|
4
|
+
language: iso-639-2:hin
|
|
5
|
+
source_script: Deva
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: IC STANDARDS FOR TRANSLITERATION OF HINDI AND URDU PERSONAL NAMES
|
|
8
|
+
url: https://github.com/interscript/interscript-private-references/blob/master/odni/Hindi_and_Urdu_IC_Standard.doc
|
|
9
|
+
creation_date: 2004
|
|
10
|
+
description: |
|
|
11
|
+
IC STANDARDS FOR TRANSLITERATION OF HINDI AND URDU PERSONAL NAMES
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
- |
|
|
15
|
+
Long/Short Vowels: Long and short vowels are not distinguished in the system:
|
|
16
|
+
The borrowed Arabic name Samir could represent two distinct names, one with a
|
|
17
|
+
long /a/ (Saamir) and one with a long /i/ (Samiir). One solution would be to use
|
|
18
|
+
/ee/ to stand for the long /i/, as is often done (Sameer). The IC Standard will not
|
|
19
|
+
distinguish between these.
|
|
20
|
+
- |
|
|
21
|
+
No distinction is made between: retroflex and non-retroflex consonants; and
|
|
22
|
+
nasalized vowels and vowels followed by /n/.
|
|
23
|
+
- |
|
|
24
|
+
A distinction is drawn between Urdu letters qaf and kaf (and correspondingly,
|
|
25
|
+
Hindi qa and ka).
|
|
26
|
+
- |
|
|
27
|
+
A distinction is drawn between aspirated (e.g., /d/) and nonaspirated consonants
|
|
28
|
+
(e.g., /dh/), with the exception of ch/chh, both represented by /ch/.
|
|
29
|
+
- |
|
|
30
|
+
Digraphs: No distinction is made between digraphs such as /sh/ and single
|
|
31
|
+
contiguous letters such as /s/ followed by /h/.
|
|
32
|
+
- |
|
|
33
|
+
Hyphens: Hyphens (-) are NOT used to connect name elements within a name:
|
|
34
|
+
Abdur Rahman. The single exception to this is the izafat (i.e., linking vowel in
|
|
35
|
+
noun-link-modifier construction of Persian origin), which does show a hypen
|
|
36
|
+
before the /e/ and a following space: Koh-e Nur (‘mountain of light’), “Jaish-e
|
|
37
|
+
xx” (‘Army of xx’ construction).
|
|
38
|
+
- |
|
|
39
|
+
Names incorporating “din” are written as one unit: Azermuddin, Badruddin,
|
|
40
|
+
Faizuddin, Salahuddin.
|
|
41
|
+
- |
|
|
42
|
+
Names that incorporate Allah as part of the name show the Arabic grammatical
|
|
43
|
+
marker /u/ rather than the /a/ of Allah: Abdullah (not Abdallah).
|
|
44
|
+
- |
|
|
45
|
+
Inherent short vowel /a/ in Devanagari is represented with an /a/ in Roman. Final
|
|
46
|
+
consonants are assumed not to have a short /a/ (e.g., masc. name Ram Lal, not
|
|
47
|
+
Rama Lala).
|
|
48
|
+
- |
|
|
49
|
+
As a general rule, Devanagari va is transcribed as a /v/: Vijay, Vishal, etc.
|
|
50
|
+
Exception: /sw/ combination: Saraswati, Krishnaswami. Urdu wau, however, is
|
|
51
|
+
transcribed as /w/: Wasim, Walid.
|
|
52
|
+
|
|
53
|
+
tests:
|
|
54
|
+
- source: "दिल्ली"
|
|
55
|
+
expected: "dilli"
|
|
56
|
+
- source: "भारत"
|
|
57
|
+
expected: "bhart"
|
|
58
|
+
- source: "विजय"
|
|
59
|
+
expected: "vijy"
|
|
60
|
+
- source: "विशाल"
|
|
61
|
+
expected: "vishal"
|
|
62
|
+
- source: "अब्दुल्ला"
|
|
63
|
+
expected: "abdulla"
|
|
64
|
+
- source: "संख्या"
|
|
65
|
+
expected: "snkhya"
|
|
66
|
+
- source: "संख्या"
|
|
67
|
+
expected: "snkhya"
|
|
68
|
+
- source: "समीर"
|
|
69
|
+
expected: "smir"
|
|
70
|
+
- source: "सरस्वती"
|
|
71
|
+
expected: "srsvti"
|
|
72
|
+
- source: "कृष्णास्वामी"
|
|
73
|
+
expected: "krishnasvami"
|
|
74
|
+
|
|
75
|
+
map:
|
|
76
|
+
|
|
77
|
+
characters:
|
|
78
|
+
|
|
79
|
+
#Independent vowel characters
|
|
80
|
+
'अ': 'a'
|
|
81
|
+
'आ': 'a'
|
|
82
|
+
'इ': 'i'
|
|
83
|
+
'ई': 'i'
|
|
84
|
+
'उ': 'u'
|
|
85
|
+
'ऊ': 'u'
|
|
86
|
+
'ऋ': 'ri'
|
|
87
|
+
'ऌ': 'l̤'
|
|
88
|
+
'ए': 'e'
|
|
89
|
+
'ऐ': 'ai'
|
|
90
|
+
'ओ': 'o'
|
|
91
|
+
'ऑ': 'au'
|
|
92
|
+
'औ': 'au'
|
|
93
|
+
|
|
94
|
+
#Dependent Vowels
|
|
95
|
+
'ा': "a"
|
|
96
|
+
'ि': "i"
|
|
97
|
+
'ी': "i"
|
|
98
|
+
'ु': "u"
|
|
99
|
+
'ू': "u"
|
|
100
|
+
'ृ': "ri"
|
|
101
|
+
'े': "e"
|
|
102
|
+
'ै': "ai"
|
|
103
|
+
'ॅ': "ai"
|
|
104
|
+
'ो': "o"
|
|
105
|
+
'ौ': "au"
|
|
106
|
+
'ॉ': "au"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# Consonants
|
|
110
|
+
|
|
111
|
+
# Gutturals
|
|
112
|
+
'क': 'k'
|
|
113
|
+
'क्ष': 'ksha'
|
|
114
|
+
'क़': 'q'
|
|
115
|
+
'ख': 'kh'
|
|
116
|
+
'ख़': 'kh'
|
|
117
|
+
'ग': 'g'
|
|
118
|
+
'ग़': 'gh'
|
|
119
|
+
'घ': 'gh'
|
|
120
|
+
'ङ': 'n'
|
|
121
|
+
|
|
122
|
+
# Palatals
|
|
123
|
+
'च': 'ch'
|
|
124
|
+
'छ': 'ch'
|
|
125
|
+
'ज': 'j'
|
|
126
|
+
'ज़': 'z'
|
|
127
|
+
'झ': 'gya'
|
|
128
|
+
'झ': 'jh'
|
|
129
|
+
'ञ': 'n'
|
|
130
|
+
|
|
131
|
+
# Cerebrals
|
|
132
|
+
'ट': 't'
|
|
133
|
+
'ठ': 'th'
|
|
134
|
+
'ड': 'd'
|
|
135
|
+
'ड़': 'r'
|
|
136
|
+
'ढ़': 'rh'
|
|
137
|
+
'ढ': 'dh'
|
|
138
|
+
'ण': 'n'
|
|
139
|
+
|
|
140
|
+
# Dentals
|
|
141
|
+
'त': 't'
|
|
142
|
+
'थ': 'th'
|
|
143
|
+
'द': 'd'
|
|
144
|
+
'ध': 'dh'
|
|
145
|
+
'न': 'n'
|
|
146
|
+
|
|
147
|
+
# Labials
|
|
148
|
+
'प': 'p'
|
|
149
|
+
'फ़': 'f'
|
|
150
|
+
'फ': 'ph'
|
|
151
|
+
'ब': 'b'
|
|
152
|
+
'भ': 'bh'
|
|
153
|
+
'म': 'm'
|
|
154
|
+
|
|
155
|
+
# Semivowels
|
|
156
|
+
'य': 'y'
|
|
157
|
+
'र': 'r'
|
|
158
|
+
'ल': 'l'
|
|
159
|
+
'व': 'v'
|
|
160
|
+
|
|
161
|
+
# Sibilants
|
|
162
|
+
'श': 'sh'
|
|
163
|
+
'ष': 'sh'
|
|
164
|
+
'स': 's'
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Aspirate
|
|
168
|
+
'ह': 'h'
|
|
169
|
+
|
|
170
|
+
# Anusvāra
|
|
171
|
+
'ं': 'n'
|
|
172
|
+
|
|
173
|
+
# Anunāsika
|
|
174
|
+
'ँ': 'n'
|
|
175
|
+
|
|
176
|
+
# halanta
|
|
177
|
+
'्': ''
|
|
178
|
+
|
|
179
|
+
# bisharga
|
|
180
|
+
'ः': 'h'
|
|
181
|
+
|
|
182
|
+
'़': ''
|