interscript 0.1.7 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,21 @@
|
|
1
|
+
---
|
2
|
+
authority_id: odni
|
3
|
+
id: 2005
|
4
|
+
language: iso-639-2:mkd
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: Standards for the transliteration of macedonian personal names in written reports and products
|
8
|
+
creation_date: 2005
|
9
|
+
confirmation_date: 2005
|
10
|
+
description: |
|
11
|
+
Office of the Director Of National Intelligence Macedonian Personal Names 2004 System
|
12
|
+
|
13
|
+
tests:
|
14
|
+
- source: Билјана
|
15
|
+
expected: Biljana
|
16
|
+
- source: Душко
|
17
|
+
expected: Dushko
|
18
|
+
|
19
|
+
map:
|
20
|
+
inherit: odni-mkd-Cyrl-Latn-2015
|
21
|
+
rules:
|
@@ -0,0 +1,123 @@
|
|
1
|
+
---
|
2
|
+
authority_id: odni
|
3
|
+
id: 2004
|
4
|
+
language: iso-639-3:prs
|
5
|
+
source_script: Arab
|
6
|
+
destination_script: Latn
|
7
|
+
name: Intelligence Community (IC) Standard for the Transliteration of Dari Personal Names (2004)
|
8
|
+
url: https://github.com/interscript/interscript-private-references/blob/master/odni/Farsi_(Persian)_%26_Dari_IC_Standards.doc
|
9
|
+
creation_date: 2004
|
10
|
+
confirmation_date: 2004-11
|
11
|
+
description: |
|
12
|
+
|
13
|
+
notes:
|
14
|
+
- This standard is intended only for those Afghan names
|
15
|
+
that have a common bond or similarity with Iranian or
|
16
|
+
Arabic names. They should not, for example, be used for
|
17
|
+
Pashto names, for which a separate standard should be used.
|
18
|
+
- Long/short vowels:- There is no distinction made in Roman
|
19
|
+
between long and short a:- E.g., Farhad (first a is short,
|
20
|
+
second is long).
|
21
|
+
- Double consonants:- Double consonants represented by the
|
22
|
+
tashdid are shown by doubling the Roman letter:- Mohammad.
|
23
|
+
Exceptions:- Consonants represented by Roman digraphs (
|
24
|
+
e.g., sh, ch) are not doubled:- Mobasher [not:- Mobashsher]
|
25
|
+
. Double letters are only used for tashdid (thus, Hosein [
|
26
|
+
not Hossein]) or to reflect the ‘sun letter’ assimilation (
|
27
|
+
see below).
|
28
|
+
- Hamzeh:- The hamzeh is represented name-internally by an
|
29
|
+
apostrophe, as is the ain. Name-initially, however,
|
30
|
+
neither hamzeh nor ain are indicated in transliteration (
|
31
|
+
e.g., Abdorrahman, not 'Abdorrahman).
|
32
|
+
- Digraphs:- No distinction is drawn in Roman between
|
33
|
+
digraphs such as sh and single contiguous letters (e.g., s
|
34
|
+
followed by h).
|
35
|
+
- Arabic definite article "al" ('the'):- Common in many
|
36
|
+
names borrowed from Arabic, the transliteration should show
|
37
|
+
the 'sun letter' assimilation rather than the “l” for the
|
38
|
+
lam. That is:- Abdorrahman. Note also that the "Abdol +
|
39
|
+
attribute of Allah" names are written as one unanalyzed
|
40
|
+
word, as are other names that contain the definite
|
41
|
+
article:- Shamsoddin (not Shams al-Din), Nezamoddin, etc.
|
42
|
+
- Diphthongs:- Diphthongs are written ei and ow
|
43
|
+
respectively:- Hosein; Khosrow.
|
44
|
+
- Yeh maqsura (final yeh pronounced as “a”):- should be
|
45
|
+
written as “a” as in “Musa”.
|
46
|
+
|
47
|
+
- Special Rules
|
48
|
+
|
49
|
+
- Hyphens:- A hyphen is used to indicate the ezafeh
|
50
|
+
construction:- Arshad-e Ameri
|
51
|
+
- Borrowed names that incorporate the name of God (Allah)
|
52
|
+
are transliterated as one word, with the letter "o":- E.g.,
|
53
|
+
Abdollah, Ayatollah, Azizollah.
|
54
|
+
- Foreign names borrowed or appearing in Dari are spelled
|
55
|
+
according to the standard Western tradition (even if there
|
56
|
+
is an Arabic or Dari version of the same name):- Joseph,
|
57
|
+
Michael.
|
58
|
+
- Common suffixes, such as gol, pur, mand, yar, zadeh,
|
59
|
+
etc., as well as nesbeh (‘relationship’ (to place of birth,
|
60
|
+
etc.)) names derived with these suffixes (e.g., abadi) are
|
61
|
+
written as part of the name:-
|
62
|
+
|
63
|
+
gol Parigol, Ziagol
|
64
|
+
pur Shahpur, Mehrpur
|
65
|
+
mand Gulahmand
|
66
|
+
yar Aminyar
|
67
|
+
zadeh Ismailzadeh, Karimzadeh
|
68
|
+
|
69
|
+
abadi Kamalabadi
|
70
|
+
|
71
|
+
|
72
|
+
tests:
|
73
|
+
- source: مُوسَى
|
74
|
+
expected: musa
|
75
|
+
|
76
|
+
- source: مُؤمِن
|
77
|
+
expected: momen
|
78
|
+
|
79
|
+
- source: رِضايي
|
80
|
+
expected: rezai
|
81
|
+
|
82
|
+
- source: مُبَشِّر
|
83
|
+
expected: mobasher
|
84
|
+
|
85
|
+
- source: حَسَّان
|
86
|
+
expected: hassan
|
87
|
+
|
88
|
+
- source: حَسَن
|
89
|
+
expected: hasan
|
90
|
+
|
91
|
+
- source: صَفَّار
|
92
|
+
expected: saffar
|
93
|
+
|
94
|
+
- source: صَفَر
|
95
|
+
expected: safar
|
96
|
+
|
97
|
+
map:
|
98
|
+
inherit: odni-fas-Arab-Latn-2004
|
99
|
+
characters:
|
100
|
+
|
101
|
+
'\u0626' : '' # ئ
|
102
|
+
'\u0624' : '' # ؤ
|
103
|
+
|
104
|
+
# shadda
|
105
|
+
|
106
|
+
'\u0642\u0651' : 'qq' # ق
|
107
|
+
'\u0648\u0651' : 'ww' # و
|
108
|
+
|
109
|
+
'\u0621': '' # ء
|
110
|
+
|
111
|
+
# FROM NOTES
|
112
|
+
|
113
|
+
'\u064a\u064a' : 'i' # NOTE 4 (2)
|
114
|
+
'\u06cc\u06cc' : 'i'
|
115
|
+
|
116
|
+
'\u0627\u064a\b' : 'i' # NOTE 4 (3)
|
117
|
+
'\u0627\u06cc\b' : 'i'
|
118
|
+
|
119
|
+
# Farsi consonant characters
|
120
|
+
|
121
|
+
'\u0639' : '' # ع # new
|
122
|
+
'\u0642' : 'q' # ق
|
123
|
+
'\u0648' : 'w' # و
|
File without changes
|
@@ -0,0 +1,36 @@
|
|
1
|
+
---
|
2
|
+
authority_id: odni
|
3
|
+
id: 2005
|
4
|
+
language: iso-639-2:srp
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: Office of the Director Of National Intelligence Serbian Personal Names 2004 System
|
8
|
+
creation_date: 2005
|
9
|
+
confirmation_date: 2005
|
10
|
+
description: |
|
11
|
+
Office of the Director Of National Intelligence Serbian Personal Names 2004 System
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
tests:
|
16
|
+
- source: Гојко Митић
|
17
|
+
expected: Gojko Mitic
|
18
|
+
- source: Горња Ваганица
|
19
|
+
expected: Gornja Vaganica
|
20
|
+
- source: Довиђења
|
21
|
+
expected: Dovidjenja
|
22
|
+
- source: Ћао! Здраво!
|
23
|
+
expected: Cao! Zdravo!
|
24
|
+
- source: Кључ
|
25
|
+
expected: Kljuc
|
26
|
+
- source: Цигарете
|
27
|
+
expected: Cigarete
|
28
|
+
- source: Пролеће
|
29
|
+
expected: Prolece
|
30
|
+
- source: Понедељак
|
31
|
+
expected: Ponedeljak
|
32
|
+
- source: Горња Ваганица
|
33
|
+
expected: Gornja Vaganica
|
34
|
+
|
35
|
+
map:
|
36
|
+
inherit: odni-srp-Cyrl-Latn-2015
|
@@ -0,0 +1,170 @@
|
|
1
|
+
---
|
2
|
+
authority_id: odni
|
3
|
+
id: 2015
|
4
|
+
language: iso-639-2:tuk
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: Standards for the transliteration of Turkmen personal names in written reports and products
|
8
|
+
url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20T%20-%20Turkmen_Personal_Names_FLTS%20(U).pdf
|
9
|
+
source: ICS-630-01 Annex T
|
10
|
+
creation_date: 2015
|
11
|
+
confirmation_date: 2015
|
12
|
+
description: |
|
13
|
+
This system is the Intelligence Community standard for the transliteration of Turkmen person
|
14
|
+
names that will be applied to all final written reports and products for IC consumers. It is not
|
15
|
+
intended to eliminate variations of a name that can contribute forensic information. Rather, it is to
|
16
|
+
provide an IC standard Romanized (English) transliteration from Turkmen that can then be linked
|
17
|
+
to forensic information in ways that will help identify the referent of the name.
|
18
|
+
|
19
|
+
In cases where an individual’s name has already been transliterated in a variant spelling, the IC
|
20
|
+
Standard spelling should appear first, followed by the variant spelling(s) in parentheses at the first
|
21
|
+
usage. In addition, if the original Cyrillic-script spelling is known, that spelling should also
|
22
|
+
appear in parentheses following the name, if possible, following best practices of the issuing
|
23
|
+
organization and taking into consideration information system capabilities. For example:
|
24
|
+
Azat Muhadov (also seen as Azat Muhadow, Азат Мухадов). This convention is designed to
|
25
|
+
ensure that vital forensic information is not lost.
|
26
|
+
|
27
|
+
For names of persons who are known to not be part of the Turkmen-speaking community, use the
|
28
|
+
relevant IC transliteration standard for names from that language (e.g., Yitzhak). A translator’s
|
29
|
+
note may be used to clarify the known origin of the person. Spell names of individuals from
|
30
|
+
languages that are written in Roman letters as they are spelled in those languages (e.g., George
|
31
|
+
Clooney, Jorge Garcia, Georges Pompidou).
|
32
|
+
|
33
|
+
In the case of active senior government officials in the on-line CIA World Factbook and the on-
|
34
|
+
line directory of Chiefs of State and Cabinet Members of Foreign Governments, the spellings
|
35
|
+
given in these on-line reference works should be used in place of the IC Standard. For any
|
36
|
+
individual who has at one time been listed in the Factbook or Chiefs of State directory but who no
|
37
|
+
longer appears in those resources (i.e. is no longer a government official), the IC Standard
|
38
|
+
spelling should appear first, with the spelling, if known, as it previously appeared in those
|
39
|
+
resources listed within parentheses at the first usage.
|
40
|
+
|
41
|
+
The primary goal is to produce a consistent Romanized transcription of names that is specifically
|
42
|
+
readable to the English-speaking non-specialist. The system uses the 26 letters of the standard
|
43
|
+
(English) Roman alphabet. Some ambiguities in the Romanized form will occur without the use
|
44
|
+
of diacritics. However, within the context of a report, where additional information about the
|
45
|
+
individual is provided, the referent will be clearly identified. This system will be used in
|
46
|
+
conjunction with on-line tools, name dictionaries, and lists containing conventional spellings of
|
47
|
+
names of well-known individuals.
|
48
|
+
|
49
|
+
notes:
|
50
|
+
- Transliterate double digraphs as a single digraph, i.e. шш -> sh, not shsh
|
51
|
+
- In the Roman, no distinction is made between digraphs such as 'sh' and single contiguous letters, (e.g. 's' followed by 'h').
|
52
|
+
- The Cyrillic ъ and ь are not transliterated, but instead are left out of the transliteration.
|
53
|
+
|
54
|
+
tests:
|
55
|
+
- source: Акгюль
|
56
|
+
expected: Akgyul
|
57
|
+
- source: Акгыз
|
58
|
+
expected: Akgyz
|
59
|
+
- source: Арсланбек
|
60
|
+
expected: Arslanbek
|
61
|
+
- source: Берди
|
62
|
+
expected: Berdi
|
63
|
+
- source: Дидар
|
64
|
+
expected: Didar
|
65
|
+
- source: Гөзел
|
66
|
+
expected: Gozel
|
67
|
+
- source: Гуля
|
68
|
+
expected: Gulya
|
69
|
+
- source: Гюля
|
70
|
+
expected: Gyulya
|
71
|
+
- source: Мәхри
|
72
|
+
expected: Mahri
|
73
|
+
- source: Майса
|
74
|
+
expected: Maysa
|
75
|
+
- source: Мырат
|
76
|
+
expected: Myrat
|
77
|
+
- source: Өвез
|
78
|
+
expected: Ovez
|
79
|
+
- source: Рашит
|
80
|
+
expected: Rashit
|
81
|
+
- source: Сапармырат
|
82
|
+
expected: Saparmyrat
|
83
|
+
|
84
|
+
map:
|
85
|
+
rules:
|
86
|
+
- pattern: "\u0448\u0448" # шш -> sh
|
87
|
+
result: sh
|
88
|
+
- pattern: "\u0428\u0448" # Шш -> Sh
|
89
|
+
result: Sh
|
90
|
+
- pattern: "\u0428\u0428" # ШШ -> SH
|
91
|
+
result: SH
|
92
|
+
- pattern: "\u0448\u0428" # шШ -> sH
|
93
|
+
result: sH
|
94
|
+
- pattern: "\u042C|\u044C" # remove Ь and ь
|
95
|
+
result: ''
|
96
|
+
|
97
|
+
characters:
|
98
|
+
'\u0410': 'A' # А
|
99
|
+
'\u0411': 'B' # Б
|
100
|
+
'\u0412': 'V' # В
|
101
|
+
'\u0413': 'G' # Г
|
102
|
+
'\u0414': 'D' # Д
|
103
|
+
'\u0415': 'E' # Е
|
104
|
+
'\u0401': 'Yo' # Ё
|
105
|
+
'\u0416': 'Zh' # Ж
|
106
|
+
'\u0496': 'J' # җ
|
107
|
+
'\u0417': 'Z' # З
|
108
|
+
'\u0418': 'I' # И
|
109
|
+
'\u0419': 'Y' # Й
|
110
|
+
'\u041A': 'K' # К
|
111
|
+
'\u041B': 'L' # Л
|
112
|
+
'\u041C': 'M' # М
|
113
|
+
'\u041D': 'N' # Н
|
114
|
+
'\u04A2': 'Ng' # Ң
|
115
|
+
'\u041E': 'O' # О
|
116
|
+
'\u04E8': 'O' # Ө
|
117
|
+
'\u041F': 'P' # П
|
118
|
+
'\u0420': 'R' # Р
|
119
|
+
'\u0421': 'S' # С
|
120
|
+
'\u0422': 'T' # Т
|
121
|
+
'\u0423': 'U' # У
|
122
|
+
'\u04AE': 'U' # Ү
|
123
|
+
'\u0424': 'F' # Ф
|
124
|
+
'\u0425': 'H' # Х
|
125
|
+
'\u0426': 'Ts' # Ц
|
126
|
+
'\u0427': 'Ch' # Ч
|
127
|
+
'\u0428': 'Sh' # Ш
|
128
|
+
'\u0429': 'Shch' # Щ
|
129
|
+
'\u042B': 'Y' # Ы
|
130
|
+
'\u042D': 'E' # Э
|
131
|
+
'\u04D8': 'A' # Ә
|
132
|
+
'\u042E': 'Yu' # Ю
|
133
|
+
'\u042F': 'Ya' # Я
|
134
|
+
|
135
|
+
'\u0430': 'a' # а
|
136
|
+
'\u0431': 'b' # б
|
137
|
+
'\u0432': 'v' # в
|
138
|
+
'\u0433': 'g' # г
|
139
|
+
'\u0434': 'd' # д
|
140
|
+
'\u0435': 'e' # е
|
141
|
+
'\u0451': 'yo' # ё
|
142
|
+
'\u0436': 'zh' # ж
|
143
|
+
'\u0497': 'j' # җ
|
144
|
+
'\u0437': 'z' # з
|
145
|
+
'\u0438': 'i' # и
|
146
|
+
'\u0439': 'y' # й
|
147
|
+
'\u043A': 'k' # к
|
148
|
+
'\u043B': 'l' # л
|
149
|
+
'\u043C': 'm' # м
|
150
|
+
'\u043D': 'n' # н
|
151
|
+
'\u04A3': 'ng' # ң
|
152
|
+
'\u043E': 'o' # о
|
153
|
+
'\u04E9': 'o' # ө
|
154
|
+
'\u043F': 'p' # п
|
155
|
+
'\u0440': 'r' # р
|
156
|
+
'\u0441': 's' # с
|
157
|
+
'\u0442': 't' # т
|
158
|
+
'\u0443': 'u' # у
|
159
|
+
'\u04AF': 'u' # ү
|
160
|
+
'\u0444': 'f' # ф
|
161
|
+
'\u0445': 'h' # х
|
162
|
+
'\u0446': 'ts' # ц
|
163
|
+
'\u0447': 'ch' # ч
|
164
|
+
'\u0448': 'sh' # ш
|
165
|
+
'\u0449': 'shch' # щ
|
166
|
+
'\u044B': 'y' # ы
|
167
|
+
'\u044D': 'e' # э
|
168
|
+
'\u04D9': 'a' # ә
|
169
|
+
'\u044E': 'yu' # ю
|
170
|
+
'\u044F': 'ya' # я
|
@@ -0,0 +1,223 @@
|
|
1
|
+
---
|
2
|
+
authority_id: un
|
3
|
+
id: 1972
|
4
|
+
language: iso-639-2:ben
|
5
|
+
source_script: Beng
|
6
|
+
destination_script: Latn
|
7
|
+
name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Assamese Romanization, Version 4.0
|
8
|
+
url: https://www.eki.ee/wgrs/rom1_as.htm
|
9
|
+
creation_date: 1972
|
10
|
+
confirmation_date: 2016
|
11
|
+
description: |
|
12
|
+
The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
|
13
|
+
based on a report prepared by D. N. Sharma. The tables and their corrections were published in
|
14
|
+
volume II of the conference reports.
|
15
|
+
|
16
|
+
There is no evidence of the use of the system either in India or in international cartographic products.
|
17
|
+
|
18
|
+
Assamese (Asamīyā) uses an alphasyllabic script whereby each character represents a syllable rather
|
19
|
+
than one sound. Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially)
|
20
|
+
and in an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous but the user
|
21
|
+
would have to recognize many ligatures not given in the original table. The system is mostly reversible but there
|
22
|
+
exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters) and consonants
|
23
|
+
(ligatures vs. character sequences).
|
24
|
+
|
25
|
+
References
|
26
|
+
|
27
|
+
Second United Nations Conference on the Standardization of Geographical Names.
|
28
|
+
London, 10–31 May 1972. Vol. II. Technical papers. United Nations. New York 1974, pp. 141–142.
|
29
|
+
|
30
|
+
Third United Nations Conference on the Standardization of Geographical Names. Athens,
|
31
|
+
17 August – 7 September 1977. Vol. II, Technical papers, pp. 393 etc.
|
32
|
+
|
33
|
+
notes:
|
34
|
+
- |
|
35
|
+
ু Exceptions: গু gu; রু ru; শু shu; হু hu; ন্তু ntu; স্তু stu.
|
36
|
+
- |
|
37
|
+
ূ Exceptions: রূ rū.
|
38
|
+
- |
|
39
|
+
ৃ Exceptions: হৃ hṛ.
|
40
|
+
- |
|
41
|
+
্ Pronunciation without a vowel; special form: ৎ t.
|
42
|
+
- |
|
43
|
+
Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
|
44
|
+
|
45
|
+
tests:
|
46
|
+
- source: "অসমীয়া কবিতা"
|
47
|
+
expected: "asamīyā kabitā"
|
48
|
+
- source: "কবিৰ আজি জন্মদিন"
|
49
|
+
expected: "kabira āji janmadina"
|
50
|
+
- source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
|
51
|
+
expected: "beruṭata emāhara pāchhate punara bhayaṁkara agnikāṇḍa"
|
52
|
+
- source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
|
53
|
+
expected: "bhaṅāra biruddhe āvedana dākhila kaṁganāra"
|
54
|
+
- source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
|
55
|
+
expected: "āpuni paṙhi bhāla pāba parā bātari"
|
56
|
+
- source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
|
57
|
+
expected: "shrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
|
58
|
+
- source: "কেনে আছে প্ৰাক্তন"
|
59
|
+
expected: "kene āchhe prāktana"
|
60
|
+
- source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
|
61
|
+
expected: "kamumbāira meyarara dehata kobhiḍa pajiṭibha"
|
62
|
+
- source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
|
63
|
+
expected: "ṭuiṭāraj̱oge khoda sadarī kare ei kathā"
|
64
|
+
- source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
|
65
|
+
expected: "lakhimapura jilāra nārāyaṇapurara barapathārata āji prashānti dhāma nāmere ekhana bṛddhāshramara shubhārambha karā haya"
|
66
|
+
|
67
|
+
map:
|
68
|
+
rules:
|
69
|
+
- pattern: ([ক]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
70
|
+
result: 'k'
|
71
|
+
- pattern: ([খ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
72
|
+
result: 'kh'
|
73
|
+
- pattern: ([গ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
74
|
+
result: 'g'
|
75
|
+
- pattern: ([ঘ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
76
|
+
result: 'gh'
|
77
|
+
- pattern: ([ঙ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
78
|
+
result: 'ṅ'
|
79
|
+
- pattern: ([চ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
80
|
+
result: 'ch'
|
81
|
+
- pattern: ([ছ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
82
|
+
result: 'chh'
|
83
|
+
- pattern: ([জ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
84
|
+
result: 'j'
|
85
|
+
- pattern: ([ঝ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
86
|
+
result: 'jh'
|
87
|
+
- pattern: ([ঞ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
88
|
+
result: 'ñ'
|
89
|
+
- pattern: ([ট]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
90
|
+
result: 'ṭ'
|
91
|
+
- pattern: ([ঠ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
92
|
+
result: 'ṭh'
|
93
|
+
- pattern: ([ড]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
94
|
+
result: 'ḍ'
|
95
|
+
- pattern: ([ঢ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
96
|
+
result: 'ḍh'
|
97
|
+
- pattern: ([ণ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
98
|
+
result: 'ṇ'
|
99
|
+
- pattern: ([ত]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
100
|
+
result: 't'
|
101
|
+
- pattern: ([থ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
102
|
+
result: 'th'
|
103
|
+
- pattern: ([দ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
104
|
+
result: 'd'
|
105
|
+
- pattern: ([ধ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
106
|
+
result: 'dh'
|
107
|
+
- pattern: ([ন]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
108
|
+
result: 'n'
|
109
|
+
- pattern: ([প]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
110
|
+
result: 'p'
|
111
|
+
- pattern: ([ফ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
112
|
+
result: 'ph'
|
113
|
+
- pattern: ([ব]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
114
|
+
result: 'b'
|
115
|
+
- pattern: ([ভ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
116
|
+
result: 'bh'
|
117
|
+
- pattern: ([ম]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
118
|
+
result: 'm'
|
119
|
+
- pattern: ([য]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
120
|
+
result: 'j̱'
|
121
|
+
- pattern: ([ৰ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
122
|
+
result: 'r'
|
123
|
+
- pattern: ([ল]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
124
|
+
result: 'l'
|
125
|
+
- pattern: ([ৱ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
126
|
+
result: 'v'
|
127
|
+
- pattern: ([শ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
128
|
+
result: 'sh'
|
129
|
+
- pattern: ([ষ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
130
|
+
result: 'ṣh'
|
131
|
+
- pattern: ([স]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
132
|
+
result: 's'
|
133
|
+
- pattern: ([হ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
134
|
+
result: 'h'
|
135
|
+
- pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
136
|
+
result: 'ṙ'
|
137
|
+
- pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
138
|
+
result: 'ṙh'
|
139
|
+
- pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
140
|
+
result: 'y'
|
141
|
+
- pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
142
|
+
result: 'ṙ'
|
143
|
+
- pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
144
|
+
result: 'ṙh'
|
145
|
+
- pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
|
146
|
+
result: 'y'
|
147
|
+
|
148
|
+
characters:
|
149
|
+
|
150
|
+
# I. Independent vowel characters
|
151
|
+
'অ': 'a'
|
152
|
+
'আ': 'ā'
|
153
|
+
'ই': 'i'
|
154
|
+
'ঈ': 'ī'
|
155
|
+
'উ': 'u'
|
156
|
+
'ঊ': 'ū'
|
157
|
+
'ঋ': 'ṛ'
|
158
|
+
'এ': 'e'
|
159
|
+
'ঐ': 'ai'
|
160
|
+
'ও': 'o'
|
161
|
+
'ঔ': 'au'
|
162
|
+
|
163
|
+
# II. Abbreviated vowel characters
|
164
|
+
'\u09be': 'ā'
|
165
|
+
'\u09bf': 'i'
|
166
|
+
'\u09c0': 'ī'
|
167
|
+
'\u09c1': 'u'
|
168
|
+
'\u09c2': 'ū'
|
169
|
+
'\u09c3': 'ṛ'
|
170
|
+
'\u09c7': 'e'
|
171
|
+
'\u09c8': 'ai'
|
172
|
+
'\u09cb': 'o'
|
173
|
+
'\u09cc': 'au'
|
174
|
+
|
175
|
+
# III. Other symbols
|
176
|
+
'\u0982': 'ṁ'
|
177
|
+
'\u0981': 'm̐'
|
178
|
+
'\u0983': 'ḥ'
|
179
|
+
'\u09cd': ''
|
180
|
+
|
181
|
+
# IV. Consonant characters
|
182
|
+
'ক': 'ka'
|
183
|
+
'খ': 'kha'
|
184
|
+
'গ': 'ga'
|
185
|
+
'ঘ': 'gha'
|
186
|
+
'ঙ': 'ṅa'
|
187
|
+
'চ': 'cha'
|
188
|
+
'ছ': 'chha'
|
189
|
+
'জ': 'ja'
|
190
|
+
'ঝ': 'jha'
|
191
|
+
'ঞ': 'ña'
|
192
|
+
'ট': 'ṭa'
|
193
|
+
'ঠ': 'ṭha'
|
194
|
+
'ড': 'ḍa'
|
195
|
+
'ঢ': 'ḍha'
|
196
|
+
'ণ': 'ṇa'
|
197
|
+
'ত': 'ta'
|
198
|
+
'থ': 'tha'
|
199
|
+
'দ': 'da'
|
200
|
+
'ধ': 'dha'
|
201
|
+
'ন': 'na'
|
202
|
+
'প': 'pa'
|
203
|
+
'ফ': 'pha'
|
204
|
+
'ব': 'ba'
|
205
|
+
'ভ': 'bha'
|
206
|
+
'ম': 'ma'
|
207
|
+
'য': 'j̱a'
|
208
|
+
'ৰ': 'ra'
|
209
|
+
'ল': 'la'
|
210
|
+
'ৱ': 'va'
|
211
|
+
'শ': 'sha'
|
212
|
+
'ষ': 'ṣha'
|
213
|
+
'স': 'sa'
|
214
|
+
'হ': 'ha'
|
215
|
+
'ৎ': 't'
|
216
|
+
|
217
|
+
# Note V Dotted variants
|
218
|
+
'ড়': 'ṙa'
|
219
|
+
'ঢ়': 'ṙha'
|
220
|
+
'য়': 'ya'
|
221
|
+
'য়': 'ya'
|
222
|
+
'ড়': 'ṙa'
|
223
|
+
'ঢ়': 'ya'
|