interscript-maps 2.2.1 → 2.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/interscript-maps.gemspec +1 -1
- data/interscript-maps.yaml +30 -0
- data/maps/alalc-div-Thaa-Latn-2012.imp +74 -0
- data/maps/alalc-kat-Geok-Latn-1997.imp +1 -2
- data/maps/alalc-kat-Geok-Latn-2011.imp +31 -0
- data/maps/alalc-kat-Geor-Latn-1997.imp +1 -2
- data/maps/alalc-kat-Geor-Latn-2011.imp +52 -0
- data/maps/alalc-mal-Mlym-Latn-2012.imp +1 -1
- data/maps/bas-rus-Cyrl-Latn-2017-bss.imp +1 -1
- data/maps/bas-rus-Cyrl-Latn-2017-oss.imp +1 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.imp +1 -1
- data/maps/bgn-kor-Hang-Latn-1943.imp +1 -1
- data/maps/bgn-kor-Kore-Latn-1943.imp +1 -1
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.imp +1 -1
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.imp +4 -1
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.imp +2 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.imp +1 -1
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.imp +2 -2
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.imp +2 -3
- data/maps/bis-asm-Beng-Latn-13194-1991.imp +1 -1
- data/maps/bis-ben-Beng-Latn-13194-1991.imp +1 -1
- data/maps/bis-dev-Deva-Latn-13194-1991.imp +1 -1
- data/maps/bis-guj-Gujr-Latn-13194-1991.imp +1 -1
- data/maps/bis-kan-Kana-Latn-13194-1991.imp +1 -1
- data/maps/bis-mlm-Mlym-Latn-13194-1991.imp +1 -1
- data/maps/bis-ori-Orya-Latn-13194-1991.imp +1 -1
- data/maps/bis-pnj-Guru-Latn-13194-1991.imp +1 -1
- data/maps/bis-tel-Telu-Latn-13194-1991.imp +1 -1
- data/maps/bis-tml-Taml-Latn-13194-1991.imp +1 -1
- data/maps/din-hin-Deva-Latn-33904-2018.imp +1 -1
- data/maps/din-kat-Geor-Latn-32707-2010.imp +12 -12
- data/maps/din-mar-Deva-Latn-33904-2018.imp +1 -1
- data/maps/din-nep-Deva-Latn-33904-2018.imp +1 -1
- data/maps/din-pli-Deva-Latn-33904-2018.imp +1 -1
- data/maps/din-pra-Deva-Latn-33904-2018.imp +1 -1
- data/maps/din-san-Deva-Latn-33904-2018.imp +1 -1
- data/maps/elot-ell-Grek-Latn-743-1982-tl.imp +1 -2
- data/maps/elot-ell-Grek-Latn-743-1982-ts.imp +1 -1
- data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +1 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +1 -1
- data/maps/iso-ara-Arab-Latn-233-1984.imp +1 -1
- data/maps/iso-ara-Arab-Latn-233-2-1993.imp +1 -1
- data/maps/iso-asm-Beng-Latn-15919-2001.imp +1 -1
- data/maps/iso-ben-Beng-Latn-15919-2001.imp +1 -1
- data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +1 -1
- data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +1 -1
- data/maps/iso-guj-Gujr-Latn-15919-2001.imp +1 -1
- data/maps/iso-hin-Deva-Latn-15919-2001.imp +1 -1
- data/maps/iso-inc-Deva-Latn-15919-2001.imp +1 -1
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.imp +1 -1
- data/maps/iso-kan-Kana-Latn-15919-2001.imp +1 -1
- data/maps/iso-kat-Geor-Latn-9984-1996.imp +12 -12
- data/maps/iso-kor-Hang-Latn-1996-method1.imp +1 -1
- data/maps/iso-kor-Hang-Latn-1996-method2.imp +1 -1
- data/maps/iso-mal-Mlym-Latn-15919-2001.imp +2 -2
- data/maps/iso-mar-Deva-Latn-15919-2001.imp +1 -1
- data/maps/iso-nep-Deva-Latn-15919-2001.imp +1 -1
- data/maps/iso-ori-Orya-Latn-15919-2001.imp +1 -1
- data/maps/iso-pan-Guru-Latn-15919-2001.imp +1 -1
- data/maps/iso-pli-Beng-Latn-15919-2001.imp +1 -1
- data/maps/iso-pli-Deva-Latn-15919-2001.imp +1 -1
- data/maps/iso-pli-Sinh-Latn-15919-2001.imp +1 -1
- data/maps/iso-pli-Thai-Latn-15919-2001.imp +1 -1
- data/maps/iso-pra-Deva-Latn-15919-2001.imp +1 -1
- data/maps/iso-prs-Arab-Latn-233-3-1999.imp +2 -2
- data/maps/iso-rus-Cyrl-Latn-9-1995.imp +1 -1
- data/maps/iso-san-Deva-Latn-15919-2001.imp +1 -1
- data/maps/iso-tam-Taml-Latn-15919-2001.imp +1 -1
- data/maps/iso-tel-Telu-Latn-15919-2001.imp +1 -1
- data/maps/odni-ara-Arab-Latn-2004.imp +0 -1
- data/maps/odni-ara-Arab-Latn-2015.imp +2 -2
- data/maps/odni-aze-Cyrl-Latn-2015.imp +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.imp +2 -4
- data/maps/odni-bul-Cyrl-Latn-2015.imp +1 -3
- data/maps/odni-che-Cyrl-Latn-2015.imp +1 -2
- data/maps/odni-fas-Arab-Latn-2004.imp +0 -1
- data/maps/odni-fas-Arab-Latn-2015.imp +3 -3
- data/maps/odni-hin-Deva-Latn-2004.imp +0 -1
- data/maps/odni-hin-Deva-Latn-2015.imp +2 -2
- data/maps/odni-kat-Geor-Latn-2015.imp +2 -3
- data/maps/odni-kaz-Cyrl-Latn-2015.imp +1 -1
- data/maps/odni-kir-Cyrl-Latn-2015.imp +1 -1
- data/maps/odni-kor-Hang-Latn-2015.imp +2 -2
- data/maps/odni-mkd-Cyrl-Latn-2015.imp +1 -1
- data/maps/odni-prs-Arab-Latn-2004.imp +0 -1
- data/maps/odni-prs-Arab-Latn-2015.imp +2 -2
- data/maps/odni-pus-Arab-Latn-2011.imp +1 -2
- data/maps/odni-rus-Cyrl-Latn-2015.imp +1 -3
- data/maps/odni-srp-Cyrl-Latn-2005.imp +0 -1
- data/maps/odni-srp-Cyrl-Latn-2015.imp +1 -3
- data/maps/odni-tat-Cyrl-Latn-2015.imp +1 -1
- data/maps/odni-tgk-Cyrl-Latn-2015.imp +1 -1
- data/maps/odni-tuk-Cyrl-Latn-2015.imp +1 -2
- data/maps/odni-uig-Cyrl-Latn-2015.imp +1 -1
- data/maps/odni-ukr-Cyrl-Latn-2015.imp +2 -4
- data/maps/odni-urd-Arab-Latn-2015.imp +2 -3
- data/maps/odni-uzb-Cyrl-Latn-2015.imp +1 -3
- data/maps/ses-ara-Arab-Latn-1930.imp +3 -46
- data/maps/un-ara-Arab-Latn-1971.imp +1 -1
- data/maps/un-ara-Arab-Latn-1972.imp +1 -1
- data/maps/un-ara-Arab-Latn-2017.imp +2 -2
- data/maps/un-hin-Deva-Latn-2016.imp +1 -1
- data/maps/un-kan-Kana-Latn-2016.imp +1 -1
- data/maps/un-mar-Deva-Latn-2016.imp +1 -1
- data/maps/un-nep-Deva-Latn-1972.imp +1 -1
- data/maps/un-nep-Deva-Latn-2013.imp +1 -1
- data/maps/un-ori-Orya-Latn-1972.imp +1 -1
- data/maps/un-rus-Cyrl-Latn-1987.imp +1 -1
- data/maps/un-sin-Sinh-Latn-1972.imp +15 -3
- data/maps/un-urd-Arab-Latn-1972.imp +1 -1
- data/maps/var-ara-Arab-Arab-rababa.imp +25 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.imp +1 -1
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.imp +1 -1
- data/maps/var-kor-Hang-Latn-mr-1939.imp +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.imp +1 -1
- data/maps/var-mar-Deva-Latn-hunterian-1872.imp +0 -1
- data/maps/var-mon-Mong-Latn-1930.imp +1 -1
- data/maps/var-mon-Mong-Latn-lessing.imp +2 -2
- data/maps/var-mon-Mong-Latn-vpmc.imp +2 -2
- data/maps/var-pra-Deva-Latn-iast-1912.imp +1 -1
- data/maps/var-san-Deva-Latn-iast-1912.imp +1 -1
- metadata +5 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea020ba190accdefcf21f2d09e0ffd3b0c67561f36df93961f692152a356547f
|
4
|
+
data.tar.gz: 17fb0c0f6ef53c620f261caa215c5b20c1401e98949b9f191208d2df81c17ba5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5a546886594fedd2bfccf0d6aa91e768959439b0989cba8e2f0d4a4f3f494aaac79020e9c15d1cba64973fcfb90f05c5cf0d09697f530027833703c64dad282
|
7
|
+
data.tar.gz: 3d1eed5cff23dc6ad83ca6620f5c7c86faef60616a927c549d6f49bc2bd4e4437c78be9aeb4a9a0eaefbb4bc7a87090605c6f425ca38c31a4949403716fe2bf6
|
data/interscript-maps.gemspec
CHANGED
data/interscript-maps.yaml
CHANGED
@@ -16,6 +16,36 @@ staging:
|
|
16
16
|
secryst-models:
|
17
17
|
- https://raw.githubusercontent.com/secryst/data-thai-interscript/master/index.yaml
|
18
18
|
|
19
|
+
# Here we describe Rababa configs for the maps that are required
|
20
|
+
rababa-configs:
|
21
|
+
"200":
|
22
|
+
model: https://github.com/secryst/rababa-models/releases/download/0.1/diacritization_model_max_len_200.onnx
|
23
|
+
config:
|
24
|
+
session_name: base
|
25
|
+
text_encoder: ArabicEncoderWithStartSymbol
|
26
|
+
text_cleaner: valid_arabic_cleaners
|
27
|
+
max_len: 200
|
28
|
+
batch_size: 32
|
29
|
+
|
30
|
+
# This instruction denotes that the following maps are incompatible with certain
|
31
|
+
# compilers and platforms and shouldn't be tested or compiled in.
|
32
|
+
skip:
|
33
|
+
Interscript::Compiler::Javascript:
|
34
|
+
- var-ara-Arab-Arab-rababa
|
35
|
+
# It works for detection, but takes too much time and doesn't make sense for
|
36
|
+
# the main usecase of Detector
|
37
|
+
Interscript::Detector:
|
38
|
+
- var-ara-Arab-Arab-rababa
|
39
|
+
# Windows currently fails tests for rababa
|
40
|
+
mswin64:
|
41
|
+
- var-ara-Arab-Arab-rababa
|
42
|
+
mswin32:
|
43
|
+
- var-ara-Arab-Arab-rababa
|
44
|
+
mingw32:
|
45
|
+
- var-ara-Arab-Arab-rababa
|
46
|
+
cygwin:
|
47
|
+
- var-ara-Arab-Arab-rababa
|
48
|
+
|
19
49
|
# This part registers aliases. Those were previously a part of maps themselves,
|
20
50
|
# moving this here saves us from a burden of having to load all the maps to
|
21
51
|
# locate one that we are trying to access.
|
@@ -0,0 +1,74 @@
|
|
1
|
+
metadata {
|
2
|
+
authority_id: alalc
|
3
|
+
id: 2012
|
4
|
+
language: iso-639-2:div
|
5
|
+
source_script: Thaa
|
6
|
+
destination_script: Latn
|
7
|
+
name: Romanization Table -- Divehi (2012)
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/divehi.pdf
|
9
|
+
creation_date: 2012
|
10
|
+
description: |
|
11
|
+
ALA-Library of Congress Divehi Romanization 2012 System
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
- |
|
16
|
+
Romanize ށް as ḫ when it doubles the following consonant or is used as a glottal stop.
|
17
|
+
aḫvana އަށްވަނަ
|
18
|
+
maśaḫ މަށަށް
|
19
|
+
- |
|
20
|
+
When used in medial position without ް (sukūn), romanize ނ as ṁ.
|
21
|
+
aṁga އަނގަ
|
22
|
+
haṁdu ހަނދު
|
23
|
+
- |
|
24
|
+
Romanization of އ.
|
25
|
+
(a) When used in the initial position with any vowel sign, do not romanize.
|
26
|
+
ata އަތަ
|
27
|
+
idu އިދު
|
28
|
+
umuru އުމުރު
|
29
|
+
egahugi އެގަހުގި
|
30
|
+
(b) When used in the medial position with any vowel sign, romanize as ’.
|
31
|
+
ha’hūnu ހައިހޫނު
|
32
|
+
fa’isa ފައިސަ
|
33
|
+
k’īn ކްއީން
|
34
|
+
(c) When a consonant follows އް in medial position, double it in romanization.
|
35
|
+
cappalu ޗައްޕަލު
|
36
|
+
appacci އައްޕައްޗި
|
37
|
+
(d) When used in final position with ް (sukūn), romanize as h.
|
38
|
+
boh ބޮއް
|
39
|
+
biheh ބިހެއް
|
40
|
+
- |
|
41
|
+
Romanize ތް followed by another ތ as t̤ .
|
42
|
+
at̤teri އަތްތެރި
|
43
|
+
- |
|
44
|
+
Only the vowel forms that appear at the beginning of a syllable are listed.
|
45
|
+
When the vowels follow a consonant, އ is not used and the vowel signs are added to the consonant forms.
|
46
|
+
Do not distinguish between the two in romanization.
|
47
|
+
- |
|
48
|
+
ް (called sukūn) generally indicates omission of an inherent vowel associated with a consonant.
|
49
|
+
For its other uses, see Notes 1, 3, and 4.
|
50
|
+
}
|
51
|
+
|
52
|
+
tests {
|
53
|
+
test "މަށަށް", "maśaḫ"
|
54
|
+
test "އަނގަ", "aṁga"
|
55
|
+
test "ހަނދު", "haṁdu"
|
56
|
+
test "އަތަ", "ata"
|
57
|
+
test "އިދު", "idu"
|
58
|
+
test "އުމުރު", "umuru"
|
59
|
+
test "އެގަހުގި", "egahugi"
|
60
|
+
test "ފައިސަ", "faʼisa"
|
61
|
+
test "ބޮއް", "boh"
|
62
|
+
test "ބިހެއް", "biheh"
|
63
|
+
test "އަތްތެރި", "at̤teri"
|
64
|
+
test "ޗައްޕަލު", "cappalu"
|
65
|
+
test "އައްޕައްޗި", "appacci"
|
66
|
+
}
|
67
|
+
|
68
|
+
dependency "alalc-div-Thaa-Latn-1997", as: thaalatn
|
69
|
+
|
70
|
+
stage {
|
71
|
+
|
72
|
+
run map.thaalatn.stage.main
|
73
|
+
|
74
|
+
}
|
@@ -5,12 +5,11 @@ metadata {
|
|
5
5
|
source_script: Geok
|
6
6
|
destination_script: Latn
|
7
7
|
name: Romanization Table -- Georgian Khutsuri (1997)
|
8
|
-
url:
|
8
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/georgian.pdf
|
9
9
|
creation_date: 1997
|
10
10
|
confirmation_date: 1997
|
11
11
|
description: |
|
12
12
|
Values are shown for the Khutsuri alphabet.
|
13
|
-
|
14
13
|
notes:
|
15
14
|
}
|
16
15
|
|
@@ -0,0 +1,31 @@
|
|
1
|
+
metadata {
|
2
|
+
authority_id: alalc
|
3
|
+
id: 2011
|
4
|
+
language: iso-639-2:kat
|
5
|
+
source_script: Geok
|
6
|
+
destination_script: Latn
|
7
|
+
name: Romanization Table -- Georgian Khutsuri (2011)
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/georgian.pdf
|
9
|
+
creation_date: 2011
|
10
|
+
confirmation_date: 2011
|
11
|
+
description: |
|
12
|
+
Values are shown for the Khutsuri alphabet.
|
13
|
+
notes:
|
14
|
+
}
|
15
|
+
|
16
|
+
tests {
|
17
|
+
test "ႼႨႢႬႨ", "CIGNI"
|
18
|
+
test "ⴜⴈⴂⴌⴈ", "cigni"
|
19
|
+
test "ႱႭႪႭႫႭႬ", "SOLOMON"
|
20
|
+
test "ⴑⴍⴊⴍⴋⴍⴌ", "solomon"
|
21
|
+
test "ႠႡႰႠჀႠႫ", "ABRAHAM"
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
dependency "alalc-kat-Geok-Latn-1997", as: geoklatn
|
26
|
+
|
27
|
+
stage {
|
28
|
+
|
29
|
+
run map.geoklatn.stage.main
|
30
|
+
|
31
|
+
}
|
@@ -5,13 +5,12 @@ metadata {
|
|
5
5
|
source_script: Geor
|
6
6
|
destination_script: Latn
|
7
7
|
name: Romanization Table -- Georgian Mkhedruli (1997)
|
8
|
-
url:
|
8
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/georgian.pdf
|
9
9
|
creation_date: 1997
|
10
10
|
confirmation_date: 1997
|
11
11
|
description: |
|
12
12
|
Values are shown for the older Khutsuri and the modern Mkhedruli alphabets.
|
13
13
|
There are no upper case letters in Mkhedruli.
|
14
|
-
|
15
14
|
notes:
|
16
15
|
}
|
17
16
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
metadata {
|
2
|
+
authority_id: alalc
|
3
|
+
id: 2011
|
4
|
+
language: iso-639-2:kat
|
5
|
+
source_script: Geor
|
6
|
+
destination_script: Latn
|
7
|
+
name: Romanization Table -- Georgian Mkhedruli (2011)
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/georgian.pdf
|
9
|
+
creation_date: 2011
|
10
|
+
confirmation_date: 2011
|
11
|
+
description: |
|
12
|
+
Values are shown for the older Khutsuri and the modern Mkhedruli alphabets.
|
13
|
+
There are no upper case letters in Mkhedruli.
|
14
|
+
notes:
|
15
|
+
}
|
16
|
+
|
17
|
+
tests {
|
18
|
+
test "ხაოფსე", "xaopʻse"
|
19
|
+
test "ჭლოუ", "člou"
|
20
|
+
test "ჩოხულდი", "čʻoxuldi"
|
21
|
+
test "ქვემო ლინდა", "kʻvemo linda"
|
22
|
+
test "ტამკვაჩ იგვავერა", "tamkvačʻ igvavera"
|
23
|
+
test "სვანეთი", "svanetʻi"
|
24
|
+
test "საცხვარისი", "sacʻxvarisi"
|
25
|
+
test "მუხრან-თელეთი", "muxran-tʻeletʻi"
|
26
|
+
test "მუცდი", "mucʻdi"
|
27
|
+
test "ლეჩხუმი", "lečʻxumi"
|
28
|
+
test "ვერხნაია მწარა", "verxnaia mcara"
|
29
|
+
test "ეგრისის ქედი", "egrisis kʻedi"
|
30
|
+
test "დოჩარიფშა", "dočʻaripʻša"
|
31
|
+
test "ბოლოკო", "boloko"
|
32
|
+
test "აჭანდარა", "ačandara"
|
33
|
+
test "აუალიცა", "aualicʻa"
|
34
|
+
test "აკალამრა", "akalamra"
|
35
|
+
test "ლასილი", "lasili"
|
36
|
+
test "გუბაზეული", "gubazeuli"
|
37
|
+
test "ბაყაყი", "baqaqi"
|
38
|
+
test "ძროხა", "żroxa"
|
39
|
+
test "ჰაერი", "haeri"
|
40
|
+
test "ჟოლო", "žolo"
|
41
|
+
test "ჯართი", "jartʻi"
|
42
|
+
test "ღრმაღელე", "ġrmaġele"
|
43
|
+
}
|
44
|
+
|
45
|
+
dependency "alalc-kat-Geor-Latn-1997", as: georlatn
|
46
|
+
|
47
|
+
stage {
|
48
|
+
|
49
|
+
run map.georlatn.stage.main
|
50
|
+
|
51
|
+
}
|
52
|
+
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:mal
|
5
5
|
source_script: Mlym
|
6
6
|
destination_script: Latn
|
7
|
-
name: Malayalam
|
7
|
+
name: Romanization Table -- Malayalam (2012)
|
8
8
|
url: https://www.loc.gov/catdir/cpso/romanization/malayalam.pdf
|
9
9
|
creation_date: 2012
|
10
10
|
description: |
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:rus
|
5
5
|
source_script: Cyrl
|
6
6
|
destination_script: Latn
|
7
|
-
name: Streamlined Romanization of Russian Cyrillic
|
7
|
+
name: Streamlined Romanization of Russian Cyrillic (Basic Streamlined System)
|
8
8
|
url: https://www.researchgate.net/publication/318402098
|
9
9
|
creation_date: 2017-07
|
10
10
|
description: |
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:rus
|
5
5
|
source_script: Cyrl
|
6
6
|
destination_script: Latn
|
7
|
-
name: Streamlined Romanization of Russian Cyrillic
|
7
|
+
name: Streamlined Romanization of Russian Cyrillic (Optimized Streamlined System)
|
8
8
|
url: https://www.researchgate.net/publication/318402098
|
9
9
|
creation_date: 2017-07
|
10
10
|
description: |
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:kor
|
5
5
|
source_script: Hang
|
6
6
|
destination_script: Latn
|
7
|
-
name:
|
7
|
+
name: Korean McCune-Reischauer (Hangul to Latin) (1943 Agreement)
|
8
8
|
url:
|
9
9
|
creation_date: 1943
|
10
10
|
adoption_date:
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:kor
|
5
5
|
source_script: Kore
|
6
6
|
destination_script: Latn
|
7
|
-
name:
|
7
|
+
name: Korean McCune-Reischauer (Korean to Latin) (1943 Agreement)
|
8
8
|
url:
|
9
9
|
creation_date: 1943
|
10
10
|
adoption_date:
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:rus
|
5
5
|
source_script: Cyrl
|
6
6
|
destination_script: Latn
|
7
|
-
name:
|
7
|
+
name: Table of Correspondences for Bashkir (Cyrillic-Roman) (2007 Agreement)
|
8
8
|
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
|
9
9
|
creation_date: 2007
|
10
10
|
confirmation_date: 2019
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:bel
|
5
5
|
source_script: Cyrl
|
6
6
|
destination_script: Latn
|
7
|
-
name:
|
7
|
+
name: Romanization of Belarusian (1979)
|
8
8
|
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811510/ROMANIZATION_OF_BELARUSIAN.pdf
|
9
9
|
creation_date: 1979
|
10
10
|
description: |
|
@@ -12,6 +12,9 @@ metadata {
|
|
12
12
|
romanizing names written in the Belarusian Cyrillic alphabet. The Belarusian alphabet contains three
|
13
13
|
characters not present in the Russian alphabet: і, ў, and ’.
|
14
14
|
|
15
|
+
United States Board on Geographic Names Foreign Names Committee Staff, 1994.
|
16
|
+
Romanization Systems and Roman-Script Spelling Conventions, p. 23.
|
17
|
+
|
15
18
|
notes:
|
16
19
|
- The character sequences зг, кг, сг, тс and цг and may be romanized z·h, k·h, s·h, t·s and ts·h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ш, ц, and the character sequence тш
|
17
20
|
- All apostrophes appearing in romanization are Unicode encoding 2019.
|
@@ -4,7 +4,8 @@ metadata {
|
|
4
4
|
language: iso-639-2:che
|
5
5
|
source_script: Cyrl
|
6
6
|
destination_script: Latn
|
7
|
-
name:
|
7
|
+
name: Table of Correspondences for Chechen (Cyrillic-Roman) (2008 Agreement)
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/835782/TABLE_OF_CORRESPONDENCES_FOR_CHECHEN.pdf
|
8
9
|
creation_date: 2008
|
9
10
|
confirmation_date: 2019-07
|
10
11
|
description: |
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:kor
|
5
5
|
source_script: Hang
|
6
6
|
destination_script: Latn
|
7
|
-
name: Romanization of Korean
|
7
|
+
name: Romanization of Korean for DPRK (Hangul-Latin) (1945 Agreement)
|
8
8
|
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
|
9
9
|
creation_date: 1945
|
10
10
|
adoption_date:
|
@@ -4,14 +4,14 @@ metadata {
|
|
4
4
|
language: iso-639-2:kor
|
5
5
|
source_script: Hang
|
6
6
|
destination_script: Latn
|
7
|
-
name: Romanization of Korean
|
7
|
+
name: Romanization of Korean for ROK (Hangul-Latin) (2011 Agreement, MOCT System 2000)
|
8
8
|
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693724/ROMANIZATION_OF_KOREAN-__MOCT_for_ROK.pdf
|
9
9
|
creation_date: 2011
|
10
10
|
adoption_date: 2011
|
11
11
|
description:
|
12
12
|
|
13
13
|
notes:
|
14
|
-
|
14
|
+
- BGN/PCGN 2011 Agreement
|
15
15
|
}
|
16
16
|
|
17
17
|
tests {
|
@@ -4,9 +4,8 @@ metadata {
|
|
4
4
|
language: iso-639-2:kor
|
5
5
|
source_script: Kore
|
6
6
|
destination_script: Latn
|
7
|
-
name: Romanization of Korean
|
7
|
+
name: Romanization of Korean for ROK (Korean-Latin) (2011 Agreement, MOCT System 2000)
|
8
8
|
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693724/ROMANIZATION_OF_KOREAN-__MOCT_for_ROK.pdf
|
9
|
-
url: ""
|
10
9
|
creation_date: 2011
|
11
10
|
adoption_date: 2011
|
12
11
|
description: |
|
@@ -14,7 +13,7 @@ metadata {
|
|
14
13
|
Intended to provide a means for international communication of written documents.
|
15
14
|
|
16
15
|
notes:
|
17
|
-
|
16
|
+
- BGN/PCGN 2011 Agreement
|
18
17
|
}
|
19
18
|
|
20
19
|
tests {
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:asm
|
5
5
|
source_script: Beng
|
6
6
|
destination_script: Latn
|
7
|
-
name: Indian script code for information interchange
|
7
|
+
name: Indian script code for information interchange (ISCII) -- Assamese Romanization
|
8
8
|
|
9
9
|
#url:
|
10
10
|
creation_date: 1991
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:ben
|
5
5
|
source_script: Beng
|
6
6
|
destination_script: Latn
|
7
|
-
name: Indian script code for information interchange
|
7
|
+
name: Indian script code for information interchange (ISCII) -- Bengali Romanization
|
8
8
|
|
9
9
|
#url:
|
10
10
|
creation_date: 1991
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:dev
|
5
5
|
source_script: Deva
|
6
6
|
destination_script: Latn
|
7
|
-
name: Indian script code for information interchange
|
7
|
+
name: Indian script code for information interchange (ISCII) -- Devanagri Romanization
|
8
8
|
|
9
9
|
#url:
|
10
10
|
creation_date: 1991
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:guj
|
5
5
|
source_script: Gujr
|
6
6
|
destination_script: Latn
|
7
|
-
name: Indian script code for information interchange
|
7
|
+
name: Indian script code for information interchange (ISCII) -- Gujrati Romanization
|
8
8
|
|
9
9
|
#url:
|
10
10
|
creation_date: 1991
|
@@ -4,7 +4,7 @@ metadata {
|
|
4
4
|
language: iso-639-2:kan
|
5
5
|
source_script: Kana
|
6
6
|
destination_script: Latn
|
7
|
-
name: Indian script code for information interchange
|
7
|
+
name: Indian script code for information interchange (ISCII) -- Kannada Romanization
|
8
8
|
|
9
9
|
#url:
|
10
10
|
creation_date: 1991
|