interscript 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 1997
|
|
4
|
+
language: mkd
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Serbian and Makedonian Romanization, ALA-LC 1997 System
|
|
8
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/serbian.pdf
|
|
9
|
+
creation_date: 1997
|
|
10
|
+
description: ALA-LC Romanization table for Serbian and Makedonian.
|
|
11
|
+
|
|
12
|
+
notes:
|
|
13
|
+
- Special characters in romanization
|
|
14
|
+
Đ - D with crossbar (upper case). USMARC hexadecimal code A3.
|
|
15
|
+
đ - d with crossbar (upper case). USMARC hexadecimal code B3.
|
|
16
|
+
|
|
17
|
+
- Character modifiers in romanization
|
|
18
|
+
´ - acute. USMARC hexadecimal code E2.
|
|
19
|
+
ˇ - hachek. USMARC hexadecimal code E9.
|
|
20
|
+
|
|
21
|
+
tests:
|
|
22
|
+
- source: Општина Ердут
|
|
23
|
+
expected: Opština Erdut
|
|
24
|
+
- source: Општина Двор
|
|
25
|
+
expected: Opština Dvor
|
|
26
|
+
- source: ЛУЃЕ луѓе
|
|
27
|
+
expected: LUǴE luǵe
|
|
28
|
+
- source: ЅВЕЗДА ѕвезда Ѕвезда
|
|
29
|
+
expected: DZVEZDA dzvezda Dzvezda
|
|
30
|
+
- source: ЌАРУВАЊЕ ќарување
|
|
31
|
+
expected: ḰARUVANJE ḱaruvanje
|
|
32
|
+
|
|
33
|
+
map:
|
|
34
|
+
postrules:
|
|
35
|
+
# DZ
|
|
36
|
+
- pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
|
|
37
|
+
result: "DZ"
|
|
38
|
+
#LJ
|
|
39
|
+
- pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
|
|
40
|
+
result: "LJ"
|
|
41
|
+
#NJ
|
|
42
|
+
- pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
|
|
43
|
+
result: "NJ"
|
|
44
|
+
#DŽ
|
|
45
|
+
- pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
|
|
46
|
+
result: "DŽ"
|
|
47
|
+
|
|
48
|
+
characters:
|
|
49
|
+
"\u0410": "A"
|
|
50
|
+
"\u0411": "B"
|
|
51
|
+
"\u0412": "V"
|
|
52
|
+
"\u0413": "G"
|
|
53
|
+
"\u0403": "\u01F4" # Ǵ
|
|
54
|
+
"\u0414": "D"
|
|
55
|
+
"\u0402": "\u0110" # Đ
|
|
56
|
+
"\u0415": "E"
|
|
57
|
+
"\u0416": "\u005a\u030c" # Ž
|
|
58
|
+
"\u0417": "Z"
|
|
59
|
+
"\u0405": "Dz"
|
|
60
|
+
"\u0418": "I"
|
|
61
|
+
"\u0408": "J"
|
|
62
|
+
"\u041A": "K"
|
|
63
|
+
"\u040C": "\u1E30" # Ḱ
|
|
64
|
+
"\u041B": "L"
|
|
65
|
+
"\u0409": "Lj"
|
|
66
|
+
"\u041C": "M"
|
|
67
|
+
"\u041D": "N"
|
|
68
|
+
"\u040A": "Nj"
|
|
69
|
+
"\u041E": "O"
|
|
70
|
+
"\u041F": "P"
|
|
71
|
+
"\u0420": "R"
|
|
72
|
+
"\u0421": "S"
|
|
73
|
+
"\u0422": "T"
|
|
74
|
+
"\u040B": "\u0043\u0301" # Ć
|
|
75
|
+
"\u0423": "U"
|
|
76
|
+
"\u0424": "F"
|
|
77
|
+
"\u0425": "H"
|
|
78
|
+
"\u0426": "C"
|
|
79
|
+
"\u0427": "\u0043\u030c" # Č
|
|
80
|
+
"\u040F": "D\u007a\u030c" # Dž
|
|
81
|
+
"\u0428": "\u0053\u030c" # Š
|
|
82
|
+
"\u0430": "a"
|
|
83
|
+
"\u0431": "b"
|
|
84
|
+
"\u0432": "v"
|
|
85
|
+
"\u0433": "g"
|
|
86
|
+
"\u0453": "\u01F5" # ǵ
|
|
87
|
+
"\u0434": "d"
|
|
88
|
+
"\u0452": "\u0111" # đ
|
|
89
|
+
"\u0435": "e"
|
|
90
|
+
"\u0436": "\u007a\u030c" # ž
|
|
91
|
+
"\u0437": "z"
|
|
92
|
+
"\u0455": "dz"
|
|
93
|
+
"\u0438": "i"
|
|
94
|
+
"\u0458": "j"
|
|
95
|
+
"\u043A": "k"
|
|
96
|
+
"\u045C": "\u1E31" # ḱ
|
|
97
|
+
"\u043B": "l"
|
|
98
|
+
"\u0459": "lj"
|
|
99
|
+
"\u043C": "m"
|
|
100
|
+
"\u043D": "n"
|
|
101
|
+
"\u045A": "nj"
|
|
102
|
+
"\u043E": "o"
|
|
103
|
+
"\u043F": "p"
|
|
104
|
+
"\u0440": "r"
|
|
105
|
+
"\u0441": "s"
|
|
106
|
+
"\u0442": "t"
|
|
107
|
+
"\u045B": "\u0063\u0301" # ć
|
|
108
|
+
"\u0443": "u"
|
|
109
|
+
"\u0444": "f"
|
|
110
|
+
"\u0445": "h"
|
|
111
|
+
"\u0446": "c"
|
|
112
|
+
"\u0447": "\u0063\u030c" # č
|
|
113
|
+
"\u045F": "d\u007a\u030c" # dž
|
|
114
|
+
"\u0448": "\u0073\u030c" # š
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 1997
|
|
4
|
+
language: srp
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Serbian and Makedonian Romanization, ALA-LC 1997 System
|
|
8
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/serbian.pdf
|
|
9
|
+
creation_date: 1997
|
|
10
|
+
description: ALA-LC Romanization table for Serbian and Makedonian.
|
|
11
|
+
|
|
12
|
+
notes:
|
|
13
|
+
- Special characters in romanization
|
|
14
|
+
Đ - D with crossbar (upper case). USMARC hexadecimal code A3.
|
|
15
|
+
đ - d with crossbar (upper case). USMARC hexadecimal code B3.
|
|
16
|
+
|
|
17
|
+
- Character modifiers in romanization
|
|
18
|
+
´ - acute. USMARC hexadecimal code E2.
|
|
19
|
+
ˇ - hachek. USMARC hexadecimal code E9.
|
|
20
|
+
|
|
21
|
+
tests:
|
|
22
|
+
- source: Општина Ердут
|
|
23
|
+
expected: Opština Erdut
|
|
24
|
+
- source: Општина Двор
|
|
25
|
+
expected: Opština Dvor
|
|
26
|
+
- source: ЛУЃЕ луѓе
|
|
27
|
+
expected: LUǴE luǵe
|
|
28
|
+
- source: ЅВЕЗДА ѕвезда Ѕвезда
|
|
29
|
+
expected: DZVEZDA dzvezda Dzvezda
|
|
30
|
+
- source: ЌАРУВАЊЕ ќарување
|
|
31
|
+
expected: ḰARUVANJE ḱaruvanje
|
|
32
|
+
|
|
33
|
+
map:
|
|
34
|
+
postrules:
|
|
35
|
+
# DZ
|
|
36
|
+
- pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
|
|
37
|
+
result: "DZ"
|
|
38
|
+
#LJ
|
|
39
|
+
- pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
|
|
40
|
+
result: "LJ"
|
|
41
|
+
#NJ
|
|
42
|
+
- pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
|
|
43
|
+
result: "NJ"
|
|
44
|
+
#DŽ
|
|
45
|
+
- pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
|
|
46
|
+
result: "DŽ"
|
|
47
|
+
|
|
48
|
+
characters:
|
|
49
|
+
"\u0410": "A"
|
|
50
|
+
"\u0411": "B"
|
|
51
|
+
"\u0412": "V"
|
|
52
|
+
"\u0413": "G"
|
|
53
|
+
"\u0403": "\u01F4" # Ǵ
|
|
54
|
+
"\u0414": "D"
|
|
55
|
+
"\u0402": "\u0110" # Đ
|
|
56
|
+
"\u0415": "E"
|
|
57
|
+
"\u0416": "\u005a\u030c" # Ž
|
|
58
|
+
"\u0417": "Z"
|
|
59
|
+
"\u0405": "Dz"
|
|
60
|
+
"\u0418": "I"
|
|
61
|
+
"\u0408": "J"
|
|
62
|
+
"\u041A": "K"
|
|
63
|
+
"\u040C": "\u1E30" # Ḱ
|
|
64
|
+
"\u041B": "L"
|
|
65
|
+
"\u0409": "Lj"
|
|
66
|
+
"\u041C": "M"
|
|
67
|
+
"\u041D": "N"
|
|
68
|
+
"\u040A": "Nj"
|
|
69
|
+
"\u041E": "O"
|
|
70
|
+
"\u041F": "P"
|
|
71
|
+
"\u0420": "R"
|
|
72
|
+
"\u0421": "S"
|
|
73
|
+
"\u0422": "T"
|
|
74
|
+
"\u040B": "\u0043\u0301" # Ć
|
|
75
|
+
"\u0423": "U"
|
|
76
|
+
"\u0424": "F"
|
|
77
|
+
"\u0425": "H"
|
|
78
|
+
"\u0426": "C"
|
|
79
|
+
"\u0427": "\u0043\u030c" # Č
|
|
80
|
+
"\u040F": "D\u007a\u030c" # Dž
|
|
81
|
+
"\u0428": "\u0053\u030c" # Š
|
|
82
|
+
"\u0430": "a"
|
|
83
|
+
"\u0431": "b"
|
|
84
|
+
"\u0432": "v"
|
|
85
|
+
"\u0433": "g"
|
|
86
|
+
"\u0453": "\u01F5" # ǵ
|
|
87
|
+
"\u0434": "d"
|
|
88
|
+
"\u0452": "\u0111" # đ
|
|
89
|
+
"\u0435": "e"
|
|
90
|
+
"\u0436": "\u007a\u030c" # ž
|
|
91
|
+
"\u0437": "z"
|
|
92
|
+
"\u0455": "dz"
|
|
93
|
+
"\u0438": "i"
|
|
94
|
+
"\u0458": "j"
|
|
95
|
+
"\u043A": "k"
|
|
96
|
+
"\u045C": "\u1E31" # ḱ
|
|
97
|
+
"\u043B": "l"
|
|
98
|
+
"\u0459": "lj"
|
|
99
|
+
"\u043C": "m"
|
|
100
|
+
"\u043D": "n"
|
|
101
|
+
"\u045A": "nj"
|
|
102
|
+
"\u043E": "o"
|
|
103
|
+
"\u043F": "p"
|
|
104
|
+
"\u0440": "r"
|
|
105
|
+
"\u0441": "s"
|
|
106
|
+
"\u0442": "t"
|
|
107
|
+
"\u045B": "\u0063\u0301" # ć
|
|
108
|
+
"\u0443": "u"
|
|
109
|
+
"\u0444": "f"
|
|
110
|
+
"\u0445": "h"
|
|
111
|
+
"\u0446": "c"
|
|
112
|
+
"\u0447": "\u0063\u030c" # č
|
|
113
|
+
"\u045F": "d\u007a\u030c" # dž
|
|
114
|
+
"\u0448": "\u0073\u030c" # š
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 2013
|
|
4
|
+
language: srp
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ROMANIZATION OF SERBIAN, ALA-LC 2013 System
|
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/serbian.pdf
|
|
9
|
+
creation_date: 2013
|
|
10
|
+
description: |
|
|
11
|
+
Serbian uses the Cyrillic and Roman alphabets. When the Roman alphabet is used it is the same as Croatian.
|
|
12
|
+
|
|
13
|
+
tests:
|
|
14
|
+
- source: Шупља Стена
|
|
15
|
+
expected: Šuplja Stena
|
|
16
|
+
- source: Чукарица
|
|
17
|
+
expected: Čukarica
|
|
18
|
+
- source: Црна Трава
|
|
19
|
+
expected: Crna Trava
|
|
20
|
+
- source: Херцег Нови
|
|
21
|
+
expected: Herceg Novi
|
|
22
|
+
- source: Улцињ
|
|
23
|
+
expected: Ulcinj
|
|
24
|
+
- source: Ужице
|
|
25
|
+
expected: Užice
|
|
26
|
+
- source: Тресаначка Река
|
|
27
|
+
expected: Tresanačka Reka
|
|
28
|
+
- source: Сјеница
|
|
29
|
+
expected: Sjenica
|
|
30
|
+
- source: Рожаје
|
|
31
|
+
expected: Rožaje
|
|
32
|
+
- source: Пљевља
|
|
33
|
+
expected: Pljevlja
|
|
34
|
+
- source: Оџаци
|
|
35
|
+
expected: Odžaci
|
|
36
|
+
- source: Никшић
|
|
37
|
+
expected: Nikšić
|
|
38
|
+
- source: Медвеђа
|
|
39
|
+
expected: Medveđa
|
|
40
|
+
- source: Лозница
|
|
41
|
+
expected: Loznica
|
|
42
|
+
- source: Књажевац
|
|
43
|
+
expected: Knjaževac
|
|
44
|
+
- source: Зрењанин
|
|
45
|
+
expected: Zrenjanin
|
|
46
|
+
- source: Житорађа
|
|
47
|
+
expected: Žitorađa
|
|
48
|
+
- source: Ервеник
|
|
49
|
+
expected: Ervenik
|
|
50
|
+
- source: Доње Љупче
|
|
51
|
+
expected: Donje Ljupče
|
|
52
|
+
- source: Гусиње
|
|
53
|
+
expected: Gusinje
|
|
54
|
+
- source: ГУСИЊЕ
|
|
55
|
+
expected: GUSINJE
|
|
56
|
+
- source: Врњачка Бања
|
|
57
|
+
expected: Vrnjačka Banja
|
|
58
|
+
- source: Бијело Поље
|
|
59
|
+
expected: Bijelo Polje
|
|
60
|
+
- source: Алибунар
|
|
61
|
+
expected: Alibunar
|
|
62
|
+
|
|
63
|
+
map:
|
|
64
|
+
postrules:
|
|
65
|
+
#LJ
|
|
66
|
+
- pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
|
|
67
|
+
result: "LJ"
|
|
68
|
+
#NJ
|
|
69
|
+
- pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
|
|
70
|
+
result: "NJ"
|
|
71
|
+
#DŽ
|
|
72
|
+
- pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
|
|
73
|
+
result: "DŽ"
|
|
74
|
+
|
|
75
|
+
characters:
|
|
76
|
+
"\u0410": "A"
|
|
77
|
+
"\u0411": "B"
|
|
78
|
+
"\u0412": "V"
|
|
79
|
+
"\u0413": "G"
|
|
80
|
+
"\u0414": "D"
|
|
81
|
+
"\u0402": "\u0110" # Đ
|
|
82
|
+
"\u0415": "E"
|
|
83
|
+
"\u0416": "\u005a\u030c" # Ž
|
|
84
|
+
"\u0417": "Z"
|
|
85
|
+
"\u0418": "I"
|
|
86
|
+
"\u0408": "J"
|
|
87
|
+
"\u041A": "K"
|
|
88
|
+
"\u041B": "L"
|
|
89
|
+
"\u0409": "Lj"
|
|
90
|
+
"\u041C": "M"
|
|
91
|
+
"\u041D": "N"
|
|
92
|
+
"\u040A": "Nj"
|
|
93
|
+
"\u041E": "O"
|
|
94
|
+
"\u041F": "P"
|
|
95
|
+
"\u0420": "R"
|
|
96
|
+
"\u0421": "S"
|
|
97
|
+
"\u0422": "T"
|
|
98
|
+
"\u040B": "\u0043\u0301" # Ć
|
|
99
|
+
"\u0423": "U"
|
|
100
|
+
"\u0424": "F"
|
|
101
|
+
"\u0425": "H"
|
|
102
|
+
"\u0426": "C"
|
|
103
|
+
"\u0427": "\u0043\u030c" # Č
|
|
104
|
+
"\u040F": "D\u007a\u030c" # Dž
|
|
105
|
+
"\u0428": "\u0053\u030c" # Š
|
|
106
|
+
"\u0430": "a"
|
|
107
|
+
"\u0431": "b"
|
|
108
|
+
"\u0432": "v"
|
|
109
|
+
"\u0433": "g"
|
|
110
|
+
"\u0434": "d"
|
|
111
|
+
"\u0452": "\u0111" # đ
|
|
112
|
+
"\u0435": "e"
|
|
113
|
+
"\u0436": "\u007a\u030c" # ž
|
|
114
|
+
"\u0437": "z"
|
|
115
|
+
"\u0438": "i"
|
|
116
|
+
"\u0458": "j"
|
|
117
|
+
"\u043A": "k"
|
|
118
|
+
"\u043B": "l"
|
|
119
|
+
"\u0459": "lj"
|
|
120
|
+
"\u043C": "m"
|
|
121
|
+
"\u043D": "n"
|
|
122
|
+
"\u045A": "nj"
|
|
123
|
+
"\u043E": "o"
|
|
124
|
+
"\u043F": "p"
|
|
125
|
+
"\u0440": "r"
|
|
126
|
+
"\u0441": "s"
|
|
127
|
+
"\u0442": "t"
|
|
128
|
+
"\u045B": "\u0063\u0301" # ć́
|
|
129
|
+
"\u0443": "u"
|
|
130
|
+
"\u0444": "f"
|
|
131
|
+
"\u0445": "h"
|
|
132
|
+
"\u0446": "c"
|
|
133
|
+
"\u0447": "\u0063\u030c" # č
|
|
134
|
+
"\u045F": "d\u007a\u030c" # dž
|
|
135
|
+
"\u0448": "\u0073\u030c" # š
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 1997
|
|
4
|
+
language: ukr
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ALA-LC Romanization System 1997
|
|
8
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/ukrainia.pdf
|
|
9
|
+
creation_date: 1997
|
|
10
|
+
description: |
|
|
11
|
+
The ALA-LC Romanization Tables, published by the American Library Association (1885)
|
|
12
|
+
and Library of Congress (1905). Used to represent bibliographic information by US and
|
|
13
|
+
Canadian libraries, by the British Library since 1975,[3] and in North American publications.
|
|
14
|
+
The latest 1997 revision is very similar to the 1905 version.
|
|
15
|
+
|
|
16
|
+
notes:
|
|
17
|
+
- The z͡h ligature is necessary to distinguish ж from the combination зг.
|
|
18
|
+
- The t͡s ligature is necessary to distinguish ц from the combination тс.
|
|
19
|
+
|
|
20
|
+
tests:
|
|
21
|
+
- source: Автономна Республіка Крим
|
|
22
|
+
expected: Avtonomna Respublika Krym
|
|
23
|
+
- source: Висунь
|
|
24
|
+
expected: Vysunʹ
|
|
25
|
+
- source: Долинське
|
|
26
|
+
expected: Dolynsʹke
|
|
27
|
+
- source: Дубище
|
|
28
|
+
expected: Dubyshche
|
|
29
|
+
- source: Єнакієве
|
|
30
|
+
expected: I͡enakii͡eve
|
|
31
|
+
- source: Зупиночний Пункт Мокіївці
|
|
32
|
+
expected: Zupynochnyĭ Punkt Mokiïvt͡si
|
|
33
|
+
- source: Іванівщина
|
|
34
|
+
expected: Ivanivshchyna
|
|
35
|
+
- source: Киликиїв
|
|
36
|
+
expected: Kylykyïv
|
|
37
|
+
- source: Кожанка
|
|
38
|
+
expected: Koz͡hanka
|
|
39
|
+
- source: Краснянка
|
|
40
|
+
expected: Krasni͡anka
|
|
41
|
+
- source: Краснівка
|
|
42
|
+
expected: Krasnivka
|
|
43
|
+
- source: Мале Микільське
|
|
44
|
+
expected: Male Mykilʹsʹke
|
|
45
|
+
- source: Маломиколаївка
|
|
46
|
+
expected: Malomykolaïvka
|
|
47
|
+
- source: Нове Село
|
|
48
|
+
expected: Nove Selo
|
|
49
|
+
- source: Новопавлівка
|
|
50
|
+
expected: Novopavlivka
|
|
51
|
+
- source: Новошичі
|
|
52
|
+
expected: Novoshychi
|
|
53
|
+
- source: Новоєфремівка
|
|
54
|
+
expected: Novoi͡efremivka
|
|
55
|
+
- source: Одеська Область
|
|
56
|
+
expected: Odesʹka Oblastʹ
|
|
57
|
+
- source: Орлівське
|
|
58
|
+
expected: Orlivsʹke
|
|
59
|
+
- source: Раневичі
|
|
60
|
+
expected: Ranevychi
|
|
61
|
+
- source: Рокувата
|
|
62
|
+
expected: Rokuvata
|
|
63
|
+
- source: Рудаєве
|
|
64
|
+
expected: Rudai͡eve
|
|
65
|
+
- source: Сахнівці
|
|
66
|
+
expected: Sakhnivt͡si
|
|
67
|
+
- source: Тернівка
|
|
68
|
+
expected: Ternivka
|
|
69
|
+
- source: Турбівка
|
|
70
|
+
expected: Turbivka
|
|
71
|
+
- source: Херсонська Область
|
|
72
|
+
expected: Khersonsʹka Oblastʹ
|
|
73
|
+
|
|
74
|
+
map:
|
|
75
|
+
characters:
|
|
76
|
+
"\u0410": "A" # А
|
|
77
|
+
"\u0411": "B" # Б
|
|
78
|
+
"\u0412": "V" # В
|
|
79
|
+
"\u0413": "H" # Г
|
|
80
|
+
"\u0490": "G" # Ґ
|
|
81
|
+
"\u0414": "D" # Д
|
|
82
|
+
"\u0415": "E" # Е
|
|
83
|
+
"\u0404": "I\u0361e" # Є -> I͡e
|
|
84
|
+
"\u0416": "Z\u0361h" # Ж -> Z͡h
|
|
85
|
+
"\u0417": "Z" # З
|
|
86
|
+
"\u0418": "Y" # И
|
|
87
|
+
"\u0406": "I" # І
|
|
88
|
+
"\u0407": "I\u0308" # Ї -> Ï
|
|
89
|
+
"\u0419": "I\u0306" # Й -> Ĭ
|
|
90
|
+
"\u041a": "K" # К
|
|
91
|
+
"\u041b": "L" # Л
|
|
92
|
+
"\u041c": "M" # М
|
|
93
|
+
"\u041d": "N" # Н
|
|
94
|
+
"\u041e": "O" # О
|
|
95
|
+
"\u041f": "P" # П
|
|
96
|
+
"\u0420": "R" # Р
|
|
97
|
+
"\u0421": "S" # С
|
|
98
|
+
"\u0422": "T" # Т
|
|
99
|
+
"\u0423": "U" # У
|
|
100
|
+
"\u0424": "F" # Ф
|
|
101
|
+
"\u0425": "Kh" # Х
|
|
102
|
+
"\u0426": "T\u0361s" # Ц -> T͡s
|
|
103
|
+
"\u0427": "Ch" # Ч
|
|
104
|
+
"\u0428": "Sh" # Ш
|
|
105
|
+
"\u0429": "Shch" # Щ
|
|
106
|
+
"\u042e": "I\u0361u" # Ю -> I͡u
|
|
107
|
+
"\u042f": "I\u0361a" # Я -> I͡a
|
|
108
|
+
"\u042c": "\u02B9" # Ь -> ʹ
|
|
109
|
+
"\u0430": "a" # а
|
|
110
|
+
"\u0431": "b" # б
|
|
111
|
+
"\u0432": "v" # в
|
|
112
|
+
"\u0433": "h" # г
|
|
113
|
+
"\u0491": "g" # ґ
|
|
114
|
+
"\u0434": "d" # д
|
|
115
|
+
"\u0435": "e" # е
|
|
116
|
+
"\u0454": "i\u0361e" # є -> i͡e
|
|
117
|
+
"\u0436": "z\u0361h" # ж -> z͡h
|
|
118
|
+
"\u0437": "z" # з
|
|
119
|
+
"\u0438": "y" # и
|
|
120
|
+
"\u0456": "i" # і
|
|
121
|
+
"\u0457": "i\u0308" # ї -> ï
|
|
122
|
+
"\u0439": "i\u0306" # й -> ĭ
|
|
123
|
+
"\u043a": "k" # к
|
|
124
|
+
"\u043b": "l" # л
|
|
125
|
+
"\u043c": "m" # м
|
|
126
|
+
"\u043d": "n" # н
|
|
127
|
+
"\u043e": "o" # о
|
|
128
|
+
"\u043f": "p" # п
|
|
129
|
+
"\u0440": "r" # р
|
|
130
|
+
"\u0441": "s" # с
|
|
131
|
+
"\u0442": "t" # т
|
|
132
|
+
"\u0443": "u" # у
|
|
133
|
+
"\u0444": "f" # ф
|
|
134
|
+
"\u0445": "kh" # х
|
|
135
|
+
"\u0446": "t\u0361s" # ц -> t͡s
|
|
136
|
+
"\u0447": "ch" # ч
|
|
137
|
+
"\u0448": "sh" # ш
|
|
138
|
+
"\u0449": "shch" # щ
|
|
139
|
+
"\u044e": "i\u0361u" # ю -> i͡u
|
|
140
|
+
"\u044f": "i\u0361a" # я -> i͡a
|
|
141
|
+
"\u044c": "\u02B9" # Ь -> ʹ
|