interscript 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,114 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Serbian and Makedonian Romanization, ALA-LC 1997 System
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/serbian.pdf
9
+ creation_date: 1997
10
+ description: ALA-LC Romanization table for Serbian and Makedonian.
11
+
12
+ notes:
13
+ - Special characters in romanization
14
+ Đ - D with crossbar (upper case). USMARC hexadecimal code A3.
15
+ đ - d with crossbar (upper case). USMARC hexadecimal code B3.
16
+
17
+ - Character modifiers in romanization
18
+ ´ - acute. USMARC hexadecimal code E2.
19
+ ˇ - hachek. USMARC hexadecimal code E9.
20
+
21
+ tests:
22
+ - source: Општина Ердут
23
+ expected: Opština Erdut
24
+ - source: Општина Двор
25
+ expected: Opština Dvor
26
+ - source: ЛУЃЕ луѓе
27
+ expected: LUǴE luǵe
28
+ - source: ЅВЕЗДА ѕвезда Ѕвезда
29
+ expected: DZVEZDA dzvezda Dzvezda
30
+ - source: ЌАРУВАЊЕ ќарување
31
+ expected: ḰARUVANJE ḱaruvanje
32
+
33
+ map:
34
+ postrules:
35
+ # DZ
36
+ - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
37
+ result: "DZ"
38
+ #LJ
39
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
40
+ result: "LJ"
41
+ #NJ
42
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
43
+ result: "NJ"
44
+ #DŽ
45
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
46
+ result: "DŽ"
47
+
48
+ characters:
49
+ "\u0410": "A"
50
+ "\u0411": "B"
51
+ "\u0412": "V"
52
+ "\u0413": "G"
53
+ "\u0403": "\u01F4" # Ǵ
54
+ "\u0414": "D"
55
+ "\u0402": "\u0110" # Đ
56
+ "\u0415": "E"
57
+ "\u0416": "\u005a\u030c" # Ž
58
+ "\u0417": "Z"
59
+ "\u0405": "Dz"
60
+ "\u0418": "I"
61
+ "\u0408": "J"
62
+ "\u041A": "K"
63
+ "\u040C": "\u1E30" # Ḱ
64
+ "\u041B": "L"
65
+ "\u0409": "Lj"
66
+ "\u041C": "M"
67
+ "\u041D": "N"
68
+ "\u040A": "Nj"
69
+ "\u041E": "O"
70
+ "\u041F": "P"
71
+ "\u0420": "R"
72
+ "\u0421": "S"
73
+ "\u0422": "T"
74
+ "\u040B": "\u0043\u0301" # Ć
75
+ "\u0423": "U"
76
+ "\u0424": "F"
77
+ "\u0425": "H"
78
+ "\u0426": "C"
79
+ "\u0427": "\u0043\u030c" # Č
80
+ "\u040F": "D\u007a\u030c" # Dž
81
+ "\u0428": "\u0053\u030c" # Š
82
+ "\u0430": "a"
83
+ "\u0431": "b"
84
+ "\u0432": "v"
85
+ "\u0433": "g"
86
+ "\u0453": "\u01F5" # ǵ
87
+ "\u0434": "d"
88
+ "\u0452": "\u0111" # đ
89
+ "\u0435": "e"
90
+ "\u0436": "\u007a\u030c" # ž
91
+ "\u0437": "z"
92
+ "\u0455": "dz"
93
+ "\u0438": "i"
94
+ "\u0458": "j"
95
+ "\u043A": "k"
96
+ "\u045C": "\u1E31" # ḱ
97
+ "\u043B": "l"
98
+ "\u0459": "lj"
99
+ "\u043C": "m"
100
+ "\u043D": "n"
101
+ "\u045A": "nj"
102
+ "\u043E": "o"
103
+ "\u043F": "p"
104
+ "\u0440": "r"
105
+ "\u0441": "s"
106
+ "\u0442": "t"
107
+ "\u045B": "\u0063\u0301" # ć
108
+ "\u0443": "u"
109
+ "\u0444": "f"
110
+ "\u0445": "h"
111
+ "\u0446": "c"
112
+ "\u0447": "\u0063\u030c" # č
113
+ "\u045F": "d\u007a\u030c" # dž
114
+ "\u0448": "\u0073\u030c" # š
@@ -0,0 +1,114 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: srp
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Serbian and Makedonian Romanization, ALA-LC 1997 System
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/serbian.pdf
9
+ creation_date: 1997
10
+ description: ALA-LC Romanization table for Serbian and Makedonian.
11
+
12
+ notes:
13
+ - Special characters in romanization
14
+ Đ - D with crossbar (upper case). USMARC hexadecimal code A3.
15
+ đ - d with crossbar (upper case). USMARC hexadecimal code B3.
16
+
17
+ - Character modifiers in romanization
18
+ ´ - acute. USMARC hexadecimal code E2.
19
+ ˇ - hachek. USMARC hexadecimal code E9.
20
+
21
+ tests:
22
+ - source: Општина Ердут
23
+ expected: Opština Erdut
24
+ - source: Општина Двор
25
+ expected: Opština Dvor
26
+ - source: ЛУЃЕ луѓе
27
+ expected: LUǴE luǵe
28
+ - source: ЅВЕЗДА ѕвезда Ѕвезда
29
+ expected: DZVEZDA dzvezda Dzvezda
30
+ - source: ЌАРУВАЊЕ ќарување
31
+ expected: ḰARUVANJE ḱaruvanje
32
+
33
+ map:
34
+ postrules:
35
+ # DZ
36
+ - pattern: "((?<=[[:upper:]])Dz(?=[[:upper:]])?|(?<=[[:upper:]])?Dz(?=[[:upper:]]))"
37
+ result: "DZ"
38
+ #LJ
39
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
40
+ result: "LJ"
41
+ #NJ
42
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
43
+ result: "NJ"
44
+ #DŽ
45
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
46
+ result: "DŽ"
47
+
48
+ characters:
49
+ "\u0410": "A"
50
+ "\u0411": "B"
51
+ "\u0412": "V"
52
+ "\u0413": "G"
53
+ "\u0403": "\u01F4" # Ǵ
54
+ "\u0414": "D"
55
+ "\u0402": "\u0110" # Đ
56
+ "\u0415": "E"
57
+ "\u0416": "\u005a\u030c" # Ž
58
+ "\u0417": "Z"
59
+ "\u0405": "Dz"
60
+ "\u0418": "I"
61
+ "\u0408": "J"
62
+ "\u041A": "K"
63
+ "\u040C": "\u1E30" # Ḱ
64
+ "\u041B": "L"
65
+ "\u0409": "Lj"
66
+ "\u041C": "M"
67
+ "\u041D": "N"
68
+ "\u040A": "Nj"
69
+ "\u041E": "O"
70
+ "\u041F": "P"
71
+ "\u0420": "R"
72
+ "\u0421": "S"
73
+ "\u0422": "T"
74
+ "\u040B": "\u0043\u0301" # Ć
75
+ "\u0423": "U"
76
+ "\u0424": "F"
77
+ "\u0425": "H"
78
+ "\u0426": "C"
79
+ "\u0427": "\u0043\u030c" # Č
80
+ "\u040F": "D\u007a\u030c" # Dž
81
+ "\u0428": "\u0053\u030c" # Š
82
+ "\u0430": "a"
83
+ "\u0431": "b"
84
+ "\u0432": "v"
85
+ "\u0433": "g"
86
+ "\u0453": "\u01F5" # ǵ
87
+ "\u0434": "d"
88
+ "\u0452": "\u0111" # đ
89
+ "\u0435": "e"
90
+ "\u0436": "\u007a\u030c" # ž
91
+ "\u0437": "z"
92
+ "\u0455": "dz"
93
+ "\u0438": "i"
94
+ "\u0458": "j"
95
+ "\u043A": "k"
96
+ "\u045C": "\u1E31" # ḱ
97
+ "\u043B": "l"
98
+ "\u0459": "lj"
99
+ "\u043C": "m"
100
+ "\u043D": "n"
101
+ "\u045A": "nj"
102
+ "\u043E": "o"
103
+ "\u043F": "p"
104
+ "\u0440": "r"
105
+ "\u0441": "s"
106
+ "\u0442": "t"
107
+ "\u045B": "\u0063\u0301" # ć
108
+ "\u0443": "u"
109
+ "\u0444": "f"
110
+ "\u0445": "h"
111
+ "\u0446": "c"
112
+ "\u0447": "\u0063\u030c" # č
113
+ "\u045F": "d\u007a\u030c" # dž
114
+ "\u0448": "\u0073\u030c" # š
@@ -0,0 +1,135 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2013
4
+ language: srp
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF SERBIAN, ALA-LC 2013 System
8
+ url: https://www.loc.gov/catdir/cpso/romanization/serbian.pdf
9
+ creation_date: 2013
10
+ description: |
11
+ Serbian uses the Cyrillic and Roman alphabets. When the Roman alphabet is used it is the same as Croatian.
12
+
13
+ tests:
14
+ - source: Шупља Стена
15
+ expected: Šuplja Stena
16
+ - source: Чукарица
17
+ expected: Čukarica
18
+ - source: Црна Трава
19
+ expected: Crna Trava
20
+ - source: Херцег Нови
21
+ expected: Herceg Novi
22
+ - source: Улцињ
23
+ expected: Ulcinj
24
+ - source: Ужице
25
+ expected: Užice
26
+ - source: Тресаначка Река
27
+ expected: Tresanačka Reka
28
+ - source: Сјеница
29
+ expected: Sjenica
30
+ - source: Рожаје
31
+ expected: Rožaje
32
+ - source: Пљевља
33
+ expected: Pljevlja
34
+ - source: Оџаци
35
+ expected: Odžaci
36
+ - source: Никшић
37
+ expected: Nikšić
38
+ - source: Медвеђа
39
+ expected: Medveđa
40
+ - source: Лозница
41
+ expected: Loznica
42
+ - source: Књажевац
43
+ expected: Knjaževac
44
+ - source: Зрењанин
45
+ expected: Zrenjanin
46
+ - source: Житорађа
47
+ expected: Žitorađa
48
+ - source: Ервеник
49
+ expected: Ervenik
50
+ - source: Доње Љупче
51
+ expected: Donje Ljupče
52
+ - source: Гусиње
53
+ expected: Gusinje
54
+ - source: ГУСИЊЕ
55
+ expected: GUSINJE
56
+ - source: Врњачка Бања
57
+ expected: Vrnjačka Banja
58
+ - source: Бијело Поље
59
+ expected: Bijelo Polje
60
+ - source: Алибунар
61
+ expected: Alibunar
62
+
63
+ map:
64
+ postrules:
65
+ #LJ
66
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
67
+ result: "LJ"
68
+ #NJ
69
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
70
+ result: "NJ"
71
+ #DŽ
72
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
73
+ result: "DŽ"
74
+
75
+ characters:
76
+ "\u0410": "A"
77
+ "\u0411": "B"
78
+ "\u0412": "V"
79
+ "\u0413": "G"
80
+ "\u0414": "D"
81
+ "\u0402": "\u0110" # Đ
82
+ "\u0415": "E"
83
+ "\u0416": "\u005a\u030c" # Ž
84
+ "\u0417": "Z"
85
+ "\u0418": "I"
86
+ "\u0408": "J"
87
+ "\u041A": "K"
88
+ "\u041B": "L"
89
+ "\u0409": "Lj"
90
+ "\u041C": "M"
91
+ "\u041D": "N"
92
+ "\u040A": "Nj"
93
+ "\u041E": "O"
94
+ "\u041F": "P"
95
+ "\u0420": "R"
96
+ "\u0421": "S"
97
+ "\u0422": "T"
98
+ "\u040B": "\u0043\u0301" # Ć
99
+ "\u0423": "U"
100
+ "\u0424": "F"
101
+ "\u0425": "H"
102
+ "\u0426": "C"
103
+ "\u0427": "\u0043\u030c" # Č
104
+ "\u040F": "D\u007a\u030c" # Dž
105
+ "\u0428": "\u0053\u030c" # Š
106
+ "\u0430": "a"
107
+ "\u0431": "b"
108
+ "\u0432": "v"
109
+ "\u0433": "g"
110
+ "\u0434": "d"
111
+ "\u0452": "\u0111" # đ
112
+ "\u0435": "e"
113
+ "\u0436": "\u007a\u030c" # ž
114
+ "\u0437": "z"
115
+ "\u0438": "i"
116
+ "\u0458": "j"
117
+ "\u043A": "k"
118
+ "\u043B": "l"
119
+ "\u0459": "lj"
120
+ "\u043C": "m"
121
+ "\u043D": "n"
122
+ "\u045A": "nj"
123
+ "\u043E": "o"
124
+ "\u043F": "p"
125
+ "\u0440": "r"
126
+ "\u0441": "s"
127
+ "\u0442": "t"
128
+ "\u045B": "\u0063\u0301" # ć́
129
+ "\u0443": "u"
130
+ "\u0444": "f"
131
+ "\u0445": "h"
132
+ "\u0446": "c"
133
+ "\u0447": "\u0063\u030c" # č
134
+ "\u045F": "d\u007a\u030c" # dž
135
+ "\u0448": "\u0073\u030c" # š
@@ -0,0 +1,141 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization System 1997
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/ukrainia.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ The ALA-LC Romanization Tables, published by the American Library Association (1885)
12
+ and Library of Congress (1905). Used to represent bibliographic information by US and
13
+ Canadian libraries, by the British Library since 1975,[3] and in North American publications.
14
+ The latest 1997 revision is very similar to the 1905 version.
15
+
16
+ notes:
17
+ - The z͡h ligature is necessary to distinguish ж from the combination зг.
18
+ - The t͡s ligature is necessary to distinguish ц from the combination тс.
19
+
20
+ tests:
21
+ - source: Автономна Республіка Крим
22
+ expected: Avtonomna Respublika Krym
23
+ - source: Висунь
24
+ expected: Vysunʹ
25
+ - source: Долинське
26
+ expected: Dolynsʹke
27
+ - source: Дубище
28
+ expected: Dubyshche
29
+ - source: Єнакієве
30
+ expected: I͡enakii͡eve
31
+ - source: Зупиночний Пункт Мокіївці
32
+ expected: Zupynochnyĭ Punkt Mokiïvt͡si
33
+ - source: Іванівщина
34
+ expected: Ivanivshchyna
35
+ - source: Киликиїв
36
+ expected: Kylykyïv
37
+ - source: Кожанка
38
+ expected: Koz͡hanka
39
+ - source: Краснянка
40
+ expected: Krasni͡anka
41
+ - source: Краснівка
42
+ expected: Krasnivka
43
+ - source: Мале Микільське
44
+ expected: Male Mykilʹsʹke
45
+ - source: Маломиколаївка
46
+ expected: Malomykolaïvka
47
+ - source: Нове Село
48
+ expected: Nove Selo
49
+ - source: Новопавлівка
50
+ expected: Novopavlivka
51
+ - source: Новошичі
52
+ expected: Novoshychi
53
+ - source: Новоєфремівка
54
+ expected: Novoi͡efremivka
55
+ - source: Одеська Область
56
+ expected: Odesʹka Oblastʹ
57
+ - source: Орлівське
58
+ expected: Orlivsʹke
59
+ - source: Раневичі
60
+ expected: Ranevychi
61
+ - source: Рокувата
62
+ expected: Rokuvata
63
+ - source: Рудаєве
64
+ expected: Rudai͡eve
65
+ - source: Сахнівці
66
+ expected: Sakhnivt͡si
67
+ - source: Тернівка
68
+ expected: Ternivka
69
+ - source: Турбівка
70
+ expected: Turbivka
71
+ - source: Херсонська Область
72
+ expected: Khersonsʹka Oblastʹ
73
+
74
+ map:
75
+ characters:
76
+ "\u0410": "A" # А
77
+ "\u0411": "B" # Б
78
+ "\u0412": "V" # В
79
+ "\u0413": "H" # Г
80
+ "\u0490": "G" # Ґ
81
+ "\u0414": "D" # Д
82
+ "\u0415": "E" # Е
83
+ "\u0404": "I\u0361e" # Є -> I͡e
84
+ "\u0416": "Z\u0361h" # Ж -> Z͡h
85
+ "\u0417": "Z" # З
86
+ "\u0418": "Y" # И
87
+ "\u0406": "I" # І
88
+ "\u0407": "I\u0308" # Ї -> Ï
89
+ "\u0419": "I\u0306" # Й -> Ĭ
90
+ "\u041a": "K" # К
91
+ "\u041b": "L" # Л
92
+ "\u041c": "M" # М
93
+ "\u041d": "N" # Н
94
+ "\u041e": "O" # О
95
+ "\u041f": "P" # П
96
+ "\u0420": "R" # Р
97
+ "\u0421": "S" # С
98
+ "\u0422": "T" # Т
99
+ "\u0423": "U" # У
100
+ "\u0424": "F" # Ф
101
+ "\u0425": "Kh" # Х
102
+ "\u0426": "T\u0361s" # Ц -> T͡s
103
+ "\u0427": "Ch" # Ч
104
+ "\u0428": "Sh" # Ш
105
+ "\u0429": "Shch" # Щ
106
+ "\u042e": "I\u0361u" # Ю -> I͡u
107
+ "\u042f": "I\u0361a" # Я -> I͡a
108
+ "\u042c": "\u02B9" # Ь -> ʹ
109
+ "\u0430": "a" # а
110
+ "\u0431": "b" # б
111
+ "\u0432": "v" # в
112
+ "\u0433": "h" # г
113
+ "\u0491": "g" # ґ
114
+ "\u0434": "d" # д
115
+ "\u0435": "e" # е
116
+ "\u0454": "i\u0361e" # є -> i͡e
117
+ "\u0436": "z\u0361h" # ж -> z͡h
118
+ "\u0437": "z" # з
119
+ "\u0438": "y" # и
120
+ "\u0456": "i" # і
121
+ "\u0457": "i\u0308" # ї -> ï
122
+ "\u0439": "i\u0306" # й -> ĭ
123
+ "\u043a": "k" # к
124
+ "\u043b": "l" # л
125
+ "\u043c": "m" # м
126
+ "\u043d": "n" # н
127
+ "\u043e": "o" # о
128
+ "\u043f": "p" # п
129
+ "\u0440": "r" # р
130
+ "\u0441": "s" # с
131
+ "\u0442": "t" # т
132
+ "\u0443": "u" # у
133
+ "\u0444": "f" # ф
134
+ "\u0445": "kh" # х
135
+ "\u0446": "t\u0361s" # ц -> t͡s
136
+ "\u0447": "ch" # ч
137
+ "\u0448": "sh" # ш
138
+ "\u0449": "shch" # щ
139
+ "\u044e": "i\u0361u" # ю -> i͡u
140
+ "\u044f": "i\u0361a" # я -> i͡a
141
+ "\u044c": "\u02B9" # Ь -> ʹ