interscript 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,125 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization Table -- Byelorussian (1997)
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/beloruss.pdf
9
+ creation_date: 1997
10
+
11
+ notes:
12
+ - Ґ letter found in Old Belarusian and in modern publications in Tarashkevitsa orthography.
13
+ - Do not confuse with the digraph кг (also romanized as “kh”). Manual review may be needed when transcribing data in vernacular characters in order to distinguish х from кг.
14
+ - The apostrophe (´) is not transliterated.
15
+ - Soft sign (prime) is USMARC hexadecimal code A7 ~ U+02B9 Unicode character
16
+
17
+ tests:
18
+ # https://en.wikipedia.org/wiki/Romanization_of_Belarusian#Examples
19
+ # https://img.tyt.by/n/02/d/belorusskiy_translit.jpg - with issues
20
+ - source: Беларусь
21
+ expected: Belarusʹ
22
+ - source: Магілёў
23
+ expected: Mahili͡oŭ
24
+ - source: Лукашэнка
25
+ expected: Lukashėnka
26
+ - source: сям´я
27
+ expected: si͡ami͡a
28
+ - source: Уручча
29
+ expected: Uruchcha
30
+ - source: Барысаўскі тракт
31
+ expected: Barysaŭski trakt
32
+ - source: Усход
33
+ expected: Uskhod
34
+ - source: Маскоўская
35
+ expected: Maskoŭskai͡a
36
+ - source: Парк Чалюскінцаў
37
+ expected: Park Chali͡uskintsaŭ
38
+ - source: Акадэмія навук
39
+ expected: Akadėmii͡a navuk
40
+ - source: Плошча Якуба Коласа
41
+ expected: Ploshcha I͡Akuba Kolasa
42
+ - source: Плошча Перамогі
43
+ expected: Ploshcha Peramohi
44
+ - source: Кастрычніцкая
45
+ expected: Kastrychnitskai͡a
46
+ - source: Плошча Леніна
47
+ expected: Ploshcha Lenina
48
+ - source: Інстытут Культуры
49
+ expected: Instytut Kulʹtury
50
+
51
+ map:
52
+ characters:
53
+ # Apostrophes
54
+ '\u00B4' : '' # official english Apostrophe
55
+ '\u02BC' : ''
56
+ '\u0027' : '' # from belarussian keyboard
57
+
58
+ # Characters
59
+ '\u0410' : 'A' # A
60
+ '\u0411' : 'B' # Б
61
+ '\u0412' : 'V' # B
62
+ '\u0413' : 'H' # Г
63
+ '\u0490' : 'G' # Ґ
64
+ '\u0414' : 'D' # Д
65
+ '\u0415' : 'E' # Е
66
+ '\u0401' : "I\u0361O" # Ё : I͡O
67
+ '\u0416' : "Z\u0361H" # Ж : Z͡H
68
+ '\u0417' : 'Z' # З
69
+ '\u0406' : 'I' # І
70
+ '\u0419' : "\u012C" # Й : Ĭ
71
+ '\u041A' : 'K' # К
72
+ '\u041B' : 'L' # Л
73
+ '\u041C' : 'M' # М
74
+ '\u041D' : 'N' # Н
75
+ '\u041E' : 'O' # О
76
+ '\u041F' : 'P' # П
77
+ '\u0420' : 'R' # Р
78
+ '\u0421' : 'S' # С
79
+ '\u0422' : 'T' # Т
80
+ '\u0423' : 'U' # У
81
+ '\U040E' : "\u016C" # Ў : Ŭ
82
+ '\u0424' : 'F' # Ф
83
+ '\u0425' : 'Kh' # Х
84
+ '\u0426' : 'Ts' # Ц
85
+ '\u0427' : 'Ch' # Ч
86
+ '\u0428' : 'Sh' # Ш
87
+ '\u042B' : 'Y' # Ы
88
+ '\u042C' : "\u02B9" # Ь : ʹ
89
+ '\u042D' : "\u0116" # Э : Ė
90
+ '\u042E' : "I\u0361U" # Ю : I͡U
91
+ '\u042F' : "I\u0361A" # Я : I͡A
92
+
93
+ '\u0430' : 'a' # а
94
+ '\u0431' : 'b' # б
95
+ '\u0432' : 'v' # в
96
+ '\u0433' : 'h' # г
97
+ '\u0491' : 'g' # ґ
98
+ '\u0434' : 'd' # д
99
+ '\u0435' : 'e' # е
100
+ '\u0451' : "i\u0361o" # ё : i͡o
101
+ '\u0436' : "z\u0361h" # ж : z͡h
102
+ '\u0437' : 'z' # з
103
+ '\u0456' : 'i' # і
104
+ '\u0439' : "\u012D" # й : ĭ
105
+ '\u043A' : 'k' # к
106
+ '\u043B' : 'l' # л
107
+ '\u043C' : 'm' # м
108
+ '\u043D' : 'n' # н
109
+ '\u043E' : 'o' # о
110
+ '\u043F' : 'p' # п
111
+ '\u0440' : 'r' # р
112
+ '\u0441' : 's' # с
113
+ '\u0442' : 't' # т
114
+ '\u0443' : 'u' # у
115
+ '\u045E' : "\u016D" # ў : ŭ
116
+ '\u0444' : 'f' # ф
117
+ '\u0445' : 'kh' # х
118
+ '\u0446' : 'ts' # Ц
119
+ '\u0447' : 'ch' # ч
120
+ '\u0448' : 'sh' # ш
121
+ '\u044B' : 'y' # ы
122
+ '\u044C' : "\u02B9" # ь : ʹ
123
+ '\u044D' : "\u0117" # э : ė
124
+ '\u044E' : "i\u0361u" # ю : i͡u
125
+ '\u044F' : "i\u0361a" # я : i͡a
@@ -0,0 +1,130 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2017
4
+ language: ben
5
+ source_script: Beng
6
+ destination_script: Latn
7
+ name: Bengali Romanization, 2017
8
+ url: https://www.loc.gov/catdir/cpso/romanization/bengali.pdf
9
+ creation_date: 2017
10
+ description: |
11
+ ALA-LC Romanization table for Bengali
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are
16
+ listed; the forms used for vowels following a consonant can be found in
17
+ grammars; no distinction between the two is made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters
21
+ and is supplied in transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript symbol ( ্ )
25
+ called hasanta or birāma.
26
+
27
+ - ব is used both as a labial and as a semivowel. When it occurs as the
28
+ second or subsequent consonant of a consonant cluster, it is
29
+ transliterated va. When ব is doubled, it is transliterated bba.
30
+
31
+ - Candrabindu before guttural, palatal, cerebral, and dental occlusives
32
+ is transliterated n̐. Before labials, sibilants, semivowels, the
33
+ aspirate, vowels, and in final position it is transliterated m̐.
34
+
35
+ - When doubled, abagraha is transliterated by two apostrophes ( ’’ ).
36
+
37
+ tests:
38
+ - source: "র্ক"
39
+ expected: "rka"
40
+ - source: "গ্র"
41
+ expected: "gra"
42
+ - source: "ত্য"
43
+ expected: "tya"
44
+
45
+ map:
46
+ inherit: "un-ben-Beng-Latn-2016"
47
+
48
+ characters:
49
+
50
+ # I. Vowels and Diphthongs (see Note 1)
51
+
52
+ 'অ': 'a'
53
+ 'ৠ': 'ṝ'
54
+ 'আ': 'ā'
55
+ 'ঌ': 'ḹ'
56
+ 'ই': 'I'
57
+ 'এ': 'e'
58
+ 'ঈ': 'ī'
59
+ 'ঐ': 'ai'
60
+ 'উ': 'u'
61
+ 'ও': 'o'
62
+ 'ঊ': 'ū'
63
+ 'ঔ': 'au'
64
+ 'ঋ': 'ṛ'
65
+
66
+ # II. Consonants (see Note 2)
67
+ # Gutturals
68
+ 'ক': 'ka'
69
+ 'খ': 'kha'
70
+ 'গ': 'ga'
71
+ 'ঘ': 'gha'
72
+ 'ঙ': 'ṅa'
73
+
74
+ # Palatals
75
+ 'চ': 'ca'
76
+ 'ছ': 'cha'
77
+ 'জ': 'ja'
78
+ 'ঝ': 'jha'
79
+ 'ঞ': 'ña'
80
+
81
+ # Cerebrals
82
+ 'ট': 'ṭa'
83
+ 'ঠ': 'ṭha'
84
+ 'ড': 'ḍa'
85
+ 'ড়': 'ṛa'
86
+ 'ঢ': 'ḍha'
87
+ 'ঢ়': 'ṛha'
88
+ 'ণ': 'ṇa'
89
+
90
+ # Dentals
91
+ 'ত': 'ta'
92
+ 'ৎ': 't'
93
+ 'থ': 'tha'
94
+ 'দ': 'da'
95
+ 'ধ': 'dha'
96
+ 'ন': 'na'
97
+
98
+ # Labials
99
+ 'প': 'pa'
100
+ 'ফ': 'pha'
101
+ 'ব': 'ba' # see Note 3
102
+ 'ভ': 'bha'
103
+ 'ম': 'ma'
104
+
105
+ # Semivowels
106
+ 'য': 'ya'
107
+ 'য়': 'ẏa'
108
+ 'র': 'ra'
109
+ 'ল': 'la'
110
+ 'ব': 'ba' # see Note 3
111
+
112
+ # Sibilants
113
+ 'শ': 'śa'
114
+ 'ষ': 'sha'
115
+ 'স': 'sa'
116
+
117
+ # Aspirate
118
+ 'হ': 'ha'
119
+
120
+ # Anusvāra
121
+ ' ং': 'ṃ'
122
+
123
+ # Bisarga
124
+ ' ঃ': 'ḥ'
125
+
126
+ # Candrabindu (anunāsika) see note 4
127
+ '\u0981': 'n̐' # ঁ : n̐, m̐
128
+
129
+ # Abagraha (see Note 5)
130
+ 'ऽ': '’' # (apostrophe)
@@ -0,0 +1,94 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization Table -- Bulgarian (1997)
8
+ url: http://www.rechtertie.nl/databases/judd/downloads/Bulgarian.pdf
9
+ creation_date: 1997
10
+
11
+ notes:
12
+ - |
13
+ The letter Ъ in the initial and medial position is a vowel in Bulgarian and is romanized as ŭ.
14
+ It appears in the final position chiefly in texts published before 1945, in which it serves as
15
+ a hard sign and is romanized as ʺ.
16
+ - The letters Ѣ and Ѫ are found chiefly in texts published before the orthographic reform of 1945.
17
+
18
+ tests:
19
+ # missing
20
+
21
+ map:
22
+ rules:
23
+ # note[1]
24
+ - pattern: (?<=)\u042a(?=\b)
25
+ result: "\u02BA"
26
+ - pattern: (?<=)u044a(?=\b)
27
+ result: "\u02BA"
28
+
29
+ characters:
30
+ '\u0410': 'A'
31
+ '\u0411': 'B'
32
+ '\u0412': 'V'
33
+ '\u0413': 'G'
34
+ '\u0414': 'D'
35
+ '\u0415': 'E'
36
+ '\u0416': 'Zh'
37
+ '\u0417': 'Z'
38
+ '\u0418': 'I'
39
+ '\u0419': "I\u0306" # Ĭ
40
+ '\u041a': 'K'
41
+ '\u041b': 'L'
42
+ '\u041c': 'M'
43
+ '\u041d': 'N'
44
+ '\u041e': 'O'
45
+ '\u041f': 'P'
46
+ '\u0420': 'R'
47
+ '\u0421': 'S'
48
+ '\u0422': 'T'
49
+ '\u0423': 'U'
50
+ '\u0424': 'F'
51
+ '\u0425': 'Kh'
52
+ '\u0426': "T\u0361S"
53
+ '\u0427': 'Ch'
54
+ '\u0428': 'Sh'
55
+ '\u0429': 'Sht'
56
+ '\u042a': "U\u0306" # or ʺ check note[1]
57
+ '\u042c': '\u02B9'
58
+ '\u0462': "I\u0361E" # Ѣ check note [2]
59
+ '\u042e': "I\u0361U"
60
+ '\u042f': "I\u0361A"
61
+ '\u046A': "U\u0310" # Ѫ check note [2]
62
+
63
+ '\u0430': 'a'
64
+ '\u0431': 'b'
65
+ '\u0432': 'v'
66
+ '\u0433': 'g'
67
+ '\u0434': 'd'
68
+ '\u0435': 'e'
69
+ '\u0436': 'zh'
70
+ '\u0437': 'z'
71
+ '\u0438': 'i'
72
+ '\u0439': "i\u0306" # ĭ
73
+ '\u043a': 'k'
74
+ '\u043b': 'l'
75
+ '\u043c': 'm'
76
+ '\u043d': 'n'
77
+ '\u043e': 'o'
78
+ '\u043f': 'p'
79
+ '\u0440': 'r'
80
+ '\u0441': 's'
81
+ '\u0442': 't'
82
+ '\u0443': 'u'
83
+ '\u0444': 'f'
84
+ '\u0445': 'kh'
85
+ '\u0446': "t\u0361s"
86
+ '\u0447': 'ch'
87
+ '\u0448': 'sh'
88
+ '\u0449': 'sht'
89
+ '\u044a': "u\u0306" # or ʺ check note[1]
90
+ '\u044c': '\u02B9'
91
+ '\u0463': "i\u0361e" # ѣ check note [2]
92
+ '\u044e': "i\u0361u"
93
+ '\u044f': "i\u0361a"
94
+ '\u046B': "u\u0310" # ѫ check note [2]
@@ -0,0 +1,625 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: Greek Romanization, 1997
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/greek.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ ALA-LC Romanization table for Greek
12
+
13
+ note:
14
+ - Applies to texts after 1453 (Modern Greek)
15
+ - This table presupposes monotonic accentuation; rough breathings are accordingly not addressed.
16
+ - The diphthong definition for upsilon is taken from the 2010 version
17
+ - Generalised gramma digraph rule to capitals
18
+
19
+ tests:
20
+
21
+ - source: |
22
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
23
+
24
+ Γιάννης Μακρυγιάννης.
25
+
26
+ expected: |
27
+ Ena prama monon me parakinēse ki emena na grapsō oti toutēn tēn patrida tēn echomen oloi mazi, kai sophoi ki amatheis kai plousioi kai phtōchoi kai politikoi kai stratiōtikoi kai oi pleon mikroteroi anthrōpoi; osoi agōnistēkamen, analogōs o katheis, echomen na zēsomen edō. To loipon doulepsamen oloi mazi, na tēn phylamen ki oloi mazi kai na mēn legei oute o dynatos «egō» oute o adynatos. Xerete pote na legei o katheis «egō»? Otan agōnistei monos tou kai phkiasei ē chalasei, na legei «egō»; otan omōs agōnizontai polloi kai phkianoun, tote na lene «emeis». Eimaste eis to «emeis» ki ochi eis to «egō». Kai eis to exēs na mathomen gnōsē, an thelomen na phkiasomen chōrion, na zēsomen oloi mazi.
28
+
29
+ Giannēs Makrygiannēs.
30
+
31
+
32
+ - source: ΑΘΗΝΑ
33
+ expected: ATHĒNA
34
+ - source: μπαμπάκι
35
+ expected: bampaki
36
+ - source: νταντά
37
+ expected: ḏanta
38
+ - source: γκέγκε
39
+ expected: nkenke
40
+ - source: Γκαμπόν
41
+ expected: Nkampon
42
+ - source: Μάγχη
43
+ expected: Manchē
44
+ - source: κογξ
45
+ expected: konx
46
+ - source: υιός
47
+ expected: uios
48
+ - source: Υιός
49
+ expected: Uios
50
+ - source: νεράντζι
51
+ expected: nerantzi
52
+ - source: Γοίθιος
53
+ expected: Goithios
54
+ - source: μπέικον
55
+ expected: beikon
56
+ - source: μπέϊκον
57
+ expected: beikon
58
+ - source: βόλεϊ
59
+ expected: volei
60
+ - source: αθεΐα
61
+ expected: atheia
62
+ - source: Εϊγιαφιάτλαγιοκουτλ
63
+ expected: Eigiaphiatlagiokoutl
64
+ - source: Εΐτζι
65
+ expected: Eitzi
66
+ - source: Μυρτώο
67
+ expected: Myrtōo
68
+ - source: αέρας
69
+ expected: aeras
70
+ - source: γαυ γαυ
71
+ expected: gau gau
72
+ - source: Ταΰγετος
73
+ expected: Taygetos
74
+ - source: σπρέυ
75
+ expected: sprey
76
+
77
+ - source: Αθήνα
78
+ expected: Athēna
79
+ - source: Άγιον Όρος
80
+ expected: Agion Oros
81
+ - source: Άγραφα
82
+ expected: Agrapha
83
+ - source: Αγρίνιο
84
+ expected: Agrinio
85
+ - source: Αίγινα
86
+ expected: Aigina
87
+ - source: Αίγιο
88
+ expected: Aigio
89
+ - source: Αλεξανδρούπολη
90
+ expected: Alexandroupolē
91
+ - source: Αλεποχώρι
92
+ expected: Alepochōri
93
+ - source: Αμοργός
94
+ expected: Amorgos
95
+ - source: Άμφισσα
96
+ expected: Amphissa
97
+ - source: Αράχωβα
98
+ expected: Arachōva
99
+ - source: Άργος
100
+ expected: Argos
101
+ - source: Αρκαδία
102
+ expected: Arkadia
103
+ - source: Άρτα
104
+ expected: Arta
105
+ - source: Βελούχι
106
+ expected: Velouchi
107
+ - source: Βέροια
108
+ expected: Veroia
109
+ - source: Βοιωτία
110
+ expected: Voiōtia
111
+ - source: Βόλος
112
+ expected: Volos
113
+ - source: Βόνιτσα
114
+ expected: Vonitsa
115
+ - source: Γαλαξίδι
116
+ expected: Galaxidi
117
+ - source: Γαλάτσι
118
+ expected: Galatsi
119
+ - source: Γιαννιτσά
120
+ expected: Giannitsa
121
+ - source: Γλυφάδα
122
+ expected: Glyphada
123
+ - source: Γρανίτσα
124
+ expected: Granitsa
125
+ - source: Γρεβενά
126
+ expected: Grevena
127
+ - source: Γύθειο
128
+ expected: Gytheio
129
+ - source: Διόνυσος
130
+ expected: Dionysos
131
+ - source: Δίστομο
132
+ expected: Distomo
133
+ - source: Δολιανά
134
+ expected: Doliana
135
+ - source: Δράμα
136
+ expected: Drama
137
+ - source: Δωδεκάνησα
138
+ expected: Dōdekanēsa
139
+ - source: Έδεσσα
140
+ expected: Edessa
141
+ - source: Ελευσίνα
142
+ expected: Eleusina
143
+ - source: Επίδαυρος
144
+ expected: Epidauros
145
+ - source: Επτάνησα
146
+ expected: Eptanēsa
147
+ - source: Ερμούπολη
148
+ expected: Ermoupolē
149
+ - source: Εύβοια
150
+ expected: Euvoia
151
+ - source: Ζάκυνθος
152
+ expected: Zakynthos
153
+ - source: Ήπειρος
154
+ expected: Ēpeiros
155
+ - source: Ηράκλειο
156
+ expected: Ērakleio
157
+ - source: Θάσος
158
+ expected: Thasos
159
+ - source: Θεσσαλονίκη
160
+ expected: Thessalonikē
161
+ - source: Θεσσαλία
162
+ expected: Thessalia
163
+ - source: Θεσπρωτία
164
+ expected: Thesprōtia
165
+ - source: Θήβα
166
+ expected: Thēva
167
+ - source: Θράκη
168
+ expected: Thrakē
169
+ - source: Ιθάκη
170
+ expected: Ithakē
171
+ - source: Ίος
172
+ expected: Ios
173
+ - source: Ιωάννινα
174
+ expected: Iōannina
175
+ - source: Καβάλα
176
+ expected: Kavala
177
+ - source: Καλάβρυτα
178
+ expected: Kalavryta
179
+ - source: Καλαμάτα
180
+ expected: Kalamata
181
+ - source: Καλαμπάκα
182
+ expected: Kalampaka
183
+ - source: Καλύβια
184
+ expected: Kalyvia
185
+ - source: Κάλυμνος
186
+ expected: Kalymnos
187
+ - source: Καρδίτσα
188
+ expected: Karditsa
189
+ - source: Καρπενήσι
190
+ expected: Karpenēsi
191
+ - source: Κάρυστος
192
+ expected: Karystos
193
+ - source: Καστελλόριζο
194
+ expected: Kastellorizo
195
+ - source: Καστοριά
196
+ expected: Kastoria
197
+ - source: Κατερίνη
198
+ expected: Katerinē
199
+ - source: Κάτω Αχαΐα
200
+ expected: Katō Achaia
201
+ - source: Κερατέα
202
+ expected: Keratea
203
+ - source: Κέρκυρα
204
+ expected: Kerkyra
205
+ - source: Κεφαλλονιά
206
+ expected: Kephallonia
207
+ - source: Κηφισιά
208
+ expected: Kēphisia
209
+ - source: Κιλκίς
210
+ expected: Kilkis
211
+ - source: Κοζάνη
212
+ expected: Kozanē
213
+ - source: Κολωνός
214
+ expected: Kolōnos
215
+ - source: Κομοτηνή
216
+ expected: Komotēnē
217
+ - source: Κόρινθος
218
+ expected: Korinthos
219
+ - source: Κορώνη
220
+ expected: Korōnē
221
+ - source: Κρανίδι
222
+ expected: Kranidi
223
+ - source: Κρέστενα
224
+ expected: Krestena
225
+ - source: Κρήτη
226
+ expected: Krētē
227
+ - source: Κύθηρα
228
+ expected: Kythēra
229
+ - source: Κυκλάδες
230
+ expected: Kyklades
231
+ - source: Κύμη
232
+ expected: Kymē
233
+ - source: Κυψέλη
234
+ expected: Kypselē
235
+ - source: Κως
236
+ expected: Kōs
237
+ - source: Λαγκαδάς
238
+ expected: Lankadas
239
+ - source: Λαμία
240
+ expected: Lamia
241
+ - source: Λάρισα
242
+ expected: Larisa
243
+ - source: Λαύριο
244
+ expected: Laurio
245
+ - source: Λέρος
246
+ expected: Leros
247
+ - source: Λέσβος
248
+ expected: Lesvos
249
+ - source: Λευκάδα
250
+ expected: Leukada
251
+ - source: Λήμνος
252
+ expected: Lēmnos
253
+ - source: Λιβαδειά
254
+ expected: Livadeia
255
+ - source: Μακεδονία
256
+ expected: Makedonia
257
+ - source: Μάνη
258
+ expected: Manē
259
+ - source: Μαραθώνας
260
+ expected: Marathōnas
261
+ - source: Μαρκόπουλο
262
+ expected: Markopoulo
263
+ - source: Μαρούσι
264
+ expected: Marousi
265
+ - source: Μέγαρα
266
+ expected: Megara
267
+ - source: Μεσολόγγι
268
+ expected: Mesolongi
269
+ - source: Μεταξουργείο
270
+ expected: Metaxourgeio
271
+ - source: Μέτσοβο
272
+ expected: Metsovo
273
+ - source: Μήλος
274
+ expected: Mēlos
275
+ - source: Μύκονος
276
+ expected: Mykonos
277
+ - source: Μυστράς
278
+ expected: Mystras
279
+ - source: Μυτιλήνη
280
+ expected: Mytilēnē
281
+ - source: Νάξος
282
+ expected: Naxos
283
+ - source: Νάουσα
284
+ expected: Naousa
285
+ - source: Ναύπακτος
286
+ expected: Naupaktos
287
+ - source: Ναύπλιο
288
+ expected: Nauplio
289
+ - source: Νέα Σμύρνη
290
+ expected: Nea Smyrnē
291
+ - source: Νίσυρος
292
+ expected: Nisyros
293
+ - source: Ξάνθη
294
+ expected: Xanthē
295
+ - source: Όλυμπος
296
+ expected: Olympos
297
+ - source: Παγκράτι
298
+ expected: Pankrati
299
+ - source: Παπάγου
300
+ expected: Papagou
301
+ - source: Πάρος
302
+ expected: Paros
303
+ - source: Πασαλιμάνι
304
+ expected: Pasalimani
305
+ - source: Πατήσια
306
+ expected: Patēsia
307
+ - source: Πάτμος
308
+ expected: Patmos
309
+ - source: Πάτρα
310
+ expected: Patra
311
+ - source: Πειραιάς
312
+ expected: Peiraias
313
+ - source: Πελοπόννησος
314
+ expected: Peloponnēsos
315
+ - source: Περιστέρι
316
+ expected: Peristeri
317
+ - source: Πεύκη
318
+ expected: Peukē
319
+ - source: Πήλιο
320
+ expected: Pēlio
321
+ - source: Πολύγυρος
322
+ expected: Polygyros
323
+ - source: Πόρος
324
+ expected: Poros
325
+ - source: Πρέβεζα
326
+ expected: Preveza
327
+ - source: Πτολεμαΐδα
328
+ expected: Ptolemaida
329
+ - source: Πύλος
330
+ expected: Pylos
331
+ - source: Πύργος
332
+ expected: Pyrgos
333
+ - source: Ρέθυμνο
334
+ expected: Rethymno
335
+ - source: Ρόδος
336
+ expected: Rodos
337
+ - source: Ρούμελη
338
+ expected: Roumelē
339
+ - source: Σαλαμίνα
340
+ expected: Salamina
341
+ - source: Σαμοθράκη
342
+ expected: Samothrakē
343
+ - source: Σάμος
344
+ expected: Samos
345
+ - source: Σαντορίνη
346
+ expected: Santorinē
347
+ - source: Σέρρες
348
+ expected: Serres
349
+ - source: Σίκινος
350
+ expected: Sikinos
351
+ - source: Σίφνος
352
+ expected: Siphnos
353
+ - source: Σκιάθος
354
+ expected: Skiathos
355
+ - source: Σκόπελος
356
+ expected: Skopelos
357
+ - source: Σούλι
358
+ expected: Souli
359
+ - source: Σπάρτη
360
+ expected: Spartē
361
+ - source: Στερεά Ελλάδα
362
+ expected: Sterea Ellada
363
+ - source: Στύρα
364
+ expected: Styra
365
+ - source: Σύμη
366
+ expected: Symē
367
+ - source: Σύρος
368
+ expected: Syros
369
+ - source: Σφακιά
370
+ expected: Sphakia
371
+ - source: Τήλος
372
+ expected: Tēlos
373
+ - source: Τήνος
374
+ expected: Tēnos
375
+ - source: Τρίκαλα
376
+ expected: Trikala
377
+ - source: Τρίπολη
378
+ expected: Tripolē
379
+ - source: Τσακωνιά
380
+ expected: Tsakōnia
381
+ - source: Ύδρα
382
+ expected: Ydra
383
+ - source: Φάληρο
384
+ expected: Phalēro
385
+ - source: Φλώρινα
386
+ expected: Phlōrina
387
+ - source: Φολέγανδρος
388
+ expected: Pholegandros
389
+ - source: Χάλκη
390
+ expected: Chalkē
391
+ - source: Χαλκίδα
392
+ expected: Chalkida
393
+ - source: Χαλάνδρι
394
+ expected: Chalandri
395
+ - source: Χαλκιδική
396
+ expected: Chalkidikē
397
+ - source: Χανιά
398
+ expected: Chania
399
+ - source: Χίος
400
+ expected: Chios
401
+ - source: Ψαρά
402
+ expected: Psara
403
+ - source: Αβάνα
404
+ expected: Avana
405
+ - source: Αγγλία
406
+ expected: Anglia
407
+ - source: Αϊβαλί
408
+ expected: Aivali
409
+ - source: Αλεξάνδρεια
410
+ expected: Alexandreia
411
+ - source: Άμστερνταμ
412
+ expected: Amsterntam
413
+ - source: Βαυαρία
414
+ expected: Vauaria
415
+ - source: Βενετία
416
+ expected: Venetia
417
+ - source: Βερολίνο
418
+ expected: Verolino
419
+ - source: Βερόνα
420
+ expected: Verona
421
+ - source: Βιέννη
422
+ expected: Viennē
423
+ - source: Γένοβα
424
+ expected: Genova
425
+ - source: Δουβλίνο
426
+ expected: Douvlino
427
+ - source: Καλαβρία
428
+ expected: Kalavria
429
+ - source: Καλιφόρνια
430
+ expected: Kaliphornia
431
+ - source: Καύκασος
432
+ expected: Kaukasos
433
+ - source: Κονγκό
434
+ expected: Konnko
435
+ - source: Κορσική
436
+ expected: Korsikē
437
+ - source: Κουρδιστάν
438
+ expected: Kourdistan
439
+ - source: Κωνσταντινούπολη
440
+ expected: Kōnstantinoupolē
441
+ - source: Κατεχόμενη Κύπρος
442
+ expected: Katechomenē Kypros
443
+ - source: Λαπωνία
444
+ expected: Lapōnia
445
+ - source: Λευκωσία
446
+ expected: Leukōsia
447
+ - source: Λιβόρνο
448
+ expected: Livorno
449
+ - source: Λονδίνο
450
+ expected: Londino
451
+ - source: Λυών
452
+ expected: Lyōn
453
+ - source: Μάλαγα
454
+ expected: Malaga
455
+ - source: Μασσαλία
456
+ expected: Massalia
457
+ - source: Μικρονησία
458
+ expected: Mikronēsia
459
+ - source: Μιλάνο
460
+ expected: Milano
461
+ - source: Μόσχα
462
+ expected: Moscha
463
+ - source: Μπολόνια
464
+ expected: Bolonia
465
+ - source: Νάπολη
466
+ expected: Napolē
467
+ - source: Νταγκεστάν
468
+ expected: Ḏankestan
469
+ - source: Νέα Υόρκη
470
+ expected: Nea Yorkē
471
+ - source: Οξφόρδη
472
+ expected: Oxphordē
473
+ - source: Ουαλία
474
+ expected: Oualia
475
+ - source: Παρίσι
476
+ expected: Parisi
477
+ - source: Πάφος
478
+ expected: Paphos
479
+ - source: Πολυνησία
480
+ expected: Polynēsia
481
+ - source: Ρώμη
482
+ expected: Rōmē
483
+ - source: Σαμάρεια
484
+ expected: Samareia
485
+ - source: Σικελία
486
+ expected: Sikelia
487
+ - source: Σκανδιναβία
488
+ expected: Skandinavia
489
+ - source: Σκόπια
490
+ expected: Skopia
491
+ - source: Σκωτία
492
+ expected: Skōtia
493
+ - source: Σμύρνη
494
+ expected: Smyrnē
495
+ - source: Ταϊτή
496
+ expected: Taitē
497
+ - source: Ταταρστάν
498
+ expected: Tatarstan
499
+ - source: Τζαμάικα
500
+ expected: Tzamaika
501
+ - source: Τηλλυρία
502
+ expected: Tēllyria
503
+ - source: Τιρόλο
504
+ expected: Tirolo
505
+ - source: Τορίνο
506
+ expected: Torino
507
+ - source: Φανάρι
508
+ expected: Phanari
509
+ - source: Φλωρεντία
510
+ expected: Phlōrentia
511
+ - source: Χαβάη
512
+ expected: Chavaē
513
+ - source: Χονγκ Κονγκ
514
+ expected: Chonnk Konnk
515
+
516
+ map:
517
+ # https://en.wikipedia.org/wiki/Romanization_of_Greek
518
+ rules:
519
+ - pattern: (?<=[ΑαΕεΟοΗηΩω])\u03A5 # Υ (after Α, Ε, Ο, Η, Ω)
520
+ result: U
521
+ - pattern: (?<=[ΑαΕεΟοΗηΩω])\u03C5 # υ (after Α, Ε, Ο, Η, Ω)
522
+ result: u
523
+ - pattern: (?<=[ΑαΕεΟοΗηΩω])\u03CD # ύ (after Α, Ε, Ο, Η, Ω)
524
+ result: u
525
+ - pattern: \u03A5(?=[Ιιί]) # Υ (before Ι)
526
+ result: U
527
+ - pattern: \u03C5(?=[Ιιί]) # Υ (before Ι)
528
+ result: u
529
+ - pattern: \u0393(?=[γΓκΚξΞχΧ]) # Γ (before Γ, Κ, Ξ, Χ)
530
+ result: N
531
+ - pattern: \u03B3(?=[γΓκΚξΞχΧ]) # γ (before Γ, Κ, Ξ, Χ)
532
+ result: n
533
+ - pattern: (?<=\b)\u039D\u03A4 # ΝΤ
534
+ result: Ḏ
535
+ - pattern: (?<=\b)\u039D\u03C4 # Ντ
536
+ result: Ḏ
537
+ - pattern: (?<=\b)\u03BD\u03C4 # ντ
538
+ result: ḏ
539
+ - pattern: (?<=\b)\u039C\u03A0 # ΜΠ
540
+ result: B
541
+ - pattern: (?<=\b)\u039C\u03C0 # Μπ
542
+ result: B
543
+ - pattern: (?<=\b)\u03BC\u03C0 # μπ
544
+ result: b
545
+ - pattern: \u037E # ;
546
+ result: "?"
547
+ - pattern: \u003B # ;
548
+ result: "?"
549
+
550
+ characters:
551
+ "\u0027": ""
552
+ "\u0386": "A" # Ά
553
+ "\u0391": "A" # Α
554
+ "\u0392": "V" # Β
555
+ "\u0393": "G" # Γ
556
+ "\u0394": "D" # Δ
557
+ "\u0395": "E" # Ε
558
+ "\u0396": "Z" # Ζ
559
+ "\u0397": "Ē" # Η
560
+ "\u0398": "Th" # Θ
561
+ "\u0399": "I" # Ι
562
+ "\u039A": "K" # Κ
563
+ "\u039B": "L" # Λ
564
+ "\u039C": "M" # Μ
565
+ "\u039D": "N" # Ν
566
+ "\u039E": "X" # Ξ
567
+ "\u039F": "O" # Ο
568
+ "\u03A0": "P" # Π
569
+ "\u03A1": "R" # Ρ
570
+ "\u03A3": "S" # Σ
571
+ "\u03A4": "T" # Τ
572
+ "\u03A5": "Y" # Υ
573
+ "\u03A6": "Ph" # Φ
574
+ "\u03A7": "Ch" # Χ
575
+ "\u03A8": "Ps" # Ψ
576
+ "\u03A9": "Ō" # Ω
577
+ "\u0388": "E" # Έ
578
+ "\u0389": "Ē" # Ή
579
+ "\u038A": "I" # Ί
580
+ "\u038C": "O" # Ό
581
+ "\u038E": "Y" # Ύ
582
+ "\u038F": "Ō" # Ώ
583
+ "\u03AA": "I" # Ϊ
584
+ "\u03AB": "Y" # Ϋ
585
+
586
+ "\u03AC": "a" # ά
587
+ "\u03B1": "a" # α
588
+ "\u03B2": "v" # β
589
+ "\u03B3": "g" # γ
590
+ "\u03B4": "d" # δ
591
+ "\u03B5": "e" # ε
592
+ "\u03B6": "z" # ζ
593
+ "\u03B7": "ē" # η
594
+ "\u03B8": "th" # θ
595
+ "\u03B9": "i" # ι
596
+ "\u03BA": "k" # κ
597
+ "\u03BB": "l" # λ
598
+ "\u03BC": "m" # μ
599
+ "\u03BD": "n" # ν
600
+ "\u03BE": "x" # ξ
601
+ "\u03BF": "o" # ο
602
+ "\u03C0": "p" # π
603
+ "\u03C1": "r" # ρ
604
+ "\u03C3": "s" # σ
605
+ "\u03C2": "s" # ς
606
+ "\u03C4": "t" # τ
607
+ "\u03C5": "y" # υ
608
+ "\u03C6": "ph" # φ
609
+ "\u03C7": "ch" # χ
610
+ "\u03C8": "ps" # ψ
611
+ "\u03C9": "ō" # ω
612
+ "\u03AD": "e" # έ
613
+ "\u03AE": "ē" # ή
614
+ "\u03AF": "i" # ί
615
+ "\u03CC": "o" # ό
616
+ "\u03CD": "y" # ύ
617
+ "\u03CE": "ō" # ώ
618
+ "\u03CA": "i" # ϊ
619
+ "\u03CB": "y" # ϋ
620
+ "\u0390": "i" # ΐ
621
+ "\u03B0": "y" # ΰ
622
+
623
+ "\u0387": ";" # ·
624
+ "\u00B7": ";" # ·
625
+