interscript 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,32 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1987
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: UNGEGN Greek v4.0
8
+ url: http://www.eki.ee/wgrs/rom1_el.htm
9
+ creation_date: 1987
10
+ description: |
11
+ UNGEGN Romanization table for Greek
12
+
13
+ note:
14
+ - Transliteration version (reversible). Also appears in annex B of ISO 843:1997. Originates in v1.0 of UNGEGN transliteration, 1987. This likely was the intended system in ELOT 743:1982 for transliteration, but the reversible character diacritics are missing from the document, and the document does not indicate what there was supposed to be.
15
+
16
+ tests:
17
+
18
+ - source: |
19
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
20
+
21
+ Γιάννης Μακρυγιάννης.
22
+
23
+ expected: |
24
+ Éna práma mónon me parakíni̱se ki eména na grápso̱ óti toúti̱n ti̱n patrída ti̱n échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai fto̱choí kai politikoí kai stratio̱tikoí kai oi pléon mikróteroi ánthro̱poi; ósoi ago̱nistí̱kamen, analógo̱s o katheís, échomen na zí̱somen edó̱. To loipón doulépsamen óloi mazí, na ti̱n fylámen ki óloi mazí kai na mi̱n légei oúte o dynatós «egó̱» oúte o adýnatos. Xérete póte na légei o katheís «egó̱»? Ótan ago̱nisteí mónos tou kai fkiásei í̱ chalásei, na légei «egó̱»; ótan ómo̱s ago̱nízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó̱». Kai eis to exí̱s na máthomen gnó̱si̱, an thélomen na fkiásomen cho̱rión, na zí̱somen óloi mazí.
25
+
26
+ Giánni̱s Makrygiánni̱s.
27
+
28
+ map:
29
+ character_separator: ""
30
+ word_separator: " "
31
+ inherit: "elot-ell-Grek-Latn-743-1982-tl"
32
+
@@ -0,0 +1,20 @@
1
+ ---
2
+ authority_id: un
3
+ id: 2016
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: UNGEGN Greek v4.0
8
+ url: http://www.eki.ee/wgrs/rom1_el.htm
9
+ creation_date: 2016
10
+ description: |
11
+ UNGEGN Romanization table for Greek
12
+
13
+ note:
14
+ - Transcription version
15
+
16
+ map:
17
+ character_separator: ""
18
+ word_separator: " "
19
+ inherit: "elot-ell-Grek-Latn-743-1982-ts"
20
+
@@ -0,0 +1,780 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1987
4
+ language: ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: UNGEGN Greek v1.0, Fifth United Nations Conference on the Standardization of Geographical Names. Writing Systems and Guides to Pronunciation Romanization > Conversion of the Greek alphabet into Latin characters. Paper submitted by Greece and Cyprus.
8
+ url: http://www.eki.ee/wgrs/rom1_el.htm
9
+ creation_date: 1987
10
+ description: |
11
+ UNGEGN Romanization table for Greek: Phonetic transcription
12
+
13
+
14
+ note:
15
+ - Also included in ISO 843:1997, Annex B, Column 5, and ELOT 743:1982, column 5.
16
+ - Corrected obvious errors, which occur every time the table has reappeared: χ > x, x > ks, oï > oi.
17
+ - The vowels are taken from the specification, but some are controversial: /ɑ ɛ/ but /o/.
18
+ - Stress is not indicated. (To do so in IPA would require syllabification in preprocessing, since stress is positioned at syllable breaks)
19
+ - Followed specification in treating final μπ as b, but final ντ as nd. That distinction is dubious. (In ELOT 743:1982, both d and nd are erroneously marked as initial, and no final is given.)
20
+ - τζ is not correctly transcribed as dz: fixed
21
+ - not reducing geminated consonants: fixed
22
+
23
+ tests:
24
+
25
+ - source: |
26
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί,
27
+
28
+ και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι·
29
+
30
+ όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ.
31
+
32
+ Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος.
33
+
34
+ Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»·
35
+
36
+ όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ».
37
+
38
+ Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
39
+
40
+ Γιάννης Μακρυγιάννης.
41
+
42
+ expected: |
43
+ ɛnɑ prɑmɑ monon mɛ pɑrɑkinisɛ ki ɛmɛnɑ nɑ ɣrɑpso oti tutin tin pɑtriðɑ tin ɛxomɛn oli mɑzi,
44
+
45
+ kɛ sofi ki ɑmɑθis kɛ plusii kɛ ftoxi kɛ politiki kɛ strɑtiotiki kɛ i plɛon mikrotɛri ɑnθropi;
46
+
47
+ osi ɑɣonistikɑmɛn, ɑnɑloɣos o kɑθis, ɛxomɛn nɑ zisomɛn ɛðo.
48
+
49
+ to lipon ðulɛpsɑmɛn oli mɑzi, nɑ tin filɑmɛn ki oli mɑzi kɛ nɑ min lɛɣi utɛ o ðinɑtos «ɛɣo» utɛ o ɑðinɑtos.
50
+
51
+ ksɛrɛtɛ potɛ nɑ lɛɣi o kɑθis «ɛɣo»? otɑn ɑɣonisti monos tu kɛ fkiɑsi i xɑlɑsi, nɑ lɛɣi «ɛɣo»;
52
+
53
+ otɑn omos ɑɣonizondɛ poli kɛ fkiɑnun, totɛ nɑ lɛnɛ «ɛmis». imɑstɛ is to «ɛmis» ki oxi is to «ɛɣo».
54
+
55
+ kɛ is to ɛksis nɑ mɑθomɛn ɣnosi, ɑn θɛlomɛn nɑ fkiɑsomɛn xorion, nɑ zisomɛn oli mɑzi.
56
+
57
+ ɣiɑnis mɑkriɣiɑnis.
58
+
59
+
60
+ - source: ΑΘΗΝΑ
61
+ expected: ɑθinɑ
62
+ - source: μπαμπάκι
63
+ expected: bɑmbɑki
64
+ - source: νταντά
65
+ expected: dɑndɑ
66
+ - source: γκέγκε
67
+ expected: ɡɛŋɡɛ
68
+ - source: Γκαμπόν
69
+ expected: ɡɑmbon
70
+ - source: Μάγχη
71
+ expected: mɑnxi
72
+ - source: κογξ
73
+ expected: konks
74
+ - source: υιός
75
+ expected: ios
76
+ - source: Υιός
77
+ expected: ios
78
+ - source: νεράντζι
79
+ expected: nɛrɑndzi
80
+ - source: Γοίθιος
81
+ expected: ɣiθios
82
+ - source: μπέικον
83
+ expected: bɛikon
84
+ - source: μπέϊκον
85
+ expected: bɛikon
86
+ - source: βόλεϊ
87
+ expected: volɛi
88
+ - source: αθεΐα
89
+ expected: ɑθɛiɑ
90
+ - source: Εϊγιαφιάτλαγιοκουτλ
91
+ expected: ɛiɣiɑfiɑtlɑɣiokutl
92
+ - source: Εΐτζι
93
+ expected: ɛidzi
94
+ - source: Μυρτώο
95
+ expected: mirtoo
96
+ - source: αέρας
97
+ expected: ɑɛrɑs
98
+ - source: γαυ γαυ
99
+ expected: ɣɑf ɣɑf
100
+ - source: Ταΰγετος
101
+ expected: tɑiɣɛtos
102
+ - source: σπρέυ
103
+ expected: sprɛi
104
+
105
+ - source: Αθήνα
106
+ expected: ɑθinɑ
107
+ - source: Άγιον Όρος
108
+ expected: ɑɣion oros
109
+ - source: Άγραφα
110
+ expected: ɑɣrɑfɑ
111
+ - source: Αγρίνιο
112
+ expected: ɑɣrinio
113
+ - source: Αίγινα
114
+ expected: ɛɣinɑ
115
+ - source: Αίγιο
116
+ expected: ɛɣio
117
+ - source: Αλεξανδρούπολη
118
+ expected: ɑlɛksɑnðrupoli
119
+ - source: Αλεποχώρι
120
+ expected: ɑlɛpoxori
121
+ - source: Αμοργός
122
+ expected: ɑmorɣos
123
+ - source: Άμφισσα
124
+ expected: ɑmfisɑ
125
+ - source: Αράχωβα
126
+ expected: ɑrɑxovɑ
127
+ - source: Άργος
128
+ expected: ɑrɣos
129
+ - source: Αρκαδία
130
+ expected: ɑrkɑðiɑ
131
+ - source: Άρτα
132
+ expected: ɑrtɑ
133
+ - source: Βελούχι
134
+ expected: vɛluxi
135
+ - source: Βέροια
136
+ expected: vɛriɑ
137
+ - source: Βοιωτία
138
+ expected: viotiɑ
139
+ - source: Βόλος
140
+ expected: volos
141
+ - source: Βόνιτσα
142
+ expected: vonitsɑ
143
+ - source: Γαλαξίδι
144
+ expected: ɣɑlɑksiði
145
+ - source: Γαλάτσι
146
+ expected: ɣɑlɑtsi
147
+ - source: Γιαννιτσά
148
+ expected: ɣiɑnitsɑ
149
+ - source: Γλυφάδα
150
+ expected: ɣlifɑðɑ
151
+ - source: Γρανίτσα
152
+ expected: ɣrɑnitsɑ
153
+ - source: Γρεβενά
154
+ expected: ɣrɛvɛnɑ
155
+ - source: Γύθειο
156
+ expected: ɣiθio
157
+ - source: Διόνυσος
158
+ expected: ðionisos
159
+ - source: Δίστομο
160
+ expected: ðistomo
161
+ - source: Δολιανά
162
+ expected: ðoliɑnɑ
163
+ - source: Δράμα
164
+ expected: ðrɑmɑ
165
+ - source: Δωδεκάνησα
166
+ expected: ðoðɛkɑnisɑ
167
+ - source: Έδεσσα
168
+ expected: ɛðɛsɑ
169
+ - source: Ελευσίνα
170
+ expected: ɛlɛfsinɑ
171
+ - source: Επίδαυρος
172
+ expected: ɛpiðɑvros
173
+ - source: Επτάνησα
174
+ expected: ɛptɑnisɑ
175
+ - source: Ερμούπολη
176
+ expected: ɛrmupoli
177
+ - source: Εύβοια
178
+ expected: ɛviɑ
179
+ - source: Ζάκυνθος
180
+ expected: zɑkinθos
181
+ - source: Ήπειρος
182
+ expected: ipiros
183
+ - source: Ηράκλειο
184
+ expected: irɑklio
185
+ - source: Θάσος
186
+ expected: θɑsos
187
+ - source: Θεσσαλονίκη
188
+ expected: θɛsɑloniki
189
+ - source: Θεσσαλία
190
+ expected: θɛsɑliɑ
191
+ - source: Θεσπρωτία
192
+ expected: θɛsprotiɑ
193
+ - source: Θήβα
194
+ expected: θivɑ
195
+ - source: Θράκη
196
+ expected: θrɑki
197
+ - source: Ιθάκη
198
+ expected: iθɑki
199
+ - source: Ίος
200
+ expected: ios
201
+ - source: Ιωάννινα
202
+ expected: ioɑninɑ
203
+ - source: Καβάλα
204
+ expected: kɑvɑlɑ
205
+ - source: Καλάβρυτα
206
+ expected: kɑlɑvritɑ
207
+ - source: Καλαμάτα
208
+ expected: kɑlɑmɑtɑ
209
+ - source: Καλαμπάκα
210
+ expected: kɑlɑmbɑkɑ
211
+ - source: Καλύβια
212
+ expected: kɑliviɑ
213
+ - source: Κάλυμνος
214
+ expected: kɑlimnos
215
+ - source: Καρδίτσα
216
+ expected: kɑrðitsɑ
217
+ - source: Καρπενήσι
218
+ expected: kɑrpɛnisi
219
+ - source: Κάρυστος
220
+ expected: kɑristos
221
+ - source: Καστελλόριζο
222
+ expected: kɑstɛlorizo
223
+ - source: Καστοριά
224
+ expected: kɑstoriɑ
225
+ - source: Κατερίνη
226
+ expected: kɑtɛrini
227
+ - source: Κάτω Αχαΐα
228
+ expected: kɑto ɑxɑiɑ
229
+ - source: Κερατέα
230
+ expected: kɛrɑtɛɑ
231
+ - source: Κέρκυρα
232
+ expected: kɛrkirɑ
233
+ - source: Κεφαλλονιά
234
+ expected: kɛfɑloniɑ
235
+ - source: Κηφισιά
236
+ expected: kifisiɑ
237
+ - source: Κιλκίς
238
+ expected: kilkis
239
+ - source: Κοζάνη
240
+ expected: kozɑni
241
+ - source: Κολωνός
242
+ expected: kolonos
243
+ - source: Κομοτηνή
244
+ expected: komotini
245
+ - source: Κόρινθος
246
+ expected: korinθos
247
+ - source: Κορώνη
248
+ expected: koroni
249
+ - source: Κρανίδι
250
+ expected: krɑniði
251
+ - source: Κρέστενα
252
+ expected: krɛstɛnɑ
253
+ - source: Κρήτη
254
+ expected: kriti
255
+ - source: Κύθηρα
256
+ expected: kiθirɑ
257
+ - source: Κυκλάδες
258
+ expected: kiklɑðɛs
259
+ - source: Κύμη
260
+ expected: kimi
261
+ - source: Κυψέλη
262
+ expected: kipsɛli
263
+ - source: Κως
264
+ expected: kos
265
+ - source: Λαγκαδάς
266
+ expected: lɑŋɡɑðɑs
267
+ - source: Λαμία
268
+ expected: lɑmiɑ
269
+ - source: Λάρισα
270
+ expected: lɑrisɑ
271
+ - source: Λαύριο
272
+ expected: lɑvrio
273
+ - source: Λέρος
274
+ expected: lɛros
275
+ - source: Λέσβος
276
+ expected: lɛzvos
277
+ - source: Λευκάδα
278
+ expected: lɛfkɑðɑ
279
+ - source: Λήμνος
280
+ expected: limnos
281
+ - source: Λιβαδειά
282
+ expected: livɑðiɑ
283
+ - source: Μακεδονία
284
+ expected: mɑkɛðoniɑ
285
+ - source: Μάνη
286
+ expected: mɑni
287
+ - source: Μαραθώνας
288
+ expected: mɑrɑθonɑs
289
+ - source: Μαρκόπουλο
290
+ expected: mɑrkopulo
291
+ - source: Μαρούσι
292
+ expected: mɑrusi
293
+ - source: Μέγαρα
294
+ expected: mɛɣɑrɑ
295
+ - source: Μεσολόγγι
296
+ expected: mɛsoloŋɡi
297
+ - source: Μεταξουργείο
298
+ expected: mɛtɑksurɣio
299
+ - source: Μέτσοβο
300
+ expected: mɛtsovo
301
+ - source: Μήλος
302
+ expected: milos
303
+ - source: Μύκονος
304
+ expected: mikonos
305
+ - source: Μυστράς
306
+ expected: mistrɑs
307
+ - source: Μυτιλήνη
308
+ expected: mitilini
309
+ - source: Νάξος
310
+ expected: nɑksos
311
+ - source: Νάουσα
312
+ expected: nɑusɑ
313
+ - source: Ναύπακτος
314
+ expected: nɑfpɑktos
315
+ - source: Ναύπλιο
316
+ expected: nɑfplio
317
+ - source: Νέα Σμύρνη
318
+ expected: nɛɑ zmirni
319
+ - source: Νίσυρος
320
+ expected: nisiros
321
+ - source: Ξάνθη
322
+ expected: ksɑnθi
323
+ - source: Όλυμπος
324
+ expected: olimbos
325
+ - source: Παγκράτι
326
+ expected: pɑŋɡrɑti
327
+ - source: Παπάγου
328
+ expected: pɑpɑɣu
329
+ - source: Πάρος
330
+ expected: pɑros
331
+ - source: Πασαλιμάνι
332
+ expected: pɑsɑlimɑni
333
+ - source: Πατήσια
334
+ expected: pɑtisiɑ
335
+ - source: Πάτμος
336
+ expected: pɑtmos
337
+ - source: Πάτρα
338
+ expected: pɑtrɑ
339
+ - source: Πειραιάς
340
+ expected: pirɛɑs
341
+ - source: Πελοπόννησος
342
+ expected: pɛloponisos
343
+ - source: Περιστέρι
344
+ expected: pɛristɛri
345
+ - source: Πεύκη
346
+ expected: pɛfki
347
+ - source: Πήλιο
348
+ expected: pilio
349
+ - source: Πολύγυρος
350
+ expected: poliɣiros
351
+ - source: Πόρος
352
+ expected: poros
353
+ - source: Πρέβεζα
354
+ expected: prɛvɛzɑ
355
+ - source: Πτολεμαΐδα
356
+ expected: ptolɛmɑiðɑ
357
+ - source: Πύλος
358
+ expected: pilos
359
+ - source: Πύργος
360
+ expected: pirɣos
361
+ - source: Ρέθυμνο
362
+ expected: rɛθimno
363
+ - source: Ρόδος
364
+ expected: roðos
365
+ - source: Ρούμελη
366
+ expected: rumɛli
367
+ - source: Σαλαμίνα
368
+ expected: sɑlɑminɑ
369
+ - source: Σαμοθράκη
370
+ expected: sɑmoθrɑki
371
+ - source: Σάμος
372
+ expected: sɑmos
373
+ - source: Σαντορίνη
374
+ expected: sɑndorini
375
+ - source: Σέρρες
376
+ expected: sɛrɛs
377
+ - source: Σίκινος
378
+ expected: sikinos
379
+ - source: Σίφνος
380
+ expected: sifnos
381
+ - source: Σκιάθος
382
+ expected: skiɑθos
383
+ - source: Σκόπελος
384
+ expected: skopɛlos
385
+ - source: Σούλι
386
+ expected: suli
387
+ - source: Σπάρτη
388
+ expected: spɑrti
389
+ - source: Στερεά Ελλάδα
390
+ expected: stɛrɛɑ ɛlɑðɑ
391
+ - source: Στύρα
392
+ expected: stirɑ
393
+ - source: Σύμη
394
+ expected: simi
395
+ - source: Σύρος
396
+ expected: siros
397
+ - source: Σφακιά
398
+ expected: sfɑkiɑ
399
+ - source: Τήλος
400
+ expected: tilos
401
+ - source: Τήνος
402
+ expected: tinos
403
+ - source: Τρίκαλα
404
+ expected: trikɑlɑ
405
+ - source: Τρίπολη
406
+ expected: tripoli
407
+ - source: Τσακωνιά
408
+ expected: tsɑkoniɑ
409
+ - source: Ύδρα
410
+ expected: iðrɑ
411
+ - source: Φάληρο
412
+ expected: fɑliro
413
+ - source: Φλώρινα
414
+ expected: florinɑ
415
+ - source: Φολέγανδρος
416
+ expected: folɛɣɑnðros
417
+ - source: Χάλκη
418
+ expected: xɑlki
419
+ - source: Χαλκίδα
420
+ expected: xɑlkiðɑ
421
+ - source: Χαλάνδρι
422
+ expected: xɑlɑnðri
423
+ - source: Χαλκιδική
424
+ expected: xɑlkiðiki
425
+ - source: Χανιά
426
+ expected: xɑniɑ
427
+ - source: Χίος
428
+ expected: xios
429
+ - source: Ψαρά
430
+ expected: psɑrɑ
431
+ - source: Αβάνα
432
+ expected: ɑvɑnɑ
433
+ - source: Αγγλία
434
+ expected: ɑŋɡliɑ
435
+ - source: Αϊβαλί
436
+ expected: ɑivɑli
437
+ - source: Αλεξάνδρεια
438
+ expected: ɑlɛksɑnðriɑ
439
+ - source: Άμστερνταμ
440
+ expected: ɑmstɛrndɑm
441
+ - source: Βαυαρία
442
+ expected: vɑvɑriɑ
443
+ - source: Βενετία
444
+ expected: vɛnɛtiɑ
445
+ - source: Βερολίνο
446
+ expected: vɛrolino
447
+ - source: Βερόνα
448
+ expected: vɛronɑ
449
+ - source: Βιέννη
450
+ expected: viɛni
451
+ - source: Γένοβα
452
+ expected: ɣɛnovɑ
453
+ - source: Δουβλίνο
454
+ expected: ðuvlino
455
+ - source: Καλαβρία
456
+ expected: kɑlɑvriɑ
457
+ - source: Καλιφόρνια
458
+ expected: kɑliforniɑ
459
+ - source: Καύκασος
460
+ expected: kɑfkɑsos
461
+ - source: Κονγκό
462
+ expected: konŋɡo
463
+ - source: Κορσική
464
+ expected: korsiki
465
+ - source: Κουρδιστάν
466
+ expected: kurðistɑn
467
+ - source: Κωνσταντινούπολη
468
+ expected: konstɑndinupoli
469
+ - source: Κατεχόμενη Κύπρος
470
+ expected: kɑtɛxomɛni kipros
471
+ - source: Λαπωνία
472
+ expected: lɑponiɑ
473
+ - source: Λευκωσία
474
+ expected: lɛfkosiɑ
475
+ - source: Λιβόρνο
476
+ expected: livorno
477
+ - source: Λονδίνο
478
+ expected: lonðino
479
+ - source: Λυών
480
+ expected: lion
481
+ - source: Μάλαγα
482
+ expected: mɑlɑɣɑ
483
+ - source: Μασσαλία
484
+ expected: mɑsɑliɑ
485
+ - source: Μικρονησία
486
+ expected: mikronisiɑ
487
+ - source: Μιλάνο
488
+ expected: milɑno
489
+ - source: Μόσχα
490
+ expected: mosxɑ
491
+ - source: Μπολόνια
492
+ expected: boloniɑ
493
+ - source: Νάπολη
494
+ expected: nɑpoli
495
+ - source: Νταγκεστάν
496
+ expected: dɑŋɡɛstɑn
497
+ - source: Νέα Υόρκη
498
+ expected: nɛɑ iorki
499
+ - source: Οξφόρδη
500
+ expected: oksforði
501
+ - source: Ουαλία
502
+ expected: uɑliɑ
503
+ - source: Παρίσι
504
+ expected: pɑrisi
505
+ - source: Πάφος
506
+ expected: pɑfos
507
+ - source: Πολυνησία
508
+ expected: polinisiɑ
509
+ - source: Ρώμη
510
+ expected: romi
511
+ - source: Σαμάρεια
512
+ expected: sɑmɑriɑ
513
+ - source: Σικελία
514
+ expected: sikɛliɑ
515
+ - source: Σκανδιναβία
516
+ expected: skɑnðinɑviɑ
517
+ - source: Σκόπια
518
+ expected: skopiɑ
519
+ - source: Σκωτία
520
+ expected: skotiɑ
521
+ - source: Σμύρνη
522
+ expected: zmirni
523
+ - source: Ταϊτή
524
+ expected: tɑiti
525
+ - source: Ταταρστάν
526
+ expected: tɑtɑrstɑn
527
+ - source: Τζαμάικα
528
+ expected: dzɑmɑikɑ
529
+ - source: Τηλλυρία
530
+ expected: tiliriɑ
531
+ - source: Τιρόλο
532
+ expected: tirolo
533
+ - source: Τορίνο
534
+ expected: torino
535
+ - source: Φανάρι
536
+ expected: fɑnɑri
537
+ - source: Φλωρεντία
538
+ expected: florɛndiɑ
539
+ - source: Χαβάη
540
+ expected: xɑvɑi
541
+ - source: Χονγκ Κονγκ
542
+ expected: xonŋɡ konŋɡ
543
+
544
+ map:
545
+ downcase: True
546
+
547
+ # https://en.wikipedia.org/wiki/Romanization_of_Greek
548
+ rules:
549
+ - pattern: (?<=[ΑαΕεΗη])\u03A5(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # Υ (after Α, Ε, Η)
550
+ result: f
551
+ - pattern: (?<=[ΑαΕεΗη])\u03A5(?=\b) # Υ (after Α, Ε, Η)
552
+ result: f
553
+ - pattern: (?<=[ΑαΕεΗη])\u03C5(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # υ (after Α, Ε, Η)
554
+ result: f
555
+ - pattern: (?<=[ΑαΕεΗη])\u03C5(?=\b) # υ (after Α, Ε, Η)
556
+ result: f
557
+ - pattern: (?<=[ΑαΕεΗη])\u03A5 # Υ (after Α, Ε, Η)
558
+ result: v
559
+ - pattern: (?<=[ΑαΕεΗη])\u03C5 # υ (after Α, Ε, Η)
560
+ result: v
561
+ - pattern: \u0391\u03CD(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # Αύ
562
+ result: ɑf
563
+ - pattern: \u0391\u03CD(?=\b) # Αύ
564
+ result: ɑf
565
+ - pattern: \u0391\u03CD # Αύ
566
+ result: ɑv
567
+ - pattern: \u03B1\u03CD(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # Αύ
568
+ result: ɑf
569
+ - pattern: \u03B1\u03CD(?=\b) # Αύ
570
+ result: ɑf
571
+ - pattern: \u03B1\u03CD # Αύ
572
+ result: ɑv
573
+ - pattern: \u0395\u03CD(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # Εύ
574
+ result: ɛf
575
+ - pattern: \u0395\u03CD(?=\b) # Εύ
576
+ result: ɛf
577
+ - pattern: \u0395\u03CD # Εύ
578
+ result: ɛv
579
+ - pattern: \u03B5\u03CD(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # Εύ
580
+ result: ɛf
581
+ - pattern: \u03B5\u03CD(?=\b) # Εύ
582
+ result: ɛf
583
+ - pattern: \u03B5\u03CD # Εύ
584
+ result: ɛv
585
+ - pattern: \u0397\u03CD(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # Εύ
586
+ result: if
587
+ - pattern: \u0397\u03CD(?=\b) # Εύ
588
+ result: if
589
+ - pattern: \u0397\u03CD # Εύ
590
+ result: iv
591
+ - pattern: \u03B7\u03CD(?=[θΘκΚξΞπΠσΣςτΤφΦχΧψΨ]) # Εύ
592
+ result: if
593
+ - pattern: \u03B7\u03CD(?=\b) # Εύ
594
+ result: if
595
+ - pattern: \u03B7\u03CD # Εύ
596
+ result: iv
597
+ - pattern: \u039F[υΥύ] # Υ (after Ο)
598
+ result: u
599
+ - pattern: \u03BF[υΥύ] # υ (after Ο)
600
+ result: u
601
+ - pattern: (?<=[ΆάΈέΉήΌό])\u03A5 # Άυ, Έυ, Ήυ, Όυ
602
+ result: i
603
+ - pattern: (?<=[ΆάΈέΉήΌό])\u03C5 # Άυ, Έυ, Ήυ, Όυ
604
+ result: i
605
+ - pattern: \u0393(?=[ξΞχΧ]) # Γ (before Γ, Ξ, Χ)
606
+ result: n
607
+ - pattern: \u03B3(?=[ξΞχΧ]) # γ (before Γ, Ξ, Χ)
608
+ result: n
609
+ - pattern: (?<!\b)\u0393[Γγ] # Γ (before Γ)
610
+ result: ŋɡ
611
+ - pattern: (?<!\b)\u03B3\u03B3 # γ (before Γ)
612
+ result: ŋɡ
613
+ - pattern: (?<!\b)\u0393[Κκ] # Γ (before Κ)
614
+ result: ŋɡ
615
+ - pattern: (?<!\b)\u03B3[Κκ] # γ (before Κ)
616
+ result: ŋɡ
617
+ - pattern: (?<=\b)\u0393[Γγ] # Γ (before Γ)
618
+ result: ɡ
619
+ - pattern: (?<=\b)\u03B3\u03B3 # γ (before Γ)
620
+ result: ɡ
621
+ - pattern: (?<=\b)\u0393[Κκ] # Γ (before Κ)
622
+ result: ɡ
623
+ - pattern: (?<=\b)\u03B3[Κκ] # γ (before Κ)
624
+ result: ɡ
625
+ - pattern: (?<=\b)\u039C[πΠ] # ΜΠ (initially)
626
+ result: b
627
+ - pattern: (?<=\b)\u03BC[πΠ] # μπ (initially)
628
+ result: b
629
+ - pattern: \u039C[πΠ](?=\b) # ΜΠ (finally)
630
+ result: b
631
+ - pattern: \u03BC[πΠ](?=\b) # μπ (finally)
632
+ result: b
633
+ - pattern: (?<!\b)\u039C[πΠ](?!\b) # ΜΠ (medially)
634
+ result: mb
635
+ - pattern: (?<!\b)\u03BC[πΠ](?!\b) # μπ (medially)
636
+ result: mb
637
+ - pattern: (?<=\b)[Νν][ττ] # Ντ (initial)
638
+ result: d
639
+ - pattern: (?<!\b)[Νν][ττ] # Ντ (medial, final)
640
+ result: nd
641
+ - pattern: \u03A3(?=[ΒβΓγΔδΖζΛλΜμΝνΡρ]) # Σ (before voiced consonants)
642
+ result: z
643
+ - pattern: \u03C3(?=[ΒβΓγΔδΖζΛλΜμΝνΡρ]) # Σ (before voiced consonants)
644
+ result: z
645
+ - pattern: \u0391[Ιιί] # Αι
646
+ result: ɛ
647
+ - pattern: \u03B1[Ιιί] # αι
648
+ result: ɛ
649
+ - pattern: \u03A5[Ιιί] # Υι
650
+ result: i
651
+ - pattern: \u03C5[Ιιί] # υι
652
+ result: i
653
+ - pattern: \u0395[Ιιί] # Ει
654
+ result: i
655
+ - pattern: \u03B5[Ιιί] # ει
656
+ result: i
657
+ - pattern: \u039F[Ιιί] # Οι
658
+ result: i
659
+ - pattern: \u03BF[Ιιί] # οι
660
+ result: i
661
+ - pattern: \u03A4[ζΖ] # Τζ
662
+ result: dz
663
+ - pattern: \u03c4[ζΖ] # Τζ
664
+ result: dz
665
+ - pattern: \u037E # ;
666
+ result: "?"
667
+ - pattern: \u003B # ;
668
+ result: "?"
669
+
670
+ characters:
671
+ "\u0027": ""
672
+ "\u0386": "ɑ" # Ά
673
+ "\u0391": "ɑ" # Α
674
+ "\u0392": "v" # Β
675
+ "\u0393": "ɣ" # Γ
676
+ "\u0394": "ð" # Δ
677
+ "\u0395": "ɛ" # Ε
678
+ "\u0395\u03AA": "ɛi" # ΕΪ
679
+ "\u0395\u03CA": "ɛi" # Εϊ
680
+ "\u0388\u03CA": "ɛi" # Έϊ
681
+ "\u0396": "z" # Ζ
682
+ "\u0397": "i" # Η
683
+ "\u0398": "θ" # Θ
684
+ "\u0399": "i" # Ι
685
+ "\u039A": "k" # Κ
686
+ "\u039B": "l" # Λ
687
+ "\u039C": "m" # Μ
688
+ "\u039D": "n" # Ν
689
+ "\u039E": "ks" # Ξ
690
+ "\u039F": "o" # Ο
691
+ "\u03A0": "p" # Π
692
+ "\u03A1": "r" # Ρ
693
+ "\u03A3": "s" # Σ
694
+ "\u03A4": "t" # Τ
695
+ "\u03A5": "i" # Υ
696
+ "\u03A6": "f" # Φ
697
+ "\u03A7": "x" # Χ
698
+ "\u03A8": "ps" # Ψ
699
+ "\u03A9": "o" # Ω
700
+ "\u0388": "ɛ" # Έ
701
+ "\u0389": "i" # Ή
702
+ "\u038A": "i" # Ί
703
+ "\u038C": "o" # Ό
704
+ "\u038E": "i" # Ύ
705
+ "\u038F": "o" # Ώ
706
+ "\u03AA": "i" # Ϊ
707
+ "\u03AB": "i" # Ϋ
708
+
709
+ "\u03AC": "ɑ" # ά
710
+ "\u03B1": "ɑ" # α
711
+ "\u03B2": "v" # β
712
+ "\u03B3": "ɣ" # γ
713
+ "\u03B4": "ð" # δ
714
+ "\u03B5": "ɛ" # ε
715
+ "\u03B5\u03CA": "ɛi" # εϊ
716
+ "\u03AD\u03CA": "ɛi" # έϊ
717
+ "\u03AD\u03B9": "ɛi" # έι
718
+ "\u03B6": "z" # ζ
719
+ "\u03B7": "i" # η
720
+ #"\u03B8": "θ" # θ
721
+ "\u03B9": "i" # ι
722
+ "\u03BA": "k" # κ
723
+ "\u03BB": "l" # λ
724
+ "\u03BC": "m" # μ
725
+ "\u03BD": "n" # ν
726
+ "\u03BE": "ks" # ξ
727
+ "\u03BF": "o" # ο
728
+ "\u03C0": "p" # π
729
+ "\u03C1": "r" # ρ
730
+ "\u03C3": "s" # σ
731
+ "\u03C2": "s" # ς
732
+ "\u03C4": "t" # τ
733
+ "\u03C5": "i" # υ
734
+ "\u03C6": "f" # φ
735
+ "\u03C7": "x" # χ
736
+ "\u03C8": "ps" # ψ
737
+ "\u03C9": "o" # ω
738
+ "\u03AD": "ɛ" # έ
739
+ "\u03AE": "i" # ή
740
+ "\u03AF": "i" # ί
741
+ "\u03CC": "o" # ό
742
+ "\u03CD": "i" # ύ
743
+ "\u03CE": "o" # ώ
744
+ "\u03CA": "i" # ϊ
745
+ "\u03CB": "i" # ϋ
746
+ "\u0390": "i" # ΐ
747
+ "\u03B0": "i" # ΰ
748
+
749
+ "\u0387": ";" # ·
750
+ "\u00B7": ";" # ·
751
+
752
+ postrules:
753
+ - pattern: vv
754
+ result: "v"
755
+ - pattern: ðð
756
+ result: "ð"
757
+ - pattern: zz
758
+ result: "z"
759
+ - pattern: θθ
760
+ result: "θ"
761
+ - pattern: kk
762
+ result: "k"
763
+ - pattern: ll
764
+ result: "l"
765
+ - pattern: mm
766
+ result: "m"
767
+ - pattern: nn
768
+ result: "n"
769
+ - pattern: pp
770
+ result: "p"
771
+ - pattern: rr
772
+ result: "r"
773
+ - pattern: ss
774
+ result: "s"
775
+ - pattern: tt
776
+ result: "t"
777
+ - pattern: ff
778
+ result: "f"
779
+ - pattern: xx
780
+ result: "x"