interscript 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  3. data/lib/interscript.rb +5 -1
  4. data/lib/interscript/fs.rb +3 -1
  5. data/lib/interscript/mapping.rb +2 -2
  6. data/lib/interscript/opal.rb +5 -1
  7. data/lib/interscript/opal/maps.js.erb +7 -4
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  14. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
  15. data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
  18. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  19. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
  21. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  22. data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
  23. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  24. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
  25. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
  26. data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
  27. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
  28. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
  29. data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
  30. data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
  31. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  32. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  33. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
  34. data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
  35. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
  36. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
  37. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
  38. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
  39. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
  40. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
  41. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  42. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  43. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
  44. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
  45. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  46. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  47. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  48. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  49. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  50. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  51. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  52. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  53. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  54. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
  57. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
  59. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
  60. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
  61. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
  62. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
  63. data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
  64. data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
  65. data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
  68. data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
  69. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
  70. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
  71. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  72. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
  73. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
  74. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
  75. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  76. data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
  77. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
  78. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
  79. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
  80. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
  81. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
  82. data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
  83. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  84. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  85. data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
  86. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  87. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
  88. data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
  89. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  90. data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
  91. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  92. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
  93. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
  94. data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
  95. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
  96. data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
  97. data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
  98. data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
  99. data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
  100. data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
  101. data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
  102. data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
  103. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
  104. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
  105. data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
  106. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  107. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  108. data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
  109. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  110. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  111. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  112. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
  113. data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
  114. data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
  115. data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
  116. metadata +41 -15
@@ -94,25 +94,33 @@ tests:
94
94
  # https://unstats.un.org/unsd/geoinfo/geonames/
95
95
 
96
96
  - source: مِصر
97
- expected: mişr
97
+ expected: Mişr
98
98
 
99
99
  - source: قَطَر
100
- expected: qaţar
100
+ expected: Qaţar
101
101
 
102
102
  - source: الجُمهُورِيَّة العِراقِيَّة
103
- expected: al jumhūrīyah al ‘irāqīyah
103
+ expected: Al Jumhūrīyah al ‘Irāqīyah
104
104
 
105
105
  - source: جُمهُورِيَّة مِصر العَرَبِيَّة
106
- expected: jumhūrīyat mişr al ‘arabīyah
106
+ expected: Jumhūrīyat Mişr al ‘Arabīyah
107
107
 
108
108
  - source: الرِيَاض
109
- expected: ar riyāḑ
109
+ expected: Ar Riyāḑ
110
110
 
111
111
  - source: الشارِقة
112
- expected: ash shāriqah
112
+ expected: Ash Shāriqah
113
113
 
114
114
  map:
115
115
  inherit: "un-ara-Arab-Latn-2017"
116
+ postrules:
117
+ - pattern : ' Aş Ş' # الص
118
+ result: ' aş Ş'
119
+ - pattern : ' Aḑ Ḑ' # الض
120
+ result: ' aḑ Ḑ'
121
+ - pattern : ' Aţ Ţ' # الط
122
+ result: ' aţ Ţ'
123
+
116
124
  characters:
117
125
 
118
126
  '\b\u0627\u0644\u0635' : 'aş ş' # الص
@@ -149,4 +157,3 @@ map:
149
157
  '\ufec7' : 'z̧' # ﻇ
150
158
  '\ufec8' : 'z̧' # ﻈ
151
159
  '\ufec6' : 'z̧' # ﻆ
152
-
@@ -71,6 +71,9 @@ notes:
71
71
  middle dot (·) may be used: سهيلة S·haylah (cf. شيلة Shaylah), دهيب
72
72
  D·hayb (cf. ذيب Dhayb), أدهم Ad·ham (cf. أذم Adham).
73
73
  - |
74
+ ta' marboota should be transliterated to 'ah' if it's in
75
+ a definite article, or at the end of the sentence
76
+ otherwise it should be transliterated to 'at'
74
77
  to handle words starting with AL and ending with ta' marboota
75
78
  which is pronounced as "ah" not "at" divided into multiple
76
79
  regex because lookbehind in ruby doesn't support variable length
@@ -84,57 +87,92 @@ tests:
84
87
  # https://unstats.un.org/unsd/geoinfo/geonames/
85
88
 
86
89
  - source: مِصر
87
- expected: mis̱r
90
+ expected: Mis̱r
88
91
 
89
92
  - source: قَطَر
90
- expected: qaṯar
93
+ expected: Qaṯar
91
94
 
92
95
  - source: المَغرِب
93
- expected: al maghrib
96
+ expected: Al Maghrib
94
97
 
95
98
  - source: الجُمهُورِيَّة العِراقِيَّة
96
- expected: al jumhūrīyah al ‘irāqīyah
99
+ expected: Al Jumhūrīyah al ‘Irāqīyah
97
100
 
98
101
  - source: جُمهُورِيَّة العِراق
99
- expected: jumhūrīyat al ‘irāq
102
+ expected: Jumhūrīyat al ‘Irāq
100
103
 
101
104
  - source: جُمهُورِيَّة مِصر العَرَبِيَّة
102
- expected: jumhūrīyat mis̱r al ‘arabīyah
105
+ expected: Jumhūrīyat Mis̱r al ‘Arabīyah
103
106
 
104
107
  - source: بَغداد
105
- expected: baghdād
108
+ expected: Baghdād
106
109
 
107
110
  - source: تُونِس
108
- expected: tūnis
111
+ expected: Tūnis
109
112
 
110
113
  - source: السُعُودِيَّة
111
- expected: as su‘ūdīyah
114
+ expected: As Su‘ūdīyah
112
115
 
113
116
  - source: اليَمَن
114
- expected: al yaman
117
+ expected: Al Yaman
115
118
 
116
119
  - source: السُودان
117
- expected: as sūdān
120
+ expected: As Sūdān
118
121
 
119
122
  - source: الجَزائِر
120
- expected: al jazā'ir
123
+ expected: Al Jazā'ir
121
124
 
122
125
  - source: الجُمهُورِيَّة اللُبنانِيَّة
123
- expected: al jumhūrīyah al lubnānīyah
126
+ expected: Al Jumhūrīyah al Lubnānīyah
124
127
 
125
128
  - source: أسمَرة
126
- expected: asmarah
129
+ expected: Asmarah
127
130
 
128
131
  - source: جِدَّة
129
- expected: jiddah
132
+ expected: Jiddah
130
133
 
131
134
  - source: مَكَّة
132
- expected: makkah
135
+ expected: Makkah
133
136
 
134
137
  - source: الرِيَاض
135
- expected: ar riyāḏ
138
+ expected: Ar Riyāḏ
136
139
 
137
140
  map:
141
+ postrules:
142
+ - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
143
+ result: "upcase"
144
+ # don't capitalize defined article in the middle of a sentence
145
+ - pattern : ' At T' # الت
146
+ result: ' at T'
147
+ - pattern : ' Ath Th' # الث
148
+ result: ' ath th'
149
+ - pattern : ' Ad D' # الد
150
+ result: ' ad D'
151
+ - pattern : ' Adh Dh' # الذ
152
+ result: ' adh Dh'
153
+ - pattern : ' Ar R' # الر
154
+ result: ' ar R'
155
+ - pattern : ' Az Z' # الز
156
+ result: ' az Z'
157
+ - pattern : ' As S' # الس
158
+ result: ' as S'
159
+ - pattern : ' Ash Sh' # الش
160
+ result: ' ash Sh'
161
+ - pattern : ' As̱ S̱' # الص
162
+ result: ' as̱ S̱'
163
+ - pattern : ' Aḏ Ḏ' # الض
164
+ result: ' aḏ Ḏ'
165
+ - pattern : ' Aṯ Ṯ' # الط
166
+ result: ' aṯ Ṯ'
167
+ - pattern : ' Ad͟h D͟h' # الظ
168
+ result: ' ad͟h D͟h'
169
+ - pattern : ' Al L' # الل
170
+ result: ' al L'
171
+ - pattern : ' an n' # الن
172
+ result: ' an N'
173
+ - pattern: " Al " # ال
174
+ result: " al "
175
+
138
176
  characters:
139
177
 
140
178
  # Tool used for Unicode finding:
@@ -143,6 +181,7 @@ map:
143
181
  # pointing
144
182
  '\u064e' : 'a' # َ fatha
145
183
  '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
184
+ '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
146
185
  '\u0650' : 'i' # ِ kasra
147
186
  '\u064f' : 'u' # ُ damma
148
187
  '\u0652' : '' # ْ sokoon, see note A below
@@ -379,5 +418,3 @@ map:
379
418
 
380
419
  # Vowels, diphthongs and diacritical marks
381
420
  # (ـ stands for any consonant)
382
-
383
-
@@ -8,11 +8,11 @@ name: National System of Geographic Names Transmission into Roman Alphabet in Be
8
8
  url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/9th-uncsgn-docs/crp/9th_UNCSGN_e-conf-98-crp-21.pdf
9
9
  creation_date: 2007
10
10
  description: |
11
- RESOLUTION OF THE STATE COMMITTEE
11
+ RESOLUTION OF THE STATE COMMITTEE
12
12
  ON PROPERTY OF THE REPUBLIC OF BELARUS June 11, 2007 No. 38
13
13
 
14
- 8/16668 (06/18/2007) On amendments and additions to the Instructions
15
- for the transliteration of geographical names of the
14
+ 8/16668 (06/18/2007) On amendments and additions to the Instructions
15
+ for the transliteration of geographical names of the
16
16
  Republic of Belarus in letters of the Latin alphabet
17
17
 
18
18
  Based on the Regulation on the State Property Committee of the Republic of Belarus,
@@ -22,11 +22,10 @@ tests:
22
22
 
23
23
  expected: |
24
24
  Éna práma mónon me parakíni̱se ki eména na grápso̱ óti toúti̱n ti̱n patrída ti̱n échomen óloi mazí, kai sofoí ki amatheís kai ploúsioi kai fto̱choí kai politikoí kai stratio̱tikoí kai oi pléon mikróteroi ánthro̱poi; ósoi ago̱nistí̱kamen, analógo̱s o katheís, échomen na zí̱somen edó̱. To loipón doulépsamen óloi mazí, na ti̱n fylámen ki óloi mazí kai na mi̱n légei oúte o dynatós «egó̱» oúte o adýnatos. Xérete póte na légei o katheís «egó̱»? Ótan ago̱nisteí mónos tou kai fkiásei í̱ chalásei, na légei «egó̱»; ótan ómo̱s ago̱nízontai polloí kai fkiánoun, tóte na léne «emeís». Eímaste eis to «emeís» ki óchi eis to «egó̱». Kai eis to exí̱s na máthomen gnó̱si̱, an thélomen na fkiásomen cho̱rión, na zí̱somen óloi mazí.
25
-
25
+
26
26
  Giánni̱s Makrygiánni̱s.
27
27
 
28
28
  map:
29
29
  character_separator: ""
30
30
  word_separator: " "
31
31
  inherit: "elot-ell-Grek-Latn-743-1982-tl"
32
-
@@ -17,4 +17,3 @@ map:
17
17
  character_separator: ""
18
18
  word_separator: " "
19
19
  inherit: "elot-ell-Grek-Latn-743-1982-ts"
20
-
@@ -11,7 +11,7 @@ description: |
11
11
  UNGEGN Romanization table for Greek: Phonetic transcription
12
12
 
13
13
 
14
- note:
14
+ note:
15
15
  - Also included in ISO 843:1997, Annex B, Column 5, and ELOT 743:1982, column 5.
16
16
  - Corrected obvious errors, which occur every time the table has reappeared: χ > x, x > ks, oï > oi.
17
17
  - The vowels are taken from the specification, but some are controversial: /ɑ ɛ/ but /o/.
@@ -23,37 +23,37 @@ note:
23
23
  tests:
24
24
 
25
25
  - source: |
26
- Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί,
27
-
28
- και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι·
29
-
30
- όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ.
31
-
32
- Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος.
33
-
34
- Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»·
35
-
36
- όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ».
37
-
26
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί,
27
+
28
+ και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι·
29
+
30
+ όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ.
31
+
32
+ Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος.
33
+
34
+ Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»·
35
+
36
+ όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ».
37
+
38
38
  Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
39
39
 
40
40
  Γιάννης Μακρυγιάννης.
41
41
 
42
42
  expected: |
43
- ɛnɑ prɑmɑ monon mɛ pɑrɑkinisɛ ki ɛmɛnɑ nɑ ɣrɑpso oti tutin tin pɑtriðɑ tin ɛxomɛn oli mɑzi,
44
-
45
- kɛ sofi ki ɑmɑθis kɛ plusii kɛ ftoxi kɛ politiki kɛ strɑtiotiki kɛ i plɛon mikrotɛri ɑnθropi;
46
-
47
- osi ɑɣonistikɑmɛn, ɑnɑloɣos o kɑθis, ɛxomɛn nɑ zisomɛn ɛðo.
48
-
49
- to lipon ðulɛpsɑmɛn oli mɑzi, nɑ tin filɑmɛn ki oli mɑzi kɛ nɑ min lɛɣi utɛ o ðinɑtos «ɛɣo» utɛ o ɑðinɑtos.
50
-
51
- ksɛrɛtɛ potɛ nɑ lɛɣi o kɑθis «ɛɣo»? otɑn ɑɣonisti monos tu kɛ fkiɑsi i xɑlɑsi, nɑ lɛɣi «ɛɣo»;
52
-
53
- otɑn omos ɑɣonizondɛ poli kɛ fkiɑnun, totɛ nɑ lɛnɛ «ɛmis». imɑstɛ is to «ɛmis» ki oxi is to «ɛɣo».
54
-
43
+ ɛnɑ prɑmɑ monon mɛ pɑrɑkinisɛ ki ɛmɛnɑ nɑ ɣrɑpso oti tutin tin pɑtriðɑ tin ɛxomɛn oli mɑzi,
44
+
45
+ kɛ sofi ki ɑmɑθis kɛ plusii kɛ ftoxi kɛ politiki kɛ strɑtiotiki kɛ i plɛon mikrotɛri ɑnθropi;
46
+
47
+ osi ɑɣonistikɑmɛn, ɑnɑloɣos o kɑθis, ɛxomɛn nɑ zisomɛn ɛðo.
48
+
49
+ to lipon ðulɛpsɑmɛn oli mɑzi, nɑ tin filɑmɛn ki oli mɑzi kɛ nɑ min lɛɣi utɛ o ðinɑtos «ɛɣo» utɛ o ɑðinɑtos.
50
+
51
+ ksɛrɛtɛ potɛ nɑ lɛɣi o kɑθis «ɛɣo»? otɑn ɑɣonisti monos tu kɛ fkiɑsi i xɑlɑsi, nɑ lɛɣi «ɛɣo»;
52
+
53
+ otɑn omos ɑɣonizondɛ poli kɛ fkiɑnun, totɛ nɑ lɛnɛ «ɛmis». imɑstɛ is to «ɛmis» ki oxi is to «ɛɣo».
54
+
55
55
  kɛ is to ɛksis nɑ mɑθomɛn ɣnosi, ɑn θɛlomɛn nɑ fkiɑsomɛn xorion, nɑ zisomɛn oli mɑzi.
56
-
56
+
57
57
  ɣiɑnis mɑkriɣiɑnis.
58
58
 
59
59
 
@@ -101,7 +101,7 @@ tests:
101
101
  expected: tɑiɣɛtos
102
102
  - source: σπρέυ
103
103
  expected: sprɛi
104
-
104
+
105
105
  - source: Αθήνα
106
106
  expected: ɑθinɑ
107
107
  - source: Άγιον Όρος
@@ -748,33 +748,33 @@ map:
748
748
 
749
749
  "\u0387": ";" # ·
750
750
  "\u00B7": ";" # ·
751
-
751
+
752
752
  postrules:
753
753
  - pattern: vv
754
- result: "v"
754
+ result: "v"
755
755
  - pattern: ðð
756
- result: "ð"
756
+ result: "ð"
757
757
  - pattern: zz
758
- result: "z"
758
+ result: "z"
759
759
  - pattern: θθ
760
- result: "θ"
760
+ result: "θ"
761
761
  - pattern: kk
762
- result: "k"
762
+ result: "k"
763
763
  - pattern: ll
764
- result: "l"
764
+ result: "l"
765
765
  - pattern: mm
766
- result: "m"
766
+ result: "m"
767
767
  - pattern: nn
768
- result: "n"
768
+ result: "n"
769
769
  - pattern: pp
770
- result: "p"
770
+ result: "p"
771
771
  - pattern: rr
772
- result: "r"
772
+ result: "r"
773
773
  - pattern: ss
774
- result: "s"
774
+ result: "s"
775
775
  - pattern: tt
776
- result: "t"
776
+ result: "t"
777
777
  - pattern: ff
778
- result: "f"
778
+ result: "f"
779
779
  - pattern: xx
780
- result: "x"
780
+ result: "x"
@@ -43,6 +43,12 @@ tests:
43
43
  - source: "ᠬᠥᠬᠡᠬᠣᠲᠠ"
44
44
  expected: "kökeqota"
45
45
  map:
46
+ rules:
47
+ - pattern: \u182c(\u1821|\u1825|\u1826)
48
+ result: "k\\1"
49
+ - pattern: \u182d(\u1821|\u1825|\u1826)
50
+ result: "g\\1"
51
+
46
52
  characters:
47
53
  "ᠠ": "a"
48
54
  "ᠪ": "b"
@@ -51,9 +57,9 @@ map:
51
57
  "ᠳ": "d"
52
58
  "ᠡ": "e"
53
59
  "ᠹ": "f"
54
- "ᠭ": "g"
60
+ "ᠭ": "ġ"
55
61
  "ᠺ": "g"
56
- "ᠬ": "h"
62
+ "ᠬ": "q"
57
63
  "ᠾ": "h"
58
64
  "ᠢ": "i"
59
65
  "ᠵ": "j"
@@ -61,7 +67,7 @@ map:
61
67
  "ᠯ": "l"
62
68
  "ᠮ": "m"
63
69
  "ᠨ": "n"
64
- "ᠥ": "o"
70
+ "ᠥ": "ö"
65
71
  "ᠫ": "p"
66
72
  "ᠴ": "q"
67
73
  "ᠷ": "r"
@@ -0,0 +1,163 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 1972
4
+ language: nep
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Nepali Romanization, 1972
8
+ url: https://www.eki.ee/wgrs/v3_0/rom1_ne.pdf
9
+ creation_date: 1972
10
+ confirmation_date: 2010
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11), based on a report
13
+ prepared by D. N. Sharma. The note on the system was published in volume II of the
14
+ conference report.
15
+
16
+ There is no evidence of the use of the system either in Nepal or in international cartographic
17
+ products. The resolution IV/17 (1982) recommended association, inter alia, with Nepal in
18
+ carrying out further studies on the system. In Nepal a system of romanization is employed by
19
+ the Nepal Survey Department (NSD). This system has been applied to names appearing on
20
+ national mapping, and it is also used in the Geographic Information Infrastructure Programme
21
+ (NGIIP).
22
+
23
+ Nepali (Nepālī) uses the alphasyllabic script Devanāgarī.
24
+
25
+ notes:
26
+
27
+ - In normal spelling ṙ and ṙh are not marked, instead dotless ड ḍa, ढ ḍha are used to denote these sounds.
28
+ - |
29
+ Ligatures may be formed with r as the first component: ~ह rha.
30
+
31
+ tests:
32
+ - source: "लेखन"
33
+ expected: "laekhana"
34
+ - source: "मुद्रा"
35
+ expected: "maudaaraā"
36
+ - source: "प्रशंसा"
37
+ expected: "paarashaṁsaā"
38
+ - source: "अंक"
39
+ expected: "aṁka"
40
+ - source: "नेकपाले स्थगित स्थायी कमिटीको बैठक भदौ गते बोलाउने भएको"
41
+ expected: "naekapaālae saathagaita saathaāyaī kamaiṭaīkao baaiṭhaka bhadaau gatae baolaāunae bhaekao"
42
+ - source: "न घर रह्यो, न परिवार"
43
+ expected: "na ghara rahaayao, na paraivaāra"
44
+ - source: "ढोरपाटनमा भुजीखोला बाढीपहिरोले अभिभावक गुमाएका बालबालिकाको बिचल्ली"
45
+ expected: "ḍhaorapaāṭanamaā bhaujaīkhaolaā baāḍhaīpahairaolae abhaibhaāvaka gaumaāekaā baālabaālaikaākao baichalaalaī"
46
+ - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
47
+ expected: "sausaamaitaākaā kaākaā haemabahaādaura ra kaākaīlaāī panai pahairaolae bagaāyao"
48
+ - source: "संविधान जारी भएसँगै सार्वजनिक प्रशासनमा नयाँ उत्साह आउने अपेक्षा थियो"
49
+ expected: "saṁvaidhaāna jaāraī bhaesam̐gaai saāraavajanaika paarashaāsanamaā nayaām̐ utaasaāha āunae apaekaaṣhaā thaiyao"
50
+ - source: "देशमा कोरोना संक्रमित र मृतकको संख्या हरेक दिन बढ्दो छ"
51
+ expected: "daeshamaā kaoraonaā saṁkaaramaita ra maṛtakakao saṁkhaayaā haraeka daina baḍhaadao chha"
52
+ - source: "गाउँपालिकाका अध्यक्ष टिका गुरुङका अनुसार विष्णुदासलाई राजुले सुत्नका लागि बेलुका साथी लगेका थिए"
53
+ expected: "gaāum̐paālaikaākaā adhaayakaaṣha ṭaikaā gaurauṅakaā anausaāra vaiṣhaaṇaudaāsalaāī raājaulae sautaanakaā laāgai baelaukaā saāthaī lagaekaā thaie"
54
+ - source: "यो आयोजना गाउँपालिकाको केन्द्र तेल्लोकमा पर्छ"
55
+ expected: "yao āyaojanaā gaāum̐paālaikaākao kaenaadaara taelaalaokamaā paraachha"
56
+ - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
57
+ expected: "sausaamaitaākaā kaākaā haemabahaādaura ra kaākaīlaāī panai pahairaolae bagaāyao"
58
+ - source: "चैत पहिलो साता घर आएका उनी लकडाउन भएपछि यतै रोकिए"
59
+ expected: "chaaita pahailao saātaā ghara āekaā unaī lakaḍaāuna bhaepachhai yataai raokaie"
60
+ - source: "काम गर्न जानेको हकमा रोजगारदाता कम्पनीको पत्रसँगै वडा र जिल्ला प्रशासनको सिफारिस अनिवार्य गरिएको छ"
61
+ expected: "kaāma garaana jaānaekao hakamaā raojagaāradaātaā kamaapanaīkao pataarasam̐gaai vaḍaā ra jailaalaā paarashaāsanakao saiphaāraisa anaivaāraaya garaiekao chha"
62
+
63
+ map:
64
+
65
+ characters:
66
+
67
+ # Vowels and Diphthongs
68
+ 'अ': 'a'
69
+ 'आ': 'ā'
70
+ 'इ': 'i'
71
+ 'ई': 'ī'
72
+ 'उ': 'u'
73
+ 'ऊ': 'ū'
74
+ 'ऋ': 'ṛ'
75
+ 'ॠ': 'ṝ'
76
+ 'ऌ': 'l̤'
77
+ 'ए': 'e'
78
+ 'ऐ': 'ai'
79
+ 'ओ': 'o'
80
+ 'औ': 'au'
81
+
82
+ # Medials # Needed for connecting constants
83
+
84
+ 'ा': "ā"
85
+ 'ि': "i"
86
+ 'ी': "ī"
87
+ 'ु': "u"
88
+ 'ू': "ū"
89
+ 'ृ': "ṛ"
90
+ 'े': "e"
91
+ 'ै': "ai"
92
+ 'ो': "o"
93
+ 'ौ': "au"
94
+
95
+ # Consonants (see Note 1)
96
+
97
+ # Gutturals
98
+ 'क': 'ka'
99
+ 'ख': 'kha'
100
+ 'ग': 'ga'
101
+ 'घ': 'gha'
102
+ 'ङ': 'ṅa'
103
+
104
+ # Palatals
105
+ 'च': 'cha'
106
+ 'छ': 'chha'
107
+ 'ज': 'ja'
108
+ 'झ': 'jha'
109
+ 'ञ': 'ña'
110
+
111
+ # Cerebrals
112
+ 'ट': 'ṭa'
113
+ 'ठ': 'ṭha'
114
+ 'ड': 'ḍa'
115
+ 'ढ': 'ḍha'
116
+ 'ण': 'ṇa'
117
+
118
+ # Dentals
119
+ 'त': 'ta'
120
+ 'थ': 'tha'
121
+ 'द': 'da'
122
+ 'ध': 'dha'
123
+ 'न': 'na'
124
+
125
+ # Labials
126
+ 'प': 'pa'
127
+ 'फ': 'pha'
128
+ 'ब': 'ba'
129
+ 'भ': 'bha'
130
+ 'म': 'ma'
131
+
132
+ # Semivowels
133
+ 'य': 'ya'
134
+ 'र': 'ra'
135
+ 'ल': 'la'
136
+ 'व': 'va' # or wa [Note#3]
137
+
138
+ # Sibilants
139
+ 'श': 'sha'
140
+ 'ष': 'ṣha'
141
+ 'स': 'sa'
142
+
143
+ # Dotted variants
144
+ 'क़': qa
145
+ 'ख़': ḳha
146
+ 'ग़': ga
147
+ 'ज़': za
148
+ 'ड़': ṙa
149
+ 'ढ़': ṙha
150
+ 'फ़': fa
151
+
152
+
153
+ # Aspirate
154
+ 'ह': 'ha'
155
+
156
+ # Anusvāra
157
+ 'ं': 'ṁ'
158
+
159
+ # Anunāsika
160
+ 'ँ': 'm̐'
161
+
162
+ # halanta
163
+ '्': 'a'