interscript-maps 2.1.0a9 → 2.1.0b1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/interscript-maps.gemspec +1 -1
- data/maps/hk-yue-Hani-Latn-1888.imp +11 -11
- data/maps/iso-ara-Arab-Latn-233-1984.imp +1 -5
- data/maps/mvd-bel-Cyrl-Latn-2008.imp +8 -8
- data/maps/mvd-bel-Cyrl-Latn-2010.imp +3 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b43dc37aa94446eb83bfececb00291e1fe7ebf17c11e7f0a11969196d33f179
|
4
|
+
data.tar.gz: c919d5084e1ac1e97985e0357bacdf282611d8717ae72175e18ed7f2e74d3582
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4df63fb4c1c30c63cf11c5ea61b727001d14840f856073de1c65eaaa59153e5ba9c3918ec9ba234a3dcc4e945331b3247d8667545b0fe75c9854b75472b9f62
|
7
|
+
data.tar.gz: db7750b60fbab7dcef42d4e05fd3320a41f06c2c2ed8639aae6ddb52be547e73d0e8c7e6e59b2f7134c3e2274c695226bea148bff71989db47fd31a7c1f32da2
|
data/interscript-maps.gemspec
CHANGED
@@ -7,15 +7,16 @@ metadata {
|
|
7
7
|
name: Hong Kong Government Cantonese Romanisation
|
8
8
|
url: http://caes.hku.hk/hkjalonline/issues/download_the_file.php?f=2008_v11_1_kataoka__n__lee.pdf
|
9
9
|
creation_date: 2020-01
|
10
|
-
description:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
description: |
|
11
|
+
This system is commonly used for the transliteration of place names or
|
12
|
+
person's names in Hong Kong, as pronounced in Cantonese. There will be more
|
13
|
+
than one legitimate transliteration for the same syllable, or sometimes even
|
14
|
+
for the same character. For example, the character 仔 can be transcribed as
|
15
|
+
Chai or Tsai in this system. Some of the choice is context-dependent (e.g.
|
16
|
+
the same character in the place name 灣仔 is almost always Chai, but more
|
17
|
+
likely to be Tsai elsewhere). There will be more variations and
|
18
|
+
unpredictable conversions in person's names, and these conventions need to
|
19
|
+
be hard-coded.
|
19
20
|
|
20
21
|
notes:
|
21
22
|
- Tone is not represented in this system.
|
@@ -27,10 +28,9 @@ metadata {
|
|
27
28
|
distinctions were represented by ’ or diacritics (e.g. á vs. a), but were
|
28
29
|
removed in printed / typeset documents.
|
29
30
|
|
30
|
-
|
31
|
-
#A longer test list will be generated from GeoNames database
|
32
31
|
}
|
33
32
|
|
33
|
+
# TODO: A longer test list will be generated from GeoNames database
|
34
34
|
tests {
|
35
35
|
test "煎魚灣", "Tsin Yue Wan"
|
36
36
|
test "分流廟灣", "Fan Lau Miu Wan"
|
@@ -38,7 +38,7 @@ metadata {
|
|
38
38
|
غ is ġ instead of gh
|
39
39
|
ة is ẗ instead of h/t
|
40
40
|
ى is ỳ
|
41
|
-
ـِي is iy instead of
|
41
|
+
ـِي is iy instead of ī
|
42
42
|
ـُو is uw instead of ū
|
43
43
|
ـَا is a’ instead of ā
|
44
44
|
ـَى is aỳ instead of á
|
@@ -55,7 +55,6 @@ tests {
|
|
55
55
|
|
56
56
|
stage {
|
57
57
|
|
58
|
-
|
59
58
|
# CHARACTERS
|
60
59
|
parallel {
|
61
60
|
|
@@ -105,7 +104,6 @@ stage {
|
|
105
104
|
|
106
105
|
# Shadda
|
107
106
|
|
108
|
-
|
109
107
|
sub "\u0628\u0651", "bb" # ب
|
110
108
|
sub "\u062a\u0651", "tt" # ت
|
111
109
|
sub "\u062b\u0651", "ṯṯ" # ث
|
@@ -133,7 +131,6 @@ stage {
|
|
133
131
|
sub "\u0648\u0651", "ww" # و
|
134
132
|
sub "\u064a\u0651", "yy" # ي
|
135
133
|
|
136
|
-
|
137
134
|
sub "\u0622", "’â" # آ
|
138
135
|
|
139
136
|
sub "\u0627", "â" # ا
|
@@ -142,7 +139,6 @@ stage {
|
|
142
139
|
|
143
140
|
sub "\u0626", "'" # ئ
|
144
141
|
|
145
|
-
|
146
142
|
sub "\u0621", maybe("’") # ء# see note A
|
147
143
|
|
148
144
|
sub "\u0623", "a" # أ
|
@@ -86,14 +86,14 @@ tests {
|
|
86
86
|
stage {
|
87
87
|
|
88
88
|
# RULES
|
89
|
-
sub
|
90
|
-
sub
|
91
|
-
sub
|
92
|
-
sub
|
93
|
-
sub
|
94
|
-
sub
|
95
|
-
sub
|
96
|
-
sub
|
89
|
+
sub "\u2019\u0415", "Je" # Е
|
90
|
+
sub "\u2019\u0435", "je" # е
|
91
|
+
sub "\u2019\u0401", "Jo" # Ë
|
92
|
+
sub "\u2019\u0451", "jo" # ё
|
93
|
+
sub "\u2019\u042E", "Ju" # Ю
|
94
|
+
sub "\u2019\u044E", "ju" # ю
|
95
|
+
sub "\u2019\u042F", "Ja" # Я
|
96
|
+
sub "\u2019\u044F", "ja" # я
|
97
97
|
|
98
98
|
sub "\u0415", "Je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # Е after vowels
|
99
99
|
sub "\u0435", "je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # е after vowels
|
@@ -43,8 +43,9 @@ stage {
|
|
43
43
|
|
44
44
|
# RULES
|
45
45
|
# note[5]
|
46
|
-
|
47
|
-
sub "\
|
46
|
+
# Those two are nonsense and harmful for reversibility
|
47
|
+
# sub "\u044C", ref( 1 ), before: any("ЗзЛлНнСсЦц") # ь after consonants
|
48
|
+
# sub "\u02B9", ref( 1 ), before: any("ЗзЛлНнСсЦц") # Ь after consonants
|
48
49
|
# Й at end
|
49
50
|
sub "\u0419" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # Я after vowels
|
50
51
|
sub "\u0439" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # я after vowels
|