interscript-maps 2.1.0a9 → 2.1.0b1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/interscript-maps.gemspec +1 -1
- data/maps/hk-yue-Hani-Latn-1888.imp +11 -11
- data/maps/iso-ara-Arab-Latn-233-1984.imp +1 -5
- data/maps/mvd-bel-Cyrl-Latn-2008.imp +8 -8
- data/maps/mvd-bel-Cyrl-Latn-2010.imp +3 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8b43dc37aa94446eb83bfececb00291e1fe7ebf17c11e7f0a11969196d33f179
|
|
4
|
+
data.tar.gz: c919d5084e1ac1e97985e0357bacdf282611d8717ae72175e18ed7f2e74d3582
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c4df63fb4c1c30c63cf11c5ea61b727001d14840f856073de1c65eaaa59153e5ba9c3918ec9ba234a3dcc4e945331b3247d8667545b0fe75c9854b75472b9f62
|
|
7
|
+
data.tar.gz: db7750b60fbab7dcef42d4e05fd3320a41f06c2c2ed8639aae6ddb52be547e73d0e8c7e6e59b2f7134c3e2274c695226bea148bff71989db47fd31a7c1f32da2
|
data/interscript-maps.gemspec
CHANGED
|
@@ -7,15 +7,16 @@ metadata {
|
|
|
7
7
|
name: Hong Kong Government Cantonese Romanisation
|
|
8
8
|
url: http://caes.hku.hk/hkjalonline/issues/download_the_file.php?f=2008_v11_1_kataoka__n__lee.pdf
|
|
9
9
|
creation_date: 2020-01
|
|
10
|
-
description:
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
10
|
+
description: |
|
|
11
|
+
This system is commonly used for the transliteration of place names or
|
|
12
|
+
person's names in Hong Kong, as pronounced in Cantonese. There will be more
|
|
13
|
+
than one legitimate transliteration for the same syllable, or sometimes even
|
|
14
|
+
for the same character. For example, the character 仔 can be transcribed as
|
|
15
|
+
Chai or Tsai in this system. Some of the choice is context-dependent (e.g.
|
|
16
|
+
the same character in the place name 灣仔 is almost always Chai, but more
|
|
17
|
+
likely to be Tsai elsewhere). There will be more variations and
|
|
18
|
+
unpredictable conversions in person's names, and these conventions need to
|
|
19
|
+
be hard-coded.
|
|
19
20
|
|
|
20
21
|
notes:
|
|
21
22
|
- Tone is not represented in this system.
|
|
@@ -27,10 +28,9 @@ metadata {
|
|
|
27
28
|
distinctions were represented by ’ or diacritics (e.g. á vs. a), but were
|
|
28
29
|
removed in printed / typeset documents.
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
#A longer test list will be generated from GeoNames database
|
|
32
31
|
}
|
|
33
32
|
|
|
33
|
+
# TODO: A longer test list will be generated from GeoNames database
|
|
34
34
|
tests {
|
|
35
35
|
test "煎魚灣", "Tsin Yue Wan"
|
|
36
36
|
test "分流廟灣", "Fan Lau Miu Wan"
|
|
@@ -38,7 +38,7 @@ metadata {
|
|
|
38
38
|
غ is ġ instead of gh
|
|
39
39
|
ة is ẗ instead of h/t
|
|
40
40
|
ى is ỳ
|
|
41
|
-
ـِي is iy instead of
|
|
41
|
+
ـِي is iy instead of ī
|
|
42
42
|
ـُو is uw instead of ū
|
|
43
43
|
ـَا is a’ instead of ā
|
|
44
44
|
ـَى is aỳ instead of á
|
|
@@ -55,7 +55,6 @@ tests {
|
|
|
55
55
|
|
|
56
56
|
stage {
|
|
57
57
|
|
|
58
|
-
|
|
59
58
|
# CHARACTERS
|
|
60
59
|
parallel {
|
|
61
60
|
|
|
@@ -105,7 +104,6 @@ stage {
|
|
|
105
104
|
|
|
106
105
|
# Shadda
|
|
107
106
|
|
|
108
|
-
|
|
109
107
|
sub "\u0628\u0651", "bb" # ب
|
|
110
108
|
sub "\u062a\u0651", "tt" # ت
|
|
111
109
|
sub "\u062b\u0651", "ṯṯ" # ث
|
|
@@ -133,7 +131,6 @@ stage {
|
|
|
133
131
|
sub "\u0648\u0651", "ww" # و
|
|
134
132
|
sub "\u064a\u0651", "yy" # ي
|
|
135
133
|
|
|
136
|
-
|
|
137
134
|
sub "\u0622", "’â" # آ
|
|
138
135
|
|
|
139
136
|
sub "\u0627", "â" # ا
|
|
@@ -142,7 +139,6 @@ stage {
|
|
|
142
139
|
|
|
143
140
|
sub "\u0626", "'" # ئ
|
|
144
141
|
|
|
145
|
-
|
|
146
142
|
sub "\u0621", maybe("’") # ء# see note A
|
|
147
143
|
|
|
148
144
|
sub "\u0623", "a" # أ
|
|
@@ -86,14 +86,14 @@ tests {
|
|
|
86
86
|
stage {
|
|
87
87
|
|
|
88
88
|
# RULES
|
|
89
|
-
sub
|
|
90
|
-
sub
|
|
91
|
-
sub
|
|
92
|
-
sub
|
|
93
|
-
sub
|
|
94
|
-
sub
|
|
95
|
-
sub
|
|
96
|
-
sub
|
|
89
|
+
sub "\u2019\u0415", "Je" # Е
|
|
90
|
+
sub "\u2019\u0435", "je" # е
|
|
91
|
+
sub "\u2019\u0401", "Jo" # Ë
|
|
92
|
+
sub "\u2019\u0451", "jo" # ё
|
|
93
|
+
sub "\u2019\u042E", "Ju" # Ю
|
|
94
|
+
sub "\u2019\u044E", "ju" # ю
|
|
95
|
+
sub "\u2019\u042F", "Ja" # Я
|
|
96
|
+
sub "\u2019\u044F", "ja" # я
|
|
97
97
|
|
|
98
98
|
sub "\u0415", "Je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # Е after vowels
|
|
99
99
|
sub "\u0435", "je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # е after vowels
|
|
@@ -43,8 +43,9 @@ stage {
|
|
|
43
43
|
|
|
44
44
|
# RULES
|
|
45
45
|
# note[5]
|
|
46
|
-
|
|
47
|
-
sub "\
|
|
46
|
+
# Those two are nonsense and harmful for reversibility
|
|
47
|
+
# sub "\u044C", ref( 1 ), before: any("ЗзЛлНнСсЦц") # ь after consonants
|
|
48
|
+
# sub "\u02B9", ref( 1 ), before: any("ЗзЛлНнСсЦц") # Ь after consonants
|
|
48
49
|
# Й at end
|
|
49
50
|
sub "\u0419" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # Я after vowels
|
|
50
51
|
sub "\u0439" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # я after vowels
|