mongolian 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/mongolian/latin.rb +94 -94
- data/lib/mongolian/spellchecker.rb +6 -5
- data/lib/mongolian/stemmify.rb +1 -1
- data/lib/mongolian/tokenizer.rb +2 -2
- data/lib/mongolian/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e382f30019fbe63999fb05f4bf37f5f7e8a49d5de142947b88b68c8fd733d7a
|
4
|
+
data.tar.gz: 6837360f2162d1ea58f9bd2ab719767a47a705bee1e17c9867d6905863e7ceb2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4471e74185ab6c9c5c67c0dd3f2aa6080345a8fbffe5474459977561f285f11236bfc1b639ece74e0e8d68e611671459128f4cc64286717c3add23bb7e6149c3
|
7
|
+
data.tar.gz: 1927874996597a16f4f664ee09a0473b71498b21ef2a61e9631bb6c65261e2ee302ce4bcf4b9330efeb96ea03d836024d61196db2a1a60b7f273eb33fa381dea
|
data/lib/mongolian/latin.rb
CHANGED
@@ -6,102 +6,102 @@ module Mongolian
|
|
6
6
|
# 3. 根据 Unicode 定义,转写不分阴阳形式;
|
7
7
|
|
8
8
|
# 转换传统蒙古文为拉丁转写
|
9
|
-
def
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
return
|
9
|
+
def mon_m2l
|
10
|
+
str = self.dup.to_str
|
11
|
+
str = str.gsub(/ᠠ/, "a")
|
12
|
+
str = str.gsub(/ᠡ/, "e")
|
13
|
+
str = str.gsub(/ᠢ/, "i")
|
14
|
+
str = str.gsub(/ᠣ/, "o")
|
15
|
+
str = str.gsub(/ᠤ/, "u")
|
16
|
+
str = str.gsub(/ᠥ/, "ö")
|
17
|
+
str = str.gsub(/ᠦ/, "ü")
|
18
|
+
str = str.gsub(/ᠧ/, "ë")
|
19
|
+
str = str.gsub(/ᠨ/, "n")
|
20
|
+
str = str.gsub(/ᠩ/, "ng")
|
21
|
+
str = str.gsub(/ᠪ/, "b")
|
22
|
+
str = str.gsub(/ᠫ/, "p")
|
23
|
+
str = str.gsub(/ᠬ/, "x")
|
24
|
+
str = str.gsub(/ᠭ/, "g")
|
25
|
+
str = str.gsub(/ᠮ/, "m")
|
26
|
+
str = str.gsub(/ᠯ/, "l")
|
27
|
+
str = str.gsub(/ᠰ/, "s")
|
28
|
+
str = str.gsub(/ᠱ/, "š")
|
29
|
+
str = str.gsub(/ᠲ/, "t")
|
30
|
+
str = str.gsub(/ᠳ/, "d")
|
31
|
+
str = str.gsub(/ᠴ/, "č")
|
32
|
+
str = str.gsub(/ᠵ/, "ǰ")
|
33
|
+
str = str.gsub(/ᠶ/, "y")
|
34
|
+
str = str.gsub(/ᠷ/, "r")
|
35
|
+
str = str.gsub(/ᠸ/, "w")
|
36
|
+
str = str.gsub(/ᠹ/, "f")
|
37
|
+
str = str.gsub(/ᠺ/, "k")
|
38
|
+
str = str.gsub(/ᠻ/, "ḳ")
|
39
|
+
str = str.gsub(/ᠼ/, "c")
|
40
|
+
str = str.gsub(/ᠽ/, "z")
|
41
|
+
str = str.gsub(/ᠾ/, "h")
|
42
|
+
str = str.gsub(/ᠿ/, "ž")
|
43
|
+
str = str.gsub(/ᡀ/, "lh")
|
44
|
+
str = str.gsub(/ᡁ/, "ẑ")
|
45
|
+
str = str.gsub(/ᡂ/, "ĉ")
|
46
|
+
str = str.gsub(/᠀/, "&")
|
47
|
+
str = str.gsub(/᠁/, "…")
|
48
|
+
str = str.gsub(/᠂/, ",")
|
49
|
+
str = str.gsub(/᠃/, ".")
|
50
|
+
str = str.gsub(/᠇/, ":")
|
51
|
+
str = str.gsub(/᠈/, "#")
|
52
|
+
str = str.gsub(/᠊/, "‐")
|
53
|
+
str = str.gsub(/᠋/, "") #fvs1
|
54
|
+
str = str.gsub(/᠌/, "") #fvs2
|
55
|
+
str = str.gsub(/᠍/, "") #fvs3
|
56
|
+
str = str.gsub(//, "_")
|
57
|
+
str = str.gsub(/᠐/, "'0")
|
58
|
+
str = str.gsub(/᠑/, "'1")
|
59
|
+
str = str.gsub(/᠒/, "'2")
|
60
|
+
str = str.gsub(/᠓/, "'3")
|
61
|
+
str = str.gsub(/᠔/, "'4")
|
62
|
+
str = str.gsub(/᠕/, "'5")
|
63
|
+
str = str.gsub(/᠖/, "'6")
|
64
|
+
str = str.gsub(/᠗/, "'7")
|
65
|
+
str = str.gsub(/᠘/, "'8")
|
66
|
+
str = str.gsub(/᠙/, "'9")
|
67
|
+
str = str.gsub(/ᡛ/, "ń")
|
68
|
+
str = str.gsub(/ᢀ/, "ṃ")
|
69
|
+
str = str.gsub(/ᢁ/, "ḥ")
|
70
|
+
str = str.gsub(/ᢂ/, "â")
|
71
|
+
str = str.gsub(/ᢃ/, "ŏ")
|
72
|
+
str = str.gsub(/ᢄ/, "ô")
|
73
|
+
str = str.gsub(/ᢅ/, "ˑ")
|
74
|
+
str = str.gsub(/ᢆ/, "ːˑ")
|
75
|
+
str = str.gsub(/ᢇ/, "ā")
|
76
|
+
str = str.gsub(/ᢈ/, "ī")
|
77
|
+
str = str.gsub(/ᢉ/, "ḵ")
|
78
|
+
str = str.gsub(/ᢊ/, "ṉ")
|
79
|
+
str = str.gsub(/ᢋ/, "ƈ")
|
80
|
+
str = str.gsub(/ᢌ/, "ť")
|
81
|
+
str = str.gsub(/ᢍ/, "ţ")
|
82
|
+
str = str.gsub(/ᢎ/, "ḏ")
|
83
|
+
str = str.gsub(/ᢏ/, "ņ")
|
84
|
+
str = str.gsub(/ᢐ/, "ṯ")
|
85
|
+
str = str.gsub(/ᢑ/, "ḓ")
|
86
|
+
str = str.gsub(/ᢒ/, "p̄")
|
87
|
+
str = str.gsub(/ᢓ/, "ṕh")
|
88
|
+
str = str.gsub(/ᢔ/, "ş")
|
89
|
+
str = str.gsub(/ᢕ/, "ẖ")
|
90
|
+
str = str.gsub(/ᢖ/, "ẕ")
|
91
|
+
str = str.gsub(/ᢗ/, "ʒ̄")
|
92
|
+
str = str.gsub(/ᢦ/, "ŭ")
|
93
|
+
str = str.gsub(/ᢧ/, "ÿ")
|
94
|
+
str = str.gsub(/ᢩ/, "̲")
|
95
|
+
str = str.gsub(//, "^")
|
96
|
+
str = str.gsub(//, "*")
|
97
|
+
str = str.gsub(/ /, "-")
|
98
|
+
return str
|
99
99
|
end
|
100
100
|
|
101
101
|
# 对拉丁转写蒙古文恢复为蒙古文
|
102
|
-
def
|
103
|
-
|
104
|
-
|
105
|
-
return
|
102
|
+
def mon_l2m
|
103
|
+
str = self.dup.to_str
|
104
|
+
# 将拉丁文转换文对应的蒙古文
|
105
|
+
return str
|
106
106
|
end
|
107
107
|
end
|
@@ -7,7 +7,7 @@ module Mongolian
|
|
7
7
|
# 返回值 10:阳性词,但有错误,第一音节出现第五元音,却在后面出现了第四元音
|
8
8
|
# 返回值 2:正确,阴性词
|
9
9
|
# 返回值 20:阴性词,但有错误,第一音节出现第七元音,却在后面出现了第六元音
|
10
|
-
def
|
10
|
+
def mon_vowel_harmony
|
11
11
|
mongolian_str = self.dup.to_str
|
12
12
|
#vowel = ["ᠠ", "ᠡ", "ᠢ", "ᠣ", "ᠤ", "ᠥ", "ᠦ"]
|
13
13
|
if mongolian_str =~ /[ᠠᠣᠤ]/ and mongolian_str =~ /[ᠡᠥᠦ]/
|
@@ -15,14 +15,14 @@ module Mongolian
|
|
15
15
|
else
|
16
16
|
if mongolian_str =~ /[ᠠᠣᠤ]/
|
17
17
|
#如果第一音节阳性元音为第 5 元音,却在后面出现第 4 元音,则错误
|
18
|
-
if mongolian_str.
|
18
|
+
if mongolian_str.mon_syllable_classify[0] =~ /ᠤ/ and mongolian_str[mongolian_str.mon_syllable_classify[0].size..-1] =~ /ᠣ/
|
19
19
|
return 10
|
20
20
|
else
|
21
21
|
return 1
|
22
22
|
end
|
23
23
|
else
|
24
24
|
#如果第一音节阳性元音为第 7 元音,却在后面出现第 6 元音,则错误
|
25
|
-
if mongolian_str.
|
25
|
+
if mongolian_str.mon_syllable_classify[0] =~ /ᠦ/ and mongolian_str[mongolian_str.mon_syllable_classify[0].size..-1] =~ /ᠥ/
|
26
26
|
return 20
|
27
27
|
else
|
28
28
|
return 2
|
@@ -31,8 +31,9 @@ module Mongolian
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
#
|
35
|
-
|
34
|
+
# 对单词划分音节:每个元音前最多一个辅音前面既可划分音节
|
35
|
+
# 返回值是音节数组
|
36
|
+
def mon_syllable_classify
|
36
37
|
mongolian_str = self.dup.to_str
|
37
38
|
mlist = []
|
38
39
|
s = ""
|
data/lib/mongolian/stemmify.rb
CHANGED
data/lib/mongolian/tokenizer.rb
CHANGED
data/lib/mongolian/version.rb
CHANGED