mongolian 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b5ae047f3b8d5d37be63b5646bae637537a46f1a8402d9cb6bed1d95ea9f3135
4
- data.tar.gz: 7e00183b681aa0a29119a5d3eadcb687546761bd3b9c78f8dc670f1dc869e7de
3
+ metadata.gz: 2e382f30019fbe63999fb05f4bf37f5f7e8a49d5de142947b88b68c8fd733d7a
4
+ data.tar.gz: 6837360f2162d1ea58f9bd2ab719767a47a705bee1e17c9867d6905863e7ceb2
5
5
  SHA512:
6
- metadata.gz: b075ed85bfd720da5169e92b2e5c5ce6ce0a279ee7fb5fa8b137137bc6bcb6debd257efdac73751f63e1ccbb28bff533f2e764d71c427ae36c099173c595ca4e
7
- data.tar.gz: 9ed88cc1be818a65e58a134dafca486faa1eeeed3325c460a86c3aac60990ebcd18848802164afd27e3ac79b2c611e73e14470ea2547f38521ffca60634f6042
6
+ metadata.gz: 4471e74185ab6c9c5c67c0dd3f2aa6080345a8fbffe5474459977561f285f11236bfc1b639ece74e0e8d68e611671459128f4cc64286717c3add23bb7e6149c3
7
+ data.tar.gz: 1927874996597a16f4f664ee09a0473b71498b21ef2a61e9631bb6c65261e2ee302ce4bcf4b9330efeb96ea03d836024d61196db2a1a60b7f273eb33fa381dea
@@ -6,102 +6,102 @@ module Mongolian
6
6
  # 3. 根据 Unicode 定义,转写不分阴阳形式;
7
7
 
8
8
  # 转换传统蒙古文为拉丁转写
9
- def m2l
10
- mongolian_str = self.dup.to_str
11
- mongolian_str = mongolian_str.gsub(/ᠠ/, "a")
12
- mongolian_str = mongolian_str.gsub(/ᠡ/, "e")
13
- mongolian_str = mongolian_str.gsub(/ᠢ/, "i")
14
- mongolian_str = mongolian_str.gsub(/ᠣ/, "o")
15
- mongolian_str = mongolian_str.gsub(/ᠤ/, "u")
16
- mongolian_str = mongolian_str.gsub(/ᠥ/, "ö")
17
- mongolian_str = mongolian_str.gsub(/ᠦ/, "ü")
18
- mongolian_str = mongolian_str.gsub(/ᠧ/, "ë")
19
- mongolian_str = mongolian_str.gsub(/ᠨ/, "n")
20
- mongolian_str = mongolian_str.gsub(/ᠩ/, "ŋ")
21
- mongolian_str = mongolian_str.gsub(/ᠪ/, "b")
22
- mongolian_str = mongolian_str.gsub(/ᠫ/, "p")
23
- mongolian_str = mongolian_str.gsub(/ᠬ/, "x")
24
- mongolian_str = mongolian_str.gsub(/ᠭ/, "g")
25
- mongolian_str = mongolian_str.gsub(/ᠮ/, "m")
26
- mongolian_str = mongolian_str.gsub(/ᠯ/, "l")
27
- mongolian_str = mongolian_str.gsub(/ᠰ/, "s")
28
- mongolian_str = mongolian_str.gsub(/ᠱ/, "š")
29
- mongolian_str = mongolian_str.gsub(/ᠲ/, "t")
30
- mongolian_str = mongolian_str.gsub(/ᠳ/, "d")
31
- mongolian_str = mongolian_str.gsub(/ᠴ/, "č")
32
- mongolian_str = mongolian_str.gsub(/ᠵ/, "ǰ")
33
- mongolian_str = mongolian_str.gsub(/ᠶ/, "y")
34
- mongolian_str = mongolian_str.gsub(/ᠷ/, "r")
35
- mongolian_str = mongolian_str.gsub(/ᠸ/, "w")
36
- mongolian_str = mongolian_str.gsub(/ᠹ/, "f")
37
- mongolian_str = mongolian_str.gsub(/ᠺ/, "k")
38
- mongolian_str = mongolian_str.gsub(/ᠻ/, "ḳ")
39
- mongolian_str = mongolian_str.gsub(/ᠼ/, "c")
40
- mongolian_str = mongolian_str.gsub(/ᠽ/, "z")
41
- mongolian_str = mongolian_str.gsub(/ᠾ/, "h")
42
- mongolian_str = mongolian_str.gsub(/ᠿ/, "ž")
43
- mongolian_str = mongolian_str.gsub(/ᡀ/, "lh")
44
- mongolian_str = mongolian_str.gsub(/ᡁ/, "ẑ")
45
- mongolian_str = mongolian_str.gsub(/ᡂ/, "ĉ")
46
- mongolian_str = mongolian_str.gsub(/᠀/, "&")
47
- mongolian_str = mongolian_str.gsub(/᠁/, "…")
48
- mongolian_str = mongolian_str.gsub(/᠂/, ",")
49
- mongolian_str = mongolian_str.gsub(/᠃/, ".")
50
- mongolian_str = mongolian_str.gsub(/᠇/, ":")
51
- mongolian_str = mongolian_str.gsub(/᠈/, "#")
52
- mongolian_str = mongolian_str.gsub(/᠊/, "‐")
53
- mongolian_str = mongolian_str.gsub(/᠋/, "") #fvs1
54
- mongolian_str = mongolian_str.gsub(/᠌/, "") #fvs2
55
- mongolian_str = mongolian_str.gsub(/᠍/, "") #fvs3
56
- mongolian_str = mongolian_str.gsub(/᠎/, "_")
57
- mongolian_str = mongolian_str.gsub(/᠐/, "'0")
58
- mongolian_str = mongolian_str.gsub(/᠑/, "'1")
59
- mongolian_str = mongolian_str.gsub(/᠒/, "'2")
60
- mongolian_str = mongolian_str.gsub(/᠓/, "'3")
61
- mongolian_str = mongolian_str.gsub(/᠔/, "'4")
62
- mongolian_str = mongolian_str.gsub(/᠕/, "'5")
63
- mongolian_str = mongolian_str.gsub(/᠖/, "'6")
64
- mongolian_str = mongolian_str.gsub(/᠗/, "'7")
65
- mongolian_str = mongolian_str.gsub(/᠘/, "'8")
66
- mongolian_str = mongolian_str.gsub(/᠙/, "'9")
67
- mongolian_str = mongolian_str.gsub(/ᡛ/, "ń")
68
- mongolian_str = mongolian_str.gsub(/ᢀ/, "ṃ")
69
- mongolian_str = mongolian_str.gsub(/ᢁ/, "ḥ")
70
- mongolian_str = mongolian_str.gsub(/ᢂ/, "â")
71
- mongolian_str = mongolian_str.gsub(/ᢃ/, "ŏ")
72
- mongolian_str = mongolian_str.gsub(/ᢄ/, "ô")
73
- mongolian_str = mongolian_str.gsub(/ᢅ/, "ˑ")
74
- mongolian_str = mongolian_str.gsub(/ᢆ/, "ːˑ")
75
- mongolian_str = mongolian_str.gsub(/ᢇ/, "ā")
76
- mongolian_str = mongolian_str.gsub(/ᢈ/, "ī")
77
- mongolian_str = mongolian_str.gsub(/ᢉ/, "ḵ")
78
- mongolian_str = mongolian_str.gsub(/ᢊ/, "ṉ")
79
- mongolian_str = mongolian_str.gsub(/ᢋ/, "ƈ")
80
- mongolian_str = mongolian_str.gsub(/ᢌ/, "ť")
81
- mongolian_str = mongolian_str.gsub(/ᢍ/, "ţ")
82
- mongolian_str = mongolian_str.gsub(/ᢎ/, "ḏ")
83
- mongolian_str = mongolian_str.gsub(/ᢏ/, "ņ")
84
- mongolian_str = mongolian_str.gsub(/ᢐ/, "ṯ")
85
- mongolian_str = mongolian_str.gsub(/ᢑ/, "ḓ")
86
- mongolian_str = mongolian_str.gsub(/ᢒ/, "p̄")
87
- mongolian_str = mongolian_str.gsub(/ᢓ/, "ṕh")
88
- mongolian_str = mongolian_str.gsub(/ᢔ/, "ş")
89
- mongolian_str = mongolian_str.gsub(/ᢕ/, "ẖ")
90
- mongolian_str = mongolian_str.gsub(/ᢖ/, "ẕ")
91
- mongolian_str = mongolian_str.gsub(/ᢗ/, "ʒ̄")
92
- mongolian_str = mongolian_str.gsub(/ᢦ/, "ŭ")
93
- mongolian_str = mongolian_str.gsub(/ᢧ/, "ÿ")
94
- mongolian_str = mongolian_str.gsub(/ᢩ/, "̲")
95
- mongolian_str = mongolian_str.gsub(/‌/, "^")
96
- mongolian_str = mongolian_str.gsub(/‍/, "*")
97
- mongolian_str = mongolian_str.gsub(/ /, "-")
98
- return mongolian_str
9
+ def mon_m2l
10
+ str = self.dup.to_str
11
+ str = str.gsub(/ᠠ/, "a")
12
+ str = str.gsub(/ᠡ/, "e")
13
+ str = str.gsub(/ᠢ/, "i")
14
+ str = str.gsub(/ᠣ/, "o")
15
+ str = str.gsub(/ᠤ/, "u")
16
+ str = str.gsub(/ᠥ/, "ö")
17
+ str = str.gsub(/ᠦ/, "ü")
18
+ str = str.gsub(/ᠧ/, "ë")
19
+ str = str.gsub(/ᠨ/, "n")
20
+ str = str.gsub(/ᠩ/, "ng")
21
+ str = str.gsub(/ᠪ/, "b")
22
+ str = str.gsub(/ᠫ/, "p")
23
+ str = str.gsub(/ᠬ/, "x")
24
+ str = str.gsub(/ᠭ/, "g")
25
+ str = str.gsub(/ᠮ/, "m")
26
+ str = str.gsub(/ᠯ/, "l")
27
+ str = str.gsub(/ᠰ/, "s")
28
+ str = str.gsub(/ᠱ/, "š")
29
+ str = str.gsub(/ᠲ/, "t")
30
+ str = str.gsub(/ᠳ/, "d")
31
+ str = str.gsub(/ᠴ/, "č")
32
+ str = str.gsub(/ᠵ/, "ǰ")
33
+ str = str.gsub(/ᠶ/, "y")
34
+ str = str.gsub(/ᠷ/, "r")
35
+ str = str.gsub(/ᠸ/, "w")
36
+ str = str.gsub(/ᠹ/, "f")
37
+ str = str.gsub(/ᠺ/, "k")
38
+ str = str.gsub(/ᠻ/, "ḳ")
39
+ str = str.gsub(/ᠼ/, "c")
40
+ str = str.gsub(/ᠽ/, "z")
41
+ str = str.gsub(/ᠾ/, "h")
42
+ str = str.gsub(/ᠿ/, "ž")
43
+ str = str.gsub(/ᡀ/, "lh")
44
+ str = str.gsub(/ᡁ/, "ẑ")
45
+ str = str.gsub(/ᡂ/, "ĉ")
46
+ str = str.gsub(/᠀/, "&")
47
+ str = str.gsub(/᠁/, "…")
48
+ str = str.gsub(/᠂/, ",")
49
+ str = str.gsub(/᠃/, ".")
50
+ str = str.gsub(/᠇/, ":")
51
+ str = str.gsub(/᠈/, "#")
52
+ str = str.gsub(/᠊/, "‐")
53
+ str = str.gsub(/᠋/, "") #fvs1
54
+ str = str.gsub(/᠌/, "") #fvs2
55
+ str = str.gsub(/᠍/, "") #fvs3
56
+ str = str.gsub(/᠎/, "_")
57
+ str = str.gsub(/᠐/, "'0")
58
+ str = str.gsub(/᠑/, "'1")
59
+ str = str.gsub(/᠒/, "'2")
60
+ str = str.gsub(/᠓/, "'3")
61
+ str = str.gsub(/᠔/, "'4")
62
+ str = str.gsub(/᠕/, "'5")
63
+ str = str.gsub(/᠖/, "'6")
64
+ str = str.gsub(/᠗/, "'7")
65
+ str = str.gsub(/᠘/, "'8")
66
+ str = str.gsub(/᠙/, "'9")
67
+ str = str.gsub(/ᡛ/, "ń")
68
+ str = str.gsub(/ᢀ/, "ṃ")
69
+ str = str.gsub(/ᢁ/, "ḥ")
70
+ str = str.gsub(/ᢂ/, "â")
71
+ str = str.gsub(/ᢃ/, "ŏ")
72
+ str = str.gsub(/ᢄ/, "ô")
73
+ str = str.gsub(/ᢅ/, "ˑ")
74
+ str = str.gsub(/ᢆ/, "ːˑ")
75
+ str = str.gsub(/ᢇ/, "ā")
76
+ str = str.gsub(/ᢈ/, "ī")
77
+ str = str.gsub(/ᢉ/, "ḵ")
78
+ str = str.gsub(/ᢊ/, "ṉ")
79
+ str = str.gsub(/ᢋ/, "ƈ")
80
+ str = str.gsub(/ᢌ/, "ť")
81
+ str = str.gsub(/ᢍ/, "ţ")
82
+ str = str.gsub(/ᢎ/, "ḏ")
83
+ str = str.gsub(/ᢏ/, "ņ")
84
+ str = str.gsub(/ᢐ/, "ṯ")
85
+ str = str.gsub(/ᢑ/, "ḓ")
86
+ str = str.gsub(/ᢒ/, "p̄")
87
+ str = str.gsub(/ᢓ/, "ṕh")
88
+ str = str.gsub(/ᢔ/, "ş")
89
+ str = str.gsub(/ᢕ/, "ẖ")
90
+ str = str.gsub(/ᢖ/, "ẕ")
91
+ str = str.gsub(/ᢗ/, "ʒ̄")
92
+ str = str.gsub(/ᢦ/, "ŭ")
93
+ str = str.gsub(/ᢧ/, "ÿ")
94
+ str = str.gsub(/ᢩ/, "̲")
95
+ str = str.gsub(/‌/, "^")
96
+ str = str.gsub(/‍/, "*")
97
+ str = str.gsub(/ /, "-")
98
+ return str
99
99
  end
100
100
 
101
101
  # 对拉丁转写蒙古文恢复为蒙古文
102
- def l2m
103
- latin_str = self.dup.to_str
104
- mongolian_str = latin_str
105
- return mongolian_str
102
+ def mon_l2m
103
+ str = self.dup.to_str
104
+ # 将拉丁文转换文对应的蒙古文
105
+ return str
106
106
  end
107
107
  end
@@ -7,7 +7,7 @@ module Mongolian
7
7
  # 返回值 10:阳性词,但有错误,第一音节出现第五元音,却在后面出现了第四元音
8
8
  # 返回值 2:正确,阴性词
9
9
  # 返回值 20:阴性词,但有错误,第一音节出现第七元音,却在后面出现了第六元音
10
- def vowel_harmony
10
+ def mon_vowel_harmony
11
11
  mongolian_str = self.dup.to_str
12
12
  #vowel = ["ᠠ", "ᠡ", "ᠢ", "ᠣ", "ᠤ", "ᠥ", "ᠦ"]
13
13
  if mongolian_str =~ /[ᠠᠣᠤ]/ and mongolian_str =~ /[ᠡᠥᠦ]/
@@ -15,14 +15,14 @@ module Mongolian
15
15
  else
16
16
  if mongolian_str =~ /[ᠠᠣᠤ]/
17
17
  #如果第一音节阳性元音为第 5 元音,却在后面出现第 4 元音,则错误
18
- if mongolian_str.syllable[0] =~ /ᠤ/ and mongolian_str[mongolian_str.syllable[0].size..-1] =~ /ᠣ/
18
+ if mongolian_str.mon_syllable_classify[0] =~ /ᠤ/ and mongolian_str[mongolian_str.mon_syllable_classify[0].size..-1] =~ /ᠣ/
19
19
  return 10
20
20
  else
21
21
  return 1
22
22
  end
23
23
  else
24
24
  #如果第一音节阳性元音为第 7 元音,却在后面出现第 6 元音,则错误
25
- if mongolian_str.syllable[0] =~ /ᠦ/ and mongolian_str[mongolian_str.syllable[0].size..-1] =~ /ᠥ/
25
+ if mongolian_str.mon_syllable_classify[0] =~ /ᠦ/ and mongolian_str[mongolian_str.mon_syllable_classify[0].size..-1] =~ /ᠥ/
26
26
  return 20
27
27
  else
28
28
  return 2
@@ -31,8 +31,9 @@ module Mongolian
31
31
  end
32
32
  end
33
33
 
34
- # 对单词划分音节,返回音节数组
35
- def syllable
34
+ # 对单词划分音节:每个元音前最多一个辅音前面既可划分音节
35
+ # 返回值是音节数组
36
+ def mon_syllable_classify
36
37
  mongolian_str = self.dup.to_str
37
38
  mlist = []
38
39
  s = ""
@@ -1,6 +1,6 @@
1
1
  module Mongolian
2
2
  ## 对做好分词的蒙古文词进行词干提取。
3
- def mongolian_stemmify
3
+ def mon_stemmify
4
4
  w = self.dup.to_str
5
5
  return w if w.length < 4
6
6
 
@@ -8,8 +8,8 @@ module Mongolian
8
8
 
9
9
  ## 对于混合有多个文种的一段文字,删除其中的非蒙古文字符和蒙古文标点符号后,
10
10
  # 仅对蒙古文分词。
11
- # 函数返回仅包含蒙古文的分词数组。
12
- def mongolian_tokenize
11
+ # 返回值为蒙古文分词数组。
12
+ def mon_tokenize
13
13
  w = self.dup.to_str
14
14
  # 去除标点符号,用空格替换标点符号
15
15
  w = w.gsub(/[!᠄?·᠃᠂⁈⁉᠁—;《》]/, " ")
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Mongolian
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongolian
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Huang Fei-Long