tyccl 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/README.md +14 -9
- data/Rakefile +10 -0
- data/doc/Object.html +154 -0
- data/doc/README_md.html +182 -0
- data/doc/Tyccl.html +1020 -0
- data/doc/Tyccl/Containers.html +130 -0
- data/doc/Tyccl/Logger.html +130 -0
- data/doc/Tyccl/YAML.html +130 -0
- data/doc/created.rid +3 -0
- data/doc/images/add.png +0 -0
- data/doc/images/arrow_up.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +166 -0
- data/doc/js/darkfish.js +155 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/rdoc.css +595 -0
- data/doc/table_of_contents.html +111 -0
- data/lib/Inverted.yaml +1 -1
- data/lib/tyccl.rb +63 -66
- data/lib/tyccl/version.rb +2 -2
- data/test/test_tyccl.rb +35 -35
- metadata +43 -2
@@ -0,0 +1,111 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
|
6
|
+
|
7
|
+
<title>Table of Contents - RDoc Documentation</title>
|
8
|
+
|
9
|
+
<link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet">
|
10
|
+
|
11
|
+
<script type="text/javascript">
|
12
|
+
var rdoc_rel_prefix = "./";
|
13
|
+
</script>
|
14
|
+
|
15
|
+
<script type="text/javascript" charset="utf-8" src="./js/jquery.js"></script>
|
16
|
+
<script type="text/javascript" charset="utf-8" src="./js/navigation.js"></script>
|
17
|
+
<script type="text/javascript" charset="utf-8" src="./js/search_index.js"></script>
|
18
|
+
<script type="text/javascript" charset="utf-8" src="./js/search.js"></script>
|
19
|
+
<script type="text/javascript" charset="utf-8" src="./js/searcher.js"></script>
|
20
|
+
<script type="text/javascript" charset="utf-8" src="./js/darkfish.js"></script>
|
21
|
+
|
22
|
+
|
23
|
+
<body class="indexpage">
|
24
|
+
<h1>Table of Contents - RDoc Documentation</h1>
|
25
|
+
|
26
|
+
<h2>Pages</h2>
|
27
|
+
<ul>
|
28
|
+
<li class="file">
|
29
|
+
<a href="README_md.html">README</a>
|
30
|
+
|
31
|
+
<img class="toc-toggle" src="images/transparent.png" alt="" title="toggle headings">
|
32
|
+
<ul class="initially-hidden">
|
33
|
+
<li><a href="README_md.html#label-Tyccl">Tyccl</a>
|
34
|
+
<li><a href="README_md.html#label-Installation">Installation</a>
|
35
|
+
<li><a href="README_md.html#label-Usage">Usage</a>
|
36
|
+
<li><a href="README_md.html#label-Contributing">Contributing</a>
|
37
|
+
</ul>
|
38
|
+
</li>
|
39
|
+
|
40
|
+
</ul>
|
41
|
+
|
42
|
+
<h2 id="classes">Classes/Modules</h2>
|
43
|
+
<ul>
|
44
|
+
<li class="class">
|
45
|
+
<a href="Object.html">Object</a>
|
46
|
+
</li>
|
47
|
+
<li class="class">
|
48
|
+
<a href="Tyccl.html">Tyccl</a>
|
49
|
+
</li>
|
50
|
+
<li class="module">
|
51
|
+
<a href="Tyccl/Containers.html">Tyccl::Containers</a>
|
52
|
+
</li>
|
53
|
+
<li class="module">
|
54
|
+
<a href="Tyccl/Logger.html">Tyccl::Logger</a>
|
55
|
+
</li>
|
56
|
+
<li class="module">
|
57
|
+
<a href="Tyccl/YAML.html">Tyccl::YAML</a>
|
58
|
+
</li>
|
59
|
+
|
60
|
+
</ul>
|
61
|
+
|
62
|
+
<h2 id="methods">Methods</h2>
|
63
|
+
<ul>
|
64
|
+
|
65
|
+
<li class="method"><a href="Tyccl.html#method-c-combine_id">::combine_id — Tyccl</a>
|
66
|
+
|
67
|
+
<li class="method"><a href="Tyccl.html#method-c-compare_id">::compare_id — Tyccl</a>
|
68
|
+
|
69
|
+
<li class="method"><a href="Tyccl.html#method-c-cut_id">::cut_id — Tyccl</a>
|
70
|
+
|
71
|
+
<li class="method"><a href="Tyccl.html#method-c-dist">::dist — Tyccl</a>
|
72
|
+
|
73
|
+
<li class="method"><a href="Tyccl.html#method-c-gen_findstring">::gen_findstring — Tyccl</a>
|
74
|
+
|
75
|
+
<li class="method"><a href="Tyccl.html#method-c-get_dist_by_id">::get_dist_by_id — Tyccl</a>
|
76
|
+
|
77
|
+
<li class="method"><a href="Tyccl.html#method-c-get_equal">::get_equal — Tyccl</a>
|
78
|
+
|
79
|
+
<li class="method"><a href="Tyccl.html#method-c-get_id_sum">::get_id_sum — Tyccl</a>
|
80
|
+
|
81
|
+
<li class="method"><a href="Tyccl.html#method-c-get_ids_by_wildcard">::get_ids_by_wildcard — Tyccl</a>
|
82
|
+
|
83
|
+
<li class="method"><a href="Tyccl.html#method-c-get_ids_by_word">::get_ids_by_word — Tyccl</a>
|
84
|
+
|
85
|
+
<li class="method"><a href="Tyccl.html#method-c-get_index_sum">::get_index_sum — Tyccl</a>
|
86
|
+
|
87
|
+
<li class="method"><a href="Tyccl.html#method-c-get_same">::get_same — Tyccl</a>
|
88
|
+
|
89
|
+
<li class="method"><a href="Tyccl.html#method-c-get_sim_by_id">::get_sim_by_id — Tyccl</a>
|
90
|
+
|
91
|
+
<li class="method"><a href="Tyccl.html#method-c-get_similar">::get_similar — Tyccl</a>
|
92
|
+
|
93
|
+
<li class="method"><a href="Tyccl.html#method-c-get_words_by_id">::get_words_by_id — Tyccl</a>
|
94
|
+
|
95
|
+
<li class="method"><a href="Tyccl.html#method-c-has_equal-3F">::has_equal? — Tyccl</a>
|
96
|
+
|
97
|
+
<li class="method"><a href="Tyccl.html#method-c-has_same-3F">::has_same? — Tyccl</a>
|
98
|
+
|
99
|
+
<li class="method"><a href="Tyccl.html#method-c-has_single-3F">::has_single? — Tyccl</a>
|
100
|
+
|
101
|
+
<li class="method"><a href="Tyccl.html#method-c-sim">::sim — Tyccl</a>
|
102
|
+
|
103
|
+
</ul>
|
104
|
+
|
105
|
+
|
106
|
+
<footer id="validator-badges">
|
107
|
+
<p><a href="http://validator.w3.org/check/referer">[Validate]</a>
|
108
|
+
<p>Generated by <a href="https://github.com/rdoc/rdoc">RDoc</a> 4.0.0.
|
109
|
+
<p>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish Rdoc Generator</a> 3.
|
110
|
+
</footer>
|
111
|
+
|
data/lib/Inverted.yaml
CHANGED
data/lib/tyccl.rb
CHANGED
@@ -2,19 +2,17 @@
|
|
2
2
|
|
3
3
|
# = this gem is a tool for analysing similarity
|
4
4
|
# = between Chinese words. it based on <em>HIT Tongyici Cilin (Extended)<\em>(同义词词林())
|
5
|
-
# this gem only has one singleton class, instance once and use it always.
|
6
5
|
#
|
7
|
-
# learn more about Tongyici Cilin(同义词词林) http://vdisk.weibo.com/s/qGrIviGdExvx
|
6
|
+
# * learn more about Tongyici Cilin(同义词词林) http://vdisk.weibo.com/s/qGrIviGdExvx
|
8
7
|
#
|
9
|
-
# Author:: Joe Woo (https://github.com/JoeWoo)
|
10
|
-
# License:: MIT
|
8
|
+
# * Author:: Joe Woo (https://github.com/JoeWoo)
|
9
|
+
# * License:: MIT
|
11
10
|
#
|
12
11
|
|
13
|
-
require File.expand_path("../tyccl/version", __FILE__)
|
14
|
-
require "algorithms"
|
15
|
-
require "yaml"
|
16
|
-
require "
|
17
|
-
require "logger"
|
12
|
+
require File.expand_path("../tyccl/version", __FILE__)#:nodoc:all
|
13
|
+
require "algorithms"#:nodoc:all
|
14
|
+
require "yaml"#:nodoc:all
|
15
|
+
require "logger"#:nodoc:all
|
18
16
|
|
19
17
|
|
20
18
|
# this struct is used to return analysing result
|
@@ -26,69 +24,68 @@ Result_t = Struct.new(:value,:x_id,:y_id)
|
|
26
24
|
# to keep Tyccl object just only one.
|
27
25
|
class Tyccl
|
28
26
|
|
29
|
-
|
30
|
-
|
27
|
+
#--
|
31
28
|
# Read the Cilin file to memory.
|
32
29
|
# Format the data structure \#@IDsTire.
|
33
30
|
# Index the hash \#@IDsIndex.
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
}
|
55
|
-
items << word
|
56
|
-
}
|
57
|
-
#--
|
58
|
-
#init Trie of cilin.txt
|
59
|
-
#++
|
60
|
-
@IDsTrie = Containers::Trie.new
|
61
|
-
i=0
|
62
|
-
codes.each{ |key|
|
63
|
-
@IDsTrie[key]=items[i]
|
64
|
-
i+=1
|
31
|
+
#++
|
32
|
+
#--
|
33
|
+
#read the cilin.txt to ids[] and items[]
|
34
|
+
#++
|
35
|
+
@logger = Logger.new(STDOUT)
|
36
|
+
@logger.level = Logger::WARN
|
37
|
+
codes=[]
|
38
|
+
items=[]
|
39
|
+
@IDsIndex = Hash.new
|
40
|
+
f = File.new(File.expand_path("../cilin.txt", __FILE__))
|
41
|
+
i=0
|
42
|
+
f.each { |line|
|
43
|
+
line.force_encoding('utf-8')
|
44
|
+
m=line.split(" ")
|
45
|
+
codes << m[0]
|
46
|
+
@IDsIndex[m[0]] = i
|
47
|
+
i += 1
|
48
|
+
word = Array.new
|
49
|
+
m[1..-1].each{ |term|
|
50
|
+
word << term
|
65
51
|
}
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
52
|
+
items << word
|
53
|
+
}
|
54
|
+
#--
|
55
|
+
#init Trie of cilin.txt
|
56
|
+
#++
|
57
|
+
@IDsTrie = Containers::Trie.new
|
58
|
+
i=0
|
59
|
+
codes.each{ |key|
|
60
|
+
@IDsTrie[key]=items[i]
|
61
|
+
i+=1
|
62
|
+
}
|
63
|
+
#--
|
64
|
+
#init index of cilin.txt
|
65
|
+
#++
|
66
|
+
@index = YAML::load(File.open(File.expand_path("../Inverted.yaml", __FILE__)))
|
67
|
+
|
71
68
|
|
72
69
|
|
73
70
|
|
74
71
|
# Given id(string) such as:"Aa01A01=" "Aa01A03#"
|
75
72
|
# Returns an array containing words(string) that match this id
|
76
73
|
# If no match is found, nil is returned.
|
77
|
-
def get_words_by_id(id)
|
74
|
+
def self.get_words_by_id(id)
|
78
75
|
@IDsTrie[id]
|
79
76
|
end
|
80
77
|
|
81
78
|
# Returns a sorted array containing IDs(string) that match the parameter Wildcard(string).
|
82
79
|
# The wildcard characters that match any character are ‘*’ and ‘.’ such as "Aa01A..=","Aa**A..."
|
83
80
|
# If no match is found, an empty array is returned.
|
84
|
-
def get_ids_by_wildcard(wildcard)
|
81
|
+
def self.get_ids_by_wildcard(wildcard)
|
85
82
|
@IDsTrie.wildcard(wildcard)
|
86
83
|
end
|
87
84
|
|
88
85
|
# Returns an array containing IDs(string) that the parameter Word(string) matchs.
|
89
86
|
#
|
90
87
|
# tips: the same word may have a few semantic meanings, so a word can match many IDs.
|
91
|
-
def get_ids_by_word(word)
|
88
|
+
def self.get_ids_by_word(word)
|
92
89
|
m = @index[word]
|
93
90
|
if(m==nil)
|
94
91
|
@logger.error(word+" is an unlisted word!")
|
@@ -101,7 +98,7 @@ class Tyccl
|
|
101
98
|
# Given a word(string).
|
102
99
|
# Test to see if the parameter Word has any synonym.
|
103
100
|
# Returns true or false.
|
104
|
-
def has_same?(word)
|
101
|
+
def self.has_same?(word)
|
105
102
|
ids = get_ids_by_word(word)
|
106
103
|
i=0
|
107
104
|
flag=false
|
@@ -119,7 +116,7 @@ class Tyccl
|
|
119
116
|
# Given a word(string).
|
120
117
|
# Test to see if the parameter Word has any equivalent word.
|
121
118
|
# Returns true or false.
|
122
|
-
def has_equal?(word)
|
119
|
+
def self.has_equal?(word)
|
123
120
|
ids = get_ids_by_word(word)
|
124
121
|
i=0
|
125
122
|
flag=false
|
@@ -138,7 +135,7 @@ class Tyccl
|
|
138
135
|
# Test to see if the parameter Word has any ID whose corresponding
|
139
136
|
# words list just has only one element.
|
140
137
|
# Returns true or false.
|
141
|
-
def has_single?(word)
|
138
|
+
def self.has_single?(word)
|
142
139
|
ids = get_ids_by_word(word)
|
143
140
|
i=0
|
144
141
|
flag=false
|
@@ -157,7 +154,7 @@ class Tyccl
|
|
157
154
|
# Returns a two dimensional array that contains the parameter Word`s
|
158
155
|
# synonym which divided by different ID that the word matchs.
|
159
156
|
# If the word has no synonym, nil is returned.
|
160
|
-
def get_same(word)
|
157
|
+
def self.get_same(word)
|
161
158
|
if has_same?(word)
|
162
159
|
same_words=[]
|
163
160
|
ids = get_ids_by_word(word)
|
@@ -175,7 +172,7 @@ class Tyccl
|
|
175
172
|
# Returns a two dimensional array that contains the parameter Word`s
|
176
173
|
# equivalent words which divided by different ID that the word matchs.
|
177
174
|
# If the word has no synonym, nil is returned.
|
178
|
-
def get_equal(word)
|
175
|
+
def self.get_equal(word)
|
179
176
|
if has_equal?(word)
|
180
177
|
equal_words=[]
|
181
178
|
ids = get_ids_by_word(word)
|
@@ -198,7 +195,7 @@ class Tyccl
|
|
198
195
|
#
|
199
196
|
# tips: level 0,1,2,3,4 correspond Cilin(同义词词林) ID`s different
|
200
197
|
# segment: A,a,01,A,01=.
|
201
|
-
def get_similar(word, level=4)
|
198
|
+
def self.get_similar(word, level=4)
|
202
199
|
ids = get_ids_by_word(word)
|
203
200
|
similar=[]
|
204
201
|
ids.each{ |code|
|
@@ -221,7 +218,7 @@ class Tyccl
|
|
221
218
|
|
222
219
|
# Given idA(string) and idB(string).
|
223
220
|
# Returns semantic distance(int) between idA and idB, values in [0,10].
|
224
|
-
def get_dist_by_id(idA, idB)
|
221
|
+
def self.get_dist_by_id(idA, idB)
|
225
222
|
alpha=10.0/5
|
226
223
|
n = compare_id(idA,idB)
|
227
224
|
(alpha*(5-n)).round
|
@@ -229,7 +226,7 @@ class Tyccl
|
|
229
226
|
|
230
227
|
# Given idA(string) and idB(string).
|
231
228
|
# Returns similarity(float) between idA and idB, values in [0,1].
|
232
|
-
def get_sim_by_id(idA, idB)
|
229
|
+
def self.get_sim_by_id(idA, idB)
|
233
230
|
n = compare_id(idA,idB)
|
234
231
|
str = idA.clone
|
235
232
|
if n==0
|
@@ -254,7 +251,7 @@ class Tyccl
|
|
254
251
|
# Given wordA(string) and wordB(string).
|
255
252
|
# Returns a Struct Result_t which contains idA, idB, and shortest
|
256
253
|
# semantic distance(int) between wordA and wordB.
|
257
|
-
def dist(wordA, wordB)
|
254
|
+
def self.dist(wordA, wordB)
|
258
255
|
alpha=10.0/5
|
259
256
|
shortest_Pair = Result_t.new(100,"","")
|
260
257
|
idAs = get_ids_by_word(wordA)
|
@@ -277,7 +274,7 @@ class Tyccl
|
|
277
274
|
# Given wordA(string) and wordB(string).
|
278
275
|
# Returns a Struct Result_t which contains the most similar Pairs
|
279
276
|
# wordA`s ID and wordB`s ID, and similarity(float) between idA and idB.
|
280
|
-
def sim(wordA, wordB)
|
277
|
+
def self.sim(wordA, wordB)
|
281
278
|
factor=[0.02,0.65,0.8,0.9,0.96,1,0.5]#0,1,2,3,4,5各层参数
|
282
279
|
longest_Pair = Result_t.new(-1,"","")
|
283
280
|
idAs = get_ids_by_word(wordA)
|
@@ -320,7 +317,7 @@ class Tyccl
|
|
320
317
|
# segment: A,a,01,A,01=.
|
321
318
|
# Returns a string that is used '.' to explace every char from
|
322
319
|
# the start_index to the string`s end.
|
323
|
-
def gen_findstring(code, start_index)
|
320
|
+
def self.gen_findstring(code, start_index)
|
324
321
|
frame = cut_id(code)
|
325
322
|
(start_index).upto(4){|i|
|
326
323
|
0.upto(frame[i].size-1){ |j|
|
@@ -333,13 +330,13 @@ class Tyccl
|
|
333
330
|
# Given a id(string).
|
334
331
|
# Returns an array that contains 5 strings which are ID`s
|
335
332
|
# diffrent segment, like: A,a,01,A,01= .
|
336
|
-
def cut_id(id)
|
333
|
+
def self.cut_id(id)
|
337
334
|
frame=[id[0],id[1],id[2..3],id[4],id[5..7]]
|
338
335
|
return frame
|
339
336
|
end
|
340
337
|
|
341
338
|
# the method #cut_id`s inverse process.
|
342
|
-
def combine_id(frame)
|
339
|
+
def self.combine_id(frame)
|
343
340
|
m=""
|
344
341
|
frame.each{|seg|
|
345
342
|
m << seg
|
@@ -350,7 +347,7 @@ class Tyccl
|
|
350
347
|
# Given idA(string) and idB(string).
|
351
348
|
# Returns fisrt diffrent place of their segment, place vlaues in[0,4].
|
352
349
|
# if they are the same , returns 5.
|
353
|
-
def compare_id(idA, idB)
|
350
|
+
def self.compare_id(idA, idB)
|
354
351
|
frameA=cut_id(idA)
|
355
352
|
frameB=cut_id(idB)
|
356
353
|
0.upto(frameA.length-1){ |i|
|
@@ -362,12 +359,12 @@ class Tyccl
|
|
362
359
|
end
|
363
360
|
|
364
361
|
# Returns the total number of different ID in Cilin.
|
365
|
-
def get_id_sum
|
362
|
+
def self.get_id_sum
|
366
363
|
@IDsIndex.size
|
367
364
|
end
|
368
365
|
|
369
366
|
# Returns the total number of different words in Cilin.
|
370
|
-
def get_index_sum
|
367
|
+
def self.get_index_sum
|
371
368
|
@index.size
|
372
369
|
end
|
373
370
|
|
data/lib/tyccl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
class Tyccl
|
2
|
-
VERSION = "0.0.
|
1
|
+
class Tyccl #:nodoc:all
|
2
|
+
VERSION = "0.0.2"
|
3
3
|
end
|
data/test/test_tyccl.rb
CHANGED
@@ -5,75 +5,75 @@ require 'test/unit'
|
|
5
5
|
require File.expand_path('../../lib/tyccl', __FILE__)
|
6
6
|
|
7
7
|
|
8
|
-
$tyc=Tyccl.instance
|
9
8
|
|
10
|
-
|
9
|
+
|
10
|
+
class TycclTest < Test::Unit::TestCase #:nodoc:all
|
11
11
|
|
12
12
|
def test_instance
|
13
13
|
assert_equal 17809,
|
14
|
-
|
14
|
+
Tyccl.get_id_sum
|
15
15
|
assert_equal 77457,
|
16
|
-
|
16
|
+
Tyccl.get_index_sum
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_get_words_by_id
|
20
20
|
assert_equal ["人","士","人物","人士","人氏","人选"],
|
21
|
-
|
21
|
+
Tyccl.get_words_by_id("Aa01A01=")
|
22
22
|
assert_equal nil,
|
23
|
-
|
23
|
+
Tyccl.get_words_by_id("dfdf")
|
24
24
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def test_get_ids_by_wildcard
|
28
28
|
assert_equal 9,
|
29
|
-
|
29
|
+
Tyccl.get_ids_by_wildcard("Aa01A...").size
|
30
30
|
assert_equal 32,
|
31
|
-
|
31
|
+
Tyccl.get_ids_by_wildcard("Aa**A...").size
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_get_ids_by_word
|
35
35
|
assert_equal nil,
|
36
|
-
|
36
|
+
Tyccl.get_ids_by_word("屌丝")
|
37
37
|
assert_equal 1,
|
38
|
-
|
38
|
+
Tyccl.get_ids_by_word("桅顶").size
|
39
39
|
assert_equal 7,
|
40
|
-
|
40
|
+
Tyccl.get_ids_by_word("底").size
|
41
41
|
end
|
42
42
|
|
43
43
|
def test_has_same
|
44
44
|
assert_equal true,
|
45
|
-
|
45
|
+
Tyccl.has_same?("人")
|
46
46
|
assert_equal false,
|
47
|
-
|
47
|
+
Tyccl.has_same?("顺民")
|
48
48
|
assert_equal false,
|
49
|
-
|
49
|
+
Tyccl.has_same?("众学生")
|
50
50
|
end
|
51
51
|
|
52
52
|
def test_has_equal
|
53
53
|
assert_equal true,
|
54
|
-
|
54
|
+
Tyccl.has_equal?("良民")
|
55
55
|
assert_equal false,
|
56
|
-
|
56
|
+
Tyccl.has_equal?("众学生")
|
57
57
|
assert_equal false,
|
58
|
-
|
58
|
+
Tyccl.has_equal?("人")
|
59
59
|
end
|
60
60
|
|
61
61
|
def test_has_single
|
62
62
|
assert_equal false,
|
63
|
-
|
63
|
+
Tyccl.has_single?("良民")
|
64
64
|
assert_equal true,
|
65
|
-
|
65
|
+
Tyccl.has_single?("众学生")
|
66
66
|
assert_equal false,
|
67
|
-
|
67
|
+
Tyccl.has_single?("人")
|
68
68
|
end
|
69
69
|
|
70
70
|
def test_get_same
|
71
|
-
m
|
71
|
+
m=Tyccl.get_same("人")
|
72
72
|
|
73
73
|
assert_equal nil,
|
74
|
-
|
74
|
+
Tyccl.get_same("顺民")
|
75
75
|
assert_equal nil,
|
76
|
-
|
76
|
+
Tyccl.get_same("众学生")
|
77
77
|
assert_equal 5,
|
78
78
|
m.size
|
79
79
|
assert_equal 6,
|
@@ -91,13 +91,13 @@ class TycclTest < Test::Unit::TestCase
|
|
91
91
|
|
92
92
|
def test_get_equal
|
93
93
|
assert_equal nil,
|
94
|
-
|
94
|
+
Tyccl.get_equal("人")
|
95
95
|
assert_equal nil,
|
96
|
-
|
96
|
+
Tyccl.get_equal("众学生")
|
97
97
|
assert_equal 1,
|
98
|
-
|
98
|
+
Tyccl.get_equal("流民").size
|
99
99
|
assert_equal 9,
|
100
|
-
|
100
|
+
Tyccl.get_equal("流民")[0].size
|
101
101
|
end
|
102
102
|
|
103
103
|
def test_get_similar
|
@@ -106,24 +106,24 @@ class TycclTest < Test::Unit::TestCase
|
|
106
106
|
["身体", "人"],
|
107
107
|
["人格", "人品", "人头", "人", "品质", "质地", "格调", "灵魂", "为人"],
|
108
108
|
["人数", "人头", "人口", "人", "口", "丁", "家口", "食指", "总人口"] ],
|
109
|
-
|
109
|
+
Tyccl.get_similar("人")
|
110
110
|
end
|
111
111
|
|
112
112
|
# dist ranges [0,10];
|
113
113
|
# if dist<7 then we believe that the two words are related
|
114
114
|
def test_dist
|
115
115
|
assert_equal Result_t.new(0,"Aa01A01=","Aa01A01="),
|
116
|
-
|
116
|
+
Tyccl.dist("人","士")
|
117
117
|
assert_equal Result_t.new(2,"Bh06A32=","Bh06A34="),
|
118
|
-
|
118
|
+
Tyccl.dist("西红柿","黄瓜")
|
119
119
|
assert_equal Result_t.new(4,"Aa01A05=","Aa01B03#"),
|
120
|
-
|
120
|
+
Tyccl.dist("匹夫","良民")
|
121
121
|
assert_equal Result_t.new(6,"Bh07A14=","Bh06A32="),
|
122
|
-
|
122
|
+
Tyccl.dist("苹果","西红柿")
|
123
123
|
assert_equal Result_t.new(8,"Aa01B02=","Ab01B10="),
|
124
|
-
|
124
|
+
Tyccl.dist("群众","村姑")
|
125
125
|
assert_equal Result_t.new(10,"Aa01A01=","Kd04C01="),
|
126
|
-
|
126
|
+
Tyccl.dist("人","哟")
|
127
127
|
end
|
128
128
|
|
129
129
|
def test_sim
|
@@ -142,7 +142,7 @@ class TycclTest < Test::Unit::TestCase
|
|
142
142
|
i=0
|
143
143
|
words.each{ |word|
|
144
144
|
assert_equal result[i],
|
145
|
-
|
145
|
+
Tyccl.sim("人民",word)
|
146
146
|
i+=1
|
147
147
|
}
|
148
148
|
end
|