tyccl 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/README.md +14 -9
- data/Rakefile +10 -0
- data/doc/Object.html +154 -0
- data/doc/README_md.html +182 -0
- data/doc/Tyccl.html +1020 -0
- data/doc/Tyccl/Containers.html +130 -0
- data/doc/Tyccl/Logger.html +130 -0
- data/doc/Tyccl/YAML.html +130 -0
- data/doc/created.rid +3 -0
- data/doc/images/add.png +0 -0
- data/doc/images/arrow_up.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +166 -0
- data/doc/js/darkfish.js +155 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/rdoc.css +595 -0
- data/doc/table_of_contents.html +111 -0
- data/lib/Inverted.yaml +1 -1
- data/lib/tyccl.rb +63 -66
- data/lib/tyccl/version.rb +2 -2
- data/test/test_tyccl.rb +35 -35
- metadata +43 -2
@@ -0,0 +1,111 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
|
6
|
+
|
7
|
+
<title>Table of Contents - RDoc Documentation</title>
|
8
|
+
|
9
|
+
<link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet">
|
10
|
+
|
11
|
+
<script type="text/javascript">
|
12
|
+
var rdoc_rel_prefix = "./";
|
13
|
+
</script>
|
14
|
+
|
15
|
+
<script type="text/javascript" charset="utf-8" src="./js/jquery.js"></script>
|
16
|
+
<script type="text/javascript" charset="utf-8" src="./js/navigation.js"></script>
|
17
|
+
<script type="text/javascript" charset="utf-8" src="./js/search_index.js"></script>
|
18
|
+
<script type="text/javascript" charset="utf-8" src="./js/search.js"></script>
|
19
|
+
<script type="text/javascript" charset="utf-8" src="./js/searcher.js"></script>
|
20
|
+
<script type="text/javascript" charset="utf-8" src="./js/darkfish.js"></script>
|
21
|
+
|
22
|
+
|
23
|
+
<body class="indexpage">
|
24
|
+
<h1>Table of Contents - RDoc Documentation</h1>
|
25
|
+
|
26
|
+
<h2>Pages</h2>
|
27
|
+
<ul>
|
28
|
+
<li class="file">
|
29
|
+
<a href="README_md.html">README</a>
|
30
|
+
|
31
|
+
<img class="toc-toggle" src="images/transparent.png" alt="" title="toggle headings">
|
32
|
+
<ul class="initially-hidden">
|
33
|
+
<li><a href="README_md.html#label-Tyccl">Tyccl</a>
|
34
|
+
<li><a href="README_md.html#label-Installation">Installation</a>
|
35
|
+
<li><a href="README_md.html#label-Usage">Usage</a>
|
36
|
+
<li><a href="README_md.html#label-Contributing">Contributing</a>
|
37
|
+
</ul>
|
38
|
+
</li>
|
39
|
+
|
40
|
+
</ul>
|
41
|
+
|
42
|
+
<h2 id="classes">Classes/Modules</h2>
|
43
|
+
<ul>
|
44
|
+
<li class="class">
|
45
|
+
<a href="Object.html">Object</a>
|
46
|
+
</li>
|
47
|
+
<li class="class">
|
48
|
+
<a href="Tyccl.html">Tyccl</a>
|
49
|
+
</li>
|
50
|
+
<li class="module">
|
51
|
+
<a href="Tyccl/Containers.html">Tyccl::Containers</a>
|
52
|
+
</li>
|
53
|
+
<li class="module">
|
54
|
+
<a href="Tyccl/Logger.html">Tyccl::Logger</a>
|
55
|
+
</li>
|
56
|
+
<li class="module">
|
57
|
+
<a href="Tyccl/YAML.html">Tyccl::YAML</a>
|
58
|
+
</li>
|
59
|
+
|
60
|
+
</ul>
|
61
|
+
|
62
|
+
<h2 id="methods">Methods</h2>
|
63
|
+
<ul>
|
64
|
+
|
65
|
+
<li class="method"><a href="Tyccl.html#method-c-combine_id">::combine_id — Tyccl</a>
|
66
|
+
|
67
|
+
<li class="method"><a href="Tyccl.html#method-c-compare_id">::compare_id — Tyccl</a>
|
68
|
+
|
69
|
+
<li class="method"><a href="Tyccl.html#method-c-cut_id">::cut_id — Tyccl</a>
|
70
|
+
|
71
|
+
<li class="method"><a href="Tyccl.html#method-c-dist">::dist — Tyccl</a>
|
72
|
+
|
73
|
+
<li class="method"><a href="Tyccl.html#method-c-gen_findstring">::gen_findstring — Tyccl</a>
|
74
|
+
|
75
|
+
<li class="method"><a href="Tyccl.html#method-c-get_dist_by_id">::get_dist_by_id — Tyccl</a>
|
76
|
+
|
77
|
+
<li class="method"><a href="Tyccl.html#method-c-get_equal">::get_equal — Tyccl</a>
|
78
|
+
|
79
|
+
<li class="method"><a href="Tyccl.html#method-c-get_id_sum">::get_id_sum — Tyccl</a>
|
80
|
+
|
81
|
+
<li class="method"><a href="Tyccl.html#method-c-get_ids_by_wildcard">::get_ids_by_wildcard — Tyccl</a>
|
82
|
+
|
83
|
+
<li class="method"><a href="Tyccl.html#method-c-get_ids_by_word">::get_ids_by_word — Tyccl</a>
|
84
|
+
|
85
|
+
<li class="method"><a href="Tyccl.html#method-c-get_index_sum">::get_index_sum — Tyccl</a>
|
86
|
+
|
87
|
+
<li class="method"><a href="Tyccl.html#method-c-get_same">::get_same — Tyccl</a>
|
88
|
+
|
89
|
+
<li class="method"><a href="Tyccl.html#method-c-get_sim_by_id">::get_sim_by_id — Tyccl</a>
|
90
|
+
|
91
|
+
<li class="method"><a href="Tyccl.html#method-c-get_similar">::get_similar — Tyccl</a>
|
92
|
+
|
93
|
+
<li class="method"><a href="Tyccl.html#method-c-get_words_by_id">::get_words_by_id — Tyccl</a>
|
94
|
+
|
95
|
+
<li class="method"><a href="Tyccl.html#method-c-has_equal-3F">::has_equal? — Tyccl</a>
|
96
|
+
|
97
|
+
<li class="method"><a href="Tyccl.html#method-c-has_same-3F">::has_same? — Tyccl</a>
|
98
|
+
|
99
|
+
<li class="method"><a href="Tyccl.html#method-c-has_single-3F">::has_single? — Tyccl</a>
|
100
|
+
|
101
|
+
<li class="method"><a href="Tyccl.html#method-c-sim">::sim — Tyccl</a>
|
102
|
+
|
103
|
+
</ul>
|
104
|
+
|
105
|
+
|
106
|
+
<footer id="validator-badges">
|
107
|
+
<p><a href="http://validator.w3.org/check/referer">[Validate]</a>
|
108
|
+
<p>Generated by <a href="https://github.com/rdoc/rdoc">RDoc</a> 4.0.0.
|
109
|
+
<p>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish Rdoc Generator</a> 3.
|
110
|
+
</footer>
|
111
|
+
|
data/lib/Inverted.yaml
CHANGED
data/lib/tyccl.rb
CHANGED
@@ -2,19 +2,17 @@
|
|
2
2
|
|
3
3
|
# = this gem is a tool for analysing similarity
|
4
4
|
# = between Chinese words. it based on <em>HIT Tongyici Cilin (Extended)<\em>(同义词词林())
|
5
|
-
# this gem only has one singleton class, instance once and use it always.
|
6
5
|
#
|
7
|
-
# learn more about Tongyici Cilin(同义词词林) http://vdisk.weibo.com/s/qGrIviGdExvx
|
6
|
+
# * learn more about Tongyici Cilin(同义词词林) http://vdisk.weibo.com/s/qGrIviGdExvx
|
8
7
|
#
|
9
|
-
# Author:: Joe Woo (https://github.com/JoeWoo)
|
10
|
-
# License:: MIT
|
8
|
+
# * Author:: Joe Woo (https://github.com/JoeWoo)
|
9
|
+
# * License:: MIT
|
11
10
|
#
|
12
11
|
|
13
|
-
require File.expand_path("../tyccl/version", __FILE__)
|
14
|
-
require "algorithms"
|
15
|
-
require "yaml"
|
16
|
-
require "
|
17
|
-
require "logger"
|
12
|
+
require File.expand_path("../tyccl/version", __FILE__)#:nodoc:all
|
13
|
+
require "algorithms"#:nodoc:all
|
14
|
+
require "yaml"#:nodoc:all
|
15
|
+
require "logger"#:nodoc:all
|
18
16
|
|
19
17
|
|
20
18
|
# this struct is used to return analysing result
|
@@ -26,69 +24,68 @@ Result_t = Struct.new(:value,:x_id,:y_id)
|
|
26
24
|
# to keep Tyccl object just only one.
|
27
25
|
class Tyccl
|
28
26
|
|
29
|
-
|
30
|
-
|
27
|
+
#--
|
31
28
|
# Read the Cilin file to memory.
|
32
29
|
# Format the data structure \#@IDsTire.
|
33
30
|
# Index the hash \#@IDsIndex.
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
}
|
55
|
-
items << word
|
56
|
-
}
|
57
|
-
#--
|
58
|
-
#init Trie of cilin.txt
|
59
|
-
#++
|
60
|
-
@IDsTrie = Containers::Trie.new
|
61
|
-
i=0
|
62
|
-
codes.each{ |key|
|
63
|
-
@IDsTrie[key]=items[i]
|
64
|
-
i+=1
|
31
|
+
#++
|
32
|
+
#--
|
33
|
+
#read the cilin.txt to ids[] and items[]
|
34
|
+
#++
|
35
|
+
@logger = Logger.new(STDOUT)
|
36
|
+
@logger.level = Logger::WARN
|
37
|
+
codes=[]
|
38
|
+
items=[]
|
39
|
+
@IDsIndex = Hash.new
|
40
|
+
f = File.new(File.expand_path("../cilin.txt", __FILE__))
|
41
|
+
i=0
|
42
|
+
f.each { |line|
|
43
|
+
line.force_encoding('utf-8')
|
44
|
+
m=line.split(" ")
|
45
|
+
codes << m[0]
|
46
|
+
@IDsIndex[m[0]] = i
|
47
|
+
i += 1
|
48
|
+
word = Array.new
|
49
|
+
m[1..-1].each{ |term|
|
50
|
+
word << term
|
65
51
|
}
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
52
|
+
items << word
|
53
|
+
}
|
54
|
+
#--
|
55
|
+
#init Trie of cilin.txt
|
56
|
+
#++
|
57
|
+
@IDsTrie = Containers::Trie.new
|
58
|
+
i=0
|
59
|
+
codes.each{ |key|
|
60
|
+
@IDsTrie[key]=items[i]
|
61
|
+
i+=1
|
62
|
+
}
|
63
|
+
#--
|
64
|
+
#init index of cilin.txt
|
65
|
+
#++
|
66
|
+
@index = YAML::load(File.open(File.expand_path("../Inverted.yaml", __FILE__)))
|
67
|
+
|
71
68
|
|
72
69
|
|
73
70
|
|
74
71
|
# Given id(string) such as:"Aa01A01=" "Aa01A03#"
|
75
72
|
# Returns an array containing words(string) that match this id
|
76
73
|
# If no match is found, nil is returned.
|
77
|
-
def get_words_by_id(id)
|
74
|
+
def self.get_words_by_id(id)
|
78
75
|
@IDsTrie[id]
|
79
76
|
end
|
80
77
|
|
81
78
|
# Returns a sorted array containing IDs(string) that match the parameter Wildcard(string).
|
82
79
|
# The wildcard characters that match any character are ‘*’ and ‘.’ such as "Aa01A..=","Aa**A..."
|
83
80
|
# If no match is found, an empty array is returned.
|
84
|
-
def get_ids_by_wildcard(wildcard)
|
81
|
+
def self.get_ids_by_wildcard(wildcard)
|
85
82
|
@IDsTrie.wildcard(wildcard)
|
86
83
|
end
|
87
84
|
|
88
85
|
# Returns an array containing IDs(string) that the parameter Word(string) matchs.
|
89
86
|
#
|
90
87
|
# tips: the same word may have a few semantic meanings, so a word can match many IDs.
|
91
|
-
def get_ids_by_word(word)
|
88
|
+
def self.get_ids_by_word(word)
|
92
89
|
m = @index[word]
|
93
90
|
if(m==nil)
|
94
91
|
@logger.error(word+" is an unlisted word!")
|
@@ -101,7 +98,7 @@ class Tyccl
|
|
101
98
|
# Given a word(string).
|
102
99
|
# Test to see if the parameter Word has any synonym.
|
103
100
|
# Returns true or false.
|
104
|
-
def has_same?(word)
|
101
|
+
def self.has_same?(word)
|
105
102
|
ids = get_ids_by_word(word)
|
106
103
|
i=0
|
107
104
|
flag=false
|
@@ -119,7 +116,7 @@ class Tyccl
|
|
119
116
|
# Given a word(string).
|
120
117
|
# Test to see if the parameter Word has any equivalent word.
|
121
118
|
# Returns true or false.
|
122
|
-
def has_equal?(word)
|
119
|
+
def self.has_equal?(word)
|
123
120
|
ids = get_ids_by_word(word)
|
124
121
|
i=0
|
125
122
|
flag=false
|
@@ -138,7 +135,7 @@ class Tyccl
|
|
138
135
|
# Test to see if the parameter Word has any ID whose corresponding
|
139
136
|
# words list just has only one element.
|
140
137
|
# Returns true or false.
|
141
|
-
def has_single?(word)
|
138
|
+
def self.has_single?(word)
|
142
139
|
ids = get_ids_by_word(word)
|
143
140
|
i=0
|
144
141
|
flag=false
|
@@ -157,7 +154,7 @@ class Tyccl
|
|
157
154
|
# Returns a two dimensional array that contains the parameter Word`s
|
158
155
|
# synonym which divided by different ID that the word matchs.
|
159
156
|
# If the word has no synonym, nil is returned.
|
160
|
-
def get_same(word)
|
157
|
+
def self.get_same(word)
|
161
158
|
if has_same?(word)
|
162
159
|
same_words=[]
|
163
160
|
ids = get_ids_by_word(word)
|
@@ -175,7 +172,7 @@ class Tyccl
|
|
175
172
|
# Returns a two dimensional array that contains the parameter Word`s
|
176
173
|
# equivalent words which divided by different ID that the word matchs.
|
177
174
|
# If the word has no synonym, nil is returned.
|
178
|
-
def get_equal(word)
|
175
|
+
def self.get_equal(word)
|
179
176
|
if has_equal?(word)
|
180
177
|
equal_words=[]
|
181
178
|
ids = get_ids_by_word(word)
|
@@ -198,7 +195,7 @@ class Tyccl
|
|
198
195
|
#
|
199
196
|
# tips: level 0,1,2,3,4 correspond Cilin(同义词词林) ID`s different
|
200
197
|
# segment: A,a,01,A,01=.
|
201
|
-
def get_similar(word, level=4)
|
198
|
+
def self.get_similar(word, level=4)
|
202
199
|
ids = get_ids_by_word(word)
|
203
200
|
similar=[]
|
204
201
|
ids.each{ |code|
|
@@ -221,7 +218,7 @@ class Tyccl
|
|
221
218
|
|
222
219
|
# Given idA(string) and idB(string).
|
223
220
|
# Returns semantic distance(int) between idA and idB, values in [0,10].
|
224
|
-
def get_dist_by_id(idA, idB)
|
221
|
+
def self.get_dist_by_id(idA, idB)
|
225
222
|
alpha=10.0/5
|
226
223
|
n = compare_id(idA,idB)
|
227
224
|
(alpha*(5-n)).round
|
@@ -229,7 +226,7 @@ class Tyccl
|
|
229
226
|
|
230
227
|
# Given idA(string) and idB(string).
|
231
228
|
# Returns similarity(float) between idA and idB, values in [0,1].
|
232
|
-
def get_sim_by_id(idA, idB)
|
229
|
+
def self.get_sim_by_id(idA, idB)
|
233
230
|
n = compare_id(idA,idB)
|
234
231
|
str = idA.clone
|
235
232
|
if n==0
|
@@ -254,7 +251,7 @@ class Tyccl
|
|
254
251
|
# Given wordA(string) and wordB(string).
|
255
252
|
# Returns a Struct Result_t which contains idA, idB, and shortest
|
256
253
|
# semantic distance(int) between wordA and wordB.
|
257
|
-
def dist(wordA, wordB)
|
254
|
+
def self.dist(wordA, wordB)
|
258
255
|
alpha=10.0/5
|
259
256
|
shortest_Pair = Result_t.new(100,"","")
|
260
257
|
idAs = get_ids_by_word(wordA)
|
@@ -277,7 +274,7 @@ class Tyccl
|
|
277
274
|
# Given wordA(string) and wordB(string).
|
278
275
|
# Returns a Struct Result_t which contains the most similar Pairs
|
279
276
|
# wordA`s ID and wordB`s ID, and similarity(float) between idA and idB.
|
280
|
-
def sim(wordA, wordB)
|
277
|
+
def self.sim(wordA, wordB)
|
281
278
|
factor=[0.02,0.65,0.8,0.9,0.96,1,0.5]#0,1,2,3,4,5各层参数
|
282
279
|
longest_Pair = Result_t.new(-1,"","")
|
283
280
|
idAs = get_ids_by_word(wordA)
|
@@ -320,7 +317,7 @@ class Tyccl
|
|
320
317
|
# segment: A,a,01,A,01=.
|
321
318
|
# Returns a string that is used '.' to explace every char from
|
322
319
|
# the start_index to the string`s end.
|
323
|
-
def gen_findstring(code, start_index)
|
320
|
+
def self.gen_findstring(code, start_index)
|
324
321
|
frame = cut_id(code)
|
325
322
|
(start_index).upto(4){|i|
|
326
323
|
0.upto(frame[i].size-1){ |j|
|
@@ -333,13 +330,13 @@ class Tyccl
|
|
333
330
|
# Given a id(string).
|
334
331
|
# Returns an array that contains 5 strings which are ID`s
|
335
332
|
# diffrent segment, like: A,a,01,A,01= .
|
336
|
-
def cut_id(id)
|
333
|
+
def self.cut_id(id)
|
337
334
|
frame=[id[0],id[1],id[2..3],id[4],id[5..7]]
|
338
335
|
return frame
|
339
336
|
end
|
340
337
|
|
341
338
|
# the method #cut_id`s inverse process.
|
342
|
-
def combine_id(frame)
|
339
|
+
def self.combine_id(frame)
|
343
340
|
m=""
|
344
341
|
frame.each{|seg|
|
345
342
|
m << seg
|
@@ -350,7 +347,7 @@ class Tyccl
|
|
350
347
|
# Given idA(string) and idB(string).
|
351
348
|
# Returns fisrt diffrent place of their segment, place vlaues in[0,4].
|
352
349
|
# if they are the same , returns 5.
|
353
|
-
def compare_id(idA, idB)
|
350
|
+
def self.compare_id(idA, idB)
|
354
351
|
frameA=cut_id(idA)
|
355
352
|
frameB=cut_id(idB)
|
356
353
|
0.upto(frameA.length-1){ |i|
|
@@ -362,12 +359,12 @@ class Tyccl
|
|
362
359
|
end
|
363
360
|
|
364
361
|
# Returns the total number of different ID in Cilin.
|
365
|
-
def get_id_sum
|
362
|
+
def self.get_id_sum
|
366
363
|
@IDsIndex.size
|
367
364
|
end
|
368
365
|
|
369
366
|
# Returns the total number of different words in Cilin.
|
370
|
-
def get_index_sum
|
367
|
+
def self.get_index_sum
|
371
368
|
@index.size
|
372
369
|
end
|
373
370
|
|
data/lib/tyccl/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
class Tyccl
|
2
|
-
VERSION = "0.0.
|
1
|
+
class Tyccl #:nodoc:all
|
2
|
+
VERSION = "0.0.2"
|
3
3
|
end
|
data/test/test_tyccl.rb
CHANGED
@@ -5,75 +5,75 @@ require 'test/unit'
|
|
5
5
|
require File.expand_path('../../lib/tyccl', __FILE__)
|
6
6
|
|
7
7
|
|
8
|
-
$tyc=Tyccl.instance
|
9
8
|
|
10
|
-
|
9
|
+
|
10
|
+
class TycclTest < Test::Unit::TestCase #:nodoc:all
|
11
11
|
|
12
12
|
def test_instance
|
13
13
|
assert_equal 17809,
|
14
|
-
|
14
|
+
Tyccl.get_id_sum
|
15
15
|
assert_equal 77457,
|
16
|
-
|
16
|
+
Tyccl.get_index_sum
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_get_words_by_id
|
20
20
|
assert_equal ["人","士","人物","人士","人氏","人选"],
|
21
|
-
|
21
|
+
Tyccl.get_words_by_id("Aa01A01=")
|
22
22
|
assert_equal nil,
|
23
|
-
|
23
|
+
Tyccl.get_words_by_id("dfdf")
|
24
24
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def test_get_ids_by_wildcard
|
28
28
|
assert_equal 9,
|
29
|
-
|
29
|
+
Tyccl.get_ids_by_wildcard("Aa01A...").size
|
30
30
|
assert_equal 32,
|
31
|
-
|
31
|
+
Tyccl.get_ids_by_wildcard("Aa**A...").size
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_get_ids_by_word
|
35
35
|
assert_equal nil,
|
36
|
-
|
36
|
+
Tyccl.get_ids_by_word("屌丝")
|
37
37
|
assert_equal 1,
|
38
|
-
|
38
|
+
Tyccl.get_ids_by_word("桅顶").size
|
39
39
|
assert_equal 7,
|
40
|
-
|
40
|
+
Tyccl.get_ids_by_word("底").size
|
41
41
|
end
|
42
42
|
|
43
43
|
def test_has_same
|
44
44
|
assert_equal true,
|
45
|
-
|
45
|
+
Tyccl.has_same?("人")
|
46
46
|
assert_equal false,
|
47
|
-
|
47
|
+
Tyccl.has_same?("顺民")
|
48
48
|
assert_equal false,
|
49
|
-
|
49
|
+
Tyccl.has_same?("众学生")
|
50
50
|
end
|
51
51
|
|
52
52
|
def test_has_equal
|
53
53
|
assert_equal true,
|
54
|
-
|
54
|
+
Tyccl.has_equal?("良民")
|
55
55
|
assert_equal false,
|
56
|
-
|
56
|
+
Tyccl.has_equal?("众学生")
|
57
57
|
assert_equal false,
|
58
|
-
|
58
|
+
Tyccl.has_equal?("人")
|
59
59
|
end
|
60
60
|
|
61
61
|
def test_has_single
|
62
62
|
assert_equal false,
|
63
|
-
|
63
|
+
Tyccl.has_single?("良民")
|
64
64
|
assert_equal true,
|
65
|
-
|
65
|
+
Tyccl.has_single?("众学生")
|
66
66
|
assert_equal false,
|
67
|
-
|
67
|
+
Tyccl.has_single?("人")
|
68
68
|
end
|
69
69
|
|
70
70
|
def test_get_same
|
71
|
-
m
|
71
|
+
m=Tyccl.get_same("人")
|
72
72
|
|
73
73
|
assert_equal nil,
|
74
|
-
|
74
|
+
Tyccl.get_same("顺民")
|
75
75
|
assert_equal nil,
|
76
|
-
|
76
|
+
Tyccl.get_same("众学生")
|
77
77
|
assert_equal 5,
|
78
78
|
m.size
|
79
79
|
assert_equal 6,
|
@@ -91,13 +91,13 @@ class TycclTest < Test::Unit::TestCase
|
|
91
91
|
|
92
92
|
def test_get_equal
|
93
93
|
assert_equal nil,
|
94
|
-
|
94
|
+
Tyccl.get_equal("人")
|
95
95
|
assert_equal nil,
|
96
|
-
|
96
|
+
Tyccl.get_equal("众学生")
|
97
97
|
assert_equal 1,
|
98
|
-
|
98
|
+
Tyccl.get_equal("流民").size
|
99
99
|
assert_equal 9,
|
100
|
-
|
100
|
+
Tyccl.get_equal("流民")[0].size
|
101
101
|
end
|
102
102
|
|
103
103
|
def test_get_similar
|
@@ -106,24 +106,24 @@ class TycclTest < Test::Unit::TestCase
|
|
106
106
|
["身体", "人"],
|
107
107
|
["人格", "人品", "人头", "人", "品质", "质地", "格调", "灵魂", "为人"],
|
108
108
|
["人数", "人头", "人口", "人", "口", "丁", "家口", "食指", "总人口"] ],
|
109
|
-
|
109
|
+
Tyccl.get_similar("人")
|
110
110
|
end
|
111
111
|
|
112
112
|
# dist ranges [0,10];
|
113
113
|
# if dist<7 then we believe that the two words are related
|
114
114
|
def test_dist
|
115
115
|
assert_equal Result_t.new(0,"Aa01A01=","Aa01A01="),
|
116
|
-
|
116
|
+
Tyccl.dist("人","士")
|
117
117
|
assert_equal Result_t.new(2,"Bh06A32=","Bh06A34="),
|
118
|
-
|
118
|
+
Tyccl.dist("西红柿","黄瓜")
|
119
119
|
assert_equal Result_t.new(4,"Aa01A05=","Aa01B03#"),
|
120
|
-
|
120
|
+
Tyccl.dist("匹夫","良民")
|
121
121
|
assert_equal Result_t.new(6,"Bh07A14=","Bh06A32="),
|
122
|
-
|
122
|
+
Tyccl.dist("苹果","西红柿")
|
123
123
|
assert_equal Result_t.new(8,"Aa01B02=","Ab01B10="),
|
124
|
-
|
124
|
+
Tyccl.dist("群众","村姑")
|
125
125
|
assert_equal Result_t.new(10,"Aa01A01=","Kd04C01="),
|
126
|
-
|
126
|
+
Tyccl.dist("人","哟")
|
127
127
|
end
|
128
128
|
|
129
129
|
def test_sim
|
@@ -142,7 +142,7 @@ class TycclTest < Test::Unit::TestCase
|
|
142
142
|
i=0
|
143
143
|
words.each{ |word|
|
144
144
|
assert_equal result[i],
|
145
|
-
|
145
|
+
Tyccl.sim("人民",word)
|
146
146
|
i+=1
|
147
147
|
}
|
148
148
|
end
|