rmmseg 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/README.txt +1 -1
- data/TODO.txt +1 -0
- data/lib/rmmseg/ferret.rb +5 -1
- data/lib/rmmseg/token.rb +0 -19
- data/lib/rmmseg.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.txt
CHANGED
@@ -10,7 +10,7 @@ algorithms. Two algorithms are available for using:
|
|
10
10
|
|
11
11
|
* simple algorithm that uses only forward maximum matching.
|
12
12
|
* complex algorithm that uses three-word chunk maximum matching and 3
|
13
|
-
|
13
|
+
additonal rules to solve ambiguities.
|
14
14
|
|
15
15
|
For more information about the algorithm, please refer to the
|
16
16
|
following essays:
|
data/TODO.txt
CHANGED
data/lib/rmmseg/ferret.rb
CHANGED
data/lib/rmmseg/token.rb
CHANGED
@@ -18,9 +18,6 @@ module RMMSeg
|
|
18
18
|
# token. This is *byte* index instead of character.
|
19
19
|
attr_accessor :end
|
20
20
|
|
21
|
-
# See Ferret document for Token.
|
22
|
-
attr_accessor :pos_inc
|
23
|
-
|
24
21
|
# +text+ is the ref to the whole text. In other words:
|
25
22
|
# +text[start_pos...end_pos]+ should be the string held by this
|
26
23
|
# token.
|
@@ -28,23 +25,7 @@ module RMMSeg
|
|
28
25
|
@text = text
|
29
26
|
@start = start_pos
|
30
27
|
@end = end_pos
|
31
|
-
@pos_inc = 1
|
32
|
-
end
|
33
|
-
|
34
|
-
def <=> other
|
35
|
-
if @start > other.start
|
36
|
-
return 1
|
37
|
-
elsif @start < other.start
|
38
|
-
return -1
|
39
|
-
elsif @end > other.end
|
40
|
-
return 1
|
41
|
-
elsif @end < other.end
|
42
|
-
return -1
|
43
|
-
else
|
44
|
-
return @text <=> other.text
|
45
|
-
end
|
46
28
|
end
|
47
|
-
include Comparable
|
48
29
|
|
49
30
|
def to_s
|
50
31
|
@text.dup
|
data/lib/rmmseg.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rmmseg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pluskid
|
@@ -9,11 +9,11 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-03-
|
12
|
+
date: 2008-03-04 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description: "RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm. It is based on two variants of maximum matching algorithms. Two algorithms are available for using: * simple algorithm that uses only forward maximum matching. * complex algorithm that uses three-word chunk maximum matching and 3
|
16
|
+
description: "RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm. It is based on two variants of maximum matching algorithms. Two algorithms are available for using: * simple algorithm that uses only forward maximum matching. * complex algorithm that uses three-word chunk maximum matching and 3 additonal rules to solve ambiguities. For more information about the algorithm, please refer to the following essays: * http://technology.chtsai.org/mmseg/ * http://pluskid.lifegoo.com/?p=261"
|
17
17
|
email: pluskid@gmail.com
|
18
18
|
executables:
|
19
19
|
- rmmseg
|