rmmseg 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.txt +1 -1
- data/TODO.txt +1 -0
- data/lib/rmmseg/ferret.rb +5 -1
- data/lib/rmmseg/token.rb +0 -19
- data/lib/rmmseg.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.txt
CHANGED
@@ -10,7 +10,7 @@ algorithms. Two algorithms are available for using:
|
|
10
10
|
|
11
11
|
* simple algorithm that uses only forward maximum matching.
|
12
12
|
* complex algorithm that uses three-word chunk maximum matching and 3
|
13
|
-
|
13
|
+
additonal rules to solve ambiguities.
|
14
14
|
|
15
15
|
For more information about the algorithm, please refer to the
|
16
16
|
following essays:
|
data/TODO.txt
CHANGED
data/lib/rmmseg/ferret.rb
CHANGED
data/lib/rmmseg/token.rb
CHANGED
@@ -18,9 +18,6 @@ module RMMSeg
|
|
18
18
|
# token. This is *byte* index instead of character.
|
19
19
|
attr_accessor :end
|
20
20
|
|
21
|
-
# See Ferret document for Token.
|
22
|
-
attr_accessor :pos_inc
|
23
|
-
|
24
21
|
# +text+ is the ref to the whole text. In other words:
|
25
22
|
# +text[start_pos...end_pos]+ should be the string held by this
|
26
23
|
# token.
|
@@ -28,23 +25,7 @@ module RMMSeg
|
|
28
25
|
@text = text
|
29
26
|
@start = start_pos
|
30
27
|
@end = end_pos
|
31
|
-
@pos_inc = 1
|
32
|
-
end
|
33
|
-
|
34
|
-
def <=> other
|
35
|
-
if @start > other.start
|
36
|
-
return 1
|
37
|
-
elsif @start < other.start
|
38
|
-
return -1
|
39
|
-
elsif @end > other.end
|
40
|
-
return 1
|
41
|
-
elsif @end < other.end
|
42
|
-
return -1
|
43
|
-
else
|
44
|
-
return @text <=> other.text
|
45
|
-
end
|
46
28
|
end
|
47
|
-
include Comparable
|
48
29
|
|
49
30
|
def to_s
|
50
31
|
@text.dup
|
data/lib/rmmseg.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rmmseg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pluskid
|
@@ -9,11 +9,11 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-03-
|
12
|
+
date: 2008-03-04 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description: "RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm. It is based on two variants of maximum matching algorithms. Two algorithms are available for using: * simple algorithm that uses only forward maximum matching. * complex algorithm that uses three-word chunk maximum matching and 3
|
16
|
+
description: "RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm. It is based on two variants of maximum matching algorithms. Two algorithms are available for using: * simple algorithm that uses only forward maximum matching. * complex algorithm that uses three-word chunk maximum matching and 3 additonal rules to solve ambiguities. For more information about the algorithm, please refer to the following essays: * http://technology.chtsai.org/mmseg/ * http://pluskid.lifegoo.com/?p=261"
|
17
17
|
email: pluskid@gmail.com
|
18
18
|
executables:
|
19
19
|
- rmmseg
|