wordcut 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -2
- data/data/tha/mixed-tdict.txt +24124 -0
- data/wordcut/dict.rb +17 -2
- data/wordcut/edge.rb +3 -1
- data/wordcut/edge_builder.rb +1 -1
- data/wordcut/pointer.rb +4 -0
- metadata +3 -2
data/wordcut/dict.rb
CHANGED
@@ -1,12 +1,21 @@
|
|
1
1
|
require_relative "dict_seek"
|
2
2
|
module Wordcut
|
3
3
|
class WordItem
|
4
|
-
attr_reader :headword
|
4
|
+
attr_reader :headword, :payload
|
5
5
|
def initialize(headword)
|
6
6
|
@headword = headword
|
7
|
-
|
7
|
+
@payload = nil
|
8
|
+
end
|
8
9
|
end
|
9
10
|
|
11
|
+
class WordItemWithPayload
|
12
|
+
attr_reader :headword, :payload
|
13
|
+
def initialize(headword, payload)
|
14
|
+
@headword = headword
|
15
|
+
@payload = payload
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
10
19
|
module DictInfo
|
11
20
|
def l
|
12
21
|
0
|
@@ -34,7 +43,9 @@ module Wordcut
|
|
34
43
|
self.concat(open(path).each_line
|
35
44
|
.map(&:strip)
|
36
45
|
.reject(&:empty?)
|
46
|
+
.sort
|
37
47
|
.map{|w| WordItem.new w})
|
48
|
+
|
38
49
|
end
|
39
50
|
end
|
40
51
|
|
@@ -49,4 +60,8 @@ module Wordcut
|
|
49
60
|
return dict
|
50
61
|
end
|
51
62
|
end
|
63
|
+
|
64
|
+
class DictWithPayload < Array
|
65
|
+
include DictSeeker
|
66
|
+
end
|
52
67
|
end
|
data/wordcut/edge.rb
CHANGED
@@ -3,7 +3,9 @@ module Wordcut
|
|
3
3
|
class Edge
|
4
4
|
attr_reader :unk, :chunk, :s, :payload, :etype
|
5
5
|
|
6
|
-
CMP_FUNCS = [lambda {|e| e.unk},
|
6
|
+
CMP_FUNCS = [lambda {|e| e.unk},
|
7
|
+
lambda {|e| e.chunk},
|
8
|
+
lambda {|e| e.payload ? 0 : 1}]
|
7
9
|
|
8
10
|
def initialize(args = {})
|
9
11
|
@unk = args[:unk] || 0
|
data/wordcut/edge_builder.rb
CHANGED
data/wordcut/pointer.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wordcut
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vee Satayamas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Word segmentation tools for ASEAN languages written in Ruby
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- LICENSE
|
21
21
|
- README.md
|
22
|
+
- data/tha/mixed-tdict.txt
|
22
23
|
- data/tha/tdict-acronyms.txt
|
23
24
|
- data/tha/tdict-city.txt
|
24
25
|
- data/tha/tdict-collection.txt
|