wordcut 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -2
- data/data/tha/mixed-tdict.txt +24124 -0
- data/wordcut/dict.rb +17 -2
- data/wordcut/edge.rb +3 -1
- data/wordcut/edge_builder.rb +1 -1
- data/wordcut/pointer.rb +4 -0
- metadata +3 -2
data/wordcut/dict.rb
CHANGED
@@ -1,12 +1,21 @@
|
|
1
1
|
require_relative "dict_seek"
|
2
2
|
module Wordcut
|
3
3
|
class WordItem
|
4
|
-
attr_reader :headword
|
4
|
+
attr_reader :headword, :payload
|
5
5
|
def initialize(headword)
|
6
6
|
@headword = headword
|
7
|
-
|
7
|
+
@payload = nil
|
8
|
+
end
|
8
9
|
end
|
9
10
|
|
11
|
+
class WordItemWithPayload
|
12
|
+
attr_reader :headword, :payload
|
13
|
+
def initialize(headword, payload)
|
14
|
+
@headword = headword
|
15
|
+
@payload = payload
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
10
19
|
module DictInfo
|
11
20
|
def l
|
12
21
|
0
|
@@ -34,7 +43,9 @@ module Wordcut
|
|
34
43
|
self.concat(open(path).each_line
|
35
44
|
.map(&:strip)
|
36
45
|
.reject(&:empty?)
|
46
|
+
.sort
|
37
47
|
.map{|w| WordItem.new w})
|
48
|
+
|
38
49
|
end
|
39
50
|
end
|
40
51
|
|
@@ -49,4 +60,8 @@ module Wordcut
|
|
49
60
|
return dict
|
50
61
|
end
|
51
62
|
end
|
63
|
+
|
64
|
+
class DictWithPayload < Array
|
65
|
+
include DictSeeker
|
66
|
+
end
|
52
67
|
end
|
data/wordcut/edge.rb
CHANGED
@@ -3,7 +3,9 @@ module Wordcut
|
|
3
3
|
class Edge
|
4
4
|
attr_reader :unk, :chunk, :s, :payload, :etype
|
5
5
|
|
6
|
-
CMP_FUNCS = [lambda {|e| e.unk},
|
6
|
+
CMP_FUNCS = [lambda {|e| e.unk},
|
7
|
+
lambda {|e| e.chunk},
|
8
|
+
lambda {|e| e.payload ? 0 : 1}]
|
7
9
|
|
8
10
|
def initialize(args = {})
|
9
11
|
@unk = args[:unk] || 0
|
data/wordcut/edge_builder.rb
CHANGED
data/wordcut/pointer.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wordcut
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vee Satayamas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Word segmentation tools for ASEAN languages written in Ruby
|
14
14
|
email:
|
@@ -19,6 +19,7 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- LICENSE
|
21
21
|
- README.md
|
22
|
+
- data/tha/mixed-tdict.txt
|
22
23
|
- data/tha/tdict-acronyms.txt
|
23
24
|
- data/tha/tdict-city.txt
|
24
25
|
- data/tha/tdict-collection.txt
|