wordcut 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/wordcut/dict.rb CHANGED
@@ -1,12 +1,21 @@
1
1
  require_relative "dict_seek"
2
2
  module Wordcut
3
3
  class WordItem
4
- attr_reader :headword
4
+ attr_reader :headword, :payload
5
5
  def initialize(headword)
6
6
  @headword = headword
7
- end
7
+ @payload = nil
8
+ end
8
9
  end
9
10
 
11
+ class WordItemWithPayload
12
+ attr_reader :headword, :payload
13
+ def initialize(headword, payload)
14
+ @headword = headword
15
+ @payload = payload
16
+ end
17
+ end
18
+
10
19
  module DictInfo
11
20
  def l
12
21
  0
@@ -34,7 +43,9 @@ module Wordcut
34
43
  self.concat(open(path).each_line
35
44
  .map(&:strip)
36
45
  .reject(&:empty?)
46
+ .sort
37
47
  .map{|w| WordItem.new w})
48
+
38
49
  end
39
50
  end
40
51
 
@@ -49,4 +60,8 @@ module Wordcut
49
60
  return dict
50
61
  end
51
62
  end
63
+
64
+ class DictWithPayload < Array
65
+ include DictSeeker
66
+ end
52
67
  end
data/wordcut/edge.rb CHANGED
@@ -3,7 +3,9 @@ module Wordcut
3
3
  class Edge
4
4
  attr_reader :unk, :chunk, :s, :payload, :etype
5
5
 
6
- CMP_FUNCS = [lambda {|e| e.unk}, lambda {|e| e.chunk}]
6
+ CMP_FUNCS = [lambda {|e| e.unk},
7
+ lambda {|e| e.chunk},
8
+ lambda {|e| e.payload ? 0 : 1}]
7
9
 
8
10
  def initialize(args = {})
9
11
  @unk = args[:unk] || 0
@@ -13,7 +13,7 @@ module Wordcut
13
13
  :unk => src.unk,
14
14
  :chunk => src.chunk + 1,
15
15
  :etype => :DICT,
16
- :payload => nil)
16
+ :payload => pointer.payload)
17
17
  end
18
18
  end
19
19
  end
data/wordcut/pointer.rb CHANGED
@@ -18,6 +18,10 @@ module Wordcut
18
18
  final = (@dict[l].headword.length == @offset + 1)
19
19
  self.class.new(@s, l, r, @offset + 1, @dict, final)
20
20
  end
21
+
22
+ def payload
23
+ @dict[@l].payload
24
+ end
21
25
  end
22
26
 
23
27
  module PointersManipulator
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordcut
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vee Satayamas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-03 00:00:00.000000000 Z
11
+ date: 2016-05-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Word segmentation tools for ASEAN languages written in Ruby
14
14
  email:
@@ -19,6 +19,7 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - LICENSE
21
21
  - README.md
22
+ - data/tha/mixed-tdict.txt
22
23
  - data/tha/tdict-acronyms.txt
23
24
  - data/tha/tdict-city.txt
24
25
  - data/tha/tdict-collection.txt