wordcut 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/wordcut/dict.rb CHANGED
@@ -1,12 +1,21 @@
1
1
  require_relative "dict_seek"
2
2
  module Wordcut
3
3
  class WordItem
4
- attr_reader :headword
4
+ attr_reader :headword, :payload
5
5
  def initialize(headword)
6
6
  @headword = headword
7
- end
7
+ @payload = nil
8
+ end
8
9
  end
9
10
 
11
+ class WordItemWithPayload
12
+ attr_reader :headword, :payload
13
+ def initialize(headword, payload)
14
+ @headword = headword
15
+ @payload = payload
16
+ end
17
+ end
18
+
10
19
  module DictInfo
11
20
  def l
12
21
  0
@@ -34,7 +43,9 @@ module Wordcut
34
43
  self.concat(open(path).each_line
35
44
  .map(&:strip)
36
45
  .reject(&:empty?)
46
+ .sort
37
47
  .map{|w| WordItem.new w})
48
+
38
49
  end
39
50
  end
40
51
 
@@ -49,4 +60,8 @@ module Wordcut
49
60
  return dict
50
61
  end
51
62
  end
63
+
64
+ class DictWithPayload < Array
65
+ include DictSeeker
66
+ end
52
67
  end
data/wordcut/edge.rb CHANGED
@@ -3,7 +3,9 @@ module Wordcut
3
3
  class Edge
4
4
  attr_reader :unk, :chunk, :s, :payload, :etype
5
5
 
6
- CMP_FUNCS = [lambda {|e| e.unk}, lambda {|e| e.chunk}]
6
+ CMP_FUNCS = [lambda {|e| e.unk},
7
+ lambda {|e| e.chunk},
8
+ lambda {|e| e.payload ? 0 : 1}]
7
9
 
8
10
  def initialize(args = {})
9
11
  @unk = args[:unk] || 0
@@ -13,7 +13,7 @@ module Wordcut
13
13
  :unk => src.unk,
14
14
  :chunk => src.chunk + 1,
15
15
  :etype => :DICT,
16
- :payload => nil)
16
+ :payload => pointer.payload)
17
17
  end
18
18
  end
19
19
  end
data/wordcut/pointer.rb CHANGED
@@ -18,6 +18,10 @@ module Wordcut
18
18
  final = (@dict[l].headword.length == @offset + 1)
19
19
  self.class.new(@s, l, r, @offset + 1, @dict, final)
20
20
  end
21
+
22
+ def payload
23
+ @dict[@l].payload
24
+ end
21
25
  end
22
26
 
23
27
  module PointersManipulator
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordcut
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vee Satayamas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-03 00:00:00.000000000 Z
11
+ date: 2016-05-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Word segmentation tools for ASEAN languages written in Ruby
14
14
  email:
@@ -19,6 +19,7 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - LICENSE
21
21
  - README.md
22
+ - data/tha/mixed-tdict.txt
22
23
  - data/tha/tdict-acronyms.txt
23
24
  - data/tha/tdict-city.txt
24
25
  - data/tha/tdict-collection.txt