twitter_cldr 5.2.0 → 5.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -4
- data/Rakefile +19 -8
- data/lib/twitter_cldr/normalization.rb +18 -5
- data/lib/twitter_cldr/resources.rb +3 -1
- data/lib/twitter_cldr/resources/import_resolver.rb +11 -3
- data/lib/twitter_cldr/resources/loader.rb +22 -1
- data/lib/twitter_cldr/resources/locales_resources_importer.rb +0 -9
- data/lib/twitter_cldr/resources/postal_codes_importer.rb +19 -23
- data/lib/twitter_cldr/resources/segment_dictionaries_importer.rb +75 -0
- data/lib/twitter_cldr/resources/segment_tests_importer.rb +130 -13
- data/lib/twitter_cldr/segmentation.rb +25 -10
- data/lib/twitter_cldr/segmentation/brahmic_break_engine.rb +200 -0
- data/lib/twitter_cldr/segmentation/break_iterator.rb +22 -22
- data/lib/twitter_cldr/segmentation/burmese_break_engine.rb +83 -0
- data/lib/twitter_cldr/segmentation/category_table.rb +5 -1
- data/lib/twitter_cldr/segmentation/cj_break_engine.rb +163 -0
- data/lib/twitter_cldr/segmentation/cursor.rb +1 -1
- data/lib/twitter_cldr/segmentation/dictionary.rb +84 -0
- data/lib/twitter_cldr/segmentation/dictionary_break_engine.rb +34 -0
- data/lib/twitter_cldr/segmentation/khmer_break_engine.rb +83 -0
- data/lib/twitter_cldr/segmentation/korean_break_engine.rb +30 -0
- data/lib/twitter_cldr/segmentation/lao_break_engine.rb +85 -0
- data/lib/twitter_cldr/segmentation/line_iterator.rb +23 -0
- data/lib/twitter_cldr/segmentation/possible_word.rb +74 -0
- data/lib/twitter_cldr/segmentation/possible_word_list.rb +23 -0
- data/lib/twitter_cldr/segmentation/rule_set.rb +3 -12
- data/lib/twitter_cldr/segmentation/segment_iterator.rb +40 -0
- data/lib/twitter_cldr/segmentation/state_machine.rb +2 -8
- data/lib/twitter_cldr/segmentation/thai_break_engine.rb +141 -0
- data/lib/twitter_cldr/segmentation/unhandled_break_engine.rb +21 -0
- data/lib/twitter_cldr/segmentation/word_iterator.rb +170 -0
- data/lib/twitter_cldr/shared.rb +1 -0
- data/lib/twitter_cldr/shared/caser.rb +3 -3
- data/lib/twitter_cldr/shared/unicode_set.rb +77 -0
- data/lib/twitter_cldr/utils/range_set.rb +10 -1
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tailoring/km.yml +82 -0
- data/resources/collation/tailoring/lo.yml +4 -0
- data/resources/collation/tailoring/my.yml +940 -0
- data/resources/collation/tries/km.dump +0 -0
- data/resources/collation/tries/lo.dump +0 -0
- data/resources/collation/tries/my.dump +0 -0
- data/resources/locales/km/calendars.yml +373 -0
- data/resources/locales/km/currencies.yml +654 -0
- data/resources/locales/km/day_periods.yml +96 -0
- data/resources/locales/km/fields.yml +495 -0
- data/resources/locales/km/languages.yml +397 -0
- data/resources/locales/km/layout.yml +5 -0
- data/resources/locales/km/lists.yml +37 -0
- data/resources/locales/km/numbers.yml +402 -0
- data/resources/locales/km/plural_rules.yml +6 -0
- data/resources/locales/km/plurals.yml +12 -0
- data/resources/locales/km/rbnf.yml +131 -0
- data/resources/locales/km/territories.yml +267 -0
- data/resources/locales/km/timezones.yml +1471 -0
- data/resources/locales/km/units.yml +721 -0
- data/resources/locales/lo/calendars.yml +368 -0
- data/resources/locales/lo/currencies.yml +918 -0
- data/resources/locales/lo/day_periods.yml +96 -0
- data/resources/locales/lo/fields.yml +437 -0
- data/resources/locales/lo/languages.yml +529 -0
- data/resources/locales/lo/layout.yml +5 -0
- data/resources/locales/lo/lists.yml +42 -0
- data/resources/locales/lo/numbers.yml +476 -0
- data/resources/locales/lo/plural_rules.yml +7 -0
- data/resources/locales/lo/plurals.yml +14 -0
- data/resources/locales/lo/rbnf.yml +119 -0
- data/resources/locales/lo/territories.yml +265 -0
- data/resources/locales/lo/timezones.yml +1513 -0
- data/resources/locales/lo/units.yml +750 -0
- data/resources/locales/my/calendars.yml +374 -0
- data/resources/locales/my/currencies.yml +697 -0
- data/resources/locales/my/day_periods.yml +96 -0
- data/resources/locales/my/fields.yml +459 -0
- data/resources/locales/my/languages.yml +420 -0
- data/resources/locales/my/layout.yml +5 -0
- data/resources/locales/my/lists.yml +43 -0
- data/resources/locales/my/numbers.yml +417 -0
- data/resources/locales/my/plural_rules.yml +6 -0
- data/resources/locales/my/plurals.yml +12 -0
- data/resources/locales/my/rbnf.yml +145 -0
- data/resources/locales/my/territories.yml +265 -0
- data/resources/locales/my/timezones.yml +1479 -0
- data/resources/locales/my/units.yml +759 -0
- data/resources/locales/th/plurals.yml +1 -1
- data/resources/shared/segments/dictionaries/burmesedict.dump +0 -0
- data/resources/shared/segments/dictionaries/cjdict.dump +0 -0
- data/resources/shared/segments/dictionaries/khmerdict.dump +0 -0
- data/resources/shared/segments/dictionaries/laodict.dump +0 -0
- data/resources/shared/segments/dictionaries/thaidict.dump +0 -0
- data/resources/shared/segments/tests/dictionary_tests/combined.yml +1253 -0
- data/resources/shared/segments/tests/dictionary_tests/km.yml +204 -0
- data/resources/shared/segments/tests/dictionary_tests/ko.yml +171 -0
- data/resources/shared/segments/tests/dictionary_tests/lo.yml +236 -0
- data/resources/shared/segments/tests/dictionary_tests/my.yml +249 -0
- data/resources/shared/segments/tests/dictionary_tests/th.yml +201 -0
- data/resources/shared/segments/tests/dictionary_tests/zh.yml +206 -0
- data/resources/shared/segments/tests/line_break_test.yml +68 -68
- data/resources/shared/segments/tests/sentence_break_test.yml +52 -52
- data/resources/supported_locales.yml +3 -0
- data/spec/formatters/numbers/rbnf/locales/km/rbnf_test.yml +706 -0
- data/spec/formatters/numbers/rbnf/locales/lo/rbnf_test.yml +706 -0
- data/spec/formatters/numbers/rbnf/locales/my/rbnf_test.yml +706 -0
- data/spec/segmentation/dictionary_break_spec.rb +42 -0
- data/spec/segmentation/rule_set_spec.rb +3 -1
- data/spec/timezones/tests/km.yml +12475 -0
- data/spec/timezones/tests/lo.yml +12475 -0
- data/spec/timezones/tests/my.yml +12475 -0
- metadata +87 -3
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Segmentation
|
8
|
+
class Dictionary
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def burmese
|
12
|
+
get('burmese')
|
13
|
+
end
|
14
|
+
|
15
|
+
def cj
|
16
|
+
get('cj')
|
17
|
+
end
|
18
|
+
|
19
|
+
def khmer
|
20
|
+
get('khmer')
|
21
|
+
end
|
22
|
+
|
23
|
+
def lao
|
24
|
+
get('lao')
|
25
|
+
end
|
26
|
+
|
27
|
+
def thai
|
28
|
+
get('thai')
|
29
|
+
end
|
30
|
+
|
31
|
+
def get(name)
|
32
|
+
dictionary_cache[name] ||= begin
|
33
|
+
resource = TwitterCldr.get_resource(
|
34
|
+
'shared', 'segments', 'dictionaries', "#{name}dict.dump"
|
35
|
+
)
|
36
|
+
|
37
|
+
new(resource)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def dictionary_cache
|
44
|
+
@dictionary_cache ||= {}
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
attr_reader :trie
|
49
|
+
|
50
|
+
def initialize(trie)
|
51
|
+
@trie = trie
|
52
|
+
end
|
53
|
+
|
54
|
+
def matches(cursor, max_search_length, limit)
|
55
|
+
return 0 if cursor.length == 0
|
56
|
+
|
57
|
+
count = 0
|
58
|
+
num_chars = 1
|
59
|
+
current = trie.root.child(cursor.codepoint)
|
60
|
+
values = []
|
61
|
+
lengths = []
|
62
|
+
|
63
|
+
until current.nil?
|
64
|
+
if current.has_value? && count < limit
|
65
|
+
values << current.value
|
66
|
+
lengths << num_chars
|
67
|
+
count += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
break if num_chars >= max_search_length
|
71
|
+
|
72
|
+
current = current.child(
|
73
|
+
cursor.codepoint(cursor.position + num_chars)
|
74
|
+
)
|
75
|
+
|
76
|
+
num_chars += 1
|
77
|
+
end
|
78
|
+
|
79
|
+
[count, values, lengths, num_chars]
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Segmentation
|
8
|
+
class DictionaryBreakEngine
|
9
|
+
|
10
|
+
def each_boundary(cursor, &block)
|
11
|
+
return to_enum(__method__, cursor) unless block_given?
|
12
|
+
|
13
|
+
stop = cursor.position
|
14
|
+
|
15
|
+
while !cursor.eos? && word_set.include?(cursor.codepoints[stop])
|
16
|
+
stop += 1
|
17
|
+
end
|
18
|
+
|
19
|
+
divide_up_dictionary_range(cursor, stop, &block)
|
20
|
+
end
|
21
|
+
|
22
|
+
def word_set(*args)
|
23
|
+
raise NotImplementedError, "#{__method__} must be defined in derived classes"
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def divide_up_dictionary_range(*args)
|
29
|
+
raise NotImplementedError, "#{__method__} must be defined in derived classes"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'singleton'
|
7
|
+
require 'forwardable'
|
8
|
+
|
9
|
+
module TwitterCldr
|
10
|
+
module Segmentation
|
11
|
+
|
12
|
+
# https://github.com/unicode-org/icu/blob/release-65-1/icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java
|
13
|
+
class KhmerBreakEngine
|
14
|
+
|
15
|
+
include Singleton
|
16
|
+
extend Forwardable
|
17
|
+
|
18
|
+
def_delegators :engine, :each_boundary
|
19
|
+
|
20
|
+
def self.word_set
|
21
|
+
@word_set ||= begin
|
22
|
+
uset = TwitterCldr::Shared::UnicodeSet.new
|
23
|
+
uset.apply_pattern('[[:Khmer:]&[:Line_Break=SA:]]')
|
24
|
+
uset.to_set
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# All Brahmic scripts (including Khmer) can make use of the same break
|
31
|
+
# logic, so we use composition here and defer to the Brahmic break engine.
|
32
|
+
def engine
|
33
|
+
@engine ||= BrahmicBreakEngine.new(
|
34
|
+
# How many words in a row are "good enough"?
|
35
|
+
lookahead: 3,
|
36
|
+
|
37
|
+
# Will not combine a non-word with a preceding dictionary word longer than this
|
38
|
+
root_combine_threshold: 3,
|
39
|
+
|
40
|
+
# Will not combine a non-word that shares at least this much prefix with a
|
41
|
+
# dictionary word with a preceding word
|
42
|
+
prefix_combine_threshold: 3,
|
43
|
+
|
44
|
+
# Minimum word size
|
45
|
+
min_word: 4,
|
46
|
+
|
47
|
+
# Minimum number of characters for two words (same as min_word for Khmer)
|
48
|
+
min_word_span: 4,
|
49
|
+
|
50
|
+
word_set: self.class.word_set,
|
51
|
+
mark_set: mark_set,
|
52
|
+
end_word_set: end_word_set,
|
53
|
+
begin_word_set: begin_word_set,
|
54
|
+
dictionary: Dictionary.khmer,
|
55
|
+
advance_past_suffix: -> (*) do
|
56
|
+
0 # not applicable to Khmer
|
57
|
+
end
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
def mark_set
|
62
|
+
@mark_set ||= TwitterCldr::Shared::UnicodeSet.new.tap do |set|
|
63
|
+
set.apply_pattern('[[:Khmer:]&[:Line_Break=SA:]&[:M:]]')
|
64
|
+
set.add(0x0020)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def end_word_set
|
69
|
+
@end_word_set ||= TwitterCldr::Shared::UnicodeSet.new.tap do |set|
|
70
|
+
set.add_list(self.class.word_set)
|
71
|
+
set.subtract(0x17D2) # KHMER SIGN COENG that combines some characters
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def begin_word_set
|
76
|
+
@begin_word_set ||= TwitterCldr::Shared::UnicodeSet.new.tap do |set|
|
77
|
+
set.add_range(0x1780..0x17B3)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'singleton'
|
7
|
+
|
8
|
+
module TwitterCldr
|
9
|
+
module Segmentation
|
10
|
+
class KoreanBreakEngine < CjBreakEngine
|
11
|
+
|
12
|
+
include Singleton
|
13
|
+
|
14
|
+
def self.word_set
|
15
|
+
@word_set ||= begin
|
16
|
+
uset = TwitterCldr::Shared::UnicodeSet.new
|
17
|
+
uset.add_range(0xAC00..0xD7A3)
|
18
|
+
uset.to_set
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def word_set
|
25
|
+
self.class.word_set
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'singleton'
|
7
|
+
require 'forwardable'
|
8
|
+
|
9
|
+
module TwitterCldr
|
10
|
+
module Segmentation
|
11
|
+
|
12
|
+
# See: https://github.com/unicode-org/icu/blob/release-65-1/icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java
|
13
|
+
class LaoBreakEngine
|
14
|
+
|
15
|
+
include Singleton
|
16
|
+
extend Forwardable
|
17
|
+
|
18
|
+
def_delegators :engine, :each_boundary
|
19
|
+
|
20
|
+
def self.word_set
|
21
|
+
@word_set ||= begin
|
22
|
+
uset = TwitterCldr::Shared::UnicodeSet.new
|
23
|
+
uset.apply_pattern('[[:Laoo:]&[:Line_Break=SA:]]')
|
24
|
+
uset.to_set
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# All Brahmic scripts (including Lao) can make use of the same break
|
31
|
+
# logic, so we use composition here and defer to the Brahmic break engine.
|
32
|
+
def engine
|
33
|
+
@engine ||= BrahmicBreakEngine.new(
|
34
|
+
# How many words in a row are "good enough"?
|
35
|
+
lookahead: 3,
|
36
|
+
|
37
|
+
# Will not combine a non-word with a preceding dictionary word longer than this
|
38
|
+
root_combine_threshold: 3,
|
39
|
+
|
40
|
+
# Will not combine a non-word that shares at least this much prefix with a
|
41
|
+
# dictionary word with a preceding word
|
42
|
+
prefix_combine_threshold: 3,
|
43
|
+
|
44
|
+
# Minimum word size
|
45
|
+
min_word: 2,
|
46
|
+
|
47
|
+
# Minimum number of characters for two words (same as min_word for Lao)
|
48
|
+
min_word_span: 2,
|
49
|
+
|
50
|
+
word_set: self.class.word_set,
|
51
|
+
mark_set: mark_set,
|
52
|
+
end_word_set: end_word_set,
|
53
|
+
begin_word_set: begin_word_set,
|
54
|
+
dictionary: Dictionary.lao,
|
55
|
+
advance_past_suffix: -> (*) do
|
56
|
+
0 # not applicable to Lao
|
57
|
+
end
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
def mark_set
|
62
|
+
@mark_set ||= TwitterCldr::Shared::UnicodeSet.new.tap do |set|
|
63
|
+
set.apply_pattern('[[:Laoo:]&[:Line_Break=SA:]&[:M:]]')
|
64
|
+
set.add(0x0020)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def end_word_set
|
69
|
+
@end_word_set ||= TwitterCldr::Shared::UnicodeSet.new.tap do |set|
|
70
|
+
set.add_list(self.class.word_set)
|
71
|
+
set.subtract_range(0x0EC0..0x0EC4) # prefix vowels
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def begin_word_set
|
76
|
+
@begin_word_set ||= TwitterCldr::Shared::UnicodeSet.new.tap do |set|
|
77
|
+
set.add_range(0x0E81..0x0EAE) # basic consonants (including holes for corresponding Thai characters)
|
78
|
+
set.add_range(0x0EDC..0x0EDD) # digraph consonants (no Thai equivalent)
|
79
|
+
set.add_range(0x0EC0..0x0EC4) # prefix vowels
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Segmentation
|
8
|
+
class LineIterator < SegmentIterator
|
9
|
+
def each_boundary(str, &block)
|
10
|
+
return to_enum(__method__, str) unless block_given?
|
11
|
+
|
12
|
+
# Let the state machine find the first boundary for the line
|
13
|
+
# boundary type (i.e. don't yield 0 here). This helps pass
|
14
|
+
# nearly all the Unicode segmentation tests, so it must be
|
15
|
+
# the right thing to do. Normally the first boundary is the
|
16
|
+
# implicit start of text boundary, but potentially not for
|
17
|
+
# the line rules?
|
18
|
+
cursor = create_cursor(str)
|
19
|
+
rule_set.each_boundary(cursor, &block)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Segmentation
|
8
|
+
class PossibleWord
|
9
|
+
# list size, limited by the maximum number of words in the dictionary
|
10
|
+
# that form a nested sequence.
|
11
|
+
POSSIBLE_WORD_LIST_MAX = 20
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@lengths = []
|
15
|
+
@count = nil
|
16
|
+
@offset = -1
|
17
|
+
end
|
18
|
+
|
19
|
+
# fill the list of candidates if needed, select the longest, and return the number found
|
20
|
+
def candidates(cursor, dictionary, end_pos)
|
21
|
+
start = cursor.position
|
22
|
+
|
23
|
+
if start != @offset
|
24
|
+
@offset = start
|
25
|
+
@count, _, @lengths, @prefix = dictionary.matches(
|
26
|
+
cursor, end_pos - start, POSSIBLE_WORD_LIST_MAX
|
27
|
+
)
|
28
|
+
|
29
|
+
# dictionary leaves text after longest prefix, not longest word, so back up.
|
30
|
+
if @count <= 0
|
31
|
+
cursor.position = start
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
if @count > 0
|
36
|
+
cursor.position = start + @lengths[@count - 1]
|
37
|
+
end
|
38
|
+
|
39
|
+
@current = @count - 1
|
40
|
+
@mark = @current
|
41
|
+
|
42
|
+
return @count
|
43
|
+
end
|
44
|
+
|
45
|
+
# select the currently marked candidate, point after it in the text, and invalidate self
|
46
|
+
def accept_marked(cursor)
|
47
|
+
cursor.position = @offset + @lengths[@mark]
|
48
|
+
@lengths[@mark]
|
49
|
+
end
|
50
|
+
|
51
|
+
# back up from the current candidate to the next shorter one; return true if that exists
|
52
|
+
# and point the text after it
|
53
|
+
def back_up(cursor)
|
54
|
+
if @current > 0
|
55
|
+
@current -= 1
|
56
|
+
cursor.position = @offset + @lengths[@current]
|
57
|
+
return true
|
58
|
+
end
|
59
|
+
|
60
|
+
false
|
61
|
+
end
|
62
|
+
|
63
|
+
# return the longest prefix this candidate location shares with a dictionary word
|
64
|
+
def longest_prefix
|
65
|
+
@prefix
|
66
|
+
end
|
67
|
+
|
68
|
+
# mark the current candidate as the one we like
|
69
|
+
def mark_current
|
70
|
+
@mark = @current
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Segmentation
|
8
|
+
class PossibleWordList
|
9
|
+
|
10
|
+
attr_reader :length, :items
|
11
|
+
|
12
|
+
def initialize(length)
|
13
|
+
@items = Array.new(length) { PossibleWord.new }
|
14
|
+
@length = length
|
15
|
+
end
|
16
|
+
|
17
|
+
def [](idx)
|
18
|
+
items[idx % length]
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|