langusta 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,8 +8,8 @@ class FalsifiedTest < Test::Unit::TestCase
8
8
  profiles = Dir[File.join(PROFILES_PATH, '*')].map do |filename|
9
9
  LangProfile.load_from_file(filename)
10
10
  end
11
- profiles.each_with_index do |profile, index|
12
- factory.add_profile(profile, index, profiles.length)
11
+ profiles.each do |profile|
12
+ factory.add_profile(profile)
13
13
  end
14
14
 
15
15
  incorrect_guesses = 0.0
@@ -18,7 +18,7 @@ class FalsifiedTest < Test::Unit::TestCase
18
18
  Dir['test/test_data/*'].each do |filename|
19
19
  language = filename.split(/\//).last
20
20
 
21
- ucs2_content = UCS2String.from_utf8(File.open(filename).read)
21
+ ucs2_content = Langusta.utf82cp(File.open(filename).read)
22
22
  detector = factory.create
23
23
  detector.append(ucs2_content)
24
24
 
data/test/test_command.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class CommandTest < Test::Unit::TestCase
@@ -1,58 +1,59 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class DetectorTest < Test::Unit::TestCase
4
- TRAINING_EN = "\x00a \x00a \x00a \x00b \x00b \x00c \x00c \x00d \x00e"
5
- TRAINING_FR = "\x00a \x00b \x00b \x00c \x00c \x00c \x00d \x00d \x00d"
6
- TRAINING_JP = "\x30\x42 \x30\x42 \x30\x42 \x30\x44 \x30\x46 \x30\x48 \x30\x48"
5
+ TRAINING_EN = [0x0061, 0x0061, 0x0061, 0x0062, 0x0062, 0x0063, 0x0063, 0x0064, 0x0065]
6
+ TRAINING_FR = [0x0061, 0x0062, 0x0062, 0x0063, 0x0063, 0x0063, 0x0063, 0x0064, 0x0064]
7
+ TRAINING_JP = [0x3042, 0x3042, 0x3042, 0x3044, 0x3046, 0x3048, 0x3048]
7
8
 
8
9
  def setup
9
10
  @factory = DetectorFactory.new
10
11
  profile_en = LangProfile.new("en")
11
- TRAINING_EN.split(/ /).each do |w|
12
- profile_en.add(UCS2String.new(w))
12
+ TRAINING_EN.each do |w|
13
+ profile_en.add([w])
13
14
  end
14
- @factory.add_profile(profile_en, 0, 3)
15
+ @factory.add_profile(profile_en)
15
16
 
16
17
  profile_fr = LangProfile.new("fr")
17
- TRAINING_FR.split(/ /).each do |w|
18
- profile_fr.add(UCS2String.new(w))
18
+ TRAINING_FR.each do |w|
19
+ profile_fr.add([w])
19
20
  end
20
- @factory.add_profile(profile_fr, 1, 3)
21
+ @factory.add_profile(profile_fr)
21
22
 
22
23
  profile_jp = LangProfile.new("jp")
23
- TRAINING_JP.split(/ /).each do |w|
24
- profile_jp.add(UCS2String.new(w))
24
+ TRAINING_JP.each do |w|
25
+ profile_jp.add([w])
25
26
  end
26
- @factory.add_profile(profile_jp, 2, 3)
27
+ @factory.add_profile(profile_jp)
27
28
  end
28
29
 
29
30
  def test_detector1
30
31
  detector = @factory.create()
31
- detector.append(UCS2String.new("\x00a"))
32
+ detector.append([0x0061]) # "a"
32
33
  assert_equal("en", detector.detect())
33
34
  end
34
35
 
35
36
  def test_detector2
36
37
  detector = @factory.create()
37
- detector.append(UCS2String.new("\x00b\x00\x20\x00d"))
38
+ detector.append([0x0062, 0x0020, 0x0064]) # "b d"
38
39
  assert_equal("fr", detector.detect())
39
40
  end
40
41
 
41
42
  def test_detector3
42
43
  detector = @factory.create()
43
- detector.append(UCS2String.new("\x00d\x00 \x00e"))
44
+ detector.append([0x0064, 0x0020, 0x0065]) # "d e"
44
45
  assert_equal("en", detector.detect())
45
46
  end
46
47
 
47
48
  def test_detector4
48
49
  detector = @factory.create()
49
- detector.append(UCS2String.new("\x30\x42\x30\x42\x30\x42\x30\x42\x00a"))
50
+ detector.append([0x3042, 0x3042, 0x3042, 0x3042, 0x0061])
50
51
  assert_equal("jp", detector.detect())
51
52
  end
52
53
 
53
54
  def test_exceptions
54
55
  detector = @factory.create()
55
- detector.append(UCS2String.new(''))
56
+ detector.append([])
56
57
  assert_raises(NoFeaturesInTextError) do
57
58
  detector.detect()
58
59
  end
@@ -1,28 +1,40 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class DetectorFactoryTest < Test::Unit::TestCase
4
5
  def test_add_profile
5
- profile = LangProfile.new
6
+ profile = LangProfile.new('sample')
6
7
  factory = DetectorFactory.new
7
8
 
8
- factory.add_profile(profile, 0, 1)
9
+ factory.add_profile(profile)
9
10
 
10
11
  detector = factory.create(0.123)
11
12
  assert_equal(0.123, detector.alpha)
12
13
  end
13
14
 
14
15
  def test_exceptions
15
- profile = LangProfile.new
16
+ profile = LangProfile.new('sample')
16
17
  factory = DetectorFactory.new
17
18
 
18
19
  assert_raises(NoProfilesLoadedError) do
19
20
  factory.create()
20
21
  end
21
22
 
22
- factory.add_profile(profile, 0, 2)
23
+ factory.add_profile(profile)
23
24
 
24
25
  assert_raises(DuplicateProfilesError) do
25
- factory.add_profile(profile, 1, 2)
26
+ factory.add_profile(profile)
26
27
  end
27
28
  end
29
+
30
+ def test_inspect
31
+ profile = LangProfile.new('sample')
32
+ factory = DetectorFactory.new
33
+
34
+ factory.add_profile(profile)
35
+
36
+ assert_match(Regexp.new(factory.object_ptr), factory.inspect)
37
+ assert_match(/1 profile\(s\)/, factory.inspect)
38
+ assert_match(Regexp.new(factory.class.name), factory.inspect)
39
+ end
28
40
  end
@@ -1,8 +1,9 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class JavaPropertyReaderTest < Test::Unit::TestCase
4
5
  def test_parse
5
6
  jpr = JavaPropertyReader.new(MESSAGES_PROPERTIES)
6
- assert_equal("\x4f\x7c\x69\x34", jpr["NGram.KANJI_1_0"])
7
+ assert_equal([0x4f7c, 0x6934], jpr["NGram.KANJI_1_0"])
7
8
  end
8
9
  end
@@ -3,8 +3,12 @@ require 'test/helper'
3
3
 
4
4
  class LangProfileTest < Test::Unit::TestCase
5
5
  def test_lang_profile
6
- profile = LangProfile.new
7
- assert_nil(profile.name)
6
+ assert_raises(ArgumentError) do
7
+ LangProfile.new
8
+ end
9
+ assert_raises(TypeError) do
10
+ LangProfile.new(nil)
11
+ end
8
12
  end
9
13
 
10
14
  def test_lang_profile_string_int
@@ -14,52 +18,54 @@ class LangProfileTest < Test::Unit::TestCase
14
18
 
15
19
  def test_add
16
20
  profile = LangProfile.new('en')
17
- profile.add(UCS2String.from_utf8("a"))
18
- assert_equal(1, profile.freq[UCS2String.from_utf8("a")])
19
- profile.add(UCS2String.from_utf8("a"))
20
- assert_equal(2, profile.freq[UCS2String.from_utf8("a")])
21
+ profile.add(utf82cp("a"))
22
+ assert_equal(1, profile.freq[utf82cp("a")])
23
+ profile.add(utf82cp("a"))
24
+ assert_equal(2, profile.freq[utf82cp("a")])
21
25
  profile.omit_less_freq()
22
26
  end
23
27
 
24
28
  def test_add_illegally_1
25
- profile = LangProfile.new
26
- profile.add(UCS2String.from_utf8("a"))
27
- assert_nil(profile.freq[UCS2String.from_utf8("a")])
29
+ profile = LangProfile.new('sample')
30
+ profile.add(utf82cp("a"))
31
+ assert_equal(1, profile.freq[utf82cp("a")])
28
32
  end
29
33
 
30
34
  def test_add_illegally_2
31
35
  profile = LangProfile.new('en')
32
- profile.add(UCS2String.from_utf8("a"))
33
- profile.add(UCS2String.from_utf8(""))
34
- profile.add(UCS2String.from_utf8("abcd"))
35
- assert_equal(1, profile.freq[UCS2String.from_utf8("a")])
36
- assert_nil(profile.freq[UCS2String.from_utf8("")])
37
- assert_nil(profile.freq[UCS2String.from_utf8("abcd")])
36
+ profile.add(utf82cp("a"))
37
+ profile.add(utf82cp(""))
38
+ profile.add(utf82cp("abcd"))
39
+ assert_equal(1, profile.freq[utf82cp("a")])
40
+ assert_nil(profile.freq[utf82cp("")])
41
+ assert_nil(profile.freq[utf82cp("abcd")])
38
42
  end
39
43
 
40
44
  def test_omit_less_freq
41
45
  profile = LangProfile.new('en')
42
- grams = "\x00a \x00b \x00c \x30\x42 \x30\x44 \x30\x46 \x30\x48 \x30\x4a \x30\x4b \x30\x4c \x30\x4d \x30\x4e \x30\x4f".split(/ /)
46
+ grams = [0x0061, 0x0062, 0x0063, 0x3042, 0x3044, 0x3046, 0x3048,
47
+ 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f]
43
48
  5.times do
44
49
  grams.each do |gram|
45
- profile.add(UCS2String.new(gram))
50
+ profile.add([gram])
46
51
  end
47
52
  end
48
- profile.add(UCS2String.new("\x30\x50"))
53
+ profile.add([0x3050])
49
54
 
50
- assert_equal(5, profile.freq[UCS2String.from_utf8("a")])
51
- assert_equal(5, profile.freq[UCS2String.new("\x30\x42")])
52
- assert_equal(1, profile.freq[UCS2String.new("\x30\x50")])
55
+ assert_equal(5, profile.freq[utf82cp("a")])
56
+ assert_equal(5, profile.freq[[0x3042]])
57
+ assert_equal(1, profile.freq[[0x3050]])
53
58
 
54
59
  profile.omit_less_freq()
55
- assert_nil(profile.freq[UCS2String.from_utf8("a")])
56
- assert_equal(5, profile.freq[UCS2String.new("\x30\x42")])
57
- assert_nil(profile.freq[UCS2String.new("\x30\x50")])
60
+
61
+ assert_nil(profile.freq[utf82cp("a")])
62
+ assert_equal(5, profile.freq[[0x3042]])
63
+ assert_nil(profile.freq[[0x3050]])
58
64
  end
59
65
 
60
66
  def test_omit_less_freq_illegally
61
- profile = LangProfile.new
62
- profile.omit_less_freq()
67
+ profile = LangProfile.new('sample')
68
+ assert_nil(profile.omit_less_freq())
63
69
  end
64
70
 
65
71
  def test_load_from_file
@@ -67,11 +73,11 @@ class LangProfileTest < Test::Unit::TestCase
67
73
  profile = LangProfile.load_from_file(filename)
68
74
  assert_equal(filename.split(/\//).last, profile.name)
69
75
  has_content = [
70
- profile.freq[UCS2String.from_utf8(" A")], # Latin
71
- profile.freq[UCS2String.new("\x06\x0c")], # Arabic
72
- profile.freq[UCS2String.new("\x0a\x85")], # Gujarati
73
- profile.freq[UCS2String.new("\x09\x05")], # Hindi
74
- profile.freq[UCS2String.new("\x30\x01")], # Japanese
76
+ profile.freq[utf82cp(" A")], # Latin
77
+ profile.freq[[0x060c]], # Arabic
78
+ profile.freq[[0x0a85]], # Gujarati
79
+ profile.freq[[0x0905]], # Hindi
80
+ profile.freq[[0x3001]], # Japanese
75
81
  ].any?
76
82
  assert(has_content, profile.inspect)
77
83
  end
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class LanguageTest < Test::Unit::TestCase
@@ -4,6 +4,6 @@ require 'test/helper'
4
4
  class LanguageDetectionFacadeTest < Test::Unit::TestCase
5
5
  def test_initialize_and_detect
6
6
  facade = LanguageDetectionFacade.new
7
- assert_equal("pl", facade.detect(UCS2String.from_utf8("Ich dalekopis fałszuje, gdy próby XQV nie wytrzymuje")))
7
+ assert_equal("pl", facade.detect(Langusta.utf82cp("Ich dalekopis fałszuje, gdy próby XQV nie wytrzymuje")))
8
8
  end
9
9
  end
@@ -1,13 +1,13 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class LangustaTest < Test::Unit::TestCase
4
5
 
5
6
  FACTORY = DetectorFactory.new
6
- profiles = Dir[File.join(PROFILES_PATH, '*')].map do |filename|
7
- LangProfile.load_from_file(filename)
8
- end
9
- profiles.each_with_index do |profile, index|
10
- FACTORY.add_profile(profile, index, profiles.length)
7
+
8
+ Dir[File.join(PROFILES_PATH, '*')].each do |filename|
9
+ profile = LangProfile.load_from_file(filename)
10
+ FACTORY.add_profile(profile)
11
11
  end
12
12
 
13
13
  Dir['test/test_data/*'].each do |filename|
@@ -15,7 +15,7 @@ class LangustaTest < Test::Unit::TestCase
15
15
  define_method(("test_%s_language" % [language]).to_sym) do
16
16
  detector = FACTORY.create
17
17
 
18
- ucs2_content = UCS2String.from_utf8(File.open(filename).read)
18
+ ucs2_content = Langusta.utf82cp(File.open(filename).read)
19
19
  detector = FACTORY.create
20
20
  detector.append(ucs2_content)
21
21
 
data/test/test_n_gram.rb CHANGED
@@ -7,50 +7,50 @@ class NGramTest < Test::Unit::TestCase
7
7
  end
8
8
 
9
9
  def test_normalize_with_latin
10
- assert_equal("\x00 ", NGram.normalize("\x00\x00")) # \0
11
- assert_equal("\x00 ", NGram.normalize("\x00\x09")) # <control>
12
- assert_equal("\x00 ", NGram.normalize("\x00\x20")) # space
13
- assert_equal("\x00 ", NGram.normalize("\x00\x30")) # 0
14
- assert_equal("\x00 ", NGram.normalize("\x00\x40")) # @
15
- assert_equal("\x00\x41", NGram.normalize("\x00\x41")) # A
16
- assert_equal("\x00\x5a", NGram.normalize("\x00\x5a")) # Z
17
- assert_equal("\x00 ", NGram.normalize("\x00\x5b")) # [
18
- assert_equal("\x00 ", NGram.normalize("\x00\x60")) # `
19
- assert_equal("\x00\x61", NGram.normalize("\x00\x61")) # a
20
- assert_equal("\x00\x7a", NGram.normalize("\x00\x7a")) # z
21
- assert_equal("\x00 ", NGram.normalize("\x00\x7b")) # {
22
- assert_equal("\x00 ", NGram.normalize("\x00\x7f")) # <control>
23
- assert_equal("\x00\x80", NGram.normalize("\x00\x80")) # <control>
24
- assert_equal("\x00 ", NGram.normalize("\x00\xa0")) # <control>
25
- assert_equal("\x00\xa1", NGram.normalize("\x00\xa1")) # <control>
10
+ assert_equal(0x20, NGram.normalize(0x00)) # \0
11
+ assert_equal(0x20, NGram.normalize(0x09)) # <control>
12
+ assert_equal(0x20, NGram.normalize(0x20)) # space
13
+ assert_equal(0x20, NGram.normalize(0x30)) # 0
14
+ assert_equal(0x20, NGram.normalize(0x40)) # @
15
+ assert_equal(0x41, NGram.normalize(0x41)) # A
16
+ assert_equal(0x5a, NGram.normalize(0x5a)) # Z
17
+ assert_equal(0x20, NGram.normalize(0x5b)) # [
18
+ assert_equal(0x20, NGram.normalize(0x60)) # `
19
+ assert_equal(0x61, NGram.normalize(0x61)) # a
20
+ assert_equal(0x7a, NGram.normalize(0x7a)) # z
21
+ assert_equal(0x20, NGram.normalize(0x7b)) # {
22
+ assert_equal(0x20, NGram.normalize(0x7f)) # <control>
23
+ assert_equal(0x80, NGram.normalize(0x80)) # <control>
24
+ assert_equal(0x20, NGram.normalize(0xa0)) # <control>
25
+ assert_equal(0xa1, NGram.normalize(0xa1)) # <control>
26
26
  end
27
27
 
28
28
  def test_normalize_with_cjk_kanji
29
- assert_equal("\x4e\x00", NGram.normalize("\x4e\x00"))
30
- assert_equal("\x4e\x01", NGram.normalize("\x4e\x01"))
31
- assert_equal("\x4e\x02", NGram.normalize("\x4e\x02"))
32
- assert_equal("\x4e\x01", NGram.normalize("\x4e\x03"))
33
- assert_equal("\x4e\x04", NGram.normalize("\x4e\x04"))
34
- assert_equal("\x4e\x05", NGram.normalize("\x4e\x05"))
35
- assert_equal("\x4e\x06", NGram.normalize("\x4e\x06"))
36
- assert_equal("\x4e\x07", NGram.normalize("\x4e\x07"))
37
- assert_equal("\x4e\x08", NGram.normalize("\x4e\x08"))
38
- assert_equal("\x4e\x09", NGram.normalize("\x4e\x09"))
39
- assert_equal("\x4e\x10", NGram.normalize("\x4e\x10"))
40
- assert_equal("\x4e\x11", NGram.normalize("\x4e\x11"))
41
- assert_equal("\x4e\x12", NGram.normalize("\x4e\x12"))
42
- assert_equal("\x4e\x13", NGram.normalize("\x4e\x13"))
43
- assert_equal("\x4e\x14", NGram.normalize("\x4e\x14"))
44
- assert_equal("\x4e\x15", NGram.normalize("\x4e\x15"))
45
- assert_equal("\x4e\x1e", NGram.normalize("\x4e\x1e"))
46
- assert_equal("\x4e\x1f", NGram.normalize("\x4e\x1f"))
47
- assert_equal("\x4e\x20", NGram.normalize("\x4e\x20"))
48
- assert_equal("\x4e\x21", NGram.normalize("\x4e\x21"))
49
- assert_equal("\x4e\x22", NGram.normalize("\x4e\x22"))
50
- assert_equal("\x4e\x23", NGram.normalize("\x4e\x23"))
51
- assert_equal("\x4e\x13", NGram.normalize("\x4e\x24"))
52
- assert_equal("\x4e\x13", NGram.normalize("\x4e\x25"))
53
- assert_equal("\x4e\x30", NGram.normalize("\x4e\x30"))
29
+ assert_equal(0x4e00, NGram.normalize(0x4e00))
30
+ assert_equal(0x4e01, NGram.normalize(0x4e01))
31
+ assert_equal(0x4e02, NGram.normalize(0x4e02))
32
+ assert_equal(0x4e01, NGram.normalize(0x4e03))
33
+ assert_equal(0x4e04, NGram.normalize(0x4e04))
34
+ assert_equal(0x4e05, NGram.normalize(0x4e05))
35
+ assert_equal(0x4e06, NGram.normalize(0x4e06))
36
+ assert_equal(0x4e07, NGram.normalize(0x4e07))
37
+ assert_equal(0x4e08, NGram.normalize(0x4e08))
38
+ assert_equal(0x4e09, NGram.normalize(0x4e09))
39
+ assert_equal(0x4e10, NGram.normalize(0x4e10))
40
+ assert_equal(0x4e11, NGram.normalize(0x4e11))
41
+ assert_equal(0x4e12, NGram.normalize(0x4e12))
42
+ assert_equal(0x4e13, NGram.normalize(0x4e13))
43
+ assert_equal(0x4e14, NGram.normalize(0x4e14))
44
+ assert_equal(0x4e15, NGram.normalize(0x4e15))
45
+ assert_equal(0x4e1e, NGram.normalize(0x4e1e))
46
+ assert_equal(0x4e1f, NGram.normalize(0x4e1f))
47
+ assert_equal(0x4e20, NGram.normalize(0x4e20))
48
+ assert_equal(0x4e21, NGram.normalize(0x4e21))
49
+ assert_equal(0x4e22, NGram.normalize(0x4e22))
50
+ assert_equal(0x4e23, NGram.normalize(0x4e23))
51
+ assert_equal(0x4e13, NGram.normalize(0x4e24))
52
+ assert_equal(0x4e13, NGram.normalize(0x4e25))
53
+ assert_equal(0x4e30, NGram.normalize(0x4e30))
54
54
  end
55
55
 
56
56
  def test_ngram
@@ -58,46 +58,58 @@ class NGramTest < Test::Unit::TestCase
58
58
  (0..4).each do |n|
59
59
  assert_nil(ngram.get(n))
60
60
  end
61
- ngram.add_char("\x00 ")
61
+ ngram.add_char(0x20)
62
62
  (1..3).each do |n|
63
63
  assert_nil(ngram.get(n))
64
64
  end
65
- ngram.add_char("\x00A")
66
- assert_equal(UCS2String.new("\x00A"), ngram.get(1))
67
- assert_equal(UCS2String.new("\x00 \x00A"), ngram.get(2))
65
+
66
+ ngram.add_char(0x0041)
67
+ assert_equal([0x0041], ngram.get(1))
68
+ assert_equal([0x0020, 0x0041], ngram.get(2))
68
69
  assert_nil(ngram.get(3))
69
- ngram.add_char("\x06\xcc")
70
- assert_equal(UCS2String.new("\x06\x4a"), ngram.get(1))
71
- assert_equal(UCS2String.new("\x00A\x06\x4a"), ngram.get(2))
72
- assert_equal(UCS2String.new("\x00 \x00A\x06\x4a"), ngram.get(3))
73
- ngram.add_char("\x1e\xa0")
74
- assert_equal(UCS2String.new("\x1e\xc3"), ngram.get(1))
75
- assert_equal(UCS2String.new("\x06\x4a\x1e\xc3"), ngram.get(2))
76
- assert_equal(UCS2String.new("\x00A\x06\x4a\x1e\xc3"), ngram.get(3))
77
- ngram.add_char("\x30\x44")
78
- assert_equal(UCS2String.new("\x30\x42"), ngram.get(1))
79
- assert_equal(UCS2String.new("\x1e\xc3\x30\x42"), ngram.get(2))
80
- assert_equal(UCS2String.new("\x06\x4a\x1e\xc3\x30\x42"), ngram.get(3))
81
70
 
82
- ngram.add_char("\x30\xa4")
83
- assert_equal(UCS2String.new("\x30\xa2"), ngram.get(1))
84
- assert_equal(UCS2String.new("\x30\x42\x30\xa2"), ngram.get(2))
85
- assert_equal(UCS2String.new("\x1e\xc3\x30\x42\x30\xa2"), ngram.get(3))
86
- ngram.add_char("\x31\x06")
87
- assert_equal(UCS2String.new("\x31\x05"), ngram.get(1))
88
- assert_equal(UCS2String.new("\x30\xa2\x31\x05"), ngram.get(2))
89
- assert_equal(UCS2String.new("\x30\x42\x30\xa2\x31\x05"), ngram.get(3))
90
- ngram.add_char("\xac\x01")
91
- assert_equal(UCS2String.new("\xac\x00"), ngram.get(1))
92
- assert_equal(UCS2String.new("\x31\x05\xac\x00"), ngram.get(2))
93
- assert_equal(UCS2String.new("\x30\xa2\x31\x05\xac\x00"), ngram.get(3))
94
- ngram.add_char("\x20\x10")
71
+ ngram.add_char(0x06cc)
72
+ assert_equal([0x064a], ngram.get(1))
73
+ assert_equal([0x0041, 0x64a], ngram.get(2))
74
+ assert_equal([0x0020, 0x0041, 0x064a], ngram.get(3))
75
+
76
+ ngram.add_char(0x1ea0)
77
+ assert_equal([0x1ec3], ngram.get(1))
78
+ assert_equal([0x064a, 0x1ec3], ngram.get(2))
79
+ assert_equal([0x0041, 0x064a, 0x1ec3], ngram.get(3))
80
+
81
+ ngram.add_char(0x3044)
82
+ assert_equal([0x3042], ngram.get(1))
83
+ assert_equal([0x1ec3, 0x3042], ngram.get(2))
84
+ assert_equal([0x064a, 0x1ec3, 0x3042], ngram.get(3))
85
+
86
+ ngram.add_char(0x30a4)
87
+ assert_equal([0x30a2], ngram.get(1))
88
+ assert_equal([0x3042, 0x30a2], ngram.get(2))
89
+ assert_equal([0x1ec3, 0x3042, 0x30a2], ngram.get(3))
90
+
91
+ ngram.add_char(0x3106)
92
+ assert_equal([0x3105], ngram.get(1))
93
+ assert_equal([0x30a2, 0x3105], ngram.get(2))
94
+ assert_equal([0x3042, 0x30a2, 0x3105], ngram.get(3))
95
+
96
+ ngram.add_char(0xac01)
97
+ assert_equal([0xac00], ngram.get(1))
98
+ assert_equal([0x3105, 0xac00], ngram.get(2))
99
+ assert_equal([0x30a2, 0x3105, 0xac00], ngram.get(3))
100
+
101
+ ngram.add_char(0x2010)
95
102
  assert_nil(ngram.get(1))
96
- assert_equal(UCS2String.new("\xac\x00\x00 "), ngram.get(2))
97
- assert_equal(UCS2String.new("\x31\x05\xac\x00\x00 "), ngram.get(3))
98
- ngram.add_char("\x00a")
99
- assert_equal(UCS2String.new("\x00a"), ngram.get(1))
100
- assert_equal(UCS2String.new("\x00 \x00a"), ngram.get(2))
103
+ assert_equal([0xac00, 0x0020], ngram.get(2))
104
+ assert_equal([0x3105, 0xac00, 0x0020], ngram.get(3))
105
+
106
+ ngram.add_char(0x0041)
107
+ assert_equal([0x0041], ngram.get(1))
108
+ assert_equal([0x0020, 0x0041], ngram.get(2))
101
109
  assert_nil(ngram.get(3))
102
110
  end
111
+
112
+ def array_of_codepoints
113
+ array_of_codepoints.pack('n*')
114
+ end
103
115
  end