langusta 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,8 +8,8 @@ class FalsifiedTest < Test::Unit::TestCase
8
8
  profiles = Dir[File.join(PROFILES_PATH, '*')].map do |filename|
9
9
  LangProfile.load_from_file(filename)
10
10
  end
11
- profiles.each_with_index do |profile, index|
12
- factory.add_profile(profile, index, profiles.length)
11
+ profiles.each do |profile|
12
+ factory.add_profile(profile)
13
13
  end
14
14
 
15
15
  incorrect_guesses = 0.0
@@ -18,7 +18,7 @@ class FalsifiedTest < Test::Unit::TestCase
18
18
  Dir['test/test_data/*'].each do |filename|
19
19
  language = filename.split(/\//).last
20
20
 
21
- ucs2_content = UCS2String.from_utf8(File.open(filename).read)
21
+ ucs2_content = Langusta.utf82cp(File.open(filename).read)
22
22
  detector = factory.create
23
23
  detector.append(ucs2_content)
24
24
 
data/test/test_command.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class CommandTest < Test::Unit::TestCase
@@ -1,58 +1,59 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class DetectorTest < Test::Unit::TestCase
4
- TRAINING_EN = "\x00a \x00a \x00a \x00b \x00b \x00c \x00c \x00d \x00e"
5
- TRAINING_FR = "\x00a \x00b \x00b \x00c \x00c \x00c \x00d \x00d \x00d"
6
- TRAINING_JP = "\x30\x42 \x30\x42 \x30\x42 \x30\x44 \x30\x46 \x30\x48 \x30\x48"
5
+ TRAINING_EN = [0x0061, 0x0061, 0x0061, 0x0062, 0x0062, 0x0063, 0x0063, 0x0064, 0x0065]
6
+ TRAINING_FR = [0x0061, 0x0062, 0x0062, 0x0063, 0x0063, 0x0063, 0x0063, 0x0064, 0x0064]
7
+ TRAINING_JP = [0x3042, 0x3042, 0x3042, 0x3044, 0x3046, 0x3048, 0x3048]
7
8
 
8
9
  def setup
9
10
  @factory = DetectorFactory.new
10
11
  profile_en = LangProfile.new("en")
11
- TRAINING_EN.split(/ /).each do |w|
12
- profile_en.add(UCS2String.new(w))
12
+ TRAINING_EN.each do |w|
13
+ profile_en.add([w])
13
14
  end
14
- @factory.add_profile(profile_en, 0, 3)
15
+ @factory.add_profile(profile_en)
15
16
 
16
17
  profile_fr = LangProfile.new("fr")
17
- TRAINING_FR.split(/ /).each do |w|
18
- profile_fr.add(UCS2String.new(w))
18
+ TRAINING_FR.each do |w|
19
+ profile_fr.add([w])
19
20
  end
20
- @factory.add_profile(profile_fr, 1, 3)
21
+ @factory.add_profile(profile_fr)
21
22
 
22
23
  profile_jp = LangProfile.new("jp")
23
- TRAINING_JP.split(/ /).each do |w|
24
- profile_jp.add(UCS2String.new(w))
24
+ TRAINING_JP.each do |w|
25
+ profile_jp.add([w])
25
26
  end
26
- @factory.add_profile(profile_jp, 2, 3)
27
+ @factory.add_profile(profile_jp)
27
28
  end
28
29
 
29
30
  def test_detector1
30
31
  detector = @factory.create()
31
- detector.append(UCS2String.new("\x00a"))
32
+ detector.append([0x0061]) # "a"
32
33
  assert_equal("en", detector.detect())
33
34
  end
34
35
 
35
36
  def test_detector2
36
37
  detector = @factory.create()
37
- detector.append(UCS2String.new("\x00b\x00\x20\x00d"))
38
+ detector.append([0x0062, 0x0020, 0x0064]) # "b d"
38
39
  assert_equal("fr", detector.detect())
39
40
  end
40
41
 
41
42
  def test_detector3
42
43
  detector = @factory.create()
43
- detector.append(UCS2String.new("\x00d\x00 \x00e"))
44
+ detector.append([0x0064, 0x0020, 0x0065]) # "d e"
44
45
  assert_equal("en", detector.detect())
45
46
  end
46
47
 
47
48
  def test_detector4
48
49
  detector = @factory.create()
49
- detector.append(UCS2String.new("\x30\x42\x30\x42\x30\x42\x30\x42\x00a"))
50
+ detector.append([0x3042, 0x3042, 0x3042, 0x3042, 0x0061])
50
51
  assert_equal("jp", detector.detect())
51
52
  end
52
53
 
53
54
  def test_exceptions
54
55
  detector = @factory.create()
55
- detector.append(UCS2String.new(''))
56
+ detector.append([])
56
57
  assert_raises(NoFeaturesInTextError) do
57
58
  detector.detect()
58
59
  end
@@ -1,28 +1,40 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class DetectorFactoryTest < Test::Unit::TestCase
4
5
  def test_add_profile
5
- profile = LangProfile.new
6
+ profile = LangProfile.new('sample')
6
7
  factory = DetectorFactory.new
7
8
 
8
- factory.add_profile(profile, 0, 1)
9
+ factory.add_profile(profile)
9
10
 
10
11
  detector = factory.create(0.123)
11
12
  assert_equal(0.123, detector.alpha)
12
13
  end
13
14
 
14
15
  def test_exceptions
15
- profile = LangProfile.new
16
+ profile = LangProfile.new('sample')
16
17
  factory = DetectorFactory.new
17
18
 
18
19
  assert_raises(NoProfilesLoadedError) do
19
20
  factory.create()
20
21
  end
21
22
 
22
- factory.add_profile(profile, 0, 2)
23
+ factory.add_profile(profile)
23
24
 
24
25
  assert_raises(DuplicateProfilesError) do
25
- factory.add_profile(profile, 1, 2)
26
+ factory.add_profile(profile)
26
27
  end
27
28
  end
29
+
30
+ def test_inspect
31
+ profile = LangProfile.new('sample')
32
+ factory = DetectorFactory.new
33
+
34
+ factory.add_profile(profile)
35
+
36
+ assert_match(Regexp.new(factory.object_ptr), factory.inspect)
37
+ assert_match(/1 profile\(s\)/, factory.inspect)
38
+ assert_match(Regexp.new(factory.class.name), factory.inspect)
39
+ end
28
40
  end
@@ -1,8 +1,9 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class JavaPropertyReaderTest < Test::Unit::TestCase
4
5
  def test_parse
5
6
  jpr = JavaPropertyReader.new(MESSAGES_PROPERTIES)
6
- assert_equal("\x4f\x7c\x69\x34", jpr["NGram.KANJI_1_0"])
7
+ assert_equal([0x4f7c, 0x6934], jpr["NGram.KANJI_1_0"])
7
8
  end
8
9
  end
@@ -3,8 +3,12 @@ require 'test/helper'
3
3
 
4
4
  class LangProfileTest < Test::Unit::TestCase
5
5
  def test_lang_profile
6
- profile = LangProfile.new
7
- assert_nil(profile.name)
6
+ assert_raises(ArgumentError) do
7
+ LangProfile.new
8
+ end
9
+ assert_raises(TypeError) do
10
+ LangProfile.new(nil)
11
+ end
8
12
  end
9
13
 
10
14
  def test_lang_profile_string_int
@@ -14,52 +18,54 @@ class LangProfileTest < Test::Unit::TestCase
14
18
 
15
19
  def test_add
16
20
  profile = LangProfile.new('en')
17
- profile.add(UCS2String.from_utf8("a"))
18
- assert_equal(1, profile.freq[UCS2String.from_utf8("a")])
19
- profile.add(UCS2String.from_utf8("a"))
20
- assert_equal(2, profile.freq[UCS2String.from_utf8("a")])
21
+ profile.add(utf82cp("a"))
22
+ assert_equal(1, profile.freq[utf82cp("a")])
23
+ profile.add(utf82cp("a"))
24
+ assert_equal(2, profile.freq[utf82cp("a")])
21
25
  profile.omit_less_freq()
22
26
  end
23
27
 
24
28
  def test_add_illegally_1
25
- profile = LangProfile.new
26
- profile.add(UCS2String.from_utf8("a"))
27
- assert_nil(profile.freq[UCS2String.from_utf8("a")])
29
+ profile = LangProfile.new('sample')
30
+ profile.add(utf82cp("a"))
31
+ assert_equal(1, profile.freq[utf82cp("a")])
28
32
  end
29
33
 
30
34
  def test_add_illegally_2
31
35
  profile = LangProfile.new('en')
32
- profile.add(UCS2String.from_utf8("a"))
33
- profile.add(UCS2String.from_utf8(""))
34
- profile.add(UCS2String.from_utf8("abcd"))
35
- assert_equal(1, profile.freq[UCS2String.from_utf8("a")])
36
- assert_nil(profile.freq[UCS2String.from_utf8("")])
37
- assert_nil(profile.freq[UCS2String.from_utf8("abcd")])
36
+ profile.add(utf82cp("a"))
37
+ profile.add(utf82cp(""))
38
+ profile.add(utf82cp("abcd"))
39
+ assert_equal(1, profile.freq[utf82cp("a")])
40
+ assert_nil(profile.freq[utf82cp("")])
41
+ assert_nil(profile.freq[utf82cp("abcd")])
38
42
  end
39
43
 
40
44
  def test_omit_less_freq
41
45
  profile = LangProfile.new('en')
42
- grams = "\x00a \x00b \x00c \x30\x42 \x30\x44 \x30\x46 \x30\x48 \x30\x4a \x30\x4b \x30\x4c \x30\x4d \x30\x4e \x30\x4f".split(/ /)
46
+ grams = [0x0061, 0x0062, 0x0063, 0x3042, 0x3044, 0x3046, 0x3048,
47
+ 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f]
43
48
  5.times do
44
49
  grams.each do |gram|
45
- profile.add(UCS2String.new(gram))
50
+ profile.add([gram])
46
51
  end
47
52
  end
48
- profile.add(UCS2String.new("\x30\x50"))
53
+ profile.add([0x3050])
49
54
 
50
- assert_equal(5, profile.freq[UCS2String.from_utf8("a")])
51
- assert_equal(5, profile.freq[UCS2String.new("\x30\x42")])
52
- assert_equal(1, profile.freq[UCS2String.new("\x30\x50")])
55
+ assert_equal(5, profile.freq[utf82cp("a")])
56
+ assert_equal(5, profile.freq[[0x3042]])
57
+ assert_equal(1, profile.freq[[0x3050]])
53
58
 
54
59
  profile.omit_less_freq()
55
- assert_nil(profile.freq[UCS2String.from_utf8("a")])
56
- assert_equal(5, profile.freq[UCS2String.new("\x30\x42")])
57
- assert_nil(profile.freq[UCS2String.new("\x30\x50")])
60
+
61
+ assert_nil(profile.freq[utf82cp("a")])
62
+ assert_equal(5, profile.freq[[0x3042]])
63
+ assert_nil(profile.freq[[0x3050]])
58
64
  end
59
65
 
60
66
  def test_omit_less_freq_illegally
61
- profile = LangProfile.new
62
- profile.omit_less_freq()
67
+ profile = LangProfile.new('sample')
68
+ assert_nil(profile.omit_less_freq())
63
69
  end
64
70
 
65
71
  def test_load_from_file
@@ -67,11 +73,11 @@ class LangProfileTest < Test::Unit::TestCase
67
73
  profile = LangProfile.load_from_file(filename)
68
74
  assert_equal(filename.split(/\//).last, profile.name)
69
75
  has_content = [
70
- profile.freq[UCS2String.from_utf8(" A")], # Latin
71
- profile.freq[UCS2String.new("\x06\x0c")], # Arabic
72
- profile.freq[UCS2String.new("\x0a\x85")], # Gujarati
73
- profile.freq[UCS2String.new("\x09\x05")], # Hindi
74
- profile.freq[UCS2String.new("\x30\x01")], # Japanese
76
+ profile.freq[utf82cp(" A")], # Latin
77
+ profile.freq[[0x060c]], # Arabic
78
+ profile.freq[[0x0a85]], # Gujarati
79
+ profile.freq[[0x0905]], # Hindi
80
+ profile.freq[[0x3001]], # Japanese
75
81
  ].any?
76
82
  assert(has_content, profile.inspect)
77
83
  end
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class LanguageTest < Test::Unit::TestCase
@@ -4,6 +4,6 @@ require 'test/helper'
4
4
  class LanguageDetectionFacadeTest < Test::Unit::TestCase
5
5
  def test_initialize_and_detect
6
6
  facade = LanguageDetectionFacade.new
7
- assert_equal("pl", facade.detect(UCS2String.from_utf8("Ich dalekopis fałszuje, gdy próby XQV nie wytrzymuje")))
7
+ assert_equal("pl", facade.detect(Langusta.utf82cp("Ich dalekopis fałszuje, gdy próby XQV nie wytrzymuje")))
8
8
  end
9
9
  end
@@ -1,13 +1,13 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'test/helper'
2
3
 
3
4
  class LangustaTest < Test::Unit::TestCase
4
5
 
5
6
  FACTORY = DetectorFactory.new
6
- profiles = Dir[File.join(PROFILES_PATH, '*')].map do |filename|
7
- LangProfile.load_from_file(filename)
8
- end
9
- profiles.each_with_index do |profile, index|
10
- FACTORY.add_profile(profile, index, profiles.length)
7
+
8
+ Dir[File.join(PROFILES_PATH, '*')].each do |filename|
9
+ profile = LangProfile.load_from_file(filename)
10
+ FACTORY.add_profile(profile)
11
11
  end
12
12
 
13
13
  Dir['test/test_data/*'].each do |filename|
@@ -15,7 +15,7 @@ class LangustaTest < Test::Unit::TestCase
15
15
  define_method(("test_%s_language" % [language]).to_sym) do
16
16
  detector = FACTORY.create
17
17
 
18
- ucs2_content = UCS2String.from_utf8(File.open(filename).read)
18
+ ucs2_content = Langusta.utf82cp(File.open(filename).read)
19
19
  detector = FACTORY.create
20
20
  detector.append(ucs2_content)
21
21
 
data/test/test_n_gram.rb CHANGED
@@ -7,50 +7,50 @@ class NGramTest < Test::Unit::TestCase
7
7
  end
8
8
 
9
9
  def test_normalize_with_latin
10
- assert_equal("\x00 ", NGram.normalize("\x00\x00")) # \0
11
- assert_equal("\x00 ", NGram.normalize("\x00\x09")) # <control>
12
- assert_equal("\x00 ", NGram.normalize("\x00\x20")) # space
13
- assert_equal("\x00 ", NGram.normalize("\x00\x30")) # 0
14
- assert_equal("\x00 ", NGram.normalize("\x00\x40")) # @
15
- assert_equal("\x00\x41", NGram.normalize("\x00\x41")) # A
16
- assert_equal("\x00\x5a", NGram.normalize("\x00\x5a")) # Z
17
- assert_equal("\x00 ", NGram.normalize("\x00\x5b")) # [
18
- assert_equal("\x00 ", NGram.normalize("\x00\x60")) # `
19
- assert_equal("\x00\x61", NGram.normalize("\x00\x61")) # a
20
- assert_equal("\x00\x7a", NGram.normalize("\x00\x7a")) # z
21
- assert_equal("\x00 ", NGram.normalize("\x00\x7b")) # {
22
- assert_equal("\x00 ", NGram.normalize("\x00\x7f")) # <control>
23
- assert_equal("\x00\x80", NGram.normalize("\x00\x80")) # <control>
24
- assert_equal("\x00 ", NGram.normalize("\x00\xa0")) # <control>
25
- assert_equal("\x00\xa1", NGram.normalize("\x00\xa1")) # <control>
10
+ assert_equal(0x20, NGram.normalize(0x00)) # \0
11
+ assert_equal(0x20, NGram.normalize(0x09)) # <control>
12
+ assert_equal(0x20, NGram.normalize(0x20)) # space
13
+ assert_equal(0x20, NGram.normalize(0x30)) # 0
14
+ assert_equal(0x20, NGram.normalize(0x40)) # @
15
+ assert_equal(0x41, NGram.normalize(0x41)) # A
16
+ assert_equal(0x5a, NGram.normalize(0x5a)) # Z
17
+ assert_equal(0x20, NGram.normalize(0x5b)) # [
18
+ assert_equal(0x20, NGram.normalize(0x60)) # `
19
+ assert_equal(0x61, NGram.normalize(0x61)) # a
20
+ assert_equal(0x7a, NGram.normalize(0x7a)) # z
21
+ assert_equal(0x20, NGram.normalize(0x7b)) # {
22
+ assert_equal(0x20, NGram.normalize(0x7f)) # <control>
23
+ assert_equal(0x80, NGram.normalize(0x80)) # <control>
24
+ assert_equal(0x20, NGram.normalize(0xa0)) # <control>
25
+ assert_equal(0xa1, NGram.normalize(0xa1)) # <control>
26
26
  end
27
27
 
28
28
  def test_normalize_with_cjk_kanji
29
- assert_equal("\x4e\x00", NGram.normalize("\x4e\x00"))
30
- assert_equal("\x4e\x01", NGram.normalize("\x4e\x01"))
31
- assert_equal("\x4e\x02", NGram.normalize("\x4e\x02"))
32
- assert_equal("\x4e\x01", NGram.normalize("\x4e\x03"))
33
- assert_equal("\x4e\x04", NGram.normalize("\x4e\x04"))
34
- assert_equal("\x4e\x05", NGram.normalize("\x4e\x05"))
35
- assert_equal("\x4e\x06", NGram.normalize("\x4e\x06"))
36
- assert_equal("\x4e\x07", NGram.normalize("\x4e\x07"))
37
- assert_equal("\x4e\x08", NGram.normalize("\x4e\x08"))
38
- assert_equal("\x4e\x09", NGram.normalize("\x4e\x09"))
39
- assert_equal("\x4e\x10", NGram.normalize("\x4e\x10"))
40
- assert_equal("\x4e\x11", NGram.normalize("\x4e\x11"))
41
- assert_equal("\x4e\x12", NGram.normalize("\x4e\x12"))
42
- assert_equal("\x4e\x13", NGram.normalize("\x4e\x13"))
43
- assert_equal("\x4e\x14", NGram.normalize("\x4e\x14"))
44
- assert_equal("\x4e\x15", NGram.normalize("\x4e\x15"))
45
- assert_equal("\x4e\x1e", NGram.normalize("\x4e\x1e"))
46
- assert_equal("\x4e\x1f", NGram.normalize("\x4e\x1f"))
47
- assert_equal("\x4e\x20", NGram.normalize("\x4e\x20"))
48
- assert_equal("\x4e\x21", NGram.normalize("\x4e\x21"))
49
- assert_equal("\x4e\x22", NGram.normalize("\x4e\x22"))
50
- assert_equal("\x4e\x23", NGram.normalize("\x4e\x23"))
51
- assert_equal("\x4e\x13", NGram.normalize("\x4e\x24"))
52
- assert_equal("\x4e\x13", NGram.normalize("\x4e\x25"))
53
- assert_equal("\x4e\x30", NGram.normalize("\x4e\x30"))
29
+ assert_equal(0x4e00, NGram.normalize(0x4e00))
30
+ assert_equal(0x4e01, NGram.normalize(0x4e01))
31
+ assert_equal(0x4e02, NGram.normalize(0x4e02))
32
+ assert_equal(0x4e01, NGram.normalize(0x4e03))
33
+ assert_equal(0x4e04, NGram.normalize(0x4e04))
34
+ assert_equal(0x4e05, NGram.normalize(0x4e05))
35
+ assert_equal(0x4e06, NGram.normalize(0x4e06))
36
+ assert_equal(0x4e07, NGram.normalize(0x4e07))
37
+ assert_equal(0x4e08, NGram.normalize(0x4e08))
38
+ assert_equal(0x4e09, NGram.normalize(0x4e09))
39
+ assert_equal(0x4e10, NGram.normalize(0x4e10))
40
+ assert_equal(0x4e11, NGram.normalize(0x4e11))
41
+ assert_equal(0x4e12, NGram.normalize(0x4e12))
42
+ assert_equal(0x4e13, NGram.normalize(0x4e13))
43
+ assert_equal(0x4e14, NGram.normalize(0x4e14))
44
+ assert_equal(0x4e15, NGram.normalize(0x4e15))
45
+ assert_equal(0x4e1e, NGram.normalize(0x4e1e))
46
+ assert_equal(0x4e1f, NGram.normalize(0x4e1f))
47
+ assert_equal(0x4e20, NGram.normalize(0x4e20))
48
+ assert_equal(0x4e21, NGram.normalize(0x4e21))
49
+ assert_equal(0x4e22, NGram.normalize(0x4e22))
50
+ assert_equal(0x4e23, NGram.normalize(0x4e23))
51
+ assert_equal(0x4e13, NGram.normalize(0x4e24))
52
+ assert_equal(0x4e13, NGram.normalize(0x4e25))
53
+ assert_equal(0x4e30, NGram.normalize(0x4e30))
54
54
  end
55
55
 
56
56
  def test_ngram
@@ -58,46 +58,58 @@ class NGramTest < Test::Unit::TestCase
58
58
  (0..4).each do |n|
59
59
  assert_nil(ngram.get(n))
60
60
  end
61
- ngram.add_char("\x00 ")
61
+ ngram.add_char(0x20)
62
62
  (1..3).each do |n|
63
63
  assert_nil(ngram.get(n))
64
64
  end
65
- ngram.add_char("\x00A")
66
- assert_equal(UCS2String.new("\x00A"), ngram.get(1))
67
- assert_equal(UCS2String.new("\x00 \x00A"), ngram.get(2))
65
+
66
+ ngram.add_char(0x0041)
67
+ assert_equal([0x0041], ngram.get(1))
68
+ assert_equal([0x0020, 0x0041], ngram.get(2))
68
69
  assert_nil(ngram.get(3))
69
- ngram.add_char("\x06\xcc")
70
- assert_equal(UCS2String.new("\x06\x4a"), ngram.get(1))
71
- assert_equal(UCS2String.new("\x00A\x06\x4a"), ngram.get(2))
72
- assert_equal(UCS2String.new("\x00 \x00A\x06\x4a"), ngram.get(3))
73
- ngram.add_char("\x1e\xa0")
74
- assert_equal(UCS2String.new("\x1e\xc3"), ngram.get(1))
75
- assert_equal(UCS2String.new("\x06\x4a\x1e\xc3"), ngram.get(2))
76
- assert_equal(UCS2String.new("\x00A\x06\x4a\x1e\xc3"), ngram.get(3))
77
- ngram.add_char("\x30\x44")
78
- assert_equal(UCS2String.new("\x30\x42"), ngram.get(1))
79
- assert_equal(UCS2String.new("\x1e\xc3\x30\x42"), ngram.get(2))
80
- assert_equal(UCS2String.new("\x06\x4a\x1e\xc3\x30\x42"), ngram.get(3))
81
70
 
82
- ngram.add_char("\x30\xa4")
83
- assert_equal(UCS2String.new("\x30\xa2"), ngram.get(1))
84
- assert_equal(UCS2String.new("\x30\x42\x30\xa2"), ngram.get(2))
85
- assert_equal(UCS2String.new("\x1e\xc3\x30\x42\x30\xa2"), ngram.get(3))
86
- ngram.add_char("\x31\x06")
87
- assert_equal(UCS2String.new("\x31\x05"), ngram.get(1))
88
- assert_equal(UCS2String.new("\x30\xa2\x31\x05"), ngram.get(2))
89
- assert_equal(UCS2String.new("\x30\x42\x30\xa2\x31\x05"), ngram.get(3))
90
- ngram.add_char("\xac\x01")
91
- assert_equal(UCS2String.new("\xac\x00"), ngram.get(1))
92
- assert_equal(UCS2String.new("\x31\x05\xac\x00"), ngram.get(2))
93
- assert_equal(UCS2String.new("\x30\xa2\x31\x05\xac\x00"), ngram.get(3))
94
- ngram.add_char("\x20\x10")
71
+ ngram.add_char(0x06cc)
72
+ assert_equal([0x064a], ngram.get(1))
73
+ assert_equal([0x0041, 0x64a], ngram.get(2))
74
+ assert_equal([0x0020, 0x0041, 0x064a], ngram.get(3))
75
+
76
+ ngram.add_char(0x1ea0)
77
+ assert_equal([0x1ec3], ngram.get(1))
78
+ assert_equal([0x064a, 0x1ec3], ngram.get(2))
79
+ assert_equal([0x0041, 0x064a, 0x1ec3], ngram.get(3))
80
+
81
+ ngram.add_char(0x3044)
82
+ assert_equal([0x3042], ngram.get(1))
83
+ assert_equal([0x1ec3, 0x3042], ngram.get(2))
84
+ assert_equal([0x064a, 0x1ec3, 0x3042], ngram.get(3))
85
+
86
+ ngram.add_char(0x30a4)
87
+ assert_equal([0x30a2], ngram.get(1))
88
+ assert_equal([0x3042, 0x30a2], ngram.get(2))
89
+ assert_equal([0x1ec3, 0x3042, 0x30a2], ngram.get(3))
90
+
91
+ ngram.add_char(0x3106)
92
+ assert_equal([0x3105], ngram.get(1))
93
+ assert_equal([0x30a2, 0x3105], ngram.get(2))
94
+ assert_equal([0x3042, 0x30a2, 0x3105], ngram.get(3))
95
+
96
+ ngram.add_char(0xac01)
97
+ assert_equal([0xac00], ngram.get(1))
98
+ assert_equal([0x3105, 0xac00], ngram.get(2))
99
+ assert_equal([0x30a2, 0x3105, 0xac00], ngram.get(3))
100
+
101
+ ngram.add_char(0x2010)
95
102
  assert_nil(ngram.get(1))
96
- assert_equal(UCS2String.new("\xac\x00\x00 "), ngram.get(2))
97
- assert_equal(UCS2String.new("\x31\x05\xac\x00\x00 "), ngram.get(3))
98
- ngram.add_char("\x00a")
99
- assert_equal(UCS2String.new("\x00a"), ngram.get(1))
100
- assert_equal(UCS2String.new("\x00 \x00a"), ngram.get(2))
103
+ assert_equal([0xac00, 0x0020], ngram.get(2))
104
+ assert_equal([0x3105, 0xac00, 0x0020], ngram.get(3))
105
+
106
+ ngram.add_char(0x0041)
107
+ assert_equal([0x0041], ngram.get(1))
108
+ assert_equal([0x0020, 0x0041], ngram.get(2))
101
109
  assert_nil(ngram.get(3))
102
110
  end
111
+
112
+ def array_of_codepoints
113
+ array_of_codepoints.pack('n*')
114
+ end
103
115
  end