langusta 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +7 -0
- data/Gemfile +10 -7
- data/Gemfile.lock +12 -16
- data/{README.rdoc → README.md} +27 -10
- data/Rakefile +3 -10
- data/VERSION +1 -1
- data/langusta.gemspec +23 -47
- data/lib/langusta.rb +36 -10
- data/lib/langusta/codepoints.rb +19 -0
- data/lib/langusta/command.rb +3 -3
- data/lib/langusta/detector.rb +16 -13
- data/lib/langusta/detector_factory.rb +11 -5
- data/lib/langusta/guard.rb +22 -0
- data/lib/langusta/inspector.rb +7 -0
- data/lib/langusta/java_property_reader.rb +2 -3
- data/lib/langusta/lang_profile.rb +12 -18
- data/lib/langusta/language_detection_facade.rb +2 -2
- data/lib/langusta/n_gram.rb +20 -25
- data/lib/langusta/regex_helper.rb +15 -10
- data/lib/langusta/tag_extractor.rb +5 -5
- data/lib/langusta/unicode_block.rb +34 -34
- data/test/helper.rb +12 -3
- data/test/quality/test_falsified.rb +3 -3
- data/test/test_command.rb +1 -0
- data/test/test_detector.rb +18 -17
- data/test/test_detector_factory.rb +17 -5
- data/test/test_java_property_reader.rb +2 -1
- data/test/test_lang_profile.rb +37 -31
- data/test/test_language.rb +1 -0
- data/test/test_language_detection_facade.rb +1 -1
- data/test/test_langusta.rb +6 -6
- data/test/test_n_gram.rb +87 -75
- data/test/test_tag_extractor.rb +19 -18
- data/test/test_unicode_block.rb +2 -1
- metadata +54 -156
- data/lib/langusta/ucs2_string.rb +0 -70
- data/test/test_ucs2_string.rb +0 -9
@@ -1,5 +1,7 @@
|
|
1
1
|
module Langusta
|
2
2
|
class DetectorFactory
|
3
|
+
include Inspector
|
4
|
+
|
3
5
|
attr_reader :word_lang_prob_map, :lang_list
|
4
6
|
|
5
7
|
def initialize
|
@@ -11,15 +13,15 @@ module Langusta
|
|
11
13
|
# @param [LangProfile] language profile to be added.
|
12
14
|
# @param [Fixnum] index at which the language profile is to be added.
|
13
15
|
# @param [Fixnum] counts how many language profiles are to be added to this factory in total.
|
14
|
-
def add_profile(profile
|
16
|
+
def add_profile(profile)
|
15
17
|
raise DuplicateProfilesError.new(profile.name) if @lang_list.include?(profile.name)
|
16
18
|
@lang_list << profile.name
|
19
|
+
last_lang_index = @lang_list.size - 1
|
20
|
+
|
17
21
|
profile.freq.keys.each do |word|
|
18
|
-
|
19
|
-
@word_lang_prob_map[word] = Array.new(langsize, 0.0)
|
20
|
-
end
|
22
|
+
@word_lang_prob_map[word] ||= []
|
21
23
|
prob = 1.0 * profile.freq[word] / profile.n_words[word.length - 1]
|
22
|
-
@word_lang_prob_map[word][
|
24
|
+
@word_lang_prob_map[word][last_lang_index] = prob
|
23
25
|
end
|
24
26
|
end
|
25
27
|
|
@@ -35,6 +37,10 @@ module Langusta
|
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
40
|
+
def inspect
|
41
|
+
"#<#{self.class.name}:0x#{object_ptr} (#{@lang_list.size} profile(s))"
|
42
|
+
end
|
43
|
+
|
38
44
|
private
|
39
45
|
def create_detector
|
40
46
|
raise NoProfilesLoadedError if @lang_list.empty?
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Langusta
|
2
|
+
module Guard
|
3
|
+
|
4
|
+
def self.klass(argument, klass, _method)
|
5
|
+
return unless $debug
|
6
|
+
raise TypeError.new("#{_method}: expected #{klass} got: #{argument.class}") unless argument.is_a?(klass)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.codepoint(codepoint, _method)
|
10
|
+
return unless $debug
|
11
|
+
raise ArgumentError.new([_method, ':', codepoint.to_s(16)].join) unless (0x0000..0xffff).include?(codepoint)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.codepoint_array(array, _method)
|
15
|
+
return unless $debug
|
16
|
+
raise TypeError.new("#{_method}: expected Array, got: #{array.class}") unless array.is_a?(Array)
|
17
|
+
cp = array.find do |cp|
|
18
|
+
! (0x0000..0xffff).include?(cp)
|
19
|
+
end && (raise ArgumentError.new("#{_method}: bad codepoint: #{cp}"))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -3,7 +3,7 @@ module Langusta
|
|
3
3
|
# This is a minimal implementation, don't expect this to actually work.
|
4
4
|
|
5
5
|
def initialize(filename)
|
6
|
-
@lines = File.open(filename).
|
6
|
+
@lines = File.open(filename).readlines
|
7
7
|
parse()
|
8
8
|
end
|
9
9
|
|
@@ -28,8 +28,7 @@ module Langusta
|
|
28
28
|
codepoints = value.scan(/([0-9A-F]{4})/)
|
29
29
|
codepoints.map do |cp|
|
30
30
|
int_cp = cp.first.to_i(16)
|
31
|
-
|
32
|
-
end.join
|
31
|
+
end
|
33
32
|
end
|
34
33
|
end
|
35
34
|
end
|
@@ -11,34 +11,29 @@ module Langusta
|
|
11
11
|
# @return [LangProfile]
|
12
12
|
def self.load_from_file(filename)
|
13
13
|
json = Yajl::Parser.parse(File.new(filename))
|
14
|
-
profile = self.new
|
15
14
|
|
16
|
-
name = json['name']
|
17
|
-
n_words = json['n_words']
|
18
15
|
freq = json['freq'].inject({}) do |acc, kv|
|
19
16
|
key, value = kv
|
20
|
-
acc[
|
17
|
+
acc[Langusta.utf82cp(key)] = value
|
21
18
|
acc
|
22
19
|
end
|
23
|
-
profile.populate_json(name, freq, n_words)
|
24
|
-
profile
|
25
|
-
end
|
26
20
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@n_words = Array.new(NGram::N_GRAM, 0)
|
21
|
+
self.new(json['name'] || (raise CorruptProfileError.new("Missing profile name")),
|
22
|
+
freq,
|
23
|
+
json['n_words'] || (raise CorruptProfileError.new("Missing number of words value")))
|
31
24
|
end
|
32
25
|
|
33
|
-
def
|
26
|
+
def initialize(name, freq={}, n_words = Array.new(NGram::N_GRAM, 0))
|
27
|
+
Guard.klass(name, String, __method__)
|
34
28
|
@name, @freq, @n_words = name, freq, n_words
|
35
29
|
end
|
36
30
|
|
37
31
|
# Adds a given NGram to this language profile. This operation is expected to be invoked multiple times for the same arguments.
|
38
|
-
# @param gram [
|
32
|
+
# @param gram [Array<Fixnum>]
|
39
33
|
def add(gram)
|
40
|
-
|
41
|
-
|
34
|
+
return if gram.nil?
|
35
|
+
Guard.klass(gram, Array, __method__)
|
36
|
+
|
42
37
|
length = gram.size
|
43
38
|
return if length < 1 or length > NGram::N_GRAM
|
44
39
|
@n_words[length - 1] += 1
|
@@ -47,7 +42,6 @@ module Langusta
|
|
47
42
|
end
|
48
43
|
|
49
44
|
def omit_less_freq
|
50
|
-
return if @name.nil?
|
51
45
|
threshold = @n_words[0] / LESS_FREQ_RATIO
|
52
46
|
threshold = MINIMUM_FREQ if threshold < MINIMUM_FREQ
|
53
47
|
keys = Set.new(@freq.keys)
|
@@ -59,7 +53,7 @@ module Langusta
|
|
59
53
|
@freq.delete(key)
|
60
54
|
else
|
61
55
|
# temp workaround
|
62
|
-
if RegexHelper::ROMAN_REGEX.match(key
|
56
|
+
if RegexHelper::ROMAN_REGEX.match(Langusta.cp2utf8(key))
|
63
57
|
roman += count
|
64
58
|
end
|
65
59
|
end
|
@@ -69,7 +63,7 @@ module Langusta
|
|
69
63
|
keys2 = Set.new(@freq.keys)
|
70
64
|
keys2.each do |key|
|
71
65
|
# temp workaround
|
72
|
-
if RegexHelper::INCL_ROMAN_REGEX.match(key
|
66
|
+
if RegexHelper::INCL_ROMAN_REGEX.match(Langusta.cp2utf8(key))
|
73
67
|
@n_words[key.size - 1] -= @freq[key]
|
74
68
|
@freq.delete(key)
|
75
69
|
end
|
@@ -3,8 +3,8 @@ module Langusta
|
|
3
3
|
def initialize
|
4
4
|
@factory = DetectorFactory.new
|
5
5
|
profiles = load_profiles()
|
6
|
-
profiles.
|
7
|
-
@factory.add_profile(profile
|
6
|
+
profiles.each do |profile|
|
7
|
+
@factory.add_profile(profile)
|
8
8
|
end
|
9
9
|
end
|
10
10
|
|
data/lib/langusta/n_gram.rb
CHANGED
@@ -3,17 +3,10 @@ module Langusta
|
|
3
3
|
# constructed on a character by character basis.
|
4
4
|
class NGram
|
5
5
|
N_GRAM = 3
|
6
|
-
UCS2_SPACE =
|
6
|
+
UCS2_SPACE = 0x0020
|
7
7
|
|
8
8
|
def self.calculate_latin1_excluded
|
9
|
-
|
10
|
-
_, value = internal_hash.find do |k, v|
|
11
|
-
k == "NGram.LATIN1_EXCLUDE"
|
12
|
-
end
|
13
|
-
|
14
|
-
(0..(value.length - 2)).step(2).map do |index|
|
15
|
-
value[index, 2]
|
16
|
-
end
|
9
|
+
JavaPropertyReader.new(MESSAGES_PROPERTIES)["NGram.LATIN1_EXCLUDE"]
|
17
10
|
end
|
18
11
|
|
19
12
|
LATIN1_EXCLUDED = self.calculate_latin1_excluded()
|
@@ -28,10 +21,9 @@ module Langusta
|
|
28
21
|
internal_hash.select do |key, _|
|
29
22
|
/KANJI_[0-9]{1}/ =~ key
|
30
23
|
end.each do |_, chars|
|
31
|
-
key = chars
|
32
|
-
|
33
|
-
|
34
|
-
m[chars[n, 2]] = key
|
24
|
+
key = chars.first
|
25
|
+
chars.each do |cp|
|
26
|
+
m[cp] = key
|
35
27
|
end
|
36
28
|
end
|
37
29
|
m
|
@@ -44,27 +36,27 @@ module Langusta
|
|
44
36
|
block = UnicodeBlock.of(ch)
|
45
37
|
case block
|
46
38
|
when UnicodeBlock::BASIC_LATIN
|
47
|
-
(ch <
|
39
|
+
(ch < 0x0041 || (ch < 0x0061 && ch > 0x005a) || ch > 0x007a) ? UCS2_SPACE : ch
|
48
40
|
when UnicodeBlock::LATIN_1_SUPPLEMENT
|
49
41
|
LATIN1_EXCLUDED.include?(ch) ? UCS2_SPACE : ch
|
50
42
|
when UnicodeBlock::GENERAL_PUNCTUATION
|
51
43
|
UCS2_SPACE
|
52
44
|
when UnicodeBlock::ARABIC
|
53
|
-
(ch ==
|
45
|
+
(ch == 0x06cc) ? 0x064a : ch
|
54
46
|
when UnicodeBlock::LATIN_EXTENDED_ADDITIONAL
|
55
|
-
(ch >=
|
47
|
+
(ch >= 0x1ea0) ? 0x1ec3 : ch
|
56
48
|
when UnicodeBlock::HIRAGANA
|
57
|
-
|
49
|
+
0x3042
|
58
50
|
when UnicodeBlock::KATAKANA
|
59
|
-
|
51
|
+
0x30a2
|
60
52
|
when UnicodeBlock::BOPOMOFO
|
61
|
-
|
53
|
+
0x3105
|
62
54
|
when UnicodeBlock::BOPOMOFO_EXTENDED
|
63
|
-
|
55
|
+
0x3105
|
64
56
|
when UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS
|
65
57
|
cjk_map.has_key?(ch) ? cjk_map[ch] : ch
|
66
58
|
when UnicodeBlock::HANGUL_SYLLABES
|
67
|
-
|
59
|
+
0xac00
|
68
60
|
else
|
69
61
|
ch
|
70
62
|
end
|
@@ -77,22 +69,25 @@ module Langusta
|
|
77
69
|
|
78
70
|
# Retrieves an n-sized NGram from the current sequence.
|
79
71
|
# @param n [Integer] length of NGram.
|
80
|
-
# @return [
|
72
|
+
# @return [Array<Integer>] n-sized NGram.
|
81
73
|
def get(n)
|
82
74
|
return nil if @capitalword
|
83
75
|
len = @grams.length
|
84
76
|
return nil if n < 1 || n > 3 || len < n
|
85
77
|
if n == 1
|
86
78
|
ch = @grams[len - 1]
|
87
|
-
return (ch == UCS2_SPACE) ? nil :
|
79
|
+
return (ch == UCS2_SPACE) ? nil : [ch]
|
88
80
|
else
|
89
|
-
return
|
81
|
+
return @grams[len - n, len]
|
90
82
|
end
|
91
83
|
end
|
92
84
|
|
93
85
|
# Adds a single character to an NGram sequence.
|
94
|
-
# @param character [
|
86
|
+
# @param character [Fixnum] Two-byte Unicode codepoint.
|
95
87
|
def add_char(character)
|
88
|
+
Guard.klass(character, Fixnum, __method__)
|
89
|
+
Guard.codepoint(character, __method__)
|
90
|
+
|
96
91
|
character = NGram.normalize(character)
|
97
92
|
lastchar = @grams[-1]
|
98
93
|
if lastchar == UCS2_SPACE
|
@@ -1,15 +1,20 @@
|
|
1
1
|
module Langusta
|
2
2
|
module RegexHelper
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
if RUBY_VERSION < "1.9"
|
4
|
+
include Oniguruma
|
5
|
+
|
6
|
+
ROMAN_REGEX = ORegexp.new("^[a-z]$", :options => OPTION_IGNORECASE)
|
7
|
+
INCL_ROMAN_REGEX = ORegexp.new(".*[a-z].*", :options => OPTION_IGNORECASE)
|
8
|
+
URL_REGEX = ORegexp.new("https?://[-_.?&~;+=/#0-9a-z]+", :options => OPTION_IGNORECASE)
|
9
|
+
MAIL_REGEX = ORegexp.new("[-_.0-9a-z]+@[-_0-9a-z]+[-_.0-9a-z]+", :options => OPTION_IGNORECASE)
|
10
|
+
SPACE_REGEX = ORegexp.new(" +")
|
11
|
+
else
|
12
|
+
# /ui stands for UTF-8 case-insensitive regexp.
|
13
|
+
ROMAN_REGEX = /^[a-z]$/ui
|
14
|
+
INCL_ROMAN_REGEX = /.*[a-z].*/ui
|
15
|
+
URL_REGEX = Regexp.new("https?://[-_.?&~;+=/#a-z0-9]+")
|
16
|
+
MAIL_REGEX = /[-_.a-z0-9]+@[-_a-z0-9]+[-_.a-z0-9]+/ui
|
17
|
+
SPACE_REGEX = / +/
|
7
18
|
end
|
8
|
-
|
9
|
-
ROMAN_REGEX = ORegexp.new(_u16("^[A-Za-z]$"), "", "UTF16_BE", "java")
|
10
|
-
INCL_ROMAN_REGEX = ORegexp.new(_u16(".*[A-Za-z].*"), "", "UTF16_BE", "java")
|
11
|
-
URL_REGEX = ORegexp.new(_u16("https?://[-_.?&~;+=/#0-9A-Za-z]+"), "", "UTF16_BE", "java")
|
12
|
-
MAIL_REGEX = ORegexp.new(_u16("[-_.0-9A-Za-z]+@[-_0-9A-Za-z]+[-_.0-9A-Za-z]+"), "", "UTF_16BE", "java")
|
13
|
-
SPACE_REGEX = ORegexp.new(_u16(" +"), "", "UTF16_BE", "java")
|
14
19
|
end
|
15
20
|
end
|
@@ -7,26 +7,26 @@ module Langusta
|
|
7
7
|
@target = tag
|
8
8
|
@threshold = threshold
|
9
9
|
@count = 0
|
10
|
-
@buffer =
|
10
|
+
@buffer = []
|
11
11
|
@tag = nil
|
12
12
|
end
|
13
13
|
|
14
14
|
def add(line)
|
15
15
|
if @target == @tag && line
|
16
|
-
@buffer
|
16
|
+
@buffer += line
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
20
|
def clear
|
21
21
|
@tag = nil
|
22
|
-
@buffer =
|
22
|
+
@buffer = []
|
23
23
|
end
|
24
24
|
|
25
25
|
def close_tag(profile)
|
26
26
|
if profile && @tag == @target && @buffer.length > @threshold
|
27
27
|
gram = NGram.new
|
28
|
-
@buffer.
|
29
|
-
gram.add_char(
|
28
|
+
@buffer.each do |codepoint|
|
29
|
+
gram.add_char(codepoint)
|
30
30
|
(1..NGram::N_GRAM).each do |n|
|
31
31
|
profile.add(gram.get(n))
|
32
32
|
end
|
@@ -2,44 +2,44 @@ module Langusta
|
|
2
2
|
module UnicodeBlock
|
3
3
|
# Half-baked implementation of Java's UnicodeBlock.
|
4
4
|
|
5
|
-
OTHER
|
6
|
-
BASIC_LATIN
|
7
|
-
LATIN_1_SUPPLEMENT
|
8
|
-
GENERAL_PUNCTUATION
|
9
|
-
ARABIC
|
10
|
-
LATIN_EXTENDED_ADDITIONAL
|
11
|
-
HIRAGANA
|
12
|
-
KATAKANA
|
13
|
-
BOPOMOFO
|
14
|
-
BOPOMOFO_EXTENDED
|
15
|
-
CJK_UNIFIED_IDEOGRAPHS
|
16
|
-
HANGUL_SYLLABES
|
5
|
+
OTHER = 0
|
6
|
+
BASIC_LATIN = 1
|
7
|
+
LATIN_1_SUPPLEMENT = 2
|
8
|
+
GENERAL_PUNCTUATION = 3
|
9
|
+
ARABIC = 4
|
10
|
+
LATIN_EXTENDED_ADDITIONAL = 5
|
11
|
+
HIRAGANA = 6
|
12
|
+
KATAKANA = 7
|
13
|
+
BOPOMOFO = 8
|
14
|
+
BOPOMOFO_EXTENDED = 9
|
15
|
+
CJK_UNIFIED_IDEOGRAPHS = 10
|
16
|
+
HANGUL_SYLLABES = 11
|
17
17
|
|
18
|
-
BASIC_LATIN_RANGE
|
19
|
-
LATIN_1_SUPPLEMENT_RANGE
|
20
|
-
GENERAL_PUNCTUATION_RANGE
|
21
|
-
ARABIC_RANGE
|
22
|
-
LATIN_EXTENDED_ADDITIONAL_RANGE =
|
23
|
-
HIRAGANA_RANGE
|
24
|
-
KATAKANA_RANGE
|
25
|
-
BOPOMOFO_RANGE
|
26
|
-
BOPOMOFO_EXTENDED_RANGE
|
27
|
-
CJK_UNIFIED_IDEOGRAPHS_RANGE
|
28
|
-
HANGUL_SYLLABES_RANGE
|
18
|
+
BASIC_LATIN_RANGE = 0x0000..0x007f
|
19
|
+
LATIN_1_SUPPLEMENT_RANGE = 0x0080..0x00ff
|
20
|
+
GENERAL_PUNCTUATION_RANGE = 0x2000..0x206f
|
21
|
+
ARABIC_RANGE = 0x0600..0x06ff
|
22
|
+
LATIN_EXTENDED_ADDITIONAL_RANGE = 0x1e00..0x1eff
|
23
|
+
HIRAGANA_RANGE = 0x3040..0x309f
|
24
|
+
KATAKANA_RANGE = 0x30a0..0x30ff
|
25
|
+
BOPOMOFO_RANGE = 0x3100..0x31bf
|
26
|
+
BOPOMOFO_EXTENDED_RANGE = 0x31a0..0x31bf
|
27
|
+
CJK_UNIFIED_IDEOGRAPHS_RANGE = 0x4e00..0x9fff
|
28
|
+
HANGUL_SYLLABES_RANGE = 0xac00..0xd7af
|
29
29
|
|
30
30
|
def self.of(character)
|
31
31
|
case character
|
32
|
-
when BASIC_LATIN_RANGE
|
33
|
-
when LATIN_1_SUPPLEMENT_RANGE
|
34
|
-
when GENERAL_PUNCTUATION_RANGE
|
35
|
-
when ARABIC_RANGE
|
32
|
+
when BASIC_LATIN_RANGE then return BASIC_LATIN
|
33
|
+
when LATIN_1_SUPPLEMENT_RANGE then return LATIN_1_SUPPLEMENT
|
34
|
+
when GENERAL_PUNCTUATION_RANGE then return GENERAL_PUNCTUATION
|
35
|
+
when ARABIC_RANGE then return ARABIC
|
36
36
|
when LATIN_EXTENDED_ADDITIONAL_RANGE then return LATIN_EXTENDED_ADDITIONAL
|
37
|
-
when HIRAGANA_RANGE
|
38
|
-
when KATAKANA_RANGE
|
39
|
-
when BOPOMOFO_RANGE
|
40
|
-
when BOPOMOFO_EXTENDED_RANGE
|
41
|
-
when CJK_UNIFIED_IDEOGRAPHS_RANGE
|
42
|
-
when HANGUL_SYLLABES_RANGE
|
37
|
+
when HIRAGANA_RANGE then return HIRAGANA
|
38
|
+
when KATAKANA_RANGE then return KATAKANA
|
39
|
+
when BOPOMOFO_RANGE then return BOPOMOFO
|
40
|
+
when BOPOMOFO_EXTENDED_RANGE then return BOPOMOFO_EXTENDED
|
41
|
+
when CJK_UNIFIED_IDEOGRAPHS_RANGE then return CJK_UNIFIED_IDEOGRAPHS
|
42
|
+
when HANGUL_SYLLABES_RANGE then return HANGUL_SYLLABES
|
43
43
|
else
|
44
44
|
return OTHER
|
45
45
|
end
|
@@ -50,7 +50,7 @@ module Langusta
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def self.compute_upper_case_table
|
53
|
-
File.open(UPPERCASE_BIN).read
|
53
|
+
File.open(UPPERCASE_BIN).read.unpack('n*')
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
data/test/helper.rb
CHANGED
@@ -1,20 +1,29 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bundler'
|
3
3
|
begin
|
4
|
-
Bundler.setup(:default, :
|
4
|
+
Bundler.setup(:default, :test)
|
5
5
|
rescue Bundler::BundlerError => e
|
6
6
|
$stderr.puts e.message
|
7
7
|
$stderr.puts "Run `bundle install` to install missing gems"
|
8
8
|
exit e.status_code
|
9
9
|
end
|
10
10
|
require 'test/unit'
|
11
|
+
require 'mocha'
|
11
12
|
|
12
13
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
14
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
15
|
require 'langusta'
|
15
|
-
require 'ruby-debug'
|
16
|
-
require 'mocha'
|
17
16
|
|
18
17
|
class Test::Unit::TestCase
|
19
18
|
include Langusta
|
19
|
+
|
20
|
+
def str2cp(ascii_string)
|
21
|
+
Langusta.utf82cp(ascii_string)
|
22
|
+
end
|
23
|
+
|
24
|
+
def utf82cp(utf8_string)
|
25
|
+
Langusta.utf82cp(utf8_string)
|
26
|
+
end
|
20
27
|
end
|
28
|
+
|
29
|
+
$debug = true
|