langusta 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +7 -0
- data/Gemfile +10 -7
- data/Gemfile.lock +12 -16
- data/{README.rdoc → README.md} +27 -10
- data/Rakefile +3 -10
- data/VERSION +1 -1
- data/langusta.gemspec +23 -47
- data/lib/langusta.rb +36 -10
- data/lib/langusta/codepoints.rb +19 -0
- data/lib/langusta/command.rb +3 -3
- data/lib/langusta/detector.rb +16 -13
- data/lib/langusta/detector_factory.rb +11 -5
- data/lib/langusta/guard.rb +22 -0
- data/lib/langusta/inspector.rb +7 -0
- data/lib/langusta/java_property_reader.rb +2 -3
- data/lib/langusta/lang_profile.rb +12 -18
- data/lib/langusta/language_detection_facade.rb +2 -2
- data/lib/langusta/n_gram.rb +20 -25
- data/lib/langusta/regex_helper.rb +15 -10
- data/lib/langusta/tag_extractor.rb +5 -5
- data/lib/langusta/unicode_block.rb +34 -34
- data/test/helper.rb +12 -3
- data/test/quality/test_falsified.rb +3 -3
- data/test/test_command.rb +1 -0
- data/test/test_detector.rb +18 -17
- data/test/test_detector_factory.rb +17 -5
- data/test/test_java_property_reader.rb +2 -1
- data/test/test_lang_profile.rb +37 -31
- data/test/test_language.rb +1 -0
- data/test/test_language_detection_facade.rb +1 -1
- data/test/test_langusta.rb +6 -6
- data/test/test_n_gram.rb +87 -75
- data/test/test_tag_extractor.rb +19 -18
- data/test/test_unicode_block.rb +2 -1
- metadata +54 -156
- data/lib/langusta/ucs2_string.rb +0 -70
- data/test/test_ucs2_string.rb +0 -9
@@ -1,5 +1,7 @@
|
|
1
1
|
module Langusta
|
2
2
|
class DetectorFactory
|
3
|
+
include Inspector
|
4
|
+
|
3
5
|
attr_reader :word_lang_prob_map, :lang_list
|
4
6
|
|
5
7
|
def initialize
|
@@ -11,15 +13,15 @@ module Langusta
|
|
11
13
|
# @param [LangProfile] language profile to be added.
|
12
14
|
# @param [Fixnum] index at which the language profile is to be added.
|
13
15
|
# @param [Fixnum] counts how many language profiles are to be added to this factory in total.
|
14
|
-
def add_profile(profile
|
16
|
+
def add_profile(profile)
|
15
17
|
raise DuplicateProfilesError.new(profile.name) if @lang_list.include?(profile.name)
|
16
18
|
@lang_list << profile.name
|
19
|
+
last_lang_index = @lang_list.size - 1
|
20
|
+
|
17
21
|
profile.freq.keys.each do |word|
|
18
|
-
|
19
|
-
@word_lang_prob_map[word] = Array.new(langsize, 0.0)
|
20
|
-
end
|
22
|
+
@word_lang_prob_map[word] ||= []
|
21
23
|
prob = 1.0 * profile.freq[word] / profile.n_words[word.length - 1]
|
22
|
-
@word_lang_prob_map[word][
|
24
|
+
@word_lang_prob_map[word][last_lang_index] = prob
|
23
25
|
end
|
24
26
|
end
|
25
27
|
|
@@ -35,6 +37,10 @@ module Langusta
|
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
40
|
+
def inspect
|
41
|
+
"#<#{self.class.name}:0x#{object_ptr} (#{@lang_list.size} profile(s))"
|
42
|
+
end
|
43
|
+
|
38
44
|
private
|
39
45
|
def create_detector
|
40
46
|
raise NoProfilesLoadedError if @lang_list.empty?
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Langusta
|
2
|
+
module Guard
|
3
|
+
|
4
|
+
def self.klass(argument, klass, _method)
|
5
|
+
return unless $debug
|
6
|
+
raise TypeError.new("#{_method}: expected #{klass} got: #{argument.class}") unless argument.is_a?(klass)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.codepoint(codepoint, _method)
|
10
|
+
return unless $debug
|
11
|
+
raise ArgumentError.new([_method, ':', codepoint.to_s(16)].join) unless (0x0000..0xffff).include?(codepoint)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.codepoint_array(array, _method)
|
15
|
+
return unless $debug
|
16
|
+
raise TypeError.new("#{_method}: expected Array, got: #{array.class}") unless array.is_a?(Array)
|
17
|
+
cp = array.find do |cp|
|
18
|
+
! (0x0000..0xffff).include?(cp)
|
19
|
+
end && (raise ArgumentError.new("#{_method}: bad codepoint: #{cp}"))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -3,7 +3,7 @@ module Langusta
|
|
3
3
|
# This is a minimal implementation, don't expect this to actually work.
|
4
4
|
|
5
5
|
def initialize(filename)
|
6
|
-
@lines = File.open(filename).
|
6
|
+
@lines = File.open(filename).readlines
|
7
7
|
parse()
|
8
8
|
end
|
9
9
|
|
@@ -28,8 +28,7 @@ module Langusta
|
|
28
28
|
codepoints = value.scan(/([0-9A-F]{4})/)
|
29
29
|
codepoints.map do |cp|
|
30
30
|
int_cp = cp.first.to_i(16)
|
31
|
-
|
32
|
-
end.join
|
31
|
+
end
|
33
32
|
end
|
34
33
|
end
|
35
34
|
end
|
@@ -11,34 +11,29 @@ module Langusta
|
|
11
11
|
# @return [LangProfile]
|
12
12
|
def self.load_from_file(filename)
|
13
13
|
json = Yajl::Parser.parse(File.new(filename))
|
14
|
-
profile = self.new
|
15
14
|
|
16
|
-
name = json['name']
|
17
|
-
n_words = json['n_words']
|
18
15
|
freq = json['freq'].inject({}) do |acc, kv|
|
19
16
|
key, value = kv
|
20
|
-
acc[
|
17
|
+
acc[Langusta.utf82cp(key)] = value
|
21
18
|
acc
|
22
19
|
end
|
23
|
-
profile.populate_json(name, freq, n_words)
|
24
|
-
profile
|
25
|
-
end
|
26
20
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@n_words = Array.new(NGram::N_GRAM, 0)
|
21
|
+
self.new(json['name'] || (raise CorruptProfileError.new("Missing profile name")),
|
22
|
+
freq,
|
23
|
+
json['n_words'] || (raise CorruptProfileError.new("Missing number of words value")))
|
31
24
|
end
|
32
25
|
|
33
|
-
def
|
26
|
+
def initialize(name, freq={}, n_words = Array.new(NGram::N_GRAM, 0))
|
27
|
+
Guard.klass(name, String, __method__)
|
34
28
|
@name, @freq, @n_words = name, freq, n_words
|
35
29
|
end
|
36
30
|
|
37
31
|
# Adds a given NGram to this language profile. This operation is expected to be invoked multiple times for the same arguments.
|
38
|
-
# @param gram [
|
32
|
+
# @param gram [Array<Fixnum>]
|
39
33
|
def add(gram)
|
40
|
-
|
41
|
-
|
34
|
+
return if gram.nil?
|
35
|
+
Guard.klass(gram, Array, __method__)
|
36
|
+
|
42
37
|
length = gram.size
|
43
38
|
return if length < 1 or length > NGram::N_GRAM
|
44
39
|
@n_words[length - 1] += 1
|
@@ -47,7 +42,6 @@ module Langusta
|
|
47
42
|
end
|
48
43
|
|
49
44
|
def omit_less_freq
|
50
|
-
return if @name.nil?
|
51
45
|
threshold = @n_words[0] / LESS_FREQ_RATIO
|
52
46
|
threshold = MINIMUM_FREQ if threshold < MINIMUM_FREQ
|
53
47
|
keys = Set.new(@freq.keys)
|
@@ -59,7 +53,7 @@ module Langusta
|
|
59
53
|
@freq.delete(key)
|
60
54
|
else
|
61
55
|
# temp workaround
|
62
|
-
if RegexHelper::ROMAN_REGEX.match(key
|
56
|
+
if RegexHelper::ROMAN_REGEX.match(Langusta.cp2utf8(key))
|
63
57
|
roman += count
|
64
58
|
end
|
65
59
|
end
|
@@ -69,7 +63,7 @@ module Langusta
|
|
69
63
|
keys2 = Set.new(@freq.keys)
|
70
64
|
keys2.each do |key|
|
71
65
|
# temp workaround
|
72
|
-
if RegexHelper::INCL_ROMAN_REGEX.match(key
|
66
|
+
if RegexHelper::INCL_ROMAN_REGEX.match(Langusta.cp2utf8(key))
|
73
67
|
@n_words[key.size - 1] -= @freq[key]
|
74
68
|
@freq.delete(key)
|
75
69
|
end
|
@@ -3,8 +3,8 @@ module Langusta
|
|
3
3
|
def initialize
|
4
4
|
@factory = DetectorFactory.new
|
5
5
|
profiles = load_profiles()
|
6
|
-
profiles.
|
7
|
-
@factory.add_profile(profile
|
6
|
+
profiles.each do |profile|
|
7
|
+
@factory.add_profile(profile)
|
8
8
|
end
|
9
9
|
end
|
10
10
|
|
data/lib/langusta/n_gram.rb
CHANGED
@@ -3,17 +3,10 @@ module Langusta
|
|
3
3
|
# constructed on a character by character basis.
|
4
4
|
class NGram
|
5
5
|
N_GRAM = 3
|
6
|
-
UCS2_SPACE =
|
6
|
+
UCS2_SPACE = 0x0020
|
7
7
|
|
8
8
|
def self.calculate_latin1_excluded
|
9
|
-
|
10
|
-
_, value = internal_hash.find do |k, v|
|
11
|
-
k == "NGram.LATIN1_EXCLUDE"
|
12
|
-
end
|
13
|
-
|
14
|
-
(0..(value.length - 2)).step(2).map do |index|
|
15
|
-
value[index, 2]
|
16
|
-
end
|
9
|
+
JavaPropertyReader.new(MESSAGES_PROPERTIES)["NGram.LATIN1_EXCLUDE"]
|
17
10
|
end
|
18
11
|
|
19
12
|
LATIN1_EXCLUDED = self.calculate_latin1_excluded()
|
@@ -28,10 +21,9 @@ module Langusta
|
|
28
21
|
internal_hash.select do |key, _|
|
29
22
|
/KANJI_[0-9]{1}/ =~ key
|
30
23
|
end.each do |_, chars|
|
31
|
-
key = chars
|
32
|
-
|
33
|
-
|
34
|
-
m[chars[n, 2]] = key
|
24
|
+
key = chars.first
|
25
|
+
chars.each do |cp|
|
26
|
+
m[cp] = key
|
35
27
|
end
|
36
28
|
end
|
37
29
|
m
|
@@ -44,27 +36,27 @@ module Langusta
|
|
44
36
|
block = UnicodeBlock.of(ch)
|
45
37
|
case block
|
46
38
|
when UnicodeBlock::BASIC_LATIN
|
47
|
-
(ch <
|
39
|
+
(ch < 0x0041 || (ch < 0x0061 && ch > 0x005a) || ch > 0x007a) ? UCS2_SPACE : ch
|
48
40
|
when UnicodeBlock::LATIN_1_SUPPLEMENT
|
49
41
|
LATIN1_EXCLUDED.include?(ch) ? UCS2_SPACE : ch
|
50
42
|
when UnicodeBlock::GENERAL_PUNCTUATION
|
51
43
|
UCS2_SPACE
|
52
44
|
when UnicodeBlock::ARABIC
|
53
|
-
(ch ==
|
45
|
+
(ch == 0x06cc) ? 0x064a : ch
|
54
46
|
when UnicodeBlock::LATIN_EXTENDED_ADDITIONAL
|
55
|
-
(ch >=
|
47
|
+
(ch >= 0x1ea0) ? 0x1ec3 : ch
|
56
48
|
when UnicodeBlock::HIRAGANA
|
57
|
-
|
49
|
+
0x3042
|
58
50
|
when UnicodeBlock::KATAKANA
|
59
|
-
|
51
|
+
0x30a2
|
60
52
|
when UnicodeBlock::BOPOMOFO
|
61
|
-
|
53
|
+
0x3105
|
62
54
|
when UnicodeBlock::BOPOMOFO_EXTENDED
|
63
|
-
|
55
|
+
0x3105
|
64
56
|
when UnicodeBlock::CJK_UNIFIED_IDEOGRAPHS
|
65
57
|
cjk_map.has_key?(ch) ? cjk_map[ch] : ch
|
66
58
|
when UnicodeBlock::HANGUL_SYLLABES
|
67
|
-
|
59
|
+
0xac00
|
68
60
|
else
|
69
61
|
ch
|
70
62
|
end
|
@@ -77,22 +69,25 @@ module Langusta
|
|
77
69
|
|
78
70
|
# Retrieves an n-sized NGram from the current sequence.
|
79
71
|
# @param n [Integer] length of NGram.
|
80
|
-
# @return [
|
72
|
+
# @return [Array<Integer>] n-sized NGram.
|
81
73
|
def get(n)
|
82
74
|
return nil if @capitalword
|
83
75
|
len = @grams.length
|
84
76
|
return nil if n < 1 || n > 3 || len < n
|
85
77
|
if n == 1
|
86
78
|
ch = @grams[len - 1]
|
87
|
-
return (ch == UCS2_SPACE) ? nil :
|
79
|
+
return (ch == UCS2_SPACE) ? nil : [ch]
|
88
80
|
else
|
89
|
-
return
|
81
|
+
return @grams[len - n, len]
|
90
82
|
end
|
91
83
|
end
|
92
84
|
|
93
85
|
# Adds a single character to an NGram sequence.
|
94
|
-
# @param character [
|
86
|
+
# @param character [Fixnum] Two-byte Unicode codepoint.
|
95
87
|
def add_char(character)
|
88
|
+
Guard.klass(character, Fixnum, __method__)
|
89
|
+
Guard.codepoint(character, __method__)
|
90
|
+
|
96
91
|
character = NGram.normalize(character)
|
97
92
|
lastchar = @grams[-1]
|
98
93
|
if lastchar == UCS2_SPACE
|
@@ -1,15 +1,20 @@
|
|
1
1
|
module Langusta
|
2
2
|
module RegexHelper
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
if RUBY_VERSION < "1.9"
|
4
|
+
include Oniguruma
|
5
|
+
|
6
|
+
ROMAN_REGEX = ORegexp.new("^[a-z]$", :options => OPTION_IGNORECASE)
|
7
|
+
INCL_ROMAN_REGEX = ORegexp.new(".*[a-z].*", :options => OPTION_IGNORECASE)
|
8
|
+
URL_REGEX = ORegexp.new("https?://[-_.?&~;+=/#0-9a-z]+", :options => OPTION_IGNORECASE)
|
9
|
+
MAIL_REGEX = ORegexp.new("[-_.0-9a-z]+@[-_0-9a-z]+[-_.0-9a-z]+", :options => OPTION_IGNORECASE)
|
10
|
+
SPACE_REGEX = ORegexp.new(" +")
|
11
|
+
else
|
12
|
+
# /ui stands for UTF-8 case-insensitive regexp.
|
13
|
+
ROMAN_REGEX = /^[a-z]$/ui
|
14
|
+
INCL_ROMAN_REGEX = /.*[a-z].*/ui
|
15
|
+
URL_REGEX = Regexp.new("https?://[-_.?&~;+=/#a-z0-9]+")
|
16
|
+
MAIL_REGEX = /[-_.a-z0-9]+@[-_a-z0-9]+[-_.a-z0-9]+/ui
|
17
|
+
SPACE_REGEX = / +/
|
7
18
|
end
|
8
|
-
|
9
|
-
ROMAN_REGEX = ORegexp.new(_u16("^[A-Za-z]$"), "", "UTF16_BE", "java")
|
10
|
-
INCL_ROMAN_REGEX = ORegexp.new(_u16(".*[A-Za-z].*"), "", "UTF16_BE", "java")
|
11
|
-
URL_REGEX = ORegexp.new(_u16("https?://[-_.?&~;+=/#0-9A-Za-z]+"), "", "UTF16_BE", "java")
|
12
|
-
MAIL_REGEX = ORegexp.new(_u16("[-_.0-9A-Za-z]+@[-_0-9A-Za-z]+[-_.0-9A-Za-z]+"), "", "UTF_16BE", "java")
|
13
|
-
SPACE_REGEX = ORegexp.new(_u16(" +"), "", "UTF16_BE", "java")
|
14
19
|
end
|
15
20
|
end
|
@@ -7,26 +7,26 @@ module Langusta
|
|
7
7
|
@target = tag
|
8
8
|
@threshold = threshold
|
9
9
|
@count = 0
|
10
|
-
@buffer =
|
10
|
+
@buffer = []
|
11
11
|
@tag = nil
|
12
12
|
end
|
13
13
|
|
14
14
|
def add(line)
|
15
15
|
if @target == @tag && line
|
16
|
-
@buffer
|
16
|
+
@buffer += line
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
20
|
def clear
|
21
21
|
@tag = nil
|
22
|
-
@buffer =
|
22
|
+
@buffer = []
|
23
23
|
end
|
24
24
|
|
25
25
|
def close_tag(profile)
|
26
26
|
if profile && @tag == @target && @buffer.length > @threshold
|
27
27
|
gram = NGram.new
|
28
|
-
@buffer.
|
29
|
-
gram.add_char(
|
28
|
+
@buffer.each do |codepoint|
|
29
|
+
gram.add_char(codepoint)
|
30
30
|
(1..NGram::N_GRAM).each do |n|
|
31
31
|
profile.add(gram.get(n))
|
32
32
|
end
|
@@ -2,44 +2,44 @@ module Langusta
|
|
2
2
|
module UnicodeBlock
|
3
3
|
# Half-baked implementation of Java's UnicodeBlock.
|
4
4
|
|
5
|
-
OTHER
|
6
|
-
BASIC_LATIN
|
7
|
-
LATIN_1_SUPPLEMENT
|
8
|
-
GENERAL_PUNCTUATION
|
9
|
-
ARABIC
|
10
|
-
LATIN_EXTENDED_ADDITIONAL
|
11
|
-
HIRAGANA
|
12
|
-
KATAKANA
|
13
|
-
BOPOMOFO
|
14
|
-
BOPOMOFO_EXTENDED
|
15
|
-
CJK_UNIFIED_IDEOGRAPHS
|
16
|
-
HANGUL_SYLLABES
|
5
|
+
OTHER = 0
|
6
|
+
BASIC_LATIN = 1
|
7
|
+
LATIN_1_SUPPLEMENT = 2
|
8
|
+
GENERAL_PUNCTUATION = 3
|
9
|
+
ARABIC = 4
|
10
|
+
LATIN_EXTENDED_ADDITIONAL = 5
|
11
|
+
HIRAGANA = 6
|
12
|
+
KATAKANA = 7
|
13
|
+
BOPOMOFO = 8
|
14
|
+
BOPOMOFO_EXTENDED = 9
|
15
|
+
CJK_UNIFIED_IDEOGRAPHS = 10
|
16
|
+
HANGUL_SYLLABES = 11
|
17
17
|
|
18
|
-
BASIC_LATIN_RANGE
|
19
|
-
LATIN_1_SUPPLEMENT_RANGE
|
20
|
-
GENERAL_PUNCTUATION_RANGE
|
21
|
-
ARABIC_RANGE
|
22
|
-
LATIN_EXTENDED_ADDITIONAL_RANGE =
|
23
|
-
HIRAGANA_RANGE
|
24
|
-
KATAKANA_RANGE
|
25
|
-
BOPOMOFO_RANGE
|
26
|
-
BOPOMOFO_EXTENDED_RANGE
|
27
|
-
CJK_UNIFIED_IDEOGRAPHS_RANGE
|
28
|
-
HANGUL_SYLLABES_RANGE
|
18
|
+
BASIC_LATIN_RANGE = 0x0000..0x007f
|
19
|
+
LATIN_1_SUPPLEMENT_RANGE = 0x0080..0x00ff
|
20
|
+
GENERAL_PUNCTUATION_RANGE = 0x2000..0x206f
|
21
|
+
ARABIC_RANGE = 0x0600..0x06ff
|
22
|
+
LATIN_EXTENDED_ADDITIONAL_RANGE = 0x1e00..0x1eff
|
23
|
+
HIRAGANA_RANGE = 0x3040..0x309f
|
24
|
+
KATAKANA_RANGE = 0x30a0..0x30ff
|
25
|
+
BOPOMOFO_RANGE = 0x3100..0x31bf
|
26
|
+
BOPOMOFO_EXTENDED_RANGE = 0x31a0..0x31bf
|
27
|
+
CJK_UNIFIED_IDEOGRAPHS_RANGE = 0x4e00..0x9fff
|
28
|
+
HANGUL_SYLLABES_RANGE = 0xac00..0xd7af
|
29
29
|
|
30
30
|
def self.of(character)
|
31
31
|
case character
|
32
|
-
when BASIC_LATIN_RANGE
|
33
|
-
when LATIN_1_SUPPLEMENT_RANGE
|
34
|
-
when GENERAL_PUNCTUATION_RANGE
|
35
|
-
when ARABIC_RANGE
|
32
|
+
when BASIC_LATIN_RANGE then return BASIC_LATIN
|
33
|
+
when LATIN_1_SUPPLEMENT_RANGE then return LATIN_1_SUPPLEMENT
|
34
|
+
when GENERAL_PUNCTUATION_RANGE then return GENERAL_PUNCTUATION
|
35
|
+
when ARABIC_RANGE then return ARABIC
|
36
36
|
when LATIN_EXTENDED_ADDITIONAL_RANGE then return LATIN_EXTENDED_ADDITIONAL
|
37
|
-
when HIRAGANA_RANGE
|
38
|
-
when KATAKANA_RANGE
|
39
|
-
when BOPOMOFO_RANGE
|
40
|
-
when BOPOMOFO_EXTENDED_RANGE
|
41
|
-
when CJK_UNIFIED_IDEOGRAPHS_RANGE
|
42
|
-
when HANGUL_SYLLABES_RANGE
|
37
|
+
when HIRAGANA_RANGE then return HIRAGANA
|
38
|
+
when KATAKANA_RANGE then return KATAKANA
|
39
|
+
when BOPOMOFO_RANGE then return BOPOMOFO
|
40
|
+
when BOPOMOFO_EXTENDED_RANGE then return BOPOMOFO_EXTENDED
|
41
|
+
when CJK_UNIFIED_IDEOGRAPHS_RANGE then return CJK_UNIFIED_IDEOGRAPHS
|
42
|
+
when HANGUL_SYLLABES_RANGE then return HANGUL_SYLLABES
|
43
43
|
else
|
44
44
|
return OTHER
|
45
45
|
end
|
@@ -50,7 +50,7 @@ module Langusta
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def self.compute_upper_case_table
|
53
|
-
File.open(UPPERCASE_BIN).read
|
53
|
+
File.open(UPPERCASE_BIN).read.unpack('n*')
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
data/test/helper.rb
CHANGED
@@ -1,20 +1,29 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bundler'
|
3
3
|
begin
|
4
|
-
Bundler.setup(:default, :
|
4
|
+
Bundler.setup(:default, :test)
|
5
5
|
rescue Bundler::BundlerError => e
|
6
6
|
$stderr.puts e.message
|
7
7
|
$stderr.puts "Run `bundle install` to install missing gems"
|
8
8
|
exit e.status_code
|
9
9
|
end
|
10
10
|
require 'test/unit'
|
11
|
+
require 'mocha'
|
11
12
|
|
12
13
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
14
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
15
|
require 'langusta'
|
15
|
-
require 'ruby-debug'
|
16
|
-
require 'mocha'
|
17
16
|
|
18
17
|
class Test::Unit::TestCase
|
19
18
|
include Langusta
|
19
|
+
|
20
|
+
def str2cp(ascii_string)
|
21
|
+
Langusta.utf82cp(ascii_string)
|
22
|
+
end
|
23
|
+
|
24
|
+
def utf82cp(utf8_string)
|
25
|
+
Langusta.utf82cp(utf8_string)
|
26
|
+
end
|
20
27
|
end
|
28
|
+
|
29
|
+
$debug = true
|