unichars 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/glib/glib.c +4 -0
- data/lib/chars.rb +4 -0
- data/lib/unichars.rb +36 -0
- metadata +3 -3
data/ext/glib/glib.c
CHANGED
@@ -35,6 +35,7 @@ static VALUE utf8_upcase(VALUE self, VALUE string)
|
|
35
35
|
Check_Type(string, T_STRING);
|
36
36
|
temp = g_utf8_strup(StringValuePtr(string), RSTRING(string)->len);
|
37
37
|
result = rb_str_new2(temp);
|
38
|
+
free(temp);
|
38
39
|
|
39
40
|
return result;
|
40
41
|
}
|
@@ -55,6 +56,7 @@ static VALUE utf8_downcase(VALUE self, VALUE string)
|
|
55
56
|
Check_Type(string, T_STRING);
|
56
57
|
temp = g_utf8_strdown(StringValuePtr(string), RSTRING(string)->len);
|
57
58
|
result = rb_str_new2(temp);
|
59
|
+
free(temp);
|
58
60
|
|
59
61
|
return result;
|
60
62
|
}
|
@@ -75,6 +77,7 @@ static VALUE utf8_reverse(VALUE self, VALUE string)
|
|
75
77
|
Check_Type(string, T_STRING);
|
76
78
|
temp = g_utf8_strreverse(StringValuePtr(string), RSTRING(string)->len);
|
77
79
|
result = rb_str_new2(temp);
|
80
|
+
free(temp);
|
78
81
|
|
79
82
|
return result;
|
80
83
|
}
|
@@ -116,6 +119,7 @@ static VALUE utf8_normalize(VALUE self, VALUE string, VALUE form)
|
|
116
119
|
|
117
120
|
temp = g_utf8_normalize(StringValuePtr(string), RSTRING(string)->len, mode);
|
118
121
|
result = rb_str_new2(temp);
|
122
|
+
free(temp);
|
119
123
|
|
120
124
|
return result;
|
121
125
|
}
|
data/lib/chars.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
# Unichars is a proxy class for String. It's used by Unichars as a trimmed down version of ActiveSupport::Multibyte::Chars
|
4
|
+
# when ActiveSupport isn't loaded.
|
5
|
+
#
|
6
|
+
# Chars.new('João') #=> #<Chars:0x34c240 @wrapped_string="João">
|
3
7
|
class Chars
|
4
8
|
attr_reader :wrapped_string
|
5
9
|
alias to_s wrapped_string
|
data/lib/unichars.rb
CHANGED
@@ -9,29 +9,65 @@ rescue NameError
|
|
9
9
|
class Unichars < Chars; end
|
10
10
|
end
|
11
11
|
|
12
|
+
# Unichars is a proxy class for String. It wraps a String and implements UTF-8 safe versions of various String operations.
|
13
|
+
# Unimplemented methods are forwarded to the wrapped string.
|
14
|
+
#
|
15
|
+
# Unichars uses ActiveSupport::Multibyte::Chars as its superclass if it's loaded. Otherwise it will use its own Chars class
|
16
|
+
# which is basically a trimmed down version of ActiveSupport's Chars class.
|
17
|
+
#
|
18
|
+
# require 'unichars'
|
19
|
+
# Unichars.superclass #=> Chars
|
20
|
+
#
|
21
|
+
# require 'active_support'
|
22
|
+
# require 'unichars'
|
23
|
+
# Unichars.superclass #=> ActiveSupport::Multibyte::Chars
|
24
|
+
#
|
25
|
+
# Note that all the operations on strings are implemented using Glib2, so the outcome of the methods will be influenced by the
|
26
|
+
# Glib2 version installed on your system.
|
12
27
|
class Unichars
|
28
|
+
# Valid normalization forms
|
13
29
|
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
|
14
30
|
|
15
31
|
class << self
|
16
32
|
attr_accessor :default_normalization_form
|
17
33
|
end
|
18
34
|
|
35
|
+
# Returns the length of the string expressed in codepoints.
|
36
|
+
#
|
37
|
+
# Unichars.new('A ehm…, word.').size #=> 13
|
19
38
|
def size
|
20
39
|
Glib.utf8_size(@wrapped_string)
|
21
40
|
end
|
22
41
|
|
42
|
+
# Returns a Unichars instance with the string in capitals if they are are available for the supplied string.
|
43
|
+
#
|
44
|
+
# Unichars.new('Sluß').upcase.to_s #=> SLUSS
|
23
45
|
def upcase
|
24
46
|
self.class.new(Glib.utf8_upcase(@wrapped_string))
|
25
47
|
end
|
26
48
|
|
49
|
+
# Returns a Unichars instance with the string in lowercase characters if they are are available for the supplied string.
|
50
|
+
#
|
51
|
+
# Unichars.new('ORGANISÉE').downcase.to_s #=> organisée
|
27
52
|
def downcase
|
28
53
|
self.class.new(Glib.utf8_downcase(@wrapped_string))
|
29
54
|
end
|
30
55
|
|
56
|
+
# Returns a Unichars instance with the string in reverse order.
|
57
|
+
#
|
58
|
+
# Unichars.new('Comment ça va?').reverse.to_s #=> av aç tnemmoC
|
31
59
|
def reverse
|
32
60
|
self.class.new(Glib.utf8_reverse(@wrapped_string))
|
33
61
|
end
|
34
62
|
|
63
|
+
# Returns a Unichars instance with the string in normalize form. See http://www.unicode.org/reports/tr15/tr15-29.html
|
64
|
+
# for more information about normalization.
|
65
|
+
#
|
66
|
+
# <i>form</i> can be one of the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
|
67
|
+
#
|
68
|
+
# decomposed = [101, 769].pack('U*')
|
69
|
+
# composed = Unichars.new(decomposed).normalize(:kc)
|
70
|
+
# composed.to_s.unpack('U*') #=> [233]
|
35
71
|
def normalize(form=Unichars.default_normalization_form)
|
36
72
|
self.class.new(Glib.utf8_normalize(@wrapped_string, form))
|
37
73
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unichars
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.4"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manfred Stienstra
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-01-06 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements: []
|
54
54
|
|
55
55
|
rubyforge_project:
|
56
|
-
rubygems_version: 1.
|
56
|
+
rubygems_version: 1.3.1
|
57
57
|
signing_key:
|
58
58
|
specification_version: 2
|
59
59
|
summary: Unichars is a wrapper around Glib2 UTF8 functions. It was written to speed up ActiveSupport::Multibyte, but I'm sure people can find other uses for it.
|