unichars 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/glib/glib.c +4 -0
- data/lib/chars.rb +4 -0
- data/lib/unichars.rb +36 -0
- metadata +3 -3
data/ext/glib/glib.c
CHANGED
@@ -35,6 +35,7 @@ static VALUE utf8_upcase(VALUE self, VALUE string)
|
|
35
35
|
Check_Type(string, T_STRING);
|
36
36
|
temp = g_utf8_strup(StringValuePtr(string), RSTRING(string)->len);
|
37
37
|
result = rb_str_new2(temp);
|
38
|
+
free(temp);
|
38
39
|
|
39
40
|
return result;
|
40
41
|
}
|
@@ -55,6 +56,7 @@ static VALUE utf8_downcase(VALUE self, VALUE string)
|
|
55
56
|
Check_Type(string, T_STRING);
|
56
57
|
temp = g_utf8_strdown(StringValuePtr(string), RSTRING(string)->len);
|
57
58
|
result = rb_str_new2(temp);
|
59
|
+
free(temp);
|
58
60
|
|
59
61
|
return result;
|
60
62
|
}
|
@@ -75,6 +77,7 @@ static VALUE utf8_reverse(VALUE self, VALUE string)
|
|
75
77
|
Check_Type(string, T_STRING);
|
76
78
|
temp = g_utf8_strreverse(StringValuePtr(string), RSTRING(string)->len);
|
77
79
|
result = rb_str_new2(temp);
|
80
|
+
free(temp);
|
78
81
|
|
79
82
|
return result;
|
80
83
|
}
|
@@ -116,6 +119,7 @@ static VALUE utf8_normalize(VALUE self, VALUE string, VALUE form)
|
|
116
119
|
|
117
120
|
temp = g_utf8_normalize(StringValuePtr(string), RSTRING(string)->len, mode);
|
118
121
|
result = rb_str_new2(temp);
|
122
|
+
free(temp);
|
119
123
|
|
120
124
|
return result;
|
121
125
|
}
|
data/lib/chars.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
# Unichars is a proxy class for String. It's used by Unichars as a trimmed down version of ActiveSupport::Multibyte::Chars
|
4
|
+
# when ActiveSupport isn't loaded.
|
5
|
+
#
|
6
|
+
# Chars.new('João') #=> #<Chars:0x34c240 @wrapped_string="João">
|
3
7
|
class Chars
|
4
8
|
attr_reader :wrapped_string
|
5
9
|
alias to_s wrapped_string
|
data/lib/unichars.rb
CHANGED
@@ -9,29 +9,65 @@ rescue NameError
|
|
9
9
|
class Unichars < Chars; end
|
10
10
|
end
|
11
11
|
|
12
|
+
# Unichars is a proxy class for String. It wraps a String and implements UTF-8 safe versions of various String operations.
|
13
|
+
# Unimplemented methods are forwarded to the wrapped string.
|
14
|
+
#
|
15
|
+
# Unichars uses ActiveSupport::Multibyte::Chars as its superclass if it's loaded. Otherwise it will use its own Chars class
|
16
|
+
# which is basically a trimmed down version of ActiveSupport's Chars class.
|
17
|
+
#
|
18
|
+
# require 'unichars'
|
19
|
+
# Unichars.superclass #=> Chars
|
20
|
+
#
|
21
|
+
# require 'active_support'
|
22
|
+
# require 'unichars'
|
23
|
+
# Unichars.superclass #=> ActiveSupport::Multibyte::Chars
|
24
|
+
#
|
25
|
+
# Note that all the operations on strings are implemented using Glib2, so the outcome of the methods will be influenced by the
|
26
|
+
# Glib2 version installed on your system.
|
12
27
|
class Unichars
|
28
|
+
# Valid normalization forms
|
13
29
|
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
|
14
30
|
|
15
31
|
class << self
|
16
32
|
attr_accessor :default_normalization_form
|
17
33
|
end
|
18
34
|
|
35
|
+
# Returns the length of the string expressed in codepoints.
|
36
|
+
#
|
37
|
+
# Unichars.new('A ehm…, word.').size #=> 13
|
19
38
|
def size
|
20
39
|
Glib.utf8_size(@wrapped_string)
|
21
40
|
end
|
22
41
|
|
42
|
+
# Returns a Unichars instance with the string in capitals if they are are available for the supplied string.
|
43
|
+
#
|
44
|
+
# Unichars.new('Sluß').upcase.to_s #=> SLUSS
|
23
45
|
def upcase
|
24
46
|
self.class.new(Glib.utf8_upcase(@wrapped_string))
|
25
47
|
end
|
26
48
|
|
49
|
+
# Returns a Unichars instance with the string in lowercase characters if they are are available for the supplied string.
|
50
|
+
#
|
51
|
+
# Unichars.new('ORGANISÉE').downcase.to_s #=> organisée
|
27
52
|
def downcase
|
28
53
|
self.class.new(Glib.utf8_downcase(@wrapped_string))
|
29
54
|
end
|
30
55
|
|
56
|
+
# Returns a Unichars instance with the string in reverse order.
|
57
|
+
#
|
58
|
+
# Unichars.new('Comment ça va?').reverse.to_s #=> av aç tnemmoC
|
31
59
|
def reverse
|
32
60
|
self.class.new(Glib.utf8_reverse(@wrapped_string))
|
33
61
|
end
|
34
62
|
|
63
|
+
# Returns a Unichars instance with the string in normalize form. See http://www.unicode.org/reports/tr15/tr15-29.html
|
64
|
+
# for more information about normalization.
|
65
|
+
#
|
66
|
+
# <i>form</i> can be one of the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
|
67
|
+
#
|
68
|
+
# decomposed = [101, 769].pack('U*')
|
69
|
+
# composed = Unichars.new(decomposed).normalize(:kc)
|
70
|
+
# composed.to_s.unpack('U*') #=> [233]
|
35
71
|
def normalize(form=Unichars.default_normalization_form)
|
36
72
|
self.class.new(Glib.utf8_normalize(@wrapped_string, form))
|
37
73
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unichars
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.4"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manfred Stienstra
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-01-06 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements: []
|
54
54
|
|
55
55
|
rubyforge_project:
|
56
|
-
rubygems_version: 1.
|
56
|
+
rubygems_version: 1.3.1
|
57
57
|
signing_key:
|
58
58
|
specification_version: 2
|
59
59
|
summary: Unichars is a wrapper around Glib2 UTF8 functions. It was written to speed up ActiveSupport::Multibyte, but I'm sure people can find other uses for it.
|