unichars 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/ext/glib/glib.c +4 -0
  2. data/lib/chars.rb +4 -0
  3. data/lib/unichars.rb +36 -0
  4. metadata +3 -3
@@ -35,6 +35,7 @@ static VALUE utf8_upcase(VALUE self, VALUE string)
35
35
  Check_Type(string, T_STRING);
36
36
  temp = g_utf8_strup(StringValuePtr(string), RSTRING(string)->len);
37
37
  result = rb_str_new2(temp);
38
+ free(temp);
38
39
 
39
40
  return result;
40
41
  }
@@ -55,6 +56,7 @@ static VALUE utf8_downcase(VALUE self, VALUE string)
55
56
  Check_Type(string, T_STRING);
56
57
  temp = g_utf8_strdown(StringValuePtr(string), RSTRING(string)->len);
57
58
  result = rb_str_new2(temp);
59
+ free(temp);
58
60
 
59
61
  return result;
60
62
  }
@@ -75,6 +77,7 @@ static VALUE utf8_reverse(VALUE self, VALUE string)
75
77
  Check_Type(string, T_STRING);
76
78
  temp = g_utf8_strreverse(StringValuePtr(string), RSTRING(string)->len);
77
79
  result = rb_str_new2(temp);
80
+ free(temp);
78
81
 
79
82
  return result;
80
83
  }
@@ -116,6 +119,7 @@ static VALUE utf8_normalize(VALUE self, VALUE string, VALUE form)
116
119
 
117
120
  temp = g_utf8_normalize(StringValuePtr(string), RSTRING(string)->len, mode);
118
121
  result = rb_str_new2(temp);
122
+ free(temp);
119
123
 
120
124
  return result;
121
125
  }
@@ -1,5 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # Unichars is a proxy class for String. It's used by Unichars as a trimmed down version of ActiveSupport::Multibyte::Chars
4
+ # when ActiveSupport isn't loaded.
5
+ #
6
+ # Chars.new('João') #=> #<Chars:0x34c240 @wrapped_string="João">
3
7
  class Chars
4
8
  attr_reader :wrapped_string
5
9
  alias to_s wrapped_string
@@ -9,29 +9,65 @@ rescue NameError
9
9
  class Unichars < Chars; end
10
10
  end
11
11
 
12
+ # Unichars is a proxy class for String. It wraps a String and implements UTF-8 safe versions of various String operations.
13
+ # Unimplemented methods are forwarded to the wrapped string.
14
+ #
15
+ # Unichars uses ActiveSupport::Multibyte::Chars as its superclass if it's loaded. Otherwise it will use its own Chars class
16
+ # which is basically a trimmed down version of ActiveSupport's Chars class.
17
+ #
18
+ # require 'unichars'
19
+ # Unichars.superclass #=> Chars
20
+ #
21
+ # require 'active_support'
22
+ # require 'unichars'
23
+ # Unichars.superclass #=> ActiveSupport::Multibyte::Chars
24
+ #
25
+ # Note that all the operations on strings are implemented using Glib2, so the outcome of the methods will be influenced by the
26
+ # Glib2 version installed on your system.
12
27
  class Unichars
28
+ # Valid normalization forms
13
29
  NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
14
30
 
15
31
  class << self
16
32
  attr_accessor :default_normalization_form
17
33
  end
18
34
 
35
+ # Returns the length of the string expressed in codepoints.
36
+ #
37
+ # Unichars.new('A ehm…, word.').size #=> 13
19
38
  def size
20
39
  Glib.utf8_size(@wrapped_string)
21
40
  end
22
41
 
42
+ # Returns a Unichars instance with the string in capitals if they are are available for the supplied string.
43
+ #
44
+ # Unichars.new('Sluß').upcase.to_s #=> SLUSS
23
45
  def upcase
24
46
  self.class.new(Glib.utf8_upcase(@wrapped_string))
25
47
  end
26
48
 
49
+ # Returns a Unichars instance with the string in lowercase characters if they are are available for the supplied string.
50
+ #
51
+ # Unichars.new('ORGANISÉE').downcase.to_s #=> organisée
27
52
  def downcase
28
53
  self.class.new(Glib.utf8_downcase(@wrapped_string))
29
54
  end
30
55
 
56
+ # Returns a Unichars instance with the string in reverse order.
57
+ #
58
+ # Unichars.new('Comment ça va?').reverse.to_s #=> av aç tnemmoC
31
59
  def reverse
32
60
  self.class.new(Glib.utf8_reverse(@wrapped_string))
33
61
  end
34
62
 
63
+ # Returns a Unichars instance with the string in normalize form. See http://www.unicode.org/reports/tr15/tr15-29.html
64
+ # for more information about normalization.
65
+ #
66
+ # <i>form</i> can be one of the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
67
+ #
68
+ # decomposed = [101, 769].pack('U*')
69
+ # composed = Unichars.new(decomposed).normalize(:kc)
70
+ # composed.to_s.unpack('U*') #=> [233]
35
71
  def normalize(form=Unichars.default_normalization_form)
36
72
  self.class.new(Glib.utf8_normalize(@wrapped_string, form))
37
73
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unichars
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.3"
4
+ version: "0.4"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manfred Stienstra
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-12-22 00:00:00 +01:00
12
+ date: 2009-01-06 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  requirements: []
54
54
 
55
55
  rubyforge_project:
56
- rubygems_version: 1.2.0
56
+ rubygems_version: 1.3.1
57
57
  signing_key:
58
58
  specification_version: 2
59
59
  summary: Unichars is a wrapper around Glib2 UTF8 functions. It was written to speed up ActiveSupport::Multibyte, but I'm sure people can find other uses for it.