unichars 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/ext/glib/glib.c +4 -0
  2. data/lib/chars.rb +4 -0
  3. data/lib/unichars.rb +36 -0
  4. metadata +3 -3
@@ -35,6 +35,7 @@ static VALUE utf8_upcase(VALUE self, VALUE string)
35
35
  Check_Type(string, T_STRING);
36
36
  temp = g_utf8_strup(StringValuePtr(string), RSTRING(string)->len);
37
37
  result = rb_str_new2(temp);
38
+ free(temp);
38
39
 
39
40
  return result;
40
41
  }
@@ -55,6 +56,7 @@ static VALUE utf8_downcase(VALUE self, VALUE string)
55
56
  Check_Type(string, T_STRING);
56
57
  temp = g_utf8_strdown(StringValuePtr(string), RSTRING(string)->len);
57
58
  result = rb_str_new2(temp);
59
+ free(temp);
58
60
 
59
61
  return result;
60
62
  }
@@ -75,6 +77,7 @@ static VALUE utf8_reverse(VALUE self, VALUE string)
75
77
  Check_Type(string, T_STRING);
76
78
  temp = g_utf8_strreverse(StringValuePtr(string), RSTRING(string)->len);
77
79
  result = rb_str_new2(temp);
80
+ free(temp);
78
81
 
79
82
  return result;
80
83
  }
@@ -116,6 +119,7 @@ static VALUE utf8_normalize(VALUE self, VALUE string, VALUE form)
116
119
 
117
120
  temp = g_utf8_normalize(StringValuePtr(string), RSTRING(string)->len, mode);
118
121
  result = rb_str_new2(temp);
122
+ free(temp);
119
123
 
120
124
  return result;
121
125
  }
@@ -1,5 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # Unichars is a proxy class for String. It's used by Unichars as a trimmed down version of ActiveSupport::Multibyte::Chars
4
+ # when ActiveSupport isn't loaded.
5
+ #
6
+ # Chars.new('João') #=> #<Chars:0x34c240 @wrapped_string="João">
3
7
  class Chars
4
8
  attr_reader :wrapped_string
5
9
  alias to_s wrapped_string
@@ -9,29 +9,65 @@ rescue NameError
9
9
  class Unichars < Chars; end
10
10
  end
11
11
 
12
+ # Unichars is a proxy class for String. It wraps a String and implements UTF-8 safe versions of various String operations.
13
+ # Unimplemented methods are forwarded to the wrapped string.
14
+ #
15
+ # Unichars uses ActiveSupport::Multibyte::Chars as its superclass if it's loaded. Otherwise it will use its own Chars class
16
+ # which is basically a trimmed down version of ActiveSupport's Chars class.
17
+ #
18
+ # require 'unichars'
19
+ # Unichars.superclass #=> Chars
20
+ #
21
+ # require 'active_support'
22
+ # require 'unichars'
23
+ # Unichars.superclass #=> ActiveSupport::Multibyte::Chars
24
+ #
25
+ # Note that all the operations on strings are implemented using Glib2, so the outcome of the methods will be influenced by the
26
+ # Glib2 version installed on your system.
12
27
  class Unichars
28
+ # Valid normalization forms
13
29
  NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
14
30
 
15
31
  class << self
16
32
  attr_accessor :default_normalization_form
17
33
  end
18
34
 
35
+ # Returns the length of the string expressed in codepoints.
36
+ #
37
+ # Unichars.new('A ehm…, word.').size #=> 13
19
38
  def size
20
39
  Glib.utf8_size(@wrapped_string)
21
40
  end
22
41
 
42
+ # Returns a Unichars instance with the string in capitals if they are are available for the supplied string.
43
+ #
44
+ # Unichars.new('Sluß').upcase.to_s #=> SLUSS
23
45
  def upcase
24
46
  self.class.new(Glib.utf8_upcase(@wrapped_string))
25
47
  end
26
48
 
49
+ # Returns a Unichars instance with the string in lowercase characters if they are are available for the supplied string.
50
+ #
51
+ # Unichars.new('ORGANISÉE').downcase.to_s #=> organisée
27
52
  def downcase
28
53
  self.class.new(Glib.utf8_downcase(@wrapped_string))
29
54
  end
30
55
 
56
+ # Returns a Unichars instance with the string in reverse order.
57
+ #
58
+ # Unichars.new('Comment ça va?').reverse.to_s #=> av aç tnemmoC
31
59
  def reverse
32
60
  self.class.new(Glib.utf8_reverse(@wrapped_string))
33
61
  end
34
62
 
63
+ # Returns a Unichars instance with the string in normalize form. See http://www.unicode.org/reports/tr15/tr15-29.html
64
+ # for more information about normalization.
65
+ #
66
+ # <i>form</i> can be one of the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
67
+ #
68
+ # decomposed = [101, 769].pack('U*')
69
+ # composed = Unichars.new(decomposed).normalize(:kc)
70
+ # composed.to_s.unpack('U*') #=> [233]
35
71
  def normalize(form=Unichars.default_normalization_form)
36
72
  self.class.new(Glib.utf8_normalize(@wrapped_string, form))
37
73
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unichars
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.3"
4
+ version: "0.4"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manfred Stienstra
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-12-22 00:00:00 +01:00
12
+ date: 2009-01-06 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  requirements: []
54
54
 
55
55
  rubyforge_project:
56
- rubygems_version: 1.2.0
56
+ rubygems_version: 1.3.1
57
57
  signing_key:
58
58
  specification_version: 2
59
59
  summary: Unichars is a wrapper around Glib2 UTF8 functions. It was written to speed up ActiveSupport::Multibyte, but I'm sure people can find other uses for it.