activesupport-inflector 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,98 @@
1
+ # encoding: utf-8
2
+ require 'active_support/core_ext/string/multibyte'
3
+ require 'active_support/i18n'
4
+
5
+ module ActiveSupport
6
+ module Inflector
7
+
8
+ # Replaces non-ASCII characters with an ASCII approximation, or if none
9
+ # exists, a replacement character which defaults to "?".
10
+ #
11
+ # transliterate("Ærøskøbing")
12
+ # # => "AEroskobing"
13
+ #
14
+ # Default approximations are provided for Western/Latin characters,
15
+ # e.g, "ø", "ñ", "é", "ß", etc.
16
+ #
17
+ # This method is I18n aware, so you can set up custom approximations for a
18
+ # locale. This can be useful, for example, to transliterate German's "ü"
19
+ # and "ö" to "ue" and "oe", or to add support for transliterating Russian
20
+ # to ASCII.
21
+ #
22
+ # In order to make your custom transliterations available, you must set
23
+ # them as the <tt>i18n.transliterate.rule</tt> i18n key:
24
+ #
25
+ # # Store the transliterations in locales/de.yml
26
+ # i18n:
27
+ # transliterate:
28
+ # rule:
29
+ # ü: "ue"
30
+ # ö: "oe"
31
+ #
32
+ # # Or set them using Ruby
33
+ # I18n.backend.store_translations(:de, :i18n => {
34
+ # :transliterate => {
35
+ # :rule => {
36
+ # "ü" => "ue",
37
+ # "ö" => "oe"
38
+ # }
39
+ # }
40
+ # })
41
+ #
42
+ # The value for <tt>i18n.transliterate.rule</tt> can be a simple Hash that maps
43
+ # characters to ASCII approximations as shown above, or, for more complex
44
+ # requirements, a Proc:
45
+ #
46
+ # I18n.backend.store_translations(:de, :i18n => {
47
+ # :transliterate => {
48
+ # :rule => lambda {|string| MyTransliterator.transliterate(string)}
49
+ # }
50
+ # })
51
+ #
52
+ # Now you can have different transliterations for each locale:
53
+ #
54
+ # I18n.locale = :en
55
+ # transliterate("Jürgen")
56
+ # # => "Jurgen"
57
+ #
58
+ # I18n.locale = :de
59
+ # transliterate("Jürgen")
60
+ # # => "Juergen"
61
+ def transliterate(string, replacement = "?")
62
+ I18n.transliterate(ActiveSupport::Multibyte::Unicode.normalize(
63
+ ActiveSupport::Multibyte::Unicode.tidy_bytes(string), :c),
64
+ :replacement => replacement)
65
+ end
66
+
67
+ # Replaces special characters in a string so that it may be used as part of a 'pretty' URL.
68
+ #
69
+ # ==== Examples
70
+ #
71
+ # class Person
72
+ # def to_param
73
+ # "#{id}-#{name.parameterize}"
74
+ # end
75
+ # end
76
+ #
77
+ # @person = Person.find(1)
78
+ # # => #<Person id: 1, name: "Donald E. Knuth">
79
+ #
80
+ # <%= link_to(@person.name, person_path(@person)) %>
81
+ # # => <a href="/person/1-donald-e-knuth">Donald E. Knuth</a>
82
+ def parameterize(string, sep = '-')
83
+ # replace accented chars with their ascii equivalents
84
+ parameterized_string = transliterate(string)
85
+ # Turn unwanted chars into the separator
86
+ parameterized_string.gsub!(/[^a-z0-9\-_]+/i, sep)
87
+ unless sep.nil? || sep.empty?
88
+ re_sep = Regexp.escape(sep)
89
+ # No more than one of the separator in a row.
90
+ parameterized_string.gsub!(/#{re_sep}{2,}/, sep)
91
+ # Remove leading/trailing separator.
92
+ parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
93
+ end
94
+ parameterized_string.downcase
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,46 @@
1
+ # lazy_load_hooks allows rails to lazily load a lot of components and thus making the app boot faster. Because of
2
+ # this feature now there is no need to require <tt>ActiveRecord::Base</tt> at boot time purely to apply configuration. Instead
3
+ # a hook is registered that applies configuration once <tt>ActiveRecord::Base</tt> is loaded. Here <tt>ActiveRecord::Base</tt> is used
4
+ # as example but this feature can be applied elsewhere too.
5
+ #
6
+ # Here is an example where +on_load+ method is called to register a hook.
7
+ #
8
+ # initializer "active_record.initialize_timezone" do
9
+ # ActiveSupport.on_load(:active_record) do
10
+ # self.time_zone_aware_attributes = true
11
+ # self.default_timezone = :utc
12
+ # end
13
+ # end
14
+ #
15
+ # When the entirety of +activerecord/lib/active_record/base.rb+ has been evaluated then +run_load_hooks+ is invoked.
16
+ # The very last line of +activerecord/lib/active_record/base.rb+ is:
17
+ #
18
+ # ActiveSupport.run_load_hooks(:active_record, ActiveRecord::Base)
19
+ #
20
+ module ActiveSupport
21
+ @load_hooks = Hash.new { |h,k| h[k] = [] }
22
+ @loaded = Hash.new { |h,k| h[k] = [] }
23
+
24
+ def self.on_load(name, options = {}, &block)
25
+ @loaded[name].each do |base|
26
+ execute_hook(base, options, block)
27
+ end
28
+
29
+ @load_hooks[name] << [block, options]
30
+ end
31
+
32
+ def self.execute_hook(base, options, block)
33
+ if options[:yield]
34
+ block.call(base)
35
+ else
36
+ base.instance_eval(&block)
37
+ end
38
+ end
39
+
40
+ def self.run_load_hooks(name, base = Object)
41
+ @loaded[name] << base
42
+ @load_hooks[name].each do |hook, options|
43
+ execute_hook(base, options, hook)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,44 @@
1
+ # encoding: utf-8
2
+ require 'active_support/core_ext/module/attribute_accessors'
3
+
4
+ module ActiveSupport #:nodoc:
5
+ module Multibyte
6
+ autoload :EncodingError, 'active_support/multibyte/exceptions'
7
+ autoload :Chars, 'active_support/multibyte/chars'
8
+ autoload :Unicode, 'active_support/multibyte/unicode'
9
+
10
+ # The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
11
+ # class so you can support other encodings. See the ActiveSupport::Multibyte::Chars implementation for
12
+ # an example how to do this.
13
+ #
14
+ # Example:
15
+ # ActiveSupport::Multibyte.proxy_class = CharsForUTF32
16
+ def self.proxy_class=(klass)
17
+ @proxy_class = klass
18
+ end
19
+
20
+ # Returns the current proxy class
21
+ def self.proxy_class
22
+ @proxy_class ||= ActiveSupport::Multibyte::Chars
23
+ end
24
+
25
+ # Regular expressions that describe valid byte sequences for a character
26
+ VALID_CHARACTER = {
27
+ # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
28
+ 'UTF-8' => /\A(?:
29
+ [\x00-\x7f] |
30
+ [\xc2-\xdf] [\x80-\xbf] |
31
+ \xe0 [\xa0-\xbf] [\x80-\xbf] |
32
+ [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
33
+ \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
34
+ [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
35
+ \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
36
+ # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
37
+ 'Shift_JIS' => /\A(?:
38
+ [\x00-\x7e\xa1-\xdf] |
39
+ [\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
40
+ }
41
+ end
42
+ end
43
+
44
+ require 'active_support/multibyte/utils'
@@ -0,0 +1,476 @@
1
+ # encoding: utf-8
2
+ require 'active_support/core_ext/string/access'
3
+ require 'active_support/core_ext/string/behavior'
4
+
5
+ module ActiveSupport #:nodoc:
6
+ module Multibyte #:nodoc:
7
+ # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
8
+ # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
9
+ # encoding safe manner. All the normal String methods are also implemented on the proxy.
10
+ #
11
+ # String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
12
+ # which would normally return a String object now return a Chars object so methods can be chained.
13
+ #
14
+ # "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string"
15
+ #
16
+ # Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
17
+ # If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
18
+ #
19
+ # bad.explicit_checking_method "T".mb_chars.downcase.to_s
20
+ #
21
+ # The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
22
+ # encodings you can write your own multibyte string handler and configure it through
23
+ # ActiveSupport::Multibyte.proxy_class.
24
+ #
25
+ # class CharsForUTF32
26
+ # def size
27
+ # @wrapped_string.size / 4
28
+ # end
29
+ #
30
+ # def self.accepts?(string)
31
+ # string.length % 4 == 0
32
+ # end
33
+ # end
34
+ #
35
+ # ActiveSupport::Multibyte.proxy_class = CharsForUTF32
36
+ class Chars
37
+ attr_reader :wrapped_string
38
+ alias to_s wrapped_string
39
+ alias to_str wrapped_string
40
+
41
+ if RUBY_VERSION >= "1.9"
42
+ # Creates a new Chars instance by wrapping _string_.
43
+ def initialize(string)
44
+ @wrapped_string = string
45
+ @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
46
+ end
47
+ else
48
+ def initialize(string) #:nodoc:
49
+ @wrapped_string = string
50
+ end
51
+ end
52
+
53
+ # Forward all undefined methods to the wrapped string.
54
+ def method_missing(method, *args, &block)
55
+ if method.to_s =~ /!$/
56
+ @wrapped_string.__send__(method, *args, &block)
57
+ self
58
+ else
59
+ result = @wrapped_string.__send__(method, *args, &block)
60
+ result.kind_of?(String) ? chars(result) : result
61
+ end
62
+ end
63
+
64
+ # Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
65
+ # only if the optional second parameter evaluates to +true+.
66
+ def respond_to?(method, include_private=false)
67
+ super || @wrapped_string.respond_to?(method, include_private)
68
+ end
69
+
70
+ # Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
71
+ def acts_like_string?
72
+ true
73
+ end
74
+
75
+ # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
76
+ def self.consumes?(string)
77
+ # Unpack is a little bit faster than regular expressions.
78
+ string.unpack('U*')
79
+ true
80
+ rescue ArgumentError
81
+ false
82
+ end
83
+
84
+ include Comparable
85
+
86
+ # Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
87
+ # equal or after the object on the right side of the operation. It accepts any object
88
+ # that implements +to_s+:
89
+ #
90
+ # 'é'.mb_chars <=> 'ü'.mb_chars # => -1
91
+ #
92
+ # See <tt>String#<=></tt> for more details.
93
+ def <=>(other)
94
+ @wrapped_string <=> other.to_s
95
+ end
96
+
97
+ if RUBY_VERSION < "1.9"
98
+ # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
99
+ # +false+ otherwise.
100
+ def self.wants?(string)
101
+ $KCODE == 'UTF8' && consumes?(string)
102
+ end
103
+
104
+ # Returns a new Chars object containing the _other_ object concatenated to the string.
105
+ #
106
+ # Example:
107
+ # ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
108
+ def +(other)
109
+ chars(@wrapped_string + other)
110
+ end
111
+
112
+ # Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
113
+ #
114
+ # Example:
115
+ # 'Café périferôl'.mb_chars =~ /ô/ # => 12
116
+ def =~(other)
117
+ translate_offset(@wrapped_string =~ other)
118
+ end
119
+
120
+ # Inserts the passed string at specified codepoint offsets.
121
+ #
122
+ # Example:
123
+ # 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
124
+ def insert(offset, fragment)
125
+ unpacked = Unicode.u_unpack(@wrapped_string)
126
+ unless offset > unpacked.length
127
+ @wrapped_string.replace(
128
+ Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
129
+ )
130
+ else
131
+ raise IndexError, "index #{offset} out of string"
132
+ end
133
+ self
134
+ end
135
+
136
+ # Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
137
+ #
138
+ # Example:
139
+ # 'Café'.mb_chars.include?('é') # => true
140
+ def include?(other)
141
+ # We have to redefine this method because Enumerable defines it.
142
+ @wrapped_string.include?(other)
143
+ end
144
+
145
+ # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
146
+ #
147
+ # Example:
148
+ # 'Café périferôl'.mb_chars.index('ô') # => 12
149
+ # 'Café périferôl'.mb_chars.index(/\w/u) # => 0
150
+ def index(needle, offset=0)
151
+ wrapped_offset = first(offset).wrapped_string.length
152
+ index = @wrapped_string.index(needle, wrapped_offset)
153
+ index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
154
+ end
155
+
156
+ # Returns the position _needle_ in the string, counting in
157
+ # codepoints, searching backward from _offset_ or the end of the
158
+ # string. Returns +nil+ if _needle_ isn't found.
159
+ #
160
+ # Example:
161
+ # 'Café périferôl'.mb_chars.rindex('é') # => 6
162
+ # 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
163
+ def rindex(needle, offset=nil)
164
+ offset ||= length
165
+ wrapped_offset = first(offset).wrapped_string.length
166
+ index = @wrapped_string.rindex(needle, wrapped_offset)
167
+ index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
168
+ end
169
+
170
+ # Returns the number of codepoints in the string
171
+ def size
172
+ Unicode.u_unpack(@wrapped_string).size
173
+ end
174
+ alias_method :length, :size
175
+
176
+ # Strips entire range of Unicode whitespace from the right of the string.
177
+ def rstrip
178
+ chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
179
+ end
180
+
181
+ # Strips entire range of Unicode whitespace from the left of the string.
182
+ def lstrip
183
+ chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
184
+ end
185
+
186
+ # Strips entire range of Unicode whitespace from the right and left of the string.
187
+ def strip
188
+ rstrip.lstrip
189
+ end
190
+
191
+ # Returns the codepoint of the first character in the string.
192
+ #
193
+ # Example:
194
+ # 'こんにちは'.mb_chars.ord # => 12371
195
+ def ord
196
+ Unicode.u_unpack(@wrapped_string)[0]
197
+ end
198
+
199
+ # Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
200
+ #
201
+ # Example:
202
+ #
203
+ # "¾ cup".mb_chars.rjust(8).to_s
204
+ # # => " ¾ cup"
205
+ #
206
+ # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
207
+ # # => " ¾ cup"
208
+ def rjust(integer, padstr=' ')
209
+ justify(integer, :right, padstr)
210
+ end
211
+
212
+ # Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
213
+ #
214
+ # Example:
215
+ #
216
+ # "¾ cup".mb_chars.rjust(8).to_s
217
+ # # => "¾ cup "
218
+ #
219
+ # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
220
+ # # => "¾ cup "
221
+ def ljust(integer, padstr=' ')
222
+ justify(integer, :left, padstr)
223
+ end
224
+
225
+ # Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
226
+ #
227
+ # Example:
228
+ #
229
+ # "¾ cup".mb_chars.center(8).to_s
230
+ # # => " ¾ cup "
231
+ #
232
+ # "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
233
+ # # => " ¾ cup "
234
+ def center(integer, padstr=' ')
235
+ justify(integer, :center, padstr)
236
+ end
237
+
238
+ else
239
+ def =~(other)
240
+ @wrapped_string =~ other
241
+ end
242
+ end
243
+
244
+ # Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
245
+ # instances instead of String. This makes chaining methods easier.
246
+ #
247
+ # Example:
248
+ # 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
249
+ def split(*args)
250
+ @wrapped_string.split(*args).map { |i| i.mb_chars }
251
+ end
252
+
253
+ # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
254
+ #
255
+ # Example:
256
+ #
257
+ # s = "Müller"
258
+ # s.mb_chars[2] = "e" # Replace character with offset 2
259
+ # s
260
+ # # => "Müeler"
261
+ #
262
+ # s = "Müller"
263
+ # s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
264
+ # s
265
+ # # => "Möler"
266
+ def []=(*args)
267
+ replace_by = args.pop
268
+ # Indexed replace with regular expressions already works
269
+ if args.first.is_a?(Regexp)
270
+ @wrapped_string[*args] = replace_by
271
+ else
272
+ result = Unicode.u_unpack(@wrapped_string)
273
+ case args.first
274
+ when Fixnum
275
+ raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
276
+ min = args[0]
277
+ max = args[1].nil? ? min : (min + args[1] - 1)
278
+ range = Range.new(min, max)
279
+ replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
280
+ when Range
281
+ raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
282
+ range = args[0]
283
+ else
284
+ needle = args[0].to_s
285
+ min = index(needle)
286
+ max = min + Unicode.u_unpack(needle).length - 1
287
+ range = Range.new(min, max)
288
+ end
289
+ result[range] = Unicode.u_unpack(replace_by)
290
+ @wrapped_string.replace(result.pack('U*'))
291
+ end
292
+ end
293
+
294
+ # Reverses all characters in the string.
295
+ #
296
+ # Example:
297
+ # 'Café'.mb_chars.reverse.to_s # => 'éfaC'
298
+ def reverse
299
+ chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
300
+ end
301
+
302
+ # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
303
+ # character.
304
+ #
305
+ # Example:
306
+ # 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
307
+ def slice(*args)
308
+ if args.size > 2
309
+ raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
310
+ elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
311
+ raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
312
+ elsif (args.size == 2 && !args[1].is_a?(Numeric))
313
+ raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
314
+ elsif args[0].kind_of? Range
315
+ cps = Unicode.u_unpack(@wrapped_string).slice(*args)
316
+ result = cps.nil? ? nil : cps.pack('U*')
317
+ elsif args[0].kind_of? Regexp
318
+ result = @wrapped_string.slice(*args)
319
+ elsif args.size == 1 && args[0].kind_of?(Numeric)
320
+ character = Unicode.u_unpack(@wrapped_string)[args[0]]
321
+ result = character && [character].pack('U')
322
+ else
323
+ cps = Unicode.u_unpack(@wrapped_string).slice(*args)
324
+ result = cps && cps.pack('U*')
325
+ end
326
+ result && chars(result)
327
+ end
328
+ alias_method :[], :slice
329
+
330
+ # Limit the byte size of the string to a number of bytes without breaking characters. Usable
331
+ # when the storage for a string is limited for some reason.
332
+ #
333
+ # Example:
334
+ # 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
335
+ def limit(limit)
336
+ slice(0...translate_offset(limit))
337
+ end
338
+
339
+ # Convert characters in the string to uppercase.
340
+ #
341
+ # Example:
342
+ # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
343
+ def upcase
344
+ chars(Unicode.apply_mapping @wrapped_string, :uppercase_mapping)
345
+ end
346
+
347
+ # Convert characters in the string to lowercase.
348
+ #
349
+ # Example:
350
+ # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
351
+ def downcase
352
+ chars(Unicode.apply_mapping @wrapped_string, :lowercase_mapping)
353
+ end
354
+
355
+ # Converts the first character to uppercase and the remainder to lowercase.
356
+ #
357
+ # Example:
358
+ # 'über'.mb_chars.capitalize.to_s # => "Über"
359
+ def capitalize
360
+ (slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
361
+ end
362
+
363
+ # Capitalizes the first letter of every word, when possible.
364
+ #
365
+ # Example:
366
+ # "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró"
367
+ # "日本語".mb_chars.titleize # => "日本語"
368
+ def titleize
369
+ chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
370
+ end
371
+ alias_method :titlecase, :titleize
372
+
373
+ # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
374
+ # passing strings to databases and validations.
375
+ #
376
+ # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
377
+ # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
378
+ # ActiveSupport::Multibyte::Unicode.default_normalization_form
379
+ def normalize(form = nil)
380
+ chars(Unicode.normalize(@wrapped_string, form))
381
+ end
382
+
383
+ # Performs canonical decomposition on all the characters.
384
+ #
385
+ # Example:
386
+ # 'é'.length # => 2
387
+ # 'é'.mb_chars.decompose.to_s.length # => 3
388
+ def decompose
389
+ chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
390
+ end
391
+
392
+ # Performs composition on all the characters.
393
+ #
394
+ # Example:
395
+ # 'é'.length # => 3
396
+ # 'é'.mb_chars.compose.to_s.length # => 2
397
+ def compose
398
+ chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
399
+ end
400
+
401
+ # Returns the number of grapheme clusters in the string.
402
+ #
403
+ # Example:
404
+ # 'क्षि'.mb_chars.length # => 4
405
+ # 'क्षि'.mb_chars.g_length # => 3
406
+ def g_length
407
+ Unicode.g_unpack(@wrapped_string).length
408
+ end
409
+
410
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
411
+ #
412
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
413
+ def tidy_bytes(force = false)
414
+ chars(Unicode.tidy_bytes(@wrapped_string, force))
415
+ end
416
+
417
+ %w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
418
+ # Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
419
+ # exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
420
+ if public_method_defined?(method)
421
+ define_method("#{method}!") do |*args|
422
+ @wrapped_string = send(args.nil? ? method : method, *args).to_s
423
+ self
424
+ end
425
+ end
426
+ end
427
+
428
+ protected
429
+
430
+ def translate_offset(byte_offset) #:nodoc:
431
+ return nil if byte_offset.nil?
432
+ return 0 if @wrapped_string == ''
433
+
434
+ if @wrapped_string.respond_to?(:force_encoding)
435
+ @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
436
+ end
437
+
438
+ begin
439
+ @wrapped_string[0...byte_offset].unpack('U*').length
440
+ rescue ArgumentError
441
+ byte_offset -= 1
442
+ retry
443
+ end
444
+ end
445
+
446
+ def justify(integer, way, padstr=' ') #:nodoc:
447
+ raise ArgumentError, "zero width padding" if padstr.length == 0
448
+ padsize = integer - size
449
+ padsize = padsize > 0 ? padsize : 0
450
+ case way
451
+ when :right
452
+ result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
453
+ when :left
454
+ result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
455
+ when :center
456
+ lpad = padding((padsize / 2.0).floor, padstr)
457
+ rpad = padding((padsize / 2.0).ceil, padstr)
458
+ result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
459
+ end
460
+ chars(result)
461
+ end
462
+
463
+ def padding(padsize, padstr=' ') #:nodoc:
464
+ if padsize != 0
465
+ chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
466
+ else
467
+ ''
468
+ end
469
+ end
470
+
471
+ def chars(string) #:nodoc:
472
+ self.class.new(string)
473
+ end
474
+ end
475
+ end
476
+ end