activesupport-inflector 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +22 -0
- data/Gemfile +2 -0
- data/README.md +4 -0
- data/Rakefile +14 -0
- data/activesupport-inflector.gemspec +18 -0
- data/lib/active_support/core_ext/array/extract_options.rb +29 -0
- data/lib/active_support/core_ext/module/attribute_accessors.rb +64 -0
- data/lib/active_support/core_ext/string/access.rb +99 -0
- data/lib/active_support/core_ext/string/behavior.rb +6 -0
- data/lib/active_support/core_ext/string/inflections.rb +202 -0
- data/lib/active_support/core_ext/string/multibyte.rb +72 -0
- data/lib/active_support/i18n.rb +9 -0
- data/lib/active_support/inflections.rb +63 -0
- data/lib/active_support/inflector.rb +7 -0
- data/lib/active_support/inflector/inflections.rb +174 -0
- data/lib/active_support/inflector/methods.rb +321 -0
- data/lib/active_support/inflector/transliterate.rb +98 -0
- data/lib/active_support/lazy_load_hooks.rb +46 -0
- data/lib/active_support/multibyte.rb +44 -0
- data/lib/active_support/multibyte/chars.rb +476 -0
- data/lib/active_support/multibyte/exceptions.rb +8 -0
- data/lib/active_support/multibyte/unicode.rb +393 -0
- data/lib/active_support/multibyte/utils.rb +60 -0
- metadata +66 -0
@@ -0,0 +1,98 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'active_support/core_ext/string/multibyte'
|
3
|
+
require 'active_support/i18n'
|
4
|
+
|
5
|
+
module ActiveSupport
|
6
|
+
module Inflector
|
7
|
+
|
8
|
+
# Replaces non-ASCII characters with an ASCII approximation, or if none
|
9
|
+
# exists, a replacement character which defaults to "?".
|
10
|
+
#
|
11
|
+
# transliterate("Ærøskøbing")
|
12
|
+
# # => "AEroskobing"
|
13
|
+
#
|
14
|
+
# Default approximations are provided for Western/Latin characters,
|
15
|
+
# e.g, "ø", "ñ", "é", "ß", etc.
|
16
|
+
#
|
17
|
+
# This method is I18n aware, so you can set up custom approximations for a
|
18
|
+
# locale. This can be useful, for example, to transliterate German's "ü"
|
19
|
+
# and "ö" to "ue" and "oe", or to add support for transliterating Russian
|
20
|
+
# to ASCII.
|
21
|
+
#
|
22
|
+
# In order to make your custom transliterations available, you must set
|
23
|
+
# them as the <tt>i18n.transliterate.rule</tt> i18n key:
|
24
|
+
#
|
25
|
+
# # Store the transliterations in locales/de.yml
|
26
|
+
# i18n:
|
27
|
+
# transliterate:
|
28
|
+
# rule:
|
29
|
+
# ü: "ue"
|
30
|
+
# ö: "oe"
|
31
|
+
#
|
32
|
+
# # Or set them using Ruby
|
33
|
+
# I18n.backend.store_translations(:de, :i18n => {
|
34
|
+
# :transliterate => {
|
35
|
+
# :rule => {
|
36
|
+
# "ü" => "ue",
|
37
|
+
# "ö" => "oe"
|
38
|
+
# }
|
39
|
+
# }
|
40
|
+
# })
|
41
|
+
#
|
42
|
+
# The value for <tt>i18n.transliterate.rule</tt> can be a simple Hash that maps
|
43
|
+
# characters to ASCII approximations as shown above, or, for more complex
|
44
|
+
# requirements, a Proc:
|
45
|
+
#
|
46
|
+
# I18n.backend.store_translations(:de, :i18n => {
|
47
|
+
# :transliterate => {
|
48
|
+
# :rule => lambda {|string| MyTransliterator.transliterate(string)}
|
49
|
+
# }
|
50
|
+
# })
|
51
|
+
#
|
52
|
+
# Now you can have different transliterations for each locale:
|
53
|
+
#
|
54
|
+
# I18n.locale = :en
|
55
|
+
# transliterate("Jürgen")
|
56
|
+
# # => "Jurgen"
|
57
|
+
#
|
58
|
+
# I18n.locale = :de
|
59
|
+
# transliterate("Jürgen")
|
60
|
+
# # => "Juergen"
|
61
|
+
def transliterate(string, replacement = "?")
|
62
|
+
I18n.transliterate(ActiveSupport::Multibyte::Unicode.normalize(
|
63
|
+
ActiveSupport::Multibyte::Unicode.tidy_bytes(string), :c),
|
64
|
+
:replacement => replacement)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Replaces special characters in a string so that it may be used as part of a 'pretty' URL.
|
68
|
+
#
|
69
|
+
# ==== Examples
|
70
|
+
#
|
71
|
+
# class Person
|
72
|
+
# def to_param
|
73
|
+
# "#{id}-#{name.parameterize}"
|
74
|
+
# end
|
75
|
+
# end
|
76
|
+
#
|
77
|
+
# @person = Person.find(1)
|
78
|
+
# # => #<Person id: 1, name: "Donald E. Knuth">
|
79
|
+
#
|
80
|
+
# <%= link_to(@person.name, person_path(@person)) %>
|
81
|
+
# # => <a href="/person/1-donald-e-knuth">Donald E. Knuth</a>
|
82
|
+
def parameterize(string, sep = '-')
|
83
|
+
# replace accented chars with their ascii equivalents
|
84
|
+
parameterized_string = transliterate(string)
|
85
|
+
# Turn unwanted chars into the separator
|
86
|
+
parameterized_string.gsub!(/[^a-z0-9\-_]+/i, sep)
|
87
|
+
unless sep.nil? || sep.empty?
|
88
|
+
re_sep = Regexp.escape(sep)
|
89
|
+
# No more than one of the separator in a row.
|
90
|
+
parameterized_string.gsub!(/#{re_sep}{2,}/, sep)
|
91
|
+
# Remove leading/trailing separator.
|
92
|
+
parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
|
93
|
+
end
|
94
|
+
parameterized_string.downcase
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# lazy_load_hooks allows rails to lazily load a lot of components and thus making the app boot faster. Because of
|
2
|
+
# this feature now there is no need to require <tt>ActiveRecord::Base</tt> at boot time purely to apply configuration. Instead
|
3
|
+
# a hook is registered that applies configuration once <tt>ActiveRecord::Base</tt> is loaded. Here <tt>ActiveRecord::Base</tt> is used
|
4
|
+
# as example but this feature can be applied elsewhere too.
|
5
|
+
#
|
6
|
+
# Here is an example where +on_load+ method is called to register a hook.
|
7
|
+
#
|
8
|
+
# initializer "active_record.initialize_timezone" do
|
9
|
+
# ActiveSupport.on_load(:active_record) do
|
10
|
+
# self.time_zone_aware_attributes = true
|
11
|
+
# self.default_timezone = :utc
|
12
|
+
# end
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# When the entirety of +activerecord/lib/active_record/base.rb+ has been evaluated then +run_load_hooks+ is invoked.
|
16
|
+
# The very last line of +activerecord/lib/active_record/base.rb+ is:
|
17
|
+
#
|
18
|
+
# ActiveSupport.run_load_hooks(:active_record, ActiveRecord::Base)
|
19
|
+
#
|
20
|
+
module ActiveSupport
|
21
|
+
@load_hooks = Hash.new { |h,k| h[k] = [] }
|
22
|
+
@loaded = Hash.new { |h,k| h[k] = [] }
|
23
|
+
|
24
|
+
def self.on_load(name, options = {}, &block)
|
25
|
+
@loaded[name].each do |base|
|
26
|
+
execute_hook(base, options, block)
|
27
|
+
end
|
28
|
+
|
29
|
+
@load_hooks[name] << [block, options]
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.execute_hook(base, options, block)
|
33
|
+
if options[:yield]
|
34
|
+
block.call(base)
|
35
|
+
else
|
36
|
+
base.instance_eval(&block)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.run_load_hooks(name, base = Object)
|
41
|
+
@loaded[name] << base
|
42
|
+
@load_hooks[name].each do |hook, options|
|
43
|
+
execute_hook(base, options, hook)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'active_support/core_ext/module/attribute_accessors'
|
3
|
+
|
4
|
+
module ActiveSupport #:nodoc:
|
5
|
+
module Multibyte
|
6
|
+
autoload :EncodingError, 'active_support/multibyte/exceptions'
|
7
|
+
autoload :Chars, 'active_support/multibyte/chars'
|
8
|
+
autoload :Unicode, 'active_support/multibyte/unicode'
|
9
|
+
|
10
|
+
# The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
|
11
|
+
# class so you can support other encodings. See the ActiveSupport::Multibyte::Chars implementation for
|
12
|
+
# an example how to do this.
|
13
|
+
#
|
14
|
+
# Example:
|
15
|
+
# ActiveSupport::Multibyte.proxy_class = CharsForUTF32
|
16
|
+
def self.proxy_class=(klass)
|
17
|
+
@proxy_class = klass
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns the current proxy class
|
21
|
+
def self.proxy_class
|
22
|
+
@proxy_class ||= ActiveSupport::Multibyte::Chars
|
23
|
+
end
|
24
|
+
|
25
|
+
# Regular expressions that describe valid byte sequences for a character
|
26
|
+
VALID_CHARACTER = {
|
27
|
+
# Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
|
28
|
+
'UTF-8' => /\A(?:
|
29
|
+
[\x00-\x7f] |
|
30
|
+
[\xc2-\xdf] [\x80-\xbf] |
|
31
|
+
\xe0 [\xa0-\xbf] [\x80-\xbf] |
|
32
|
+
[\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
|
33
|
+
\xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
34
|
+
[\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
35
|
+
\xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
|
36
|
+
# Quick check for valid Shift-JIS characters, disregards the odd-even pairing
|
37
|
+
'Shift_JIS' => /\A(?:
|
38
|
+
[\x00-\x7e\xa1-\xdf] |
|
39
|
+
[\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
require 'active_support/multibyte/utils'
|
@@ -0,0 +1,476 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'active_support/core_ext/string/access'
|
3
|
+
require 'active_support/core_ext/string/behavior'
|
4
|
+
|
5
|
+
module ActiveSupport #:nodoc:
|
6
|
+
module Multibyte #:nodoc:
|
7
|
+
# Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
|
8
|
+
# knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
|
9
|
+
# encoding safe manner. All the normal String methods are also implemented on the proxy.
|
10
|
+
#
|
11
|
+
# String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
|
12
|
+
# which would normally return a String object now return a Chars object so methods can be chained.
|
13
|
+
#
|
14
|
+
# "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string"
|
15
|
+
#
|
16
|
+
# Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
|
17
|
+
# If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
|
18
|
+
#
|
19
|
+
# bad.explicit_checking_method "T".mb_chars.downcase.to_s
|
20
|
+
#
|
21
|
+
# The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
|
22
|
+
# encodings you can write your own multibyte string handler and configure it through
|
23
|
+
# ActiveSupport::Multibyte.proxy_class.
|
24
|
+
#
|
25
|
+
# class CharsForUTF32
|
26
|
+
# def size
|
27
|
+
# @wrapped_string.size / 4
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# def self.accepts?(string)
|
31
|
+
# string.length % 4 == 0
|
32
|
+
# end
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# ActiveSupport::Multibyte.proxy_class = CharsForUTF32
|
36
|
+
class Chars
|
37
|
+
attr_reader :wrapped_string
|
38
|
+
alias to_s wrapped_string
|
39
|
+
alias to_str wrapped_string
|
40
|
+
|
41
|
+
if RUBY_VERSION >= "1.9"
|
42
|
+
# Creates a new Chars instance by wrapping _string_.
|
43
|
+
def initialize(string)
|
44
|
+
@wrapped_string = string
|
45
|
+
@wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
|
46
|
+
end
|
47
|
+
else
|
48
|
+
def initialize(string) #:nodoc:
|
49
|
+
@wrapped_string = string
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Forward all undefined methods to the wrapped string.
|
54
|
+
def method_missing(method, *args, &block)
|
55
|
+
if method.to_s =~ /!$/
|
56
|
+
@wrapped_string.__send__(method, *args, &block)
|
57
|
+
self
|
58
|
+
else
|
59
|
+
result = @wrapped_string.__send__(method, *args, &block)
|
60
|
+
result.kind_of?(String) ? chars(result) : result
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
|
65
|
+
# only if the optional second parameter evaluates to +true+.
|
66
|
+
def respond_to?(method, include_private=false)
|
67
|
+
super || @wrapped_string.respond_to?(method, include_private)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
|
71
|
+
def acts_like_string?
|
72
|
+
true
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
|
76
|
+
def self.consumes?(string)
|
77
|
+
# Unpack is a little bit faster than regular expressions.
|
78
|
+
string.unpack('U*')
|
79
|
+
true
|
80
|
+
rescue ArgumentError
|
81
|
+
false
|
82
|
+
end
|
83
|
+
|
84
|
+
include Comparable
|
85
|
+
|
86
|
+
# Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
|
87
|
+
# equal or after the object on the right side of the operation. It accepts any object
|
88
|
+
# that implements +to_s+:
|
89
|
+
#
|
90
|
+
# 'é'.mb_chars <=> 'ü'.mb_chars # => -1
|
91
|
+
#
|
92
|
+
# See <tt>String#<=></tt> for more details.
|
93
|
+
def <=>(other)
|
94
|
+
@wrapped_string <=> other.to_s
|
95
|
+
end
|
96
|
+
|
97
|
+
if RUBY_VERSION < "1.9"
|
98
|
+
# Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
|
99
|
+
# +false+ otherwise.
|
100
|
+
def self.wants?(string)
|
101
|
+
$KCODE == 'UTF8' && consumes?(string)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns a new Chars object containing the _other_ object concatenated to the string.
|
105
|
+
#
|
106
|
+
# Example:
|
107
|
+
# ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
|
108
|
+
def +(other)
|
109
|
+
chars(@wrapped_string + other)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
|
113
|
+
#
|
114
|
+
# Example:
|
115
|
+
# 'Café périferôl'.mb_chars =~ /ô/ # => 12
|
116
|
+
def =~(other)
|
117
|
+
translate_offset(@wrapped_string =~ other)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Inserts the passed string at specified codepoint offsets.
|
121
|
+
#
|
122
|
+
# Example:
|
123
|
+
# 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
|
124
|
+
def insert(offset, fragment)
|
125
|
+
unpacked = Unicode.u_unpack(@wrapped_string)
|
126
|
+
unless offset > unpacked.length
|
127
|
+
@wrapped_string.replace(
|
128
|
+
Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
|
129
|
+
)
|
130
|
+
else
|
131
|
+
raise IndexError, "index #{offset} out of string"
|
132
|
+
end
|
133
|
+
self
|
134
|
+
end
|
135
|
+
|
136
|
+
# Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
|
137
|
+
#
|
138
|
+
# Example:
|
139
|
+
# 'Café'.mb_chars.include?('é') # => true
|
140
|
+
def include?(other)
|
141
|
+
# We have to redefine this method because Enumerable defines it.
|
142
|
+
@wrapped_string.include?(other)
|
143
|
+
end
|
144
|
+
|
145
|
+
# Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
|
146
|
+
#
|
147
|
+
# Example:
|
148
|
+
# 'Café périferôl'.mb_chars.index('ô') # => 12
|
149
|
+
# 'Café périferôl'.mb_chars.index(/\w/u) # => 0
|
150
|
+
def index(needle, offset=0)
|
151
|
+
wrapped_offset = first(offset).wrapped_string.length
|
152
|
+
index = @wrapped_string.index(needle, wrapped_offset)
|
153
|
+
index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
|
154
|
+
end
|
155
|
+
|
156
|
+
# Returns the position _needle_ in the string, counting in
|
157
|
+
# codepoints, searching backward from _offset_ or the end of the
|
158
|
+
# string. Returns +nil+ if _needle_ isn't found.
|
159
|
+
#
|
160
|
+
# Example:
|
161
|
+
# 'Café périferôl'.mb_chars.rindex('é') # => 6
|
162
|
+
# 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
|
163
|
+
def rindex(needle, offset=nil)
|
164
|
+
offset ||= length
|
165
|
+
wrapped_offset = first(offset).wrapped_string.length
|
166
|
+
index = @wrapped_string.rindex(needle, wrapped_offset)
|
167
|
+
index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
|
168
|
+
end
|
169
|
+
|
170
|
+
# Returns the number of codepoints in the string
|
171
|
+
def size
|
172
|
+
Unicode.u_unpack(@wrapped_string).size
|
173
|
+
end
|
174
|
+
alias_method :length, :size
|
175
|
+
|
176
|
+
# Strips entire range of Unicode whitespace from the right of the string.
|
177
|
+
def rstrip
|
178
|
+
chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
|
179
|
+
end
|
180
|
+
|
181
|
+
# Strips entire range of Unicode whitespace from the left of the string.
|
182
|
+
def lstrip
|
183
|
+
chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
|
184
|
+
end
|
185
|
+
|
186
|
+
# Strips entire range of Unicode whitespace from the right and left of the string.
|
187
|
+
def strip
|
188
|
+
rstrip.lstrip
|
189
|
+
end
|
190
|
+
|
191
|
+
# Returns the codepoint of the first character in the string.
|
192
|
+
#
|
193
|
+
# Example:
|
194
|
+
# 'こんにちは'.mb_chars.ord # => 12371
|
195
|
+
def ord
|
196
|
+
Unicode.u_unpack(@wrapped_string)[0]
|
197
|
+
end
|
198
|
+
|
199
|
+
# Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
|
200
|
+
#
|
201
|
+
# Example:
|
202
|
+
#
|
203
|
+
# "¾ cup".mb_chars.rjust(8).to_s
|
204
|
+
# # => " ¾ cup"
|
205
|
+
#
|
206
|
+
# "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
|
207
|
+
# # => " ¾ cup"
|
208
|
+
def rjust(integer, padstr=' ')
|
209
|
+
justify(integer, :right, padstr)
|
210
|
+
end
|
211
|
+
|
212
|
+
# Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
|
213
|
+
#
|
214
|
+
# Example:
|
215
|
+
#
|
216
|
+
# "¾ cup".mb_chars.rjust(8).to_s
|
217
|
+
# # => "¾ cup "
|
218
|
+
#
|
219
|
+
# "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
|
220
|
+
# # => "¾ cup "
|
221
|
+
def ljust(integer, padstr=' ')
|
222
|
+
justify(integer, :left, padstr)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
|
226
|
+
#
|
227
|
+
# Example:
|
228
|
+
#
|
229
|
+
# "¾ cup".mb_chars.center(8).to_s
|
230
|
+
# # => " ¾ cup "
|
231
|
+
#
|
232
|
+
# "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
|
233
|
+
# # => " ¾ cup "
|
234
|
+
def center(integer, padstr=' ')
|
235
|
+
justify(integer, :center, padstr)
|
236
|
+
end
|
237
|
+
|
238
|
+
else
|
239
|
+
def =~(other)
|
240
|
+
@wrapped_string =~ other
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
# Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
|
245
|
+
# instances instead of String. This makes chaining methods easier.
|
246
|
+
#
|
247
|
+
# Example:
|
248
|
+
# 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
|
249
|
+
def split(*args)
|
250
|
+
@wrapped_string.split(*args).map { |i| i.mb_chars }
|
251
|
+
end
|
252
|
+
|
253
|
+
# Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
|
254
|
+
#
|
255
|
+
# Example:
|
256
|
+
#
|
257
|
+
# s = "Müller"
|
258
|
+
# s.mb_chars[2] = "e" # Replace character with offset 2
|
259
|
+
# s
|
260
|
+
# # => "Müeler"
|
261
|
+
#
|
262
|
+
# s = "Müller"
|
263
|
+
# s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
|
264
|
+
# s
|
265
|
+
# # => "Möler"
|
266
|
+
def []=(*args)
|
267
|
+
replace_by = args.pop
|
268
|
+
# Indexed replace with regular expressions already works
|
269
|
+
if args.first.is_a?(Regexp)
|
270
|
+
@wrapped_string[*args] = replace_by
|
271
|
+
else
|
272
|
+
result = Unicode.u_unpack(@wrapped_string)
|
273
|
+
case args.first
|
274
|
+
when Fixnum
|
275
|
+
raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
|
276
|
+
min = args[0]
|
277
|
+
max = args[1].nil? ? min : (min + args[1] - 1)
|
278
|
+
range = Range.new(min, max)
|
279
|
+
replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
|
280
|
+
when Range
|
281
|
+
raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
|
282
|
+
range = args[0]
|
283
|
+
else
|
284
|
+
needle = args[0].to_s
|
285
|
+
min = index(needle)
|
286
|
+
max = min + Unicode.u_unpack(needle).length - 1
|
287
|
+
range = Range.new(min, max)
|
288
|
+
end
|
289
|
+
result[range] = Unicode.u_unpack(replace_by)
|
290
|
+
@wrapped_string.replace(result.pack('U*'))
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Reverses all characters in the string.
|
295
|
+
#
|
296
|
+
# Example:
|
297
|
+
# 'Café'.mb_chars.reverse.to_s # => 'éfaC'
|
298
|
+
def reverse
|
299
|
+
chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
|
300
|
+
end
|
301
|
+
|
302
|
+
# Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
|
303
|
+
# character.
|
304
|
+
#
|
305
|
+
# Example:
|
306
|
+
# 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
|
307
|
+
def slice(*args)
|
308
|
+
if args.size > 2
|
309
|
+
raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
|
310
|
+
elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
|
311
|
+
raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
|
312
|
+
elsif (args.size == 2 && !args[1].is_a?(Numeric))
|
313
|
+
raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
|
314
|
+
elsif args[0].kind_of? Range
|
315
|
+
cps = Unicode.u_unpack(@wrapped_string).slice(*args)
|
316
|
+
result = cps.nil? ? nil : cps.pack('U*')
|
317
|
+
elsif args[0].kind_of? Regexp
|
318
|
+
result = @wrapped_string.slice(*args)
|
319
|
+
elsif args.size == 1 && args[0].kind_of?(Numeric)
|
320
|
+
character = Unicode.u_unpack(@wrapped_string)[args[0]]
|
321
|
+
result = character && [character].pack('U')
|
322
|
+
else
|
323
|
+
cps = Unicode.u_unpack(@wrapped_string).slice(*args)
|
324
|
+
result = cps && cps.pack('U*')
|
325
|
+
end
|
326
|
+
result && chars(result)
|
327
|
+
end
|
328
|
+
alias_method :[], :slice
|
329
|
+
|
330
|
+
# Limit the byte size of the string to a number of bytes without breaking characters. Usable
|
331
|
+
# when the storage for a string is limited for some reason.
|
332
|
+
#
|
333
|
+
# Example:
|
334
|
+
# 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
|
335
|
+
def limit(limit)
|
336
|
+
slice(0...translate_offset(limit))
|
337
|
+
end
|
338
|
+
|
339
|
+
# Convert characters in the string to uppercase.
|
340
|
+
#
|
341
|
+
# Example:
|
342
|
+
# 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
|
343
|
+
def upcase
|
344
|
+
chars(Unicode.apply_mapping @wrapped_string, :uppercase_mapping)
|
345
|
+
end
|
346
|
+
|
347
|
+
# Convert characters in the string to lowercase.
|
348
|
+
#
|
349
|
+
# Example:
|
350
|
+
# 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
|
351
|
+
def downcase
|
352
|
+
chars(Unicode.apply_mapping @wrapped_string, :lowercase_mapping)
|
353
|
+
end
|
354
|
+
|
355
|
+
# Converts the first character to uppercase and the remainder to lowercase.
|
356
|
+
#
|
357
|
+
# Example:
|
358
|
+
# 'über'.mb_chars.capitalize.to_s # => "Über"
|
359
|
+
def capitalize
|
360
|
+
(slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
|
361
|
+
end
|
362
|
+
|
363
|
+
# Capitalizes the first letter of every word, when possible.
|
364
|
+
#
|
365
|
+
# Example:
|
366
|
+
# "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró"
|
367
|
+
# "日本語".mb_chars.titleize # => "日本語"
|
368
|
+
def titleize
|
369
|
+
chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
|
370
|
+
end
|
371
|
+
alias_method :titlecase, :titleize
|
372
|
+
|
373
|
+
# Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
|
374
|
+
# passing strings to databases and validations.
|
375
|
+
#
|
376
|
+
# * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
|
377
|
+
# <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
|
378
|
+
# ActiveSupport::Multibyte::Unicode.default_normalization_form
|
379
|
+
def normalize(form = nil)
|
380
|
+
chars(Unicode.normalize(@wrapped_string, form))
|
381
|
+
end
|
382
|
+
|
383
|
+
# Performs canonical decomposition on all the characters.
|
384
|
+
#
|
385
|
+
# Example:
|
386
|
+
# 'é'.length # => 2
|
387
|
+
# 'é'.mb_chars.decompose.to_s.length # => 3
|
388
|
+
def decompose
|
389
|
+
chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
|
390
|
+
end
|
391
|
+
|
392
|
+
# Performs composition on all the characters.
|
393
|
+
#
|
394
|
+
# Example:
|
395
|
+
# 'é'.length # => 3
|
396
|
+
# 'é'.mb_chars.compose.to_s.length # => 2
|
397
|
+
def compose
|
398
|
+
chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
|
399
|
+
end
|
400
|
+
|
401
|
+
# Returns the number of grapheme clusters in the string.
|
402
|
+
#
|
403
|
+
# Example:
|
404
|
+
# 'क्षि'.mb_chars.length # => 4
|
405
|
+
# 'क्षि'.mb_chars.g_length # => 3
|
406
|
+
def g_length
|
407
|
+
Unicode.g_unpack(@wrapped_string).length
|
408
|
+
end
|
409
|
+
|
410
|
+
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
|
411
|
+
#
|
412
|
+
# Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
|
413
|
+
def tidy_bytes(force = false)
|
414
|
+
chars(Unicode.tidy_bytes(@wrapped_string, force))
|
415
|
+
end
|
416
|
+
|
417
|
+
%w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
|
418
|
+
# Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
|
419
|
+
# exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
|
420
|
+
if public_method_defined?(method)
|
421
|
+
define_method("#{method}!") do |*args|
|
422
|
+
@wrapped_string = send(args.nil? ? method : method, *args).to_s
|
423
|
+
self
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
protected
|
429
|
+
|
430
|
+
def translate_offset(byte_offset) #:nodoc:
|
431
|
+
return nil if byte_offset.nil?
|
432
|
+
return 0 if @wrapped_string == ''
|
433
|
+
|
434
|
+
if @wrapped_string.respond_to?(:force_encoding)
|
435
|
+
@wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
|
436
|
+
end
|
437
|
+
|
438
|
+
begin
|
439
|
+
@wrapped_string[0...byte_offset].unpack('U*').length
|
440
|
+
rescue ArgumentError
|
441
|
+
byte_offset -= 1
|
442
|
+
retry
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
def justify(integer, way, padstr=' ') #:nodoc:
|
447
|
+
raise ArgumentError, "zero width padding" if padstr.length == 0
|
448
|
+
padsize = integer - size
|
449
|
+
padsize = padsize > 0 ? padsize : 0
|
450
|
+
case way
|
451
|
+
when :right
|
452
|
+
result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
|
453
|
+
when :left
|
454
|
+
result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
|
455
|
+
when :center
|
456
|
+
lpad = padding((padsize / 2.0).floor, padstr)
|
457
|
+
rpad = padding((padsize / 2.0).ceil, padstr)
|
458
|
+
result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
|
459
|
+
end
|
460
|
+
chars(result)
|
461
|
+
end
|
462
|
+
|
463
|
+
def padding(padsize, padstr=' ') #:nodoc:
|
464
|
+
if padsize != 0
|
465
|
+
chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
|
466
|
+
else
|
467
|
+
''
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
def chars(string) #:nodoc:
|
472
|
+
self.class.new(string)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
end
|