bioinform 0.1.11 → 0.1.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/TODO.txt +2 -0
- data/bioinform.gemspec +0 -6
- data/lib/bioinform/data_models/motif.rb +1 -1
- data/lib/bioinform/data_models/pm.rb +2 -1
- data/lib/bioinform/data_models/pwm.rb +4 -5
- data/lib/bioinform/support/multiline_squish.rb +1 -1
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +29 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +23 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +54 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +64 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +57 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +99 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +6 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +49 -0
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +72 -0
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +178 -0
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +476 -0
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +8 -0
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +393 -0
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +60 -0
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +44 -0
- data/lib/bioinform/support.rb +2 -2
- data/lib/bioinform/version.rb +1 -1
- data/spec/spec_helper.rb +10 -20
- metadata +21 -77
@@ -0,0 +1,178 @@
|
|
1
|
+
require_relative 'core_ext/hash/keys'
|
2
|
+
|
3
|
+
# This class has dubious semantics and we only have it so that
|
4
|
+
# people can write <tt>params[:key]</tt> instead of <tt>params['key']</tt>
|
5
|
+
# and they get the same value for both keys.
|
6
|
+
|
7
|
+
module ActiveSupport
|
8
|
+
class HashWithIndifferentAccess < Hash
|
9
|
+
|
10
|
+
# Always returns true, so that <tt>Array#extract_options!</tt> finds members of this class.
|
11
|
+
def extractable_options?
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def with_indifferent_access
|
16
|
+
dup
|
17
|
+
end
|
18
|
+
|
19
|
+
def nested_under_indifferent_access
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(constructor = {})
|
24
|
+
if constructor.is_a?(Hash)
|
25
|
+
super()
|
26
|
+
update(constructor)
|
27
|
+
else
|
28
|
+
super(constructor)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def default(key = nil)
|
33
|
+
if key.is_a?(Symbol) && include?(key = key.to_s)
|
34
|
+
self[key]
|
35
|
+
else
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.new_from_hash_copying_default(hash)
|
41
|
+
new(hash).tap do |new_hash|
|
42
|
+
new_hash.default = hash.default
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
alias_method :regular_writer, :[]= unless method_defined?(:regular_writer)
|
47
|
+
alias_method :regular_update, :update unless method_defined?(:regular_update)
|
48
|
+
|
49
|
+
# Assigns a new value to the hash:
|
50
|
+
#
|
51
|
+
# hash = HashWithIndifferentAccess.new
|
52
|
+
# hash[:key] = "value"
|
53
|
+
#
|
54
|
+
def []=(key, value)
|
55
|
+
regular_writer(convert_key(key), convert_value(value))
|
56
|
+
end
|
57
|
+
|
58
|
+
alias_method :store, :[]=
|
59
|
+
|
60
|
+
# Updates the instantized hash with values from the second:
|
61
|
+
#
|
62
|
+
# hash_1 = HashWithIndifferentAccess.new
|
63
|
+
# hash_1[:key] = "value"
|
64
|
+
#
|
65
|
+
# hash_2 = HashWithIndifferentAccess.new
|
66
|
+
# hash_2[:key] = "New Value!"
|
67
|
+
#
|
68
|
+
# hash_1.update(hash_2) # => {"key"=>"New Value!"}
|
69
|
+
#
|
70
|
+
def update(other_hash)
|
71
|
+
if other_hash.is_a? HashWithIndifferentAccess
|
72
|
+
super(other_hash)
|
73
|
+
else
|
74
|
+
other_hash.each_pair { |key, value| regular_writer(convert_key(key), convert_value(value)) }
|
75
|
+
self
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
alias_method :merge!, :update
|
80
|
+
|
81
|
+
# Checks the hash for a key matching the argument passed in:
|
82
|
+
#
|
83
|
+
# hash = HashWithIndifferentAccess.new
|
84
|
+
# hash["key"] = "value"
|
85
|
+
# hash.key? :key # => true
|
86
|
+
# hash.key? "key" # => true
|
87
|
+
#
|
88
|
+
def key?(key)
|
89
|
+
super(convert_key(key))
|
90
|
+
end
|
91
|
+
|
92
|
+
alias_method :include?, :key?
|
93
|
+
alias_method :has_key?, :key?
|
94
|
+
alias_method :member?, :key?
|
95
|
+
|
96
|
+
# Same as <tt>Hash#fetch</tt> where the key passed as argument can be
|
97
|
+
# either a string or a symbol:
|
98
|
+
#
|
99
|
+
# counters = HashWithIndifferentAccess.new
|
100
|
+
# counters[:foo] = 1
|
101
|
+
#
|
102
|
+
# counters.fetch("foo") # => 1
|
103
|
+
# counters.fetch(:bar, 0) # => 0
|
104
|
+
# counters.fetch(:bar) {|key| 0} # => 0
|
105
|
+
# counters.fetch(:zoo) # => KeyError: key not found: "zoo"
|
106
|
+
#
|
107
|
+
def fetch(key, *extras)
|
108
|
+
super(convert_key(key), *extras)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Returns an array of the values at the specified indices:
|
112
|
+
#
|
113
|
+
# hash = HashWithIndifferentAccess.new
|
114
|
+
# hash[:a] = "x"
|
115
|
+
# hash[:b] = "y"
|
116
|
+
# hash.values_at("a", "b") # => ["x", "y"]
|
117
|
+
#
|
118
|
+
def values_at(*indices)
|
119
|
+
indices.collect {|key| self[convert_key(key)]}
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns an exact copy of the hash.
|
123
|
+
def dup
|
124
|
+
self.class.new(self).tap do |new_hash|
|
125
|
+
new_hash.default = default
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Merges the instantized and the specified hashes together, giving precedence to the values from the second hash.
|
130
|
+
# Does not overwrite the existing hash.
|
131
|
+
def merge(hash)
|
132
|
+
self.dup.update(hash)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Performs the opposite of merge, with the keys and values from the first hash taking precedence over the second.
|
136
|
+
# This overloaded definition prevents returning a regular hash, if reverse_merge is called on a <tt>HashWithDifferentAccess</tt>.
|
137
|
+
def reverse_merge(other_hash)
|
138
|
+
super self.class.new_from_hash_copying_default(other_hash)
|
139
|
+
end
|
140
|
+
|
141
|
+
def reverse_merge!(other_hash)
|
142
|
+
replace(reverse_merge( other_hash ))
|
143
|
+
end
|
144
|
+
|
145
|
+
# Removes a specified key from the hash.
|
146
|
+
def delete(key)
|
147
|
+
super(convert_key(key))
|
148
|
+
end
|
149
|
+
|
150
|
+
def stringify_keys!; self end
|
151
|
+
def stringify_keys; dup end
|
152
|
+
undef :symbolize_keys!
|
153
|
+
def symbolize_keys; to_hash.symbolize_keys end
|
154
|
+
def to_options!; self end
|
155
|
+
|
156
|
+
# Convert to a Hash with String keys.
|
157
|
+
def to_hash
|
158
|
+
Hash.new(default).merge!(self)
|
159
|
+
end
|
160
|
+
|
161
|
+
protected
|
162
|
+
def convert_key(key)
|
163
|
+
key.kind_of?(Symbol) ? key.to_s : key
|
164
|
+
end
|
165
|
+
|
166
|
+
def convert_value(value)
|
167
|
+
if value.is_a? Hash
|
168
|
+
value.nested_under_indifferent_access
|
169
|
+
elsif value.is_a?(Array)
|
170
|
+
value.dup.replace(value.map { |e| convert_value(e) })
|
171
|
+
else
|
172
|
+
value
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
HashWithIndifferentAccess = ActiveSupport::HashWithIndifferentAccess
|
@@ -0,0 +1,476 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative '../core_ext/string/access'
|
3
|
+
require_relative '../core_ext/string/behavior'
|
4
|
+
|
5
|
+
module ActiveSupport #:nodoc:
|
6
|
+
module Multibyte #:nodoc:
|
7
|
+
# Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
|
8
|
+
# knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
|
9
|
+
# encoding safe manner. All the normal String methods are also implemented on the proxy.
|
10
|
+
#
|
11
|
+
# String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
|
12
|
+
# which would normally return a String object now return a Chars object so methods can be chained.
|
13
|
+
#
|
14
|
+
# "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string"
|
15
|
+
#
|
16
|
+
# Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
|
17
|
+
# If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
|
18
|
+
#
|
19
|
+
# bad.explicit_checking_method "T".mb_chars.downcase.to_s
|
20
|
+
#
|
21
|
+
# The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
|
22
|
+
# encodings you can write your own multibyte string handler and configure it through
|
23
|
+
# ActiveSupport::Multibyte.proxy_class.
|
24
|
+
#
|
25
|
+
# class CharsForUTF32
|
26
|
+
# def size
|
27
|
+
# @wrapped_string.size / 4
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# def self.accepts?(string)
|
31
|
+
# string.length % 4 == 0
|
32
|
+
# end
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# ActiveSupport::Multibyte.proxy_class = CharsForUTF32
|
36
|
+
class Chars
|
37
|
+
attr_reader :wrapped_string
|
38
|
+
alias to_s wrapped_string
|
39
|
+
alias to_str wrapped_string
|
40
|
+
|
41
|
+
if RUBY_VERSION >= "1.9"
|
42
|
+
# Creates a new Chars instance by wrapping _string_.
|
43
|
+
def initialize(string)
|
44
|
+
@wrapped_string = string
|
45
|
+
@wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
|
46
|
+
end
|
47
|
+
else
|
48
|
+
def initialize(string) #:nodoc:
|
49
|
+
@wrapped_string = string
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Forward all undefined methods to the wrapped string.
|
54
|
+
def method_missing(method, *args, &block)
|
55
|
+
if method.to_s =~ /!$/
|
56
|
+
@wrapped_string.__send__(method, *args, &block)
|
57
|
+
self
|
58
|
+
else
|
59
|
+
result = @wrapped_string.__send__(method, *args, &block)
|
60
|
+
result.kind_of?(String) ? chars(result) : result
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
|
65
|
+
# only if the optional second parameter evaluates to +true+.
|
66
|
+
def respond_to?(method, include_private=false)
|
67
|
+
super || @wrapped_string.respond_to?(method, include_private)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
|
71
|
+
def acts_like_string?
|
72
|
+
true
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
|
76
|
+
def self.consumes?(string)
|
77
|
+
# Unpack is a little bit faster than regular expressions.
|
78
|
+
string.unpack('U*')
|
79
|
+
true
|
80
|
+
rescue ArgumentError
|
81
|
+
false
|
82
|
+
end
|
83
|
+
|
84
|
+
include Comparable
|
85
|
+
|
86
|
+
# Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
|
87
|
+
# equal or after the object on the right side of the operation. It accepts any object
|
88
|
+
# that implements +to_s+:
|
89
|
+
#
|
90
|
+
# 'é'.mb_chars <=> 'ü'.mb_chars # => -1
|
91
|
+
#
|
92
|
+
# See <tt>String#<=></tt> for more details.
|
93
|
+
def <=>(other)
|
94
|
+
@wrapped_string <=> other.to_s
|
95
|
+
end
|
96
|
+
|
97
|
+
if RUBY_VERSION < "1.9"
|
98
|
+
# Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
|
99
|
+
# +false+ otherwise.
|
100
|
+
def self.wants?(string)
|
101
|
+
$KCODE == 'UTF8' && consumes?(string)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns a new Chars object containing the _other_ object concatenated to the string.
|
105
|
+
#
|
106
|
+
# Example:
|
107
|
+
# ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
|
108
|
+
def +(other)
|
109
|
+
chars(@wrapped_string + other)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
|
113
|
+
#
|
114
|
+
# Example:
|
115
|
+
# 'Café périferôl'.mb_chars =~ /ô/ # => 12
|
116
|
+
def =~(other)
|
117
|
+
translate_offset(@wrapped_string =~ other)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Inserts the passed string at specified codepoint offsets.
|
121
|
+
#
|
122
|
+
# Example:
|
123
|
+
# 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
|
124
|
+
def insert(offset, fragment)
|
125
|
+
unpacked = Unicode.u_unpack(@wrapped_string)
|
126
|
+
unless offset > unpacked.length
|
127
|
+
@wrapped_string.replace(
|
128
|
+
Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
|
129
|
+
)
|
130
|
+
else
|
131
|
+
raise IndexError, "index #{offset} out of string"
|
132
|
+
end
|
133
|
+
self
|
134
|
+
end
|
135
|
+
|
136
|
+
# Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
|
137
|
+
#
|
138
|
+
# Example:
|
139
|
+
# 'Café'.mb_chars.include?('é') # => true
|
140
|
+
def include?(other)
|
141
|
+
# We have to redefine this method because Enumerable defines it.
|
142
|
+
@wrapped_string.include?(other)
|
143
|
+
end
|
144
|
+
|
145
|
+
# Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
|
146
|
+
#
|
147
|
+
# Example:
|
148
|
+
# 'Café périferôl'.mb_chars.index('ô') # => 12
|
149
|
+
# 'Café périferôl'.mb_chars.index(/\w/u) # => 0
|
150
|
+
def index(needle, offset=0)
|
151
|
+
wrapped_offset = first(offset).wrapped_string.length
|
152
|
+
index = @wrapped_string.index(needle, wrapped_offset)
|
153
|
+
index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
|
154
|
+
end
|
155
|
+
|
156
|
+
# Returns the position _needle_ in the string, counting in
|
157
|
+
# codepoints, searching backward from _offset_ or the end of the
|
158
|
+
# string. Returns +nil+ if _needle_ isn't found.
|
159
|
+
#
|
160
|
+
# Example:
|
161
|
+
# 'Café périferôl'.mb_chars.rindex('é') # => 6
|
162
|
+
# 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
|
163
|
+
def rindex(needle, offset=nil)
|
164
|
+
offset ||= length
|
165
|
+
wrapped_offset = first(offset).wrapped_string.length
|
166
|
+
index = @wrapped_string.rindex(needle, wrapped_offset)
|
167
|
+
index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
|
168
|
+
end
|
169
|
+
|
170
|
+
# Returns the number of codepoints in the string
|
171
|
+
def size
|
172
|
+
Unicode.u_unpack(@wrapped_string).size
|
173
|
+
end
|
174
|
+
alias_method :length, :size
|
175
|
+
|
176
|
+
# Strips entire range of Unicode whitespace from the right of the string.
|
177
|
+
def rstrip
|
178
|
+
chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
|
179
|
+
end
|
180
|
+
|
181
|
+
# Strips entire range of Unicode whitespace from the left of the string.
|
182
|
+
def lstrip
|
183
|
+
chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
|
184
|
+
end
|
185
|
+
|
186
|
+
# Strips entire range of Unicode whitespace from the right and left of the string.
|
187
|
+
def strip
|
188
|
+
rstrip.lstrip
|
189
|
+
end
|
190
|
+
|
191
|
+
# Returns the codepoint of the first character in the string.
|
192
|
+
#
|
193
|
+
# Example:
|
194
|
+
# 'こんにちは'.mb_chars.ord # => 12371
|
195
|
+
def ord
|
196
|
+
Unicode.u_unpack(@wrapped_string)[0]
|
197
|
+
end
|
198
|
+
|
199
|
+
# Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
|
200
|
+
#
|
201
|
+
# Example:
|
202
|
+
#
|
203
|
+
# "¾ cup".mb_chars.rjust(8).to_s
|
204
|
+
# # => " ¾ cup"
|
205
|
+
#
|
206
|
+
# "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
|
207
|
+
# # => " ¾ cup"
|
208
|
+
def rjust(integer, padstr=' ')
|
209
|
+
justify(integer, :right, padstr)
|
210
|
+
end
|
211
|
+
|
212
|
+
# Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
|
213
|
+
#
|
214
|
+
# Example:
|
215
|
+
#
|
216
|
+
# "¾ cup".mb_chars.rjust(8).to_s
|
217
|
+
# # => "¾ cup "
|
218
|
+
#
|
219
|
+
# "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
|
220
|
+
# # => "¾ cup "
|
221
|
+
def ljust(integer, padstr=' ')
|
222
|
+
justify(integer, :left, padstr)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
|
226
|
+
#
|
227
|
+
# Example:
|
228
|
+
#
|
229
|
+
# "¾ cup".mb_chars.center(8).to_s
|
230
|
+
# # => " ¾ cup "
|
231
|
+
#
|
232
|
+
# "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
|
233
|
+
# # => " ¾ cup "
|
234
|
+
def center(integer, padstr=' ')
|
235
|
+
justify(integer, :center, padstr)
|
236
|
+
end
|
237
|
+
|
238
|
+
else
|
239
|
+
def =~(other)
|
240
|
+
@wrapped_string =~ other
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
# Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
|
245
|
+
# instances instead of String. This makes chaining methods easier.
|
246
|
+
#
|
247
|
+
# Example:
|
248
|
+
# 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
|
249
|
+
def split(*args)
|
250
|
+
@wrapped_string.split(*args).map { |i| i.mb_chars }
|
251
|
+
end
|
252
|
+
|
253
|
+
# Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
|
254
|
+
#
|
255
|
+
# Example:
|
256
|
+
#
|
257
|
+
# s = "Müller"
|
258
|
+
# s.mb_chars[2] = "e" # Replace character with offset 2
|
259
|
+
# s
|
260
|
+
# # => "Müeler"
|
261
|
+
#
|
262
|
+
# s = "Müller"
|
263
|
+
# s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
|
264
|
+
# s
|
265
|
+
# # => "Möler"
|
266
|
+
def []=(*args)
|
267
|
+
replace_by = args.pop
|
268
|
+
# Indexed replace with regular expressions already works
|
269
|
+
if args.first.is_a?(Regexp)
|
270
|
+
@wrapped_string[*args] = replace_by
|
271
|
+
else
|
272
|
+
result = Unicode.u_unpack(@wrapped_string)
|
273
|
+
case args.first
|
274
|
+
when Fixnum
|
275
|
+
raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
|
276
|
+
min = args[0]
|
277
|
+
max = args[1].nil? ? min : (min + args[1] - 1)
|
278
|
+
range = Range.new(min, max)
|
279
|
+
replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
|
280
|
+
when Range
|
281
|
+
raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
|
282
|
+
range = args[0]
|
283
|
+
else
|
284
|
+
needle = args[0].to_s
|
285
|
+
min = index(needle)
|
286
|
+
max = min + Unicode.u_unpack(needle).length - 1
|
287
|
+
range = Range.new(min, max)
|
288
|
+
end
|
289
|
+
result[range] = Unicode.u_unpack(replace_by)
|
290
|
+
@wrapped_string.replace(result.pack('U*'))
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Reverses all characters in the string.
|
295
|
+
#
|
296
|
+
# Example:
|
297
|
+
# 'Café'.mb_chars.reverse.to_s # => 'éfaC'
|
298
|
+
def reverse
|
299
|
+
chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
|
300
|
+
end
|
301
|
+
|
302
|
+
# Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
|
303
|
+
# character.
|
304
|
+
#
|
305
|
+
# Example:
|
306
|
+
# 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
|
307
|
+
def slice(*args)
|
308
|
+
if args.size > 2
|
309
|
+
raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
|
310
|
+
elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
|
311
|
+
raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
|
312
|
+
elsif (args.size == 2 && !args[1].is_a?(Numeric))
|
313
|
+
raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
|
314
|
+
elsif args[0].kind_of? Range
|
315
|
+
cps = Unicode.u_unpack(@wrapped_string).slice(*args)
|
316
|
+
result = cps.nil? ? nil : cps.pack('U*')
|
317
|
+
elsif args[0].kind_of? Regexp
|
318
|
+
result = @wrapped_string.slice(*args)
|
319
|
+
elsif args.size == 1 && args[0].kind_of?(Numeric)
|
320
|
+
character = Unicode.u_unpack(@wrapped_string)[args[0]]
|
321
|
+
result = character && [character].pack('U')
|
322
|
+
else
|
323
|
+
cps = Unicode.u_unpack(@wrapped_string).slice(*args)
|
324
|
+
result = cps && cps.pack('U*')
|
325
|
+
end
|
326
|
+
result && chars(result)
|
327
|
+
end
|
328
|
+
alias_method :[], :slice
|
329
|
+
|
330
|
+
# Limit the byte size of the string to a number of bytes without breaking characters. Usable
|
331
|
+
# when the storage for a string is limited for some reason.
|
332
|
+
#
|
333
|
+
# Example:
|
334
|
+
# 'こんにちは'.mb_chars.limit(7).to_s # => "こん"
|
335
|
+
def limit(limit)
|
336
|
+
slice(0...translate_offset(limit))
|
337
|
+
end
|
338
|
+
|
339
|
+
# Convert characters in the string to uppercase.
|
340
|
+
#
|
341
|
+
# Example:
|
342
|
+
# 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
|
343
|
+
def upcase
|
344
|
+
chars(Unicode.apply_mapping @wrapped_string, :uppercase_mapping)
|
345
|
+
end
|
346
|
+
|
347
|
+
# Convert characters in the string to lowercase.
|
348
|
+
#
|
349
|
+
# Example:
|
350
|
+
# 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
|
351
|
+
def downcase
|
352
|
+
chars(Unicode.apply_mapping @wrapped_string, :lowercase_mapping)
|
353
|
+
end
|
354
|
+
|
355
|
+
# Converts the first character to uppercase and the remainder to lowercase.
|
356
|
+
#
|
357
|
+
# Example:
|
358
|
+
# 'über'.mb_chars.capitalize.to_s # => "Über"
|
359
|
+
def capitalize
|
360
|
+
(slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
|
361
|
+
end
|
362
|
+
|
363
|
+
# Capitalizes the first letter of every word, when possible.
|
364
|
+
#
|
365
|
+
# Example:
|
366
|
+
# "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró"
|
367
|
+
# "日本語".mb_chars.titleize # => "日本語"
|
368
|
+
def titleize
|
369
|
+
chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
|
370
|
+
end
|
371
|
+
alias_method :titlecase, :titleize
|
372
|
+
|
373
|
+
# Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
|
374
|
+
# passing strings to databases and validations.
|
375
|
+
#
|
376
|
+
# * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
|
377
|
+
# <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
|
378
|
+
# ActiveSupport::Multibyte::Unicode.default_normalization_form
|
379
|
+
def normalize(form = nil)
|
380
|
+
chars(Unicode.normalize(@wrapped_string, form))
|
381
|
+
end
|
382
|
+
|
383
|
+
# Performs canonical decomposition on all the characters.
|
384
|
+
#
|
385
|
+
# Example:
|
386
|
+
# 'é'.length # => 2
|
387
|
+
# 'é'.mb_chars.decompose.to_s.length # => 3
|
388
|
+
def decompose
|
389
|
+
chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
|
390
|
+
end
|
391
|
+
|
392
|
+
# Performs composition on all the characters.
|
393
|
+
#
|
394
|
+
# Example:
|
395
|
+
# 'é'.length # => 3
|
396
|
+
# 'é'.mb_chars.compose.to_s.length # => 2
|
397
|
+
def compose
|
398
|
+
chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
|
399
|
+
end
|
400
|
+
|
401
|
+
# Returns the number of grapheme clusters in the string.
|
402
|
+
#
|
403
|
+
# Example:
|
404
|
+
# 'क्षि'.mb_chars.length # => 4
|
405
|
+
# 'क्षि'.mb_chars.g_length # => 3
|
406
|
+
def g_length
|
407
|
+
Unicode.g_unpack(@wrapped_string).length
|
408
|
+
end
|
409
|
+
|
410
|
+
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
|
411
|
+
#
|
412
|
+
# Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
|
413
|
+
def tidy_bytes(force = false)
|
414
|
+
chars(Unicode.tidy_bytes(@wrapped_string, force))
|
415
|
+
end
|
416
|
+
|
417
|
+
%w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
|
418
|
+
# Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
|
419
|
+
# exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
|
420
|
+
if public_method_defined?(method)
|
421
|
+
define_method("#{method}!") do |*args|
|
422
|
+
@wrapped_string = send(args.nil? ? method : method, *args).to_s
|
423
|
+
self
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
protected
|
429
|
+
|
430
|
+
def translate_offset(byte_offset) #:nodoc:
|
431
|
+
return nil if byte_offset.nil?
|
432
|
+
return 0 if @wrapped_string == ''
|
433
|
+
|
434
|
+
if @wrapped_string.respond_to?(:force_encoding)
|
435
|
+
@wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
|
436
|
+
end
|
437
|
+
|
438
|
+
begin
|
439
|
+
@wrapped_string[0...byte_offset].unpack('U*').length
|
440
|
+
rescue ArgumentError
|
441
|
+
byte_offset -= 1
|
442
|
+
retry
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
def justify(integer, way, padstr=' ') #:nodoc:
|
447
|
+
raise ArgumentError, "zero width padding" if padstr.length == 0
|
448
|
+
padsize = integer - size
|
449
|
+
padsize = padsize > 0 ? padsize : 0
|
450
|
+
case way
|
451
|
+
when :right
|
452
|
+
result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
|
453
|
+
when :left
|
454
|
+
result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
|
455
|
+
when :center
|
456
|
+
lpad = padding((padsize / 2.0).floor, padstr)
|
457
|
+
rpad = padding((padsize / 2.0).ceil, padstr)
|
458
|
+
result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
|
459
|
+
end
|
460
|
+
chars(result)
|
461
|
+
end
|
462
|
+
|
463
|
+
def padding(padsize, padstr=' ') #:nodoc:
|
464
|
+
if padsize != 0
|
465
|
+
chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
|
466
|
+
else
|
467
|
+
''
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
def chars(string) #:nodoc:
|
472
|
+
self.class.new(string)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
end
|