otherinbox-mail 2.4.4
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +607 -0
- data/CONTRIBUTING.md +45 -0
- data/Dependencies.txt +3 -0
- data/Gemfile +26 -0
- data/Gemfile.lock +44 -0
- data/README.md +663 -0
- data/Rakefile +40 -0
- data/TODO.rdoc +9 -0
- data/lib/VERSION +4 -0
- data/lib/mail.rb +101 -0
- data/lib/mail/attachments_list.rb +104 -0
- data/lib/mail/body.rb +291 -0
- data/lib/mail/configuration.rb +75 -0
- data/lib/mail/core_extensions/nil.rb +17 -0
- data/lib/mail/core_extensions/object.rb +13 -0
- data/lib/mail/core_extensions/shell_escape.rb +56 -0
- data/lib/mail/core_extensions/smtp.rb +25 -0
- data/lib/mail/core_extensions/string.rb +33 -0
- data/lib/mail/core_extensions/string/access.rb +145 -0
- data/lib/mail/core_extensions/string/multibyte.rb +78 -0
- data/lib/mail/elements.rb +14 -0
- data/lib/mail/elements/address.rb +306 -0
- data/lib/mail/elements/address_list.rb +74 -0
- data/lib/mail/elements/content_disposition_element.rb +30 -0
- data/lib/mail/elements/content_location_element.rb +25 -0
- data/lib/mail/elements/content_transfer_encoding_element.rb +24 -0
- data/lib/mail/elements/content_type_element.rb +35 -0
- data/lib/mail/elements/date_time_element.rb +26 -0
- data/lib/mail/elements/envelope_from_element.rb +34 -0
- data/lib/mail/elements/message_ids_element.rb +29 -0
- data/lib/mail/elements/mime_version_element.rb +26 -0
- data/lib/mail/elements/phrase_list.rb +21 -0
- data/lib/mail/elements/received_element.rb +30 -0
- data/lib/mail/encodings.rb +274 -0
- data/lib/mail/encodings/7bit.rb +31 -0
- data/lib/mail/encodings/8bit.rb +31 -0
- data/lib/mail/encodings/base64.rb +33 -0
- data/lib/mail/encodings/binary.rb +31 -0
- data/lib/mail/encodings/quoted_printable.rb +38 -0
- data/lib/mail/encodings/transfer_encoding.rb +58 -0
- data/lib/mail/envelope.rb +35 -0
- data/lib/mail/field.rb +234 -0
- data/lib/mail/field_list.rb +33 -0
- data/lib/mail/fields.rb +35 -0
- data/lib/mail/fields/bcc_field.rb +56 -0
- data/lib/mail/fields/cc_field.rb +55 -0
- data/lib/mail/fields/comments_field.rb +41 -0
- data/lib/mail/fields/common/address_container.rb +16 -0
- data/lib/mail/fields/common/common_address.rb +125 -0
- data/lib/mail/fields/common/common_date.rb +42 -0
- data/lib/mail/fields/common/common_field.rb +51 -0
- data/lib/mail/fields/common/common_message_id.rb +44 -0
- data/lib/mail/fields/common/parameter_hash.rb +58 -0
- data/lib/mail/fields/content_description_field.rb +19 -0
- data/lib/mail/fields/content_disposition_field.rb +69 -0
- data/lib/mail/fields/content_id_field.rb +63 -0
- data/lib/mail/fields/content_location_field.rb +42 -0
- data/lib/mail/fields/content_transfer_encoding_field.rb +50 -0
- data/lib/mail/fields/content_type_field.rb +198 -0
- data/lib/mail/fields/date_field.rb +57 -0
- data/lib/mail/fields/from_field.rb +55 -0
- data/lib/mail/fields/in_reply_to_field.rb +55 -0
- data/lib/mail/fields/keywords_field.rb +44 -0
- data/lib/mail/fields/message_id_field.rb +83 -0
- data/lib/mail/fields/mime_version_field.rb +53 -0
- data/lib/mail/fields/optional_field.rb +13 -0
- data/lib/mail/fields/received_field.rb +75 -0
- data/lib/mail/fields/references_field.rb +55 -0
- data/lib/mail/fields/reply_to_field.rb +55 -0
- data/lib/mail/fields/resent_bcc_field.rb +55 -0
- data/lib/mail/fields/resent_cc_field.rb +55 -0
- data/lib/mail/fields/resent_date_field.rb +35 -0
- data/lib/mail/fields/resent_from_field.rb +55 -0
- data/lib/mail/fields/resent_message_id_field.rb +34 -0
- data/lib/mail/fields/resent_sender_field.rb +62 -0
- data/lib/mail/fields/resent_to_field.rb +55 -0
- data/lib/mail/fields/return_path_field.rb +65 -0
- data/lib/mail/fields/sender_field.rb +67 -0
- data/lib/mail/fields/structured_field.rb +51 -0
- data/lib/mail/fields/subject_field.rb +16 -0
- data/lib/mail/fields/to_field.rb +55 -0
- data/lib/mail/fields/unstructured_field.rb +191 -0
- data/lib/mail/header.rb +265 -0
- data/lib/mail/indifferent_hash.rb +146 -0
- data/lib/mail/mail.rb +255 -0
- data/lib/mail/matchers/has_sent_mail.rb +124 -0
- data/lib/mail/message.rb +2059 -0
- data/lib/mail/multibyte.rb +42 -0
- data/lib/mail/multibyte/chars.rb +474 -0
- data/lib/mail/multibyte/exceptions.rb +8 -0
- data/lib/mail/multibyte/unicode.rb +392 -0
- data/lib/mail/multibyte/utils.rb +60 -0
- data/lib/mail/network.rb +14 -0
- data/lib/mail/network/delivery_methods/exim.rb +53 -0
- data/lib/mail/network/delivery_methods/file_delivery.rb +40 -0
- data/lib/mail/network/delivery_methods/sendmail.rb +62 -0
- data/lib/mail/network/delivery_methods/smtp.rb +153 -0
- data/lib/mail/network/delivery_methods/smtp_connection.rb +74 -0
- data/lib/mail/network/delivery_methods/test_mailer.rb +40 -0
- data/lib/mail/network/retriever_methods/base.rb +63 -0
- data/lib/mail/network/retriever_methods/imap.rb +168 -0
- data/lib/mail/network/retriever_methods/pop3.rb +140 -0
- data/lib/mail/network/retriever_methods/test_retriever.rb +47 -0
- data/lib/mail/parsers/address_lists.rb +64 -0
- data/lib/mail/parsers/address_lists.treetop +19 -0
- data/lib/mail/parsers/content_disposition.rb +535 -0
- data/lib/mail/parsers/content_disposition.treetop +46 -0
- data/lib/mail/parsers/content_location.rb +139 -0
- data/lib/mail/parsers/content_location.treetop +20 -0
- data/lib/mail/parsers/content_transfer_encoding.rb +162 -0
- data/lib/mail/parsers/content_transfer_encoding.treetop +20 -0
- data/lib/mail/parsers/content_type.rb +967 -0
- data/lib/mail/parsers/content_type.treetop +68 -0
- data/lib/mail/parsers/date_time.rb +114 -0
- data/lib/mail/parsers/date_time.treetop +11 -0
- data/lib/mail/parsers/envelope_from.rb +194 -0
- data/lib/mail/parsers/envelope_from.treetop +32 -0
- data/lib/mail/parsers/message_ids.rb +45 -0
- data/lib/mail/parsers/message_ids.treetop +15 -0
- data/lib/mail/parsers/mime_version.rb +144 -0
- data/lib/mail/parsers/mime_version.treetop +19 -0
- data/lib/mail/parsers/phrase_lists.rb +45 -0
- data/lib/mail/parsers/phrase_lists.treetop +15 -0
- data/lib/mail/parsers/received.rb +71 -0
- data/lib/mail/parsers/received.treetop +11 -0
- data/lib/mail/parsers/rfc2045.rb +464 -0
- data/lib/mail/parsers/rfc2045.treetop +36 -0
- data/lib/mail/parsers/rfc2822.rb +5341 -0
- data/lib/mail/parsers/rfc2822.treetop +410 -0
- data/lib/mail/parsers/rfc2822_obsolete.rb +3768 -0
- data/lib/mail/parsers/rfc2822_obsolete.treetop +241 -0
- data/lib/mail/part.rb +116 -0
- data/lib/mail/parts_list.rb +55 -0
- data/lib/mail/patterns.rb +34 -0
- data/lib/mail/utilities.rb +215 -0
- data/lib/mail/version.rb +24 -0
- data/lib/mail/version_specific/ruby_1_8.rb +98 -0
- data/lib/mail/version_specific/ruby_1_9.rb +113 -0
- data/lib/tasks/corpus.rake +125 -0
- data/lib/tasks/treetop.rake +10 -0
- metadata +253 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Mail #:nodoc:
|
3
|
+
module Multibyte
|
4
|
+
require 'mail/multibyte/exceptions'
|
5
|
+
require 'mail/multibyte/chars'
|
6
|
+
require 'mail/multibyte/unicode'
|
7
|
+
|
8
|
+
# The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
|
9
|
+
# class so you can support other encodings. See the Mail::Multibyte::Chars implementation for
|
10
|
+
# an example how to do this.
|
11
|
+
#
|
12
|
+
# Example:
|
13
|
+
# Mail::Multibyte.proxy_class = CharsForUTF32
|
14
|
+
def self.proxy_class=(klass)
|
15
|
+
@proxy_class = klass
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns the current proxy class
|
19
|
+
def self.proxy_class
|
20
|
+
@proxy_class ||= Mail::Multibyte::Chars
|
21
|
+
end
|
22
|
+
|
23
|
+
# Regular expressions that describe valid byte sequences for a character
|
24
|
+
VALID_CHARACTER = {
|
25
|
+
# Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
|
26
|
+
'UTF-8' => /\A(?:
|
27
|
+
[\x00-\x7f] |
|
28
|
+
[\xc2-\xdf] [\x80-\xbf] |
|
29
|
+
\xe0 [\xa0-\xbf] [\x80-\xbf] |
|
30
|
+
[\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
|
31
|
+
\xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
32
|
+
[\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
|
33
|
+
\xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
|
34
|
+
# Quick check for valid Shift-JIS characters, disregards the odd-even pairing
|
35
|
+
'Shift_JIS' => /\A(?:
|
36
|
+
[\x00-\x7e\xa1-\xdf] |
|
37
|
+
[\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
require 'mail/multibyte/utils'
|
@@ -0,0 +1,474 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Mail #:nodoc:
|
4
|
+
module Multibyte #:nodoc:
|
5
|
+
# Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
|
6
|
+
# knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
|
7
|
+
# encoding safe manner. All the normal String methods are also implemented on the proxy.
|
8
|
+
#
|
9
|
+
# String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
|
10
|
+
# which would normally return a String object now return a Chars object so methods can be chained.
|
11
|
+
#
|
12
|
+
# "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string"
|
13
|
+
#
|
14
|
+
# Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
|
15
|
+
# If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
|
16
|
+
#
|
17
|
+
# bad.explicit_checking_method "T".mb_chars.downcase.to_s
|
18
|
+
#
|
19
|
+
# The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
|
20
|
+
# encodings you can write your own multibyte string handler and configure it through
|
21
|
+
# Mail::Multibyte.proxy_class.
|
22
|
+
#
|
23
|
+
# class CharsForUTF32
|
24
|
+
# def size
|
25
|
+
# @wrapped_string.size / 4
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# def self.accepts?(string)
|
29
|
+
# string.length % 4 == 0
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# Mail::Multibyte.proxy_class = CharsForUTF32
|
34
|
+
class Chars
|
35
|
+
attr_reader :wrapped_string
|
36
|
+
alias to_s wrapped_string
|
37
|
+
alias to_str wrapped_string
|
38
|
+
|
39
|
+
if RUBY_VERSION >= "1.9"
|
40
|
+
# Creates a new Chars instance by wrapping _string_.
|
41
|
+
def initialize(string)
|
42
|
+
@wrapped_string = string
|
43
|
+
@wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
|
44
|
+
end
|
45
|
+
else
|
46
|
+
def initialize(string) #:nodoc:
|
47
|
+
@wrapped_string = string
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Forward all undefined methods to the wrapped string.
|
52
|
+
def method_missing(method, *args, &block)
|
53
|
+
if method.to_s =~ /!$/
|
54
|
+
@wrapped_string.__send__(method, *args, &block)
|
55
|
+
self
|
56
|
+
else
|
57
|
+
result = @wrapped_string.__send__(method, *args, &block)
|
58
|
+
result.kind_of?(String) ? chars(result) : result
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
|
63
|
+
# only if the optional second parameter evaluates to +true+.
|
64
|
+
def respond_to?(method, include_private=false)
|
65
|
+
super || @wrapped_string.respond_to?(method, include_private) || false
|
66
|
+
end
|
67
|
+
|
68
|
+
# Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
|
69
|
+
def acts_like_string?
|
70
|
+
true
|
71
|
+
end
|
72
|
+
|
73
|
+
# Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
|
74
|
+
def self.consumes?(string)
|
75
|
+
# Unpack is a little bit faster than regular expressions.
|
76
|
+
string.unpack('U*')
|
77
|
+
true
|
78
|
+
rescue ArgumentError
|
79
|
+
false
|
80
|
+
end
|
81
|
+
|
82
|
+
include Comparable
|
83
|
+
|
84
|
+
# Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
|
85
|
+
# equal or after the object on the right side of the operation. It accepts any object
|
86
|
+
# that implements +to_s+:
|
87
|
+
#
|
88
|
+
# 'é'.mb_chars <=> 'ü'.mb_chars # => -1
|
89
|
+
#
|
90
|
+
# See <tt>String#<=></tt> for more details.
|
91
|
+
def <=>(other)
|
92
|
+
@wrapped_string <=> other.to_s
|
93
|
+
end
|
94
|
+
|
95
|
+
if RUBY_VERSION < "1.9"
|
96
|
+
# Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
|
97
|
+
# +false+ otherwise.
|
98
|
+
def self.wants?(string)
|
99
|
+
$KCODE == 'UTF8' && consumes?(string)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns a new Chars object containing the _other_ object concatenated to the string.
|
103
|
+
#
|
104
|
+
# Example:
|
105
|
+
# ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
|
106
|
+
def +(other)
|
107
|
+
chars(@wrapped_string + other)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
|
111
|
+
#
|
112
|
+
# Example:
|
113
|
+
# 'Café périferôl'.mb_chars =~ /ô/ # => 12
|
114
|
+
def =~(other)
|
115
|
+
translate_offset(@wrapped_string =~ other)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Inserts the passed string at specified codepoint offsets.
|
119
|
+
#
|
120
|
+
# Example:
|
121
|
+
# 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
|
122
|
+
def insert(offset, fragment)
|
123
|
+
unpacked = Unicode.u_unpack(@wrapped_string)
|
124
|
+
unless offset > unpacked.length
|
125
|
+
@wrapped_string.replace(
|
126
|
+
Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
|
127
|
+
)
|
128
|
+
else
|
129
|
+
raise IndexError, "index #{offset} out of string"
|
130
|
+
end
|
131
|
+
self
|
132
|
+
end
|
133
|
+
|
134
|
+
# Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
|
135
|
+
#
|
136
|
+
# Example:
|
137
|
+
# 'Café'.mb_chars.include?('é') # => true
|
138
|
+
def include?(other)
|
139
|
+
# We have to redefine this method because Enumerable defines it.
|
140
|
+
@wrapped_string.include?(other)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
|
144
|
+
#
|
145
|
+
# Example:
|
146
|
+
# 'Café périferôl'.mb_chars.index('ô') # => 12
|
147
|
+
# 'Café périferôl'.mb_chars.index(/\w/u) # => 0
|
148
|
+
def index(needle, offset=0)
|
149
|
+
wrapped_offset = first(offset).wrapped_string.length
|
150
|
+
index = @wrapped_string.index(needle, wrapped_offset)
|
151
|
+
index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
|
152
|
+
end
|
153
|
+
|
154
|
+
# Returns the position _needle_ in the string, counting in
|
155
|
+
# codepoints, searching backward from _offset_ or the end of the
|
156
|
+
# string. Returns +nil+ if _needle_ isn't found.
|
157
|
+
#
|
158
|
+
# Example:
|
159
|
+
# 'Café périferôl'.mb_chars.rindex('é') # => 6
|
160
|
+
# 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
|
161
|
+
def rindex(needle, offset=nil)
|
162
|
+
offset ||= length
|
163
|
+
wrapped_offset = first(offset).wrapped_string.length
|
164
|
+
index = @wrapped_string.rindex(needle, wrapped_offset)
|
165
|
+
index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
|
166
|
+
end
|
167
|
+
|
168
|
+
# Returns the number of codepoints in the string
|
169
|
+
def size
|
170
|
+
Unicode.u_unpack(@wrapped_string).size
|
171
|
+
end
|
172
|
+
alias_method :length, :size
|
173
|
+
|
174
|
+
# Strips entire range of Unicode whitespace from the right of the string.
|
175
|
+
def rstrip
|
176
|
+
chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
|
177
|
+
end
|
178
|
+
|
179
|
+
# Strips entire range of Unicode whitespace from the left of the string.
|
180
|
+
def lstrip
|
181
|
+
chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
|
182
|
+
end
|
183
|
+
|
184
|
+
# Strips entire range of Unicode whitespace from the right and left of the string.
|
185
|
+
def strip
|
186
|
+
rstrip.lstrip
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns the codepoint of the first character in the string.
|
190
|
+
#
|
191
|
+
# Example:
|
192
|
+
# 'こんにちは'.mb_chars.ord # => 12371
|
193
|
+
def ord
|
194
|
+
Unicode.u_unpack(@wrapped_string)[0]
|
195
|
+
end
|
196
|
+
|
197
|
+
# Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
|
198
|
+
#
|
199
|
+
# Example:
|
200
|
+
#
|
201
|
+
# "¾ cup".mb_chars.rjust(8).to_s
|
202
|
+
# # => " ¾ cup"
|
203
|
+
#
|
204
|
+
# "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
|
205
|
+
# # => " ¾ cup"
|
206
|
+
def rjust(integer, padstr=' ')
|
207
|
+
justify(integer, :right, padstr)
|
208
|
+
end
|
209
|
+
|
210
|
+
# Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
|
211
|
+
#
|
212
|
+
# Example:
|
213
|
+
#
|
214
|
+
# "¾ cup".mb_chars.rjust(8).to_s
|
215
|
+
# # => "¾ cup "
|
216
|
+
#
|
217
|
+
# "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
|
218
|
+
# # => "¾ cup "
|
219
|
+
def ljust(integer, padstr=' ')
|
220
|
+
justify(integer, :left, padstr)
|
221
|
+
end
|
222
|
+
|
223
|
+
# Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
|
224
|
+
#
|
225
|
+
# Example:
|
226
|
+
#
|
227
|
+
# "¾ cup".mb_chars.center(8).to_s
|
228
|
+
# # => " ¾ cup "
|
229
|
+
#
|
230
|
+
# "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
|
231
|
+
# # => " ¾ cup "
|
232
|
+
def center(integer, padstr=' ')
|
233
|
+
justify(integer, :center, padstr)
|
234
|
+
end
|
235
|
+
|
236
|
+
else
|
237
|
+
def =~(other)
|
238
|
+
@wrapped_string =~ other
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
|
243
|
+
# instances instead of String. This makes chaining methods easier.
|
244
|
+
#
|
245
|
+
# Example:
|
246
|
+
# 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
|
247
|
+
def split(*args)
|
248
|
+
@wrapped_string.split(*args).map { |i| i.mb_chars }
|
249
|
+
end
|
250
|
+
|
251
|
+
# Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
|
252
|
+
#
|
253
|
+
# Example:
|
254
|
+
#
|
255
|
+
# s = "Müller"
|
256
|
+
# s.mb_chars[2] = "e" # Replace character with offset 2
|
257
|
+
# s
|
258
|
+
# # => "Müeler"
|
259
|
+
#
|
260
|
+
# s = "Müller"
|
261
|
+
# s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
|
262
|
+
# s
|
263
|
+
# # => "Möler"
|
264
|
+
def []=(*args)
|
265
|
+
replace_by = args.pop
|
266
|
+
# Indexed replace with regular expressions already works
|
267
|
+
if args.first.is_a?(Regexp)
|
268
|
+
@wrapped_string[*args] = replace_by
|
269
|
+
else
|
270
|
+
result = Unicode.u_unpack(@wrapped_string)
|
271
|
+
if args[0].is_a?(Fixnum)
|
272
|
+
raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
|
273
|
+
min = args[0]
|
274
|
+
max = args[1].nil? ? min : (min + args[1] - 1)
|
275
|
+
range = Range.new(min, max)
|
276
|
+
replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
|
277
|
+
elsif args.first.is_a?(Range)
|
278
|
+
raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
|
279
|
+
range = args[0]
|
280
|
+
else
|
281
|
+
needle = args[0].to_s
|
282
|
+
min = index(needle)
|
283
|
+
max = min + Unicode.u_unpack(needle).length - 1
|
284
|
+
range = Range.new(min, max)
|
285
|
+
end
|
286
|
+
result[range] = Unicode.u_unpack(replace_by)
|
287
|
+
@wrapped_string.replace(result.pack('U*'))
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
# Reverses all characters in the string.
|
292
|
+
#
|
293
|
+
# Example:
|
294
|
+
# 'Café'.mb_chars.reverse.to_s # => 'éfaC'
|
295
|
+
def reverse
|
296
|
+
chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
|
297
|
+
end
|
298
|
+
|
299
|
+
# Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
|
300
|
+
# character.
|
301
|
+
#
|
302
|
+
# Example:
|
303
|
+
# 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
|
304
|
+
def slice(*args)
|
305
|
+
if args.size > 2
|
306
|
+
raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
|
307
|
+
elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
|
308
|
+
raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
|
309
|
+
elsif (args.size == 2 && !args[1].is_a?(Numeric))
|
310
|
+
raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
|
311
|
+
elsif args[0].kind_of? Range
|
312
|
+
cps = Unicode.u_unpack(@wrapped_string).slice(*args)
|
313
|
+
result = cps.nil? ? nil : cps.pack('U*')
|
314
|
+
elsif args[0].kind_of? Regexp
|
315
|
+
result = @wrapped_string.slice(*args)
|
316
|
+
elsif args.size == 1 && args[0].kind_of?(Numeric)
|
317
|
+
character = Unicode.u_unpack(@wrapped_string)[args[0]]
|
318
|
+
result = character && [character].pack('U')
|
319
|
+
else
|
320
|
+
cps = Unicode.u_unpack(@wrapped_string).slice(*args)
|
321
|
+
result = cps && cps.pack('U*')
|
322
|
+
end
|
323
|
+
result && chars(result)
|
324
|
+
end
|
325
|
+
alias_method :[], :slice
|
326
|
+
|
327
|
+
# Limit the byte size of the string to a number of bytes without breaking characters. Usable
|
328
|
+
# when the storage for a string is limited for some reason.
|
329
|
+
#
|
330
|
+
# Example:
|
331
|
+
# s = 'こんにちは'
|
332
|
+
# s.mb_chars.limit(7) # => "こに"
|
333
|
+
def limit(limit)
|
334
|
+
slice(0...translate_offset(limit))
|
335
|
+
end
|
336
|
+
|
337
|
+
# Convert characters in the string to uppercase.
|
338
|
+
#
|
339
|
+
# Example:
|
340
|
+
# 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
|
341
|
+
def upcase
|
342
|
+
chars(Unicode.apply_mapping(@wrapped_string), :uppercase_mapping)
|
343
|
+
end
|
344
|
+
|
345
|
+
# Convert characters in the string to lowercase.
|
346
|
+
#
|
347
|
+
# Example:
|
348
|
+
# 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
|
349
|
+
def downcase
|
350
|
+
chars(Unicode.apply_mapping(@wrapped_string), :lowercase_mapping)
|
351
|
+
end
|
352
|
+
|
353
|
+
# Converts the first character to uppercase and the remainder to lowercase.
|
354
|
+
#
|
355
|
+
# Example:
|
356
|
+
# 'über'.mb_chars.capitalize.to_s # => "Über"
|
357
|
+
def capitalize
|
358
|
+
(slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
|
359
|
+
end
|
360
|
+
|
361
|
+
# Capitalizes the first letter of every word, when possible.
|
362
|
+
#
|
363
|
+
# Example:
|
364
|
+
# "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró"
|
365
|
+
# "日本語".mb_chars.titleize # => "日本語"
|
366
|
+
def titleize
|
367
|
+
chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
|
368
|
+
end
|
369
|
+
alias_method :titlecase, :titleize
|
370
|
+
|
371
|
+
# Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
|
372
|
+
# passing strings to databases and validations.
|
373
|
+
#
|
374
|
+
# * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
|
375
|
+
# <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
|
376
|
+
# Mail::Multibyte::Unicode.default_normalization_form
|
377
|
+
def normalize(form = nil)
|
378
|
+
chars(Unicode.normalize(@wrapped_string, form))
|
379
|
+
end
|
380
|
+
|
381
|
+
# Performs canonical decomposition on all the characters.
|
382
|
+
#
|
383
|
+
# Example:
|
384
|
+
# 'é'.length # => 2
|
385
|
+
# 'é'.mb_chars.decompose.to_s.length # => 3
|
386
|
+
def decompose
|
387
|
+
chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
|
388
|
+
end
|
389
|
+
|
390
|
+
# Performs composition on all the characters.
|
391
|
+
#
|
392
|
+
# Example:
|
393
|
+
# 'é'.length # => 3
|
394
|
+
# 'é'.mb_chars.compose.to_s.length # => 2
|
395
|
+
def compose
|
396
|
+
chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
|
397
|
+
end
|
398
|
+
|
399
|
+
# Returns the number of grapheme clusters in the string.
|
400
|
+
#
|
401
|
+
# Example:
|
402
|
+
# 'क्षि'.mb_chars.length # => 4
|
403
|
+
# 'क्षि'.mb_chars.g_length # => 3
|
404
|
+
def g_length
|
405
|
+
Unicode.g_unpack(@wrapped_string).length
|
406
|
+
end
|
407
|
+
|
408
|
+
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
|
409
|
+
#
|
410
|
+
# Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
|
411
|
+
def tidy_bytes(force = false)
|
412
|
+
chars(Unicode.tidy_bytes(@wrapped_string, force))
|
413
|
+
end
|
414
|
+
|
415
|
+
%w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
|
416
|
+
# Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
|
417
|
+
# exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
|
418
|
+
if public_method_defined?(method)
|
419
|
+
define_method("#{method}!") do |*args|
|
420
|
+
@wrapped_string = send(args.nil? ? method : method, *args).to_s
|
421
|
+
self
|
422
|
+
end
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
protected
|
427
|
+
|
428
|
+
def translate_offset(byte_offset) #:nodoc:
|
429
|
+
return nil if byte_offset.nil?
|
430
|
+
return 0 if @wrapped_string == ''
|
431
|
+
|
432
|
+
if @wrapped_string.respond_to?(:force_encoding)
|
433
|
+
@wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
|
434
|
+
end
|
435
|
+
|
436
|
+
begin
|
437
|
+
@wrapped_string[0...byte_offset].unpack('U*').length
|
438
|
+
rescue ArgumentError
|
439
|
+
byte_offset -= 1
|
440
|
+
retry
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
444
|
+
def justify(integer, way, padstr=' ') #:nodoc:
|
445
|
+
raise ArgumentError, "zero width padding" if padstr.length == 0
|
446
|
+
padsize = integer - size
|
447
|
+
padsize = padsize > 0 ? padsize : 0
|
448
|
+
case way
|
449
|
+
when :right
|
450
|
+
result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
|
451
|
+
when :left
|
452
|
+
result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
|
453
|
+
when :center
|
454
|
+
lpad = padding((padsize / 2.0).floor, padstr)
|
455
|
+
rpad = padding((padsize / 2.0).ceil, padstr)
|
456
|
+
result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
|
457
|
+
end
|
458
|
+
chars(result)
|
459
|
+
end
|
460
|
+
|
461
|
+
def padding(padsize, padstr=' ') #:nodoc:
|
462
|
+
if padsize != 0
|
463
|
+
chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
|
464
|
+
else
|
465
|
+
''
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def chars(string) #:nodoc:
|
470
|
+
self.class.new(string)
|
471
|
+
end
|
472
|
+
end
|
473
|
+
end
|
474
|
+
end
|