otherinbox-mail 2.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG.rdoc +607 -0
  2. data/CONTRIBUTING.md +45 -0
  3. data/Dependencies.txt +3 -0
  4. data/Gemfile +26 -0
  5. data/Gemfile.lock +44 -0
  6. data/README.md +663 -0
  7. data/Rakefile +40 -0
  8. data/TODO.rdoc +9 -0
  9. data/lib/VERSION +4 -0
  10. data/lib/mail.rb +101 -0
  11. data/lib/mail/attachments_list.rb +104 -0
  12. data/lib/mail/body.rb +291 -0
  13. data/lib/mail/configuration.rb +75 -0
  14. data/lib/mail/core_extensions/nil.rb +17 -0
  15. data/lib/mail/core_extensions/object.rb +13 -0
  16. data/lib/mail/core_extensions/shell_escape.rb +56 -0
  17. data/lib/mail/core_extensions/smtp.rb +25 -0
  18. data/lib/mail/core_extensions/string.rb +33 -0
  19. data/lib/mail/core_extensions/string/access.rb +145 -0
  20. data/lib/mail/core_extensions/string/multibyte.rb +78 -0
  21. data/lib/mail/elements.rb +14 -0
  22. data/lib/mail/elements/address.rb +306 -0
  23. data/lib/mail/elements/address_list.rb +74 -0
  24. data/lib/mail/elements/content_disposition_element.rb +30 -0
  25. data/lib/mail/elements/content_location_element.rb +25 -0
  26. data/lib/mail/elements/content_transfer_encoding_element.rb +24 -0
  27. data/lib/mail/elements/content_type_element.rb +35 -0
  28. data/lib/mail/elements/date_time_element.rb +26 -0
  29. data/lib/mail/elements/envelope_from_element.rb +34 -0
  30. data/lib/mail/elements/message_ids_element.rb +29 -0
  31. data/lib/mail/elements/mime_version_element.rb +26 -0
  32. data/lib/mail/elements/phrase_list.rb +21 -0
  33. data/lib/mail/elements/received_element.rb +30 -0
  34. data/lib/mail/encodings.rb +274 -0
  35. data/lib/mail/encodings/7bit.rb +31 -0
  36. data/lib/mail/encodings/8bit.rb +31 -0
  37. data/lib/mail/encodings/base64.rb +33 -0
  38. data/lib/mail/encodings/binary.rb +31 -0
  39. data/lib/mail/encodings/quoted_printable.rb +38 -0
  40. data/lib/mail/encodings/transfer_encoding.rb +58 -0
  41. data/lib/mail/envelope.rb +35 -0
  42. data/lib/mail/field.rb +234 -0
  43. data/lib/mail/field_list.rb +33 -0
  44. data/lib/mail/fields.rb +35 -0
  45. data/lib/mail/fields/bcc_field.rb +56 -0
  46. data/lib/mail/fields/cc_field.rb +55 -0
  47. data/lib/mail/fields/comments_field.rb +41 -0
  48. data/lib/mail/fields/common/address_container.rb +16 -0
  49. data/lib/mail/fields/common/common_address.rb +125 -0
  50. data/lib/mail/fields/common/common_date.rb +42 -0
  51. data/lib/mail/fields/common/common_field.rb +51 -0
  52. data/lib/mail/fields/common/common_message_id.rb +44 -0
  53. data/lib/mail/fields/common/parameter_hash.rb +58 -0
  54. data/lib/mail/fields/content_description_field.rb +19 -0
  55. data/lib/mail/fields/content_disposition_field.rb +69 -0
  56. data/lib/mail/fields/content_id_field.rb +63 -0
  57. data/lib/mail/fields/content_location_field.rb +42 -0
  58. data/lib/mail/fields/content_transfer_encoding_field.rb +50 -0
  59. data/lib/mail/fields/content_type_field.rb +198 -0
  60. data/lib/mail/fields/date_field.rb +57 -0
  61. data/lib/mail/fields/from_field.rb +55 -0
  62. data/lib/mail/fields/in_reply_to_field.rb +55 -0
  63. data/lib/mail/fields/keywords_field.rb +44 -0
  64. data/lib/mail/fields/message_id_field.rb +83 -0
  65. data/lib/mail/fields/mime_version_field.rb +53 -0
  66. data/lib/mail/fields/optional_field.rb +13 -0
  67. data/lib/mail/fields/received_field.rb +75 -0
  68. data/lib/mail/fields/references_field.rb +55 -0
  69. data/lib/mail/fields/reply_to_field.rb +55 -0
  70. data/lib/mail/fields/resent_bcc_field.rb +55 -0
  71. data/lib/mail/fields/resent_cc_field.rb +55 -0
  72. data/lib/mail/fields/resent_date_field.rb +35 -0
  73. data/lib/mail/fields/resent_from_field.rb +55 -0
  74. data/lib/mail/fields/resent_message_id_field.rb +34 -0
  75. data/lib/mail/fields/resent_sender_field.rb +62 -0
  76. data/lib/mail/fields/resent_to_field.rb +55 -0
  77. data/lib/mail/fields/return_path_field.rb +65 -0
  78. data/lib/mail/fields/sender_field.rb +67 -0
  79. data/lib/mail/fields/structured_field.rb +51 -0
  80. data/lib/mail/fields/subject_field.rb +16 -0
  81. data/lib/mail/fields/to_field.rb +55 -0
  82. data/lib/mail/fields/unstructured_field.rb +191 -0
  83. data/lib/mail/header.rb +265 -0
  84. data/lib/mail/indifferent_hash.rb +146 -0
  85. data/lib/mail/mail.rb +255 -0
  86. data/lib/mail/matchers/has_sent_mail.rb +124 -0
  87. data/lib/mail/message.rb +2059 -0
  88. data/lib/mail/multibyte.rb +42 -0
  89. data/lib/mail/multibyte/chars.rb +474 -0
  90. data/lib/mail/multibyte/exceptions.rb +8 -0
  91. data/lib/mail/multibyte/unicode.rb +392 -0
  92. data/lib/mail/multibyte/utils.rb +60 -0
  93. data/lib/mail/network.rb +14 -0
  94. data/lib/mail/network/delivery_methods/exim.rb +53 -0
  95. data/lib/mail/network/delivery_methods/file_delivery.rb +40 -0
  96. data/lib/mail/network/delivery_methods/sendmail.rb +62 -0
  97. data/lib/mail/network/delivery_methods/smtp.rb +153 -0
  98. data/lib/mail/network/delivery_methods/smtp_connection.rb +74 -0
  99. data/lib/mail/network/delivery_methods/test_mailer.rb +40 -0
  100. data/lib/mail/network/retriever_methods/base.rb +63 -0
  101. data/lib/mail/network/retriever_methods/imap.rb +168 -0
  102. data/lib/mail/network/retriever_methods/pop3.rb +140 -0
  103. data/lib/mail/network/retriever_methods/test_retriever.rb +47 -0
  104. data/lib/mail/parsers/address_lists.rb +64 -0
  105. data/lib/mail/parsers/address_lists.treetop +19 -0
  106. data/lib/mail/parsers/content_disposition.rb +535 -0
  107. data/lib/mail/parsers/content_disposition.treetop +46 -0
  108. data/lib/mail/parsers/content_location.rb +139 -0
  109. data/lib/mail/parsers/content_location.treetop +20 -0
  110. data/lib/mail/parsers/content_transfer_encoding.rb +162 -0
  111. data/lib/mail/parsers/content_transfer_encoding.treetop +20 -0
  112. data/lib/mail/parsers/content_type.rb +967 -0
  113. data/lib/mail/parsers/content_type.treetop +68 -0
  114. data/lib/mail/parsers/date_time.rb +114 -0
  115. data/lib/mail/parsers/date_time.treetop +11 -0
  116. data/lib/mail/parsers/envelope_from.rb +194 -0
  117. data/lib/mail/parsers/envelope_from.treetop +32 -0
  118. data/lib/mail/parsers/message_ids.rb +45 -0
  119. data/lib/mail/parsers/message_ids.treetop +15 -0
  120. data/lib/mail/parsers/mime_version.rb +144 -0
  121. data/lib/mail/parsers/mime_version.treetop +19 -0
  122. data/lib/mail/parsers/phrase_lists.rb +45 -0
  123. data/lib/mail/parsers/phrase_lists.treetop +15 -0
  124. data/lib/mail/parsers/received.rb +71 -0
  125. data/lib/mail/parsers/received.treetop +11 -0
  126. data/lib/mail/parsers/rfc2045.rb +464 -0
  127. data/lib/mail/parsers/rfc2045.treetop +36 -0
  128. data/lib/mail/parsers/rfc2822.rb +5341 -0
  129. data/lib/mail/parsers/rfc2822.treetop +410 -0
  130. data/lib/mail/parsers/rfc2822_obsolete.rb +3768 -0
  131. data/lib/mail/parsers/rfc2822_obsolete.treetop +241 -0
  132. data/lib/mail/part.rb +116 -0
  133. data/lib/mail/parts_list.rb +55 -0
  134. data/lib/mail/patterns.rb +34 -0
  135. data/lib/mail/utilities.rb +215 -0
  136. data/lib/mail/version.rb +24 -0
  137. data/lib/mail/version_specific/ruby_1_8.rb +98 -0
  138. data/lib/mail/version_specific/ruby_1_9.rb +113 -0
  139. data/lib/tasks/corpus.rake +125 -0
  140. data/lib/tasks/treetop.rake +10 -0
  141. metadata +253 -0
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ module Mail #:nodoc:
3
+ module Multibyte
4
+ require 'mail/multibyte/exceptions'
5
+ require 'mail/multibyte/chars'
6
+ require 'mail/multibyte/unicode'
7
+
8
+ # The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
9
+ # class so you can support other encodings. See the Mail::Multibyte::Chars implementation for
10
+ # an example how to do this.
11
+ #
12
+ # Example:
13
+ # Mail::Multibyte.proxy_class = CharsForUTF32
14
+ def self.proxy_class=(klass)
15
+ @proxy_class = klass
16
+ end
17
+
18
+ # Returns the current proxy class
19
+ def self.proxy_class
20
+ @proxy_class ||= Mail::Multibyte::Chars
21
+ end
22
+
23
+ # Regular expressions that describe valid byte sequences for a character
24
+ VALID_CHARACTER = {
25
+ # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
26
+ 'UTF-8' => /\A(?:
27
+ [\x00-\x7f] |
28
+ [\xc2-\xdf] [\x80-\xbf] |
29
+ \xe0 [\xa0-\xbf] [\x80-\xbf] |
30
+ [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
31
+ \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
32
+ [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
33
+ \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
34
+ # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
35
+ 'Shift_JIS' => /\A(?:
36
+ [\x00-\x7e\xa1-\xdf] |
37
+ [\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
38
+ }
39
+ end
40
+ end
41
+
42
+ require 'mail/multibyte/utils'
@@ -0,0 +1,474 @@
1
+ # encoding: utf-8
2
+
3
+ module Mail #:nodoc:
4
+ module Multibyte #:nodoc:
5
+ # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
6
+ # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
7
+ # encoding safe manner. All the normal String methods are also implemented on the proxy.
8
+ #
9
+ # String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
10
+ # which would normally return a String object now return a Chars object so methods can be chained.
11
+ #
12
+ # "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string"
13
+ #
14
+ # Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
15
+ # If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
16
+ #
17
+ # bad.explicit_checking_method "T".mb_chars.downcase.to_s
18
+ #
19
+ # The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
20
+ # encodings you can write your own multibyte string handler and configure it through
21
+ # Mail::Multibyte.proxy_class.
22
+ #
23
+ # class CharsForUTF32
24
+ # def size
25
+ # @wrapped_string.size / 4
26
+ # end
27
+ #
28
+ # def self.accepts?(string)
29
+ # string.length % 4 == 0
30
+ # end
31
+ # end
32
+ #
33
+ # Mail::Multibyte.proxy_class = CharsForUTF32
34
+ class Chars
35
+ attr_reader :wrapped_string
36
+ alias to_s wrapped_string
37
+ alias to_str wrapped_string
38
+
39
+ if RUBY_VERSION >= "1.9"
40
+ # Creates a new Chars instance by wrapping _string_.
41
+ def initialize(string)
42
+ @wrapped_string = string
43
+ @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
44
+ end
45
+ else
46
+ def initialize(string) #:nodoc:
47
+ @wrapped_string = string
48
+ end
49
+ end
50
+
51
+ # Forward all undefined methods to the wrapped string.
52
+ def method_missing(method, *args, &block)
53
+ if method.to_s =~ /!$/
54
+ @wrapped_string.__send__(method, *args, &block)
55
+ self
56
+ else
57
+ result = @wrapped_string.__send__(method, *args, &block)
58
+ result.kind_of?(String) ? chars(result) : result
59
+ end
60
+ end
61
+
62
+ # Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
63
+ # only if the optional second parameter evaluates to +true+.
64
+ def respond_to?(method, include_private=false)
65
+ super || @wrapped_string.respond_to?(method, include_private) || false
66
+ end
67
+
68
+ # Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
69
+ def acts_like_string?
70
+ true
71
+ end
72
+
73
+ # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
74
+ def self.consumes?(string)
75
+ # Unpack is a little bit faster than regular expressions.
76
+ string.unpack('U*')
77
+ true
78
+ rescue ArgumentError
79
+ false
80
+ end
81
+
82
+ include Comparable
83
+
84
+ # Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
85
+ # equal or after the object on the right side of the operation. It accepts any object
86
+ # that implements +to_s+:
87
+ #
88
+ # 'é'.mb_chars <=> 'ü'.mb_chars # => -1
89
+ #
90
+ # See <tt>String#<=></tt> for more details.
91
+ def <=>(other)
92
+ @wrapped_string <=> other.to_s
93
+ end
94
+
95
+ if RUBY_VERSION < "1.9"
96
+ # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
97
+ # +false+ otherwise.
98
+ def self.wants?(string)
99
+ $KCODE == 'UTF8' && consumes?(string)
100
+ end
101
+
102
+ # Returns a new Chars object containing the _other_ object concatenated to the string.
103
+ #
104
+ # Example:
105
+ # ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
106
+ def +(other)
107
+ chars(@wrapped_string + other)
108
+ end
109
+
110
+ # Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
111
+ #
112
+ # Example:
113
+ # 'Café périferôl'.mb_chars =~ /ô/ # => 12
114
+ def =~(other)
115
+ translate_offset(@wrapped_string =~ other)
116
+ end
117
+
118
+ # Inserts the passed string at specified codepoint offsets.
119
+ #
120
+ # Example:
121
+ # 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
122
+ def insert(offset, fragment)
123
+ unpacked = Unicode.u_unpack(@wrapped_string)
124
+ unless offset > unpacked.length
125
+ @wrapped_string.replace(
126
+ Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
127
+ )
128
+ else
129
+ raise IndexError, "index #{offset} out of string"
130
+ end
131
+ self
132
+ end
133
+
134
+ # Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
135
+ #
136
+ # Example:
137
+ # 'Café'.mb_chars.include?('é') # => true
138
+ def include?(other)
139
+ # We have to redefine this method because Enumerable defines it.
140
+ @wrapped_string.include?(other)
141
+ end
142
+
143
+ # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
144
+ #
145
+ # Example:
146
+ # 'Café périferôl'.mb_chars.index('ô') # => 12
147
+ # 'Café périferôl'.mb_chars.index(/\w/u) # => 0
148
+ def index(needle, offset=0)
149
+ wrapped_offset = first(offset).wrapped_string.length
150
+ index = @wrapped_string.index(needle, wrapped_offset)
151
+ index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
152
+ end
153
+
154
+ # Returns the position _needle_ in the string, counting in
155
+ # codepoints, searching backward from _offset_ or the end of the
156
+ # string. Returns +nil+ if _needle_ isn't found.
157
+ #
158
+ # Example:
159
+ # 'Café périferôl'.mb_chars.rindex('é') # => 6
160
+ # 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
161
+ def rindex(needle, offset=nil)
162
+ offset ||= length
163
+ wrapped_offset = first(offset).wrapped_string.length
164
+ index = @wrapped_string.rindex(needle, wrapped_offset)
165
+ index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
166
+ end
167
+
168
+ # Returns the number of codepoints in the string
169
+ def size
170
+ Unicode.u_unpack(@wrapped_string).size
171
+ end
172
+ alias_method :length, :size
173
+
174
+ # Strips entire range of Unicode whitespace from the right of the string.
175
+ def rstrip
176
+ chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
177
+ end
178
+
179
+ # Strips entire range of Unicode whitespace from the left of the string.
180
+ def lstrip
181
+ chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
182
+ end
183
+
184
+ # Strips entire range of Unicode whitespace from the right and left of the string.
185
+ def strip
186
+ rstrip.lstrip
187
+ end
188
+
189
+ # Returns the codepoint of the first character in the string.
190
+ #
191
+ # Example:
192
+ # 'こんにちは'.mb_chars.ord # => 12371
193
+ def ord
194
+ Unicode.u_unpack(@wrapped_string)[0]
195
+ end
196
+
197
+ # Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
198
+ #
199
+ # Example:
200
+ #
201
+ # "¾ cup".mb_chars.rjust(8).to_s
202
+ # # => " ¾ cup"
203
+ #
204
+ # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
205
+ # # => "   ¾ cup"
206
+ def rjust(integer, padstr=' ')
207
+ justify(integer, :right, padstr)
208
+ end
209
+
210
+ # Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
211
+ #
212
+ # Example:
213
+ #
214
+ # "¾ cup".mb_chars.rjust(8).to_s
215
+ # # => "¾ cup "
216
+ #
217
+ # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
218
+ # # => "¾ cup   "
219
+ def ljust(integer, padstr=' ')
220
+ justify(integer, :left, padstr)
221
+ end
222
+
223
+ # Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
224
+ #
225
+ # Example:
226
+ #
227
+ # "¾ cup".mb_chars.center(8).to_s
228
+ # # => " ¾ cup "
229
+ #
230
+ # "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
231
+ # # => " ¾ cup  "
232
+ def center(integer, padstr=' ')
233
+ justify(integer, :center, padstr)
234
+ end
235
+
236
+ else
237
+ def =~(other)
238
+ @wrapped_string =~ other
239
+ end
240
+ end
241
+
242
+ # Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
243
+ # instances instead of String. This makes chaining methods easier.
244
+ #
245
+ # Example:
246
+ # 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
247
+ def split(*args)
248
+ @wrapped_string.split(*args).map { |i| i.mb_chars }
249
+ end
250
+
251
+ # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
252
+ #
253
+ # Example:
254
+ #
255
+ # s = "Müller"
256
+ # s.mb_chars[2] = "e" # Replace character with offset 2
257
+ # s
258
+ # # => "Müeler"
259
+ #
260
+ # s = "Müller"
261
+ # s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
262
+ # s
263
+ # # => "Möler"
264
+ def []=(*args)
265
+ replace_by = args.pop
266
+ # Indexed replace with regular expressions already works
267
+ if args.first.is_a?(Regexp)
268
+ @wrapped_string[*args] = replace_by
269
+ else
270
+ result = Unicode.u_unpack(@wrapped_string)
271
+ if args[0].is_a?(Fixnum)
272
+ raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
273
+ min = args[0]
274
+ max = args[1].nil? ? min : (min + args[1] - 1)
275
+ range = Range.new(min, max)
276
+ replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
277
+ elsif args.first.is_a?(Range)
278
+ raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
279
+ range = args[0]
280
+ else
281
+ needle = args[0].to_s
282
+ min = index(needle)
283
+ max = min + Unicode.u_unpack(needle).length - 1
284
+ range = Range.new(min, max)
285
+ end
286
+ result[range] = Unicode.u_unpack(replace_by)
287
+ @wrapped_string.replace(result.pack('U*'))
288
+ end
289
+ end
290
+
291
+ # Reverses all characters in the string.
292
+ #
293
+ # Example:
294
+ # 'Café'.mb_chars.reverse.to_s # => 'éfaC'
295
+ def reverse
296
+ chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
297
+ end
298
+
299
+ # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
300
+ # character.
301
+ #
302
+ # Example:
303
+ # 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
304
+ def slice(*args)
305
+ if args.size > 2
306
+ raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
307
+ elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
308
+ raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
309
+ elsif (args.size == 2 && !args[1].is_a?(Numeric))
310
+ raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
311
+ elsif args[0].kind_of? Range
312
+ cps = Unicode.u_unpack(@wrapped_string).slice(*args)
313
+ result = cps.nil? ? nil : cps.pack('U*')
314
+ elsif args[0].kind_of? Regexp
315
+ result = @wrapped_string.slice(*args)
316
+ elsif args.size == 1 && args[0].kind_of?(Numeric)
317
+ character = Unicode.u_unpack(@wrapped_string)[args[0]]
318
+ result = character && [character].pack('U')
319
+ else
320
+ cps = Unicode.u_unpack(@wrapped_string).slice(*args)
321
+ result = cps && cps.pack('U*')
322
+ end
323
+ result && chars(result)
324
+ end
325
+ alias_method :[], :slice
326
+
327
+ # Limit the byte size of the string to a number of bytes without breaking characters. Usable
328
+ # when the storage for a string is limited for some reason.
329
+ #
330
+ # Example:
331
+ # s = 'こんにちは'
332
+ # s.mb_chars.limit(7) # => "こに"
333
+ def limit(limit)
334
+ slice(0...translate_offset(limit))
335
+ end
336
+
337
+ # Convert characters in the string to uppercase.
338
+ #
339
+ # Example:
340
+ # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
341
+ def upcase
342
+ chars(Unicode.apply_mapping(@wrapped_string), :uppercase_mapping)
343
+ end
344
+
345
+ # Convert characters in the string to lowercase.
346
+ #
347
+ # Example:
348
+ # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
349
+ def downcase
350
+ chars(Unicode.apply_mapping(@wrapped_string), :lowercase_mapping)
351
+ end
352
+
353
+ # Converts the first character to uppercase and the remainder to lowercase.
354
+ #
355
+ # Example:
356
+ # 'über'.mb_chars.capitalize.to_s # => "Über"
357
+ def capitalize
358
+ (slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
359
+ end
360
+
361
+ # Capitalizes the first letter of every word, when possible.
362
+ #
363
+ # Example:
364
+ # "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró"
365
+ # "日本語".mb_chars.titleize # => "日本語"
366
+ def titleize
367
+ chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
368
+ end
369
+ alias_method :titlecase, :titleize
370
+
371
+ # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
372
+ # passing strings to databases and validations.
373
+ #
374
+ # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
375
+ # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
376
+ # Mail::Multibyte::Unicode.default_normalization_form
377
+ def normalize(form = nil)
378
+ chars(Unicode.normalize(@wrapped_string, form))
379
+ end
380
+
381
+ # Performs canonical decomposition on all the characters.
382
+ #
383
+ # Example:
384
+ # 'é'.length # => 2
385
+ # 'é'.mb_chars.decompose.to_s.length # => 3
386
+ def decompose
387
+ chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
388
+ end
389
+
390
+ # Performs composition on all the characters.
391
+ #
392
+ # Example:
393
+ # 'é'.length # => 3
394
+ # 'é'.mb_chars.compose.to_s.length # => 2
395
+ def compose
396
+ chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
397
+ end
398
+
399
+ # Returns the number of grapheme clusters in the string.
400
+ #
401
+ # Example:
402
+ # 'क्षि'.mb_chars.length # => 4
403
+ # 'क्षि'.mb_chars.g_length # => 3
404
+ def g_length
405
+ Unicode.g_unpack(@wrapped_string).length
406
+ end
407
+
408
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
409
+ #
410
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
411
+ def tidy_bytes(force = false)
412
+ chars(Unicode.tidy_bytes(@wrapped_string, force))
413
+ end
414
+
415
+ %w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
416
+ # Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
417
+ # exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
418
+ if public_method_defined?(method)
419
+ define_method("#{method}!") do |*args|
420
+ @wrapped_string = send(args.nil? ? method : method, *args).to_s
421
+ self
422
+ end
423
+ end
424
+ end
425
+
426
+ protected
427
+
428
+ def translate_offset(byte_offset) #:nodoc:
429
+ return nil if byte_offset.nil?
430
+ return 0 if @wrapped_string == ''
431
+
432
+ if @wrapped_string.respond_to?(:force_encoding)
433
+ @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
434
+ end
435
+
436
+ begin
437
+ @wrapped_string[0...byte_offset].unpack('U*').length
438
+ rescue ArgumentError
439
+ byte_offset -= 1
440
+ retry
441
+ end
442
+ end
443
+
444
+ def justify(integer, way, padstr=' ') #:nodoc:
445
+ raise ArgumentError, "zero width padding" if padstr.length == 0
446
+ padsize = integer - size
447
+ padsize = padsize > 0 ? padsize : 0
448
+ case way
449
+ when :right
450
+ result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
451
+ when :left
452
+ result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
453
+ when :center
454
+ lpad = padding((padsize / 2.0).floor, padstr)
455
+ rpad = padding((padsize / 2.0).ceil, padstr)
456
+ result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
457
+ end
458
+ chars(result)
459
+ end
460
+
461
+ def padding(padsize, padstr=' ') #:nodoc:
462
+ if padsize != 0
463
+ chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
464
+ else
465
+ ''
466
+ end
467
+ end
468
+
469
+ def chars(string) #:nodoc:
470
+ self.class.new(string)
471
+ end
472
+ end
473
+ end
474
+ end