mail 2.7.1 → 2.8.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +45 -28
- data/lib/mail/attachments_list.rb +2 -5
- data/lib/mail/body.rb +24 -47
- data/lib/mail/constants.rb +27 -5
- data/lib/mail/elements/address.rb +27 -27
- data/lib/mail/elements/address_list.rb +1 -1
- data/lib/mail/elements/content_disposition_element.rb +1 -1
- data/lib/mail/elements/content_location_element.rb +1 -1
- data/lib/mail/elements/content_transfer_encoding_element.rb +1 -1
- data/lib/mail/elements/content_type_element.rb +8 -4
- data/lib/mail/elements/date_time_element.rb +1 -1
- data/lib/mail/elements/envelope_from_element.rb +13 -7
- data/lib/mail/elements/message_ids_element.rb +14 -5
- data/lib/mail/elements/mime_version_element.rb +1 -1
- data/lib/mail/elements/phrase_list.rb +7 -2
- data/lib/mail/elements/received_element.rb +20 -6
- data/lib/mail/encodings/7bit.rb +5 -0
- data/lib/mail/encodings/base64.rb +2 -2
- data/lib/mail/encodings/quoted_printable.rb +2 -2
- data/lib/mail/encodings.rb +30 -59
- data/lib/mail/envelope.rb +11 -14
- data/lib/mail/field.rb +37 -53
- data/lib/mail/field_list.rb +60 -7
- data/lib/mail/fields/bcc_field.rb +34 -52
- data/lib/mail/fields/cc_field.rb +28 -49
- data/lib/mail/fields/comments_field.rb +27 -37
- data/lib/mail/fields/common_address_field.rb +170 -0
- data/lib/mail/fields/common_date_field.rb +58 -0
- data/lib/mail/fields/common_field.rb +77 -0
- data/lib/mail/fields/common_message_id_field.rb +42 -0
- data/lib/mail/fields/content_description_field.rb +7 -14
- data/lib/mail/fields/content_disposition_field.rb +13 -38
- data/lib/mail/fields/content_id_field.rb +24 -51
- data/lib/mail/fields/content_location_field.rb +11 -25
- data/lib/mail/fields/content_transfer_encoding_field.rb +31 -31
- data/lib/mail/fields/content_type_field.rb +46 -71
- data/lib/mail/fields/date_field.rb +23 -51
- data/lib/mail/fields/from_field.rb +28 -49
- data/lib/mail/fields/in_reply_to_field.rb +38 -49
- data/lib/mail/fields/keywords_field.rb +18 -31
- data/lib/mail/fields/message_id_field.rb +25 -71
- data/lib/mail/fields/mime_version_field.rb +19 -30
- data/lib/mail/fields/named_structured_field.rb +11 -0
- data/lib/mail/fields/named_unstructured_field.rb +11 -0
- data/lib/mail/fields/optional_field.rb +5 -6
- data/lib/mail/fields/{common/parameter_hash.rb → parameter_hash.rb} +12 -10
- data/lib/mail/fields/received_field.rb +43 -57
- data/lib/mail/fields/references_field.rb +35 -49
- data/lib/mail/fields/reply_to_field.rb +28 -49
- data/lib/mail/fields/resent_bcc_field.rb +28 -49
- data/lib/mail/fields/resent_cc_field.rb +28 -49
- data/lib/mail/fields/resent_date_field.rb +5 -29
- data/lib/mail/fields/resent_from_field.rb +28 -49
- data/lib/mail/fields/resent_message_id_field.rb +5 -29
- data/lib/mail/fields/resent_sender_field.rb +27 -56
- data/lib/mail/fields/resent_to_field.rb +28 -49
- data/lib/mail/fields/return_path_field.rb +50 -54
- data/lib/mail/fields/sender_field.rb +34 -55
- data/lib/mail/fields/structured_field.rb +3 -30
- data/lib/mail/fields/subject_field.rb +9 -11
- data/lib/mail/fields/to_field.rb +28 -49
- data/lib/mail/fields/unstructured_field.rb +16 -48
- data/lib/mail/header.rb +69 -110
- data/lib/mail/matchers/attachment_matchers.rb +15 -0
- data/lib/mail/message.rb +46 -64
- data/lib/mail/multibyte/chars.rb +8 -166
- data/lib/mail/multibyte/utils.rb +26 -43
- data/lib/mail/multibyte.rb +1 -11
- data/lib/mail/network/delivery_methods/exim.rb +5 -4
- data/lib/mail/network/delivery_methods/file_delivery.rb +11 -10
- data/lib/mail/network/delivery_methods/logger_delivery.rb +2 -5
- data/lib/mail/network/delivery_methods/sendmail.rb +27 -35
- data/lib/mail/network/delivery_methods/smtp.rb +3 -3
- data/lib/mail/network/delivery_methods/smtp_connection.rb +3 -12
- data/lib/mail/network/delivery_methods/test_mailer.rb +4 -2
- data/lib/mail/network/retriever_methods/base.rb +8 -8
- data/lib/mail/network/retriever_methods/imap.rb +2 -2
- data/lib/mail/network/retriever_methods/pop3.rb +2 -2
- data/lib/mail/network/retriever_methods/test_retriever.rb +2 -1
- data/lib/mail/parsers/address_lists_parser.rb +33070 -33064
- data/lib/mail/parsers/address_lists_parser.rl +7 -0
- data/lib/mail/parsers/content_disposition_parser.rb +833 -827
- data/lib/mail/parsers/content_disposition_parser.rl +7 -0
- data/lib/mail/parsers/content_location_parser.rb +770 -764
- data/lib/mail/parsers/content_location_parser.rl +7 -0
- data/lib/mail/parsers/content_transfer_encoding_parser.rb +474 -468
- data/lib/mail/parsers/content_transfer_encoding_parser.rl +7 -0
- data/lib/mail/parsers/content_type_parser.rb +971 -965
- data/lib/mail/parsers/content_type_parser.rl +7 -0
- data/lib/mail/parsers/date_time_parser.rb +838 -832
- data/lib/mail/parsers/date_time_parser.rl +7 -0
- data/lib/mail/parsers/envelope_from_parser.rb +3623 -3529
- data/lib/mail/parsers/envelope_from_parser.rl +7 -0
- data/lib/mail/parsers/message_ids_parser.rb +5107 -2800
- data/lib/mail/parsers/message_ids_parser.rl +12 -1
- data/lib/mail/parsers/mime_version_parser.rb +463 -457
- data/lib/mail/parsers/mime_version_parser.rl +7 -0
- data/lib/mail/parsers/phrase_lists_parser.rb +836 -830
- data/lib/mail/parsers/phrase_lists_parser.rl +8 -1
- data/lib/mail/parsers/received_parser.rb +8688 -8682
- data/lib/mail/parsers/received_parser.rl +7 -0
- data/lib/mail/parsers/rfc5322.rl +28 -13
- data/lib/mail/parsers.rb +11 -17
- data/lib/mail/part.rb +5 -9
- data/lib/mail/parts_list.rb +57 -0
- data/lib/mail/smtp_envelope.rb +57 -0
- data/lib/mail/utilities.rb +307 -69
- data/lib/mail/version.rb +3 -3
- data/lib/mail/yaml.rb +30 -0
- data/lib/mail.rb +0 -20
- metadata +74 -21
- data/lib/mail/check_delivery_params.rb +0 -60
- data/lib/mail/core_extensions/smtp.rb +0 -28
- data/lib/mail/core_extensions/string.rb +0 -17
- data/lib/mail/fields/common/address_container.rb +0 -17
- data/lib/mail/fields/common/common_address.rb +0 -161
- data/lib/mail/fields/common/common_date.rb +0 -36
- data/lib/mail/fields/common/common_field.rb +0 -52
- data/lib/mail/fields/common/common_message_id.rb +0 -49
- data/lib/mail/version_specific/ruby_1_8.rb +0 -163
- data/lib/mail/version_specific/ruby_1_9.rb +0 -278
@@ -1,163 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
require 'net/imap' # for decode_utf7
|
4
|
-
|
5
|
-
module Mail
|
6
|
-
class Ruby18
|
7
|
-
require 'base64'
|
8
|
-
require 'iconv'
|
9
|
-
|
10
|
-
# Escapes any parenthesis in a string that are unescaped. This can't
|
11
|
-
# use the Ruby 1.9.1 regexp feature of negative look behind so we have
|
12
|
-
# to do two replacement, first unescape everything, then re-escape it
|
13
|
-
def Ruby18.escape_paren( str )
|
14
|
-
re = /\\\)/
|
15
|
-
str = str.gsub(re) { |s| ')'}
|
16
|
-
re = /\\\(/
|
17
|
-
str = str.gsub(re) { |s| '('}
|
18
|
-
re = /([\(\)])/ # Only match unescaped parens
|
19
|
-
str.gsub(re) { |s| '\\' + s }
|
20
|
-
end
|
21
|
-
|
22
|
-
def Ruby18.paren( str )
|
23
|
-
str = $1 if str =~ /^\((.*)?\)$/
|
24
|
-
str = escape_paren( str )
|
25
|
-
'(' + str + ')'
|
26
|
-
end
|
27
|
-
|
28
|
-
def Ruby18.escape_bracket( str )
|
29
|
-
re = /\\\>/
|
30
|
-
str = str.gsub(re) { |s| '>'}
|
31
|
-
re = /\\\</
|
32
|
-
str = str.gsub(re) { |s| '<'}
|
33
|
-
re = /([\<\>])/ # Only match unescaped parens
|
34
|
-
str.gsub(re) { |s| '\\' + s }
|
35
|
-
end
|
36
|
-
|
37
|
-
def Ruby18.bracket( str )
|
38
|
-
str = $1 if str =~ /^\<(.*)?\>$/
|
39
|
-
str = escape_bracket( str )
|
40
|
-
'<' + str + '>'
|
41
|
-
end
|
42
|
-
|
43
|
-
def Ruby18.decode_base64(str)
|
44
|
-
Base64.decode64(str) if str
|
45
|
-
end
|
46
|
-
|
47
|
-
def Ruby18.encode_base64(str)
|
48
|
-
Base64.encode64(str)
|
49
|
-
end
|
50
|
-
|
51
|
-
def Ruby18.has_constant?(klass, string)
|
52
|
-
klass.constants.include?( string )
|
53
|
-
end
|
54
|
-
|
55
|
-
def Ruby18.get_constant(klass, string)
|
56
|
-
klass.const_get( string )
|
57
|
-
end
|
58
|
-
|
59
|
-
def Ruby18.transcode_charset(str, from_encoding, to_encoding = 'UTF-8')
|
60
|
-
case from_encoding
|
61
|
-
when /utf-?7/i
|
62
|
-
decode_utf7(str)
|
63
|
-
else
|
64
|
-
retried = false
|
65
|
-
begin
|
66
|
-
Iconv.conv("#{normalize_iconv_charset_encoding(to_encoding)}//IGNORE", normalize_iconv_charset_encoding(from_encoding), str)
|
67
|
-
rescue Iconv::InvalidEncoding
|
68
|
-
if retried
|
69
|
-
raise
|
70
|
-
else
|
71
|
-
from_encoding = 'ASCII'
|
72
|
-
retried = true
|
73
|
-
retry
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def Ruby18.decode_utf7(str)
|
80
|
-
Net::IMAP.decode_utf7(str)
|
81
|
-
end
|
82
|
-
|
83
|
-
def Ruby18.b_value_encode(str, encoding)
|
84
|
-
# Ruby 1.8 requires an encoding to work
|
85
|
-
raise ArgumentError, "Must supply an encoding" if encoding.nil?
|
86
|
-
encoding = encoding.to_s.upcase.gsub('_', '-')
|
87
|
-
[Encodings::Base64.encode(str), normalize_iconv_charset_encoding(encoding)]
|
88
|
-
end
|
89
|
-
|
90
|
-
def Ruby18.b_value_decode(str)
|
91
|
-
match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)
|
92
|
-
if match
|
93
|
-
encoding = match[1]
|
94
|
-
str = Ruby18.decode_base64(match[2])
|
95
|
-
str = transcode_charset(str, encoding)
|
96
|
-
end
|
97
|
-
str
|
98
|
-
end
|
99
|
-
|
100
|
-
def Ruby18.q_value_encode(str, encoding)
|
101
|
-
# Ruby 1.8 requires an encoding to work
|
102
|
-
raise ArgumentError, "Must supply an encoding" if encoding.nil?
|
103
|
-
encoding = encoding.to_s.upcase.gsub('_', '-')
|
104
|
-
[Encodings::QuotedPrintable.encode(str), encoding]
|
105
|
-
end
|
106
|
-
|
107
|
-
def Ruby18.q_value_decode(str)
|
108
|
-
match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)
|
109
|
-
if match
|
110
|
-
encoding = match[1]
|
111
|
-
string = match[2].gsub(/_/, '=20')
|
112
|
-
# Remove trailing = if it exists in a Q encoding
|
113
|
-
string = string.sub(/\=$/, '')
|
114
|
-
str = Encodings::QuotedPrintable.decode(string)
|
115
|
-
str = transcode_charset(str, encoding)
|
116
|
-
end
|
117
|
-
str
|
118
|
-
end
|
119
|
-
|
120
|
-
def Ruby18.param_decode(str, encoding)
|
121
|
-
str = URI.unescape(str)
|
122
|
-
if encoding
|
123
|
-
transcode_charset(str, encoding)
|
124
|
-
else
|
125
|
-
str
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def Ruby18.param_encode(str)
|
130
|
-
encoding = $KCODE.to_s.downcase
|
131
|
-
language = Configuration.instance.param_encode_language
|
132
|
-
"#{encoding}'#{language}'#{URI.escape(str)}"
|
133
|
-
end
|
134
|
-
|
135
|
-
def Ruby18.string_byteslice(str, *args)
|
136
|
-
str.slice(*args)
|
137
|
-
end
|
138
|
-
|
139
|
-
private
|
140
|
-
|
141
|
-
def Ruby18.normalize_iconv_charset_encoding(encoding)
|
142
|
-
case encoding.upcase
|
143
|
-
when 'UTF8', 'UTF_8'
|
144
|
-
'UTF-8'
|
145
|
-
when 'UTF16', 'UTF-16'
|
146
|
-
'UTF-16BE'
|
147
|
-
when 'UTF32', 'UTF-32'
|
148
|
-
'UTF-32BE'
|
149
|
-
when 'KS_C_5601-1987'
|
150
|
-
'CP949'
|
151
|
-
else
|
152
|
-
# Fall back to ASCII for charsets that Iconv doesn't recognize
|
153
|
-
begin
|
154
|
-
Iconv.new('UTF-8', encoding)
|
155
|
-
rescue Iconv::InvalidEncoding => e
|
156
|
-
'ASCII'
|
157
|
-
else
|
158
|
-
encoding
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|
@@ -1,278 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
module Mail
|
5
|
-
class Ruby19
|
6
|
-
class StrictCharsetEncoder
|
7
|
-
def encode(string, charset)
|
8
|
-
case charset
|
9
|
-
when /utf-?7/i
|
10
|
-
Mail::Ruby19.decode_utf7(string)
|
11
|
-
else
|
12
|
-
string.force_encoding(Mail::Ruby19.pick_encoding(charset))
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
class BestEffortCharsetEncoder
|
18
|
-
def encode(string, charset)
|
19
|
-
case charset
|
20
|
-
when /utf-?7/i
|
21
|
-
Mail::Ruby19.decode_utf7(string)
|
22
|
-
else
|
23
|
-
string.force_encoding(pick_encoding(charset))
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
private
|
28
|
-
|
29
|
-
def pick_encoding(charset)
|
30
|
-
charset = case charset
|
31
|
-
when /ansi_x3.110-1983/
|
32
|
-
'ISO-8859-1'
|
33
|
-
when /Windows-?1258/i # Windows-1258 is similar to 1252
|
34
|
-
"Windows-1252"
|
35
|
-
else
|
36
|
-
charset
|
37
|
-
end
|
38
|
-
Mail::Ruby19.pick_encoding(charset)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
class << self
|
43
|
-
attr_accessor :charset_encoder
|
44
|
-
end
|
45
|
-
self.charset_encoder = BestEffortCharsetEncoder.new
|
46
|
-
|
47
|
-
# Escapes any parenthesis in a string that are unescaped this uses
|
48
|
-
# a Ruby 1.9.1 regexp feature of negative look behind
|
49
|
-
def Ruby19.escape_paren( str )
|
50
|
-
re = /(?<!\\)([\(\)])/ # Only match unescaped parens
|
51
|
-
str.gsub(re) { |s| '\\' + s }
|
52
|
-
end
|
53
|
-
|
54
|
-
def Ruby19.paren( str )
|
55
|
-
str = $1 if str =~ /^\((.*)?\)$/
|
56
|
-
str = escape_paren( str )
|
57
|
-
'(' + str + ')'
|
58
|
-
end
|
59
|
-
|
60
|
-
def Ruby19.escape_bracket( str )
|
61
|
-
re = /(?<!\\)([\<\>])/ # Only match unescaped brackets
|
62
|
-
str.gsub(re) { |s| '\\' + s }
|
63
|
-
end
|
64
|
-
|
65
|
-
def Ruby19.bracket( str )
|
66
|
-
str = $1 if str =~ /^\<(.*)?\>$/
|
67
|
-
str = escape_bracket( str )
|
68
|
-
'<' + str + '>'
|
69
|
-
end
|
70
|
-
|
71
|
-
def Ruby19.decode_base64(str)
|
72
|
-
if !str.end_with?("=") && str.length % 4 != 0
|
73
|
-
str = str.ljust((str.length + 3) & ~3, "=")
|
74
|
-
end
|
75
|
-
str.unpack( 'm' ).first
|
76
|
-
end
|
77
|
-
|
78
|
-
def Ruby19.encode_base64(str)
|
79
|
-
[str].pack( 'm' )
|
80
|
-
end
|
81
|
-
|
82
|
-
def Ruby19.has_constant?(klass, string)
|
83
|
-
klass.const_defined?( string, false )
|
84
|
-
end
|
85
|
-
|
86
|
-
def Ruby19.get_constant(klass, string)
|
87
|
-
klass.const_get( string )
|
88
|
-
end
|
89
|
-
|
90
|
-
def Ruby19.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8)
|
91
|
-
to_encoding = to_encoding.to_s if RUBY_VERSION < '1.9.3'
|
92
|
-
to_encoding = Encoding.find(to_encoding)
|
93
|
-
replacement_char = to_encoding == Encoding::UTF_8 ? '�' : '?'
|
94
|
-
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char)
|
95
|
-
end
|
96
|
-
|
97
|
-
# From Ruby stdlib Net::IMAP
|
98
|
-
def Ruby19.encode_utf7(string)
|
99
|
-
string.gsub(/(&)|[^\x20-\x7e]+/) do
|
100
|
-
if $1
|
101
|
-
"&-"
|
102
|
-
else
|
103
|
-
base64 = [$&.encode(Encoding::UTF_16BE)].pack("m0")
|
104
|
-
"&" + base64.delete("=").tr("/", ",") + "-"
|
105
|
-
end
|
106
|
-
end.force_encoding(Encoding::ASCII_8BIT)
|
107
|
-
end
|
108
|
-
|
109
|
-
def Ruby19.decode_utf7(utf7)
|
110
|
-
utf7.gsub(/&([^-]+)?-/n) do
|
111
|
-
if $1
|
112
|
-
($1.tr(",", "/") + "===").unpack("m")[0].encode(Encoding::UTF_8, Encoding::UTF_16BE)
|
113
|
-
else
|
114
|
-
"&"
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
def Ruby19.b_value_encode(str, encoding = nil)
|
120
|
-
encoding = str.encoding.to_s
|
121
|
-
[Ruby19.encode_base64(str), encoding]
|
122
|
-
end
|
123
|
-
|
124
|
-
def Ruby19.b_value_decode(str)
|
125
|
-
match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)
|
126
|
-
if match
|
127
|
-
charset = match[1]
|
128
|
-
str = Ruby19.decode_base64(match[2])
|
129
|
-
str = charset_encoder.encode(str, charset)
|
130
|
-
end
|
131
|
-
transcode_to_scrubbed_utf8(str)
|
132
|
-
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
|
133
|
-
warn "Encoding conversion failed #{$!}"
|
134
|
-
str.dup.force_encoding(Encoding::UTF_8)
|
135
|
-
end
|
136
|
-
|
137
|
-
def Ruby19.q_value_encode(str, encoding = nil)
|
138
|
-
encoding = str.encoding.to_s
|
139
|
-
[Encodings::QuotedPrintable.encode(str), encoding]
|
140
|
-
end
|
141
|
-
|
142
|
-
def Ruby19.q_value_decode(str)
|
143
|
-
match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)
|
144
|
-
if match
|
145
|
-
charset = match[1]
|
146
|
-
string = match[2].gsub(/_/, '=20')
|
147
|
-
# Remove trailing = if it exists in a Q encoding
|
148
|
-
string = string.sub(/\=$/, '')
|
149
|
-
str = Encodings::QuotedPrintable.decode(string)
|
150
|
-
str = charset_encoder.encode(str, charset)
|
151
|
-
# We assume that binary strings hold utf-8 directly to work around
|
152
|
-
# jruby/jruby#829 which subtly changes String#encode semantics.
|
153
|
-
str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT
|
154
|
-
end
|
155
|
-
transcode_to_scrubbed_utf8(str)
|
156
|
-
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
|
157
|
-
warn "Encoding conversion failed #{$!}"
|
158
|
-
str.dup.force_encoding(Encoding::UTF_8)
|
159
|
-
end
|
160
|
-
|
161
|
-
def Ruby19.param_decode(str, encoding)
|
162
|
-
str = uri_parser.unescape(str)
|
163
|
-
str = charset_encoder.encode(str, encoding) if encoding
|
164
|
-
transcode_to_scrubbed_utf8(str)
|
165
|
-
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
|
166
|
-
warn "Encoding conversion failed #{$!}"
|
167
|
-
str.dup.force_encoding(Encoding::UTF_8)
|
168
|
-
end
|
169
|
-
|
170
|
-
def Ruby19.param_encode(str)
|
171
|
-
encoding = str.encoding.to_s.downcase
|
172
|
-
language = Configuration.instance.param_encode_language
|
173
|
-
"#{encoding}'#{language}'#{uri_parser.escape(str)}"
|
174
|
-
end
|
175
|
-
|
176
|
-
def Ruby19.uri_parser
|
177
|
-
@uri_parser ||= URI::Parser.new
|
178
|
-
end
|
179
|
-
|
180
|
-
# Pick a Ruby encoding corresponding to the message charset. Most
|
181
|
-
# charsets have a Ruby encoding, but some need manual aliasing here.
|
182
|
-
#
|
183
|
-
# TODO: add this as a test somewhere:
|
184
|
-
# Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}
|
185
|
-
# Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}
|
186
|
-
def Ruby19.pick_encoding(charset)
|
187
|
-
charset = charset.to_s
|
188
|
-
encoding = case charset.downcase
|
189
|
-
|
190
|
-
# ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-I
|
191
|
-
when /^iso[-_]?8859-(\d+)(-i)?$/
|
192
|
-
"ISO-8859-#{$1}"
|
193
|
-
|
194
|
-
# ISO-8859-15, ISO-2022-JP and alike
|
195
|
-
when /^iso[-_]?(\d{4})-?(\w{1,2})$/
|
196
|
-
"ISO-#{$1}-#{$2}"
|
197
|
-
|
198
|
-
# "ISO-2022-JP-KDDI" and alike
|
199
|
-
when /^iso[-_]?(\d{4})-?(\w{1,2})-?(\w*)$/
|
200
|
-
"ISO-#{$1}-#{$2}-#{$3}"
|
201
|
-
|
202
|
-
# UTF-8, UTF-32BE and alike
|
203
|
-
when /^utf[\-_]?(\d{1,2})?(\w{1,2})$/
|
204
|
-
"UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/, '\\1BE')
|
205
|
-
|
206
|
-
# Windows-1252 and alike
|
207
|
-
when /^windows-?(.*)$/
|
208
|
-
"Windows-#{$1}"
|
209
|
-
|
210
|
-
when '8bit'
|
211
|
-
Encoding::ASCII_8BIT
|
212
|
-
|
213
|
-
# alternatives/misspellings of us-ascii seen in the wild
|
214
|
-
when /^iso[-_]?646(-us)?$/, 'us=ascii'
|
215
|
-
Encoding::ASCII
|
216
|
-
|
217
|
-
# Microsoft-specific alias for MACROMAN
|
218
|
-
when 'macintosh'
|
219
|
-
Encoding::MACROMAN
|
220
|
-
|
221
|
-
# Microsoft-specific alias for CP949 (Korean)
|
222
|
-
when 'ks_c_5601-1987'
|
223
|
-
Encoding::CP949
|
224
|
-
|
225
|
-
# Wrongly written Shift_JIS (Japanese)
|
226
|
-
when 'shift-jis'
|
227
|
-
Encoding::Shift_JIS
|
228
|
-
|
229
|
-
# GB2312 (Chinese charset) is a subset of GB18030 (its replacement)
|
230
|
-
when 'gb2312'
|
231
|
-
Encoding::GB18030
|
232
|
-
|
233
|
-
when 'cp-850'
|
234
|
-
Encoding::CP850
|
235
|
-
|
236
|
-
when 'latin2'
|
237
|
-
Encoding::ISO_8859_2
|
238
|
-
|
239
|
-
else
|
240
|
-
charset
|
241
|
-
end
|
242
|
-
|
243
|
-
convert_to_encoding(encoding)
|
244
|
-
end
|
245
|
-
|
246
|
-
if "string".respond_to?(:byteslice)
|
247
|
-
def Ruby19.string_byteslice(str, *args)
|
248
|
-
str.byteslice(*args)
|
249
|
-
end
|
250
|
-
else
|
251
|
-
def Ruby19.string_byteslice(str, *args)
|
252
|
-
str.unpack('C*').slice(*args).pack('C*').force_encoding(str.encoding)
|
253
|
-
end
|
254
|
-
end
|
255
|
-
|
256
|
-
class << self
|
257
|
-
private
|
258
|
-
|
259
|
-
def convert_to_encoding(encoding)
|
260
|
-
if encoding.is_a?(Encoding)
|
261
|
-
encoding
|
262
|
-
else
|
263
|
-
# Fall back to ASCII for charsets that Ruby doesn't recognize
|
264
|
-
begin
|
265
|
-
Encoding.find(encoding)
|
266
|
-
rescue ArgumentError
|
267
|
-
Encoding::BINARY
|
268
|
-
end
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
def transcode_to_scrubbed_utf8(str)
|
273
|
-
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "�")
|
274
|
-
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "�").encode(Encoding::UTF_8)
|
275
|
-
end
|
276
|
-
end
|
277
|
-
end
|
278
|
-
end
|