smail-mime 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/smail/mime/coding_extensions.rb +192 -0
- data/lib/smail/mime/content_fields.rb +189 -0
- data/lib/smail/mime/date.rb +13 -0
- data/lib/smail/mime/header.rb +190 -0
- data/lib/smail/mime/mime.rb +257 -0
- data/lib/smail/mime/version.rb +11 -0
- data/lib/smail/mime.rb +4 -0
- metadata +90 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
#require 'jcode'
|
2
|
+
require 'iconv'
|
3
|
+
|
4
|
+
# Extensions to the String library for encoding and decoding of MIME data.
|
5
|
+
class String
|
6
|
+
|
7
|
+
# Returns true if the string consists entirely of whitespace.
|
8
|
+
# (The empty string will return false.)
|
9
|
+
def is_space?
|
10
|
+
return Regexp.new('\A\s+\Z', Regexp::MULTILINE).match(self) != nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns true if the string contains only valid ASCII characters
|
14
|
+
# (i.e. nothing over ASCII 127).
|
15
|
+
def is_ascii?
|
16
|
+
self.length == self.tr("\200-\377", '').length
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns this string encoded as base64 as defined in RFC2045, section 6.8.
|
20
|
+
def encode_base64
|
21
|
+
[self].pack("m*")
|
22
|
+
end
|
23
|
+
|
24
|
+
# Performs encode_base64 in place, and returns the string.
|
25
|
+
def encode_base64!
|
26
|
+
self.replace(self.encode_base64)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns this string decoded from base64 as defined in RFC2045, section 6.8.
|
30
|
+
def decode_base64
|
31
|
+
#self.unpack("m*").first
|
32
|
+
# This should be the above line but due to a bug in the ruby base64 decoder
|
33
|
+
# it will only decode base64 where the lines are in multiples of 4, this is
|
34
|
+
# contrary to RFC2045 which says that all characters other than the 65 used
|
35
|
+
# are to be ignored. Currently we remove all the other characters but it
|
36
|
+
# might be better to use it's advice to only remove line breaks and white
|
37
|
+
# space
|
38
|
+
self.tr("^A-Za-z0-9+/=", "").unpack("m*").first
|
39
|
+
end
|
40
|
+
|
41
|
+
# Performs decode_base64 in place, and returns the string.
|
42
|
+
def decode_base64!
|
43
|
+
self.replace(self.decode_base64)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns this string encoded as quoted-printable as defined in RFC2045, section 6.7.
|
47
|
+
def encode_quoted_printable
|
48
|
+
result = [self].pack("M*")
|
49
|
+
# Ruby's quoted printable encoding uses soft line breaks to buffer spaces
|
50
|
+
# at the end of lines, rather than encoding them with =20. We fix this.
|
51
|
+
result.gsub!(/( +)=\n\n/) { "=20" * $1.length + "\n" }
|
52
|
+
# Ruby's quoted printable encode puts a soft line break on the end of any
|
53
|
+
# string that doesn't already end in a hard line break, so we have to
|
54
|
+
# clean it up.
|
55
|
+
result.gsub!(/=\n\Z/, '')
|
56
|
+
result
|
57
|
+
end
|
58
|
+
|
59
|
+
# Performs encode_quoted_printable in place, and returns the string.
|
60
|
+
def encode_quoted_printable!
|
61
|
+
self.replace(self.encode_quoted_printable)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns this string decoded from quoted-printable as defined in RFC2045, section 6.7.
|
65
|
+
def decode_quoted_printable
|
66
|
+
self.unpack("M*").first
|
67
|
+
end
|
68
|
+
|
69
|
+
# Performs decode_quoted_printable in place, and returns the string.
|
70
|
+
def decode_quoted_printable!
|
71
|
+
self.replace(self.decode_quoted_printable)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Guesses whether this string is encoded in base64 or quoted-printable.
|
75
|
+
#
|
76
|
+
# Returns either :base64 or :quoted_printable
|
77
|
+
def guess_mime_encoding
|
78
|
+
# Grab the first line and have a guess?
|
79
|
+
# A multiple of 4 and no characters that aren't in base64 ?
|
80
|
+
# Need to allow for = at end of base64 string
|
81
|
+
squashed = self.tr("\r\n\s", '').strip.sub(/=*\Z/, '')
|
82
|
+
if squashed.length.remainder(4) == 0 && squashed.count("^A-Za-z0-9+/") == 0
|
83
|
+
:base64
|
84
|
+
else
|
85
|
+
:quoted_printable
|
86
|
+
end
|
87
|
+
# or should we just try both and see what works?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns the MIME encoding that is likely to produce the shortest
|
91
|
+
# encoded string, either :none, :base64, or :quoted_printable.
|
92
|
+
def best_mime_encoding
|
93
|
+
if self.is_ascii?
|
94
|
+
:none
|
95
|
+
elsif self.length > (self.mb_chars.length * 1.1)
|
96
|
+
:base64
|
97
|
+
else
|
98
|
+
:quoted_printable
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Decodes this string according to method, where method is
|
103
|
+
# :base64, :quoted_printable, or :none.
|
104
|
+
#
|
105
|
+
# If method is not supplied or is nil, guess_mime_encoding is used to
|
106
|
+
# try to pick an appropriate method.
|
107
|
+
#
|
108
|
+
# Method can also be a string: 'q', 'quoted-printable', 'b', or 'base64'
|
109
|
+
# This lets you pass in methods directly from Content-Transfer-Encoding
|
110
|
+
# headers, or from RFC2047 words. Matching is case-insensitive.
|
111
|
+
def decode_mime(method = nil)
|
112
|
+
method ||= guess_mime_encoding
|
113
|
+
method = method.downcase if method.kind_of?(String)
|
114
|
+
case method
|
115
|
+
when :none
|
116
|
+
self
|
117
|
+
when :base64, 'b', 'base64'
|
118
|
+
self.decode_base64
|
119
|
+
when :quoted_printable, 'q', 'quoted-printable'
|
120
|
+
self.decode_quoted_printable
|
121
|
+
else
|
122
|
+
raise ArgumentError, "Bad MIME encoding"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Performs decode_mime in place, and returns the string.
|
127
|
+
def decode_mime!(method = nil)
|
128
|
+
self.replace(self.decode_mime(method))
|
129
|
+
end
|
130
|
+
|
131
|
+
# Converts this string to to_charset from from_charset using Iconv.
|
132
|
+
#
|
133
|
+
# Because there are cases where charsets are encoded incorrectly on the 'net
|
134
|
+
# we also allow for them and attempt to fix them up here. If conversion
|
135
|
+
# ultimately fails we remove all characters 0x80 and above, replacing them
|
136
|
+
# with ! symbols and effectively making it a US-ASCII (and therefore UTF-8)
|
137
|
+
# string.
|
138
|
+
def iconv(to_charset, from_charset)
|
139
|
+
failed = false
|
140
|
+
begin
|
141
|
+
converted = Iconv.new(to_charset, from_charset).iconv(self)
|
142
|
+
rescue Iconv::IllegalSequence
|
143
|
+
case from_charset.downcase
|
144
|
+
when 'us-ascii'
|
145
|
+
# Some mailers do not send a charset when it should be CP1252,
|
146
|
+
# the default Windows Latin charset
|
147
|
+
begin
|
148
|
+
converted = Iconv.new(to_charset, 'cp1252').iconv(self)
|
149
|
+
rescue Iconv::IllegalSequence
|
150
|
+
failed = true
|
151
|
+
end
|
152
|
+
when 'ks_c_5601-1987'
|
153
|
+
# Microsoft products erroneously use this for what should be CP949
|
154
|
+
# see http://tagunov.tripod.com/cjk.html
|
155
|
+
begin
|
156
|
+
converted = Iconv.new(to_charset, 'cp949').iconv(self)
|
157
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
|
158
|
+
failed = true
|
159
|
+
end
|
160
|
+
else
|
161
|
+
failed = true
|
162
|
+
end
|
163
|
+
rescue Iconv::InvalidCharacter
|
164
|
+
if self =~ /\n$/
|
165
|
+
# Some messages can come in with a superfluous new line on the end,
|
166
|
+
# which screws up the encoding. (ISO-2022-JP for example.)
|
167
|
+
begin
|
168
|
+
converted = Iconv.new(to_charset, 'iso-2022-jp').iconv(self.chomp) + "\n"
|
169
|
+
rescue Iconv::InvalidCharacter
|
170
|
+
converted = self.tr("\200-\377", "\041")
|
171
|
+
end
|
172
|
+
else
|
173
|
+
converted = self.tr("\200-\377", "\041")
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
if failed
|
178
|
+
begin
|
179
|
+
converted = Iconv.new(to_charset + '//IGNORE', from_charset).iconv(self)
|
180
|
+
rescue Iconv::InvalidCharacter
|
181
|
+
converted = self.tr("\200-\377", "\041")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
converted
|
186
|
+
end
|
187
|
+
|
188
|
+
def iconv!(to_charset, from_charset)
|
189
|
+
self.replace(self.iconv(to_charset, from_charset))
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
class SMail #:nodoc:
|
2
|
+
class MIME < SMail
|
3
|
+
class ContentField
|
4
|
+
class << self
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_accessor :type_raw # The raw type as parsed from the message.
|
8
|
+
attr_reader :params
|
9
|
+
|
10
|
+
def initialize(text = nil)
|
11
|
+
@params = Params.new
|
12
|
+
unless text.nil?
|
13
|
+
(@type_raw, params_raw) = SMail::MIME.decode_content_field(text)
|
14
|
+
@params.replace(params_raw)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns the type as a lower case string
|
19
|
+
def type
|
20
|
+
@type_raw.nil? ? @type_raw : @type_raw.downcase
|
21
|
+
end
|
22
|
+
|
23
|
+
def type=(text)
|
24
|
+
@type_raw = text
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns the Content Field as a string suitable for inclusion in an email header.
|
28
|
+
def to_s
|
29
|
+
"#{@type_raw}#{self.params.to_s}"
|
30
|
+
end
|
31
|
+
|
32
|
+
class Params < Hash
|
33
|
+
class << self
|
34
|
+
# FIXME: These two should probably be moved
|
35
|
+
def needs_quoting?(text)
|
36
|
+
false # FIXME
|
37
|
+
end
|
38
|
+
|
39
|
+
def quote(text)
|
40
|
+
"\"#{text}\"" # FIXME
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
if self.empty?
|
46
|
+
return ""
|
47
|
+
else
|
48
|
+
pairs = []
|
49
|
+
self.each do |key, value|
|
50
|
+
pair = key + '='
|
51
|
+
if SMail::MIME::ContentField::Params.needs_quoting?(value)
|
52
|
+
pair << SMail::MIME::ContentField::Params.quote(value)
|
53
|
+
else
|
54
|
+
pair << value
|
55
|
+
end
|
56
|
+
pairs << pair
|
57
|
+
end
|
58
|
+
end
|
59
|
+
'; ' + pairs.join('; ')
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end # ContentField
|
63
|
+
|
64
|
+
# An object representing a Content-Disposition header as specified in RFC2183.
|
65
|
+
class ContentDisposition < ContentField
|
66
|
+
|
67
|
+
# Is this an inline part??
|
68
|
+
def inline?
|
69
|
+
type == 'inline'
|
70
|
+
end
|
71
|
+
|
72
|
+
# Is this an attachment part?
|
73
|
+
def attachment?
|
74
|
+
type == "attachment"
|
75
|
+
end
|
76
|
+
|
77
|
+
# Returns the filename if specified
|
78
|
+
def filename
|
79
|
+
self.params['filename']
|
80
|
+
end
|
81
|
+
|
82
|
+
# Returns the creation date if available or nil.
|
83
|
+
def creation_date
|
84
|
+
self.params['creation-date'] # FIXME: parse as a date?
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns the modification date if available or nil.
|
88
|
+
def modification_date
|
89
|
+
self.params['modification-date'] # FIXME: parse as a date?
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns the read date if available or nil.
|
93
|
+
def read_date
|
94
|
+
self.params['read-date'] # FIXME: parse as a date?
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns the size if available or nil.
|
98
|
+
def size
|
99
|
+
self.params['size']
|
100
|
+
end
|
101
|
+
|
102
|
+
# FIXME: add all the other parameters specified in RFC2183, also add setters
|
103
|
+
|
104
|
+
# Returns the Content-Disposition as a string suitable for inclusion in an email
|
105
|
+
# header. If no disposition type is specified it will default to a disposition
|
106
|
+
# type of 'attachment'.
|
107
|
+
def to_s
|
108
|
+
"#{self.type_raw || 'attachment'}#{self.params.to_s}"
|
109
|
+
end
|
110
|
+
|
111
|
+
end # ContentDisposition
|
112
|
+
|
113
|
+
class ContentType < ContentField
|
114
|
+
|
115
|
+
attr_accessor :media_type_raw, :media_subtype_raw
|
116
|
+
|
117
|
+
def initialize(text = nil)
|
118
|
+
super(text)
|
119
|
+
self.type = @type_raw
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns the media type
|
123
|
+
def media_type
|
124
|
+
@media_type_raw.nil? ? @media_type_raw : @media_type_raw.downcase
|
125
|
+
end
|
126
|
+
|
127
|
+
# Set the media type
|
128
|
+
def media_type=(text)
|
129
|
+
@media_type_raw = text
|
130
|
+
end
|
131
|
+
|
132
|
+
# Returns the media subtype
|
133
|
+
def media_subtype
|
134
|
+
@media_subtype.nil? ? @media_subtype_raw : @media_subtype_raw.downcase
|
135
|
+
end
|
136
|
+
|
137
|
+
# Set the media subtype
|
138
|
+
def media_subtype=(text)
|
139
|
+
@media_subtype_raw = text
|
140
|
+
end
|
141
|
+
|
142
|
+
# Returns the media 'type/subtype' as a lower case string.
|
143
|
+
def type
|
144
|
+
# Default to 'text/plain'
|
145
|
+
if @media_type_raw.nil? or @media_subtype_raw.nil?
|
146
|
+
'text/plain'
|
147
|
+
else
|
148
|
+
"#{@media_type_raw}/#{@media_subtype_raw}".downcase
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Set the media 'type/subtype' together
|
153
|
+
def type=(text = nil)
|
154
|
+
unless text.nil?
|
155
|
+
@type_raw = text # keep this inherited accessor in sync
|
156
|
+
(@media_type_raw, @media_subtype_raw) = text.split('/', 2)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Is this a composite media type as specified in section 5.1 of RFC 2045.
|
161
|
+
#
|
162
|
+
# Note extension tokens are permitted to be composite but will always be seen
|
163
|
+
# as discrete by this code.
|
164
|
+
def composite?
|
165
|
+
media_type == 'message' or media_type == 'multipart'
|
166
|
+
end
|
167
|
+
|
168
|
+
# Is this a discrete media type as specified in section 5.1 of RFC 2045.
|
169
|
+
#
|
170
|
+
# Note extension tokens are permitted to be composite but will always be seen
|
171
|
+
# as discrete by this code.
|
172
|
+
def discrete?
|
173
|
+
!composite?
|
174
|
+
end
|
175
|
+
|
176
|
+
# Returns the full Content-Type header as a string suitable for inclusion in an
|
177
|
+
# email header. If either of the media type or subtype are not specified it will
|
178
|
+
# default to 'text/plain; charset=us-ascii'.
|
179
|
+
def to_s
|
180
|
+
# Default to 'text/plain; charset=us-ascii'
|
181
|
+
if @media_type_raw.nil? or @media_subtype_raw.nil?
|
182
|
+
'text/plain; charset=us-ascii'
|
183
|
+
else
|
184
|
+
"#{@media_type_raw}/#{@media_subtype_raw}#{self.params.to_s}"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end # ContentType
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class SMail #:nodoc:
|
2
|
+
class MIME < SMail
|
3
|
+
# We inherit from DateTime in order to make its to_s method return an RFC2822
|
4
|
+
# compliant date string.
|
5
|
+
class Date < DateTime
|
6
|
+
# Return an RFC2822 compliant date string suitable for use in the Date header.
|
7
|
+
def to_s
|
8
|
+
# This should meet RFC2822 requirements
|
9
|
+
self.strftime('%a, %e %b %Y %H:%M:%S %z').gsub(/\s+/, ' ')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
class SMail #:nodoc:
|
4
|
+
class MIME < SMail
|
5
|
+
|
6
|
+
PATTERN_RFC2047_FIELD = '(.*?)(=\?(?:[^?]+)\?(?:.)\?(?:[^?]*)\?=)(.*)'
|
7
|
+
|
8
|
+
class << self
|
9
|
+
# Parses a Content-* header and returns the bits.
|
10
|
+
#
|
11
|
+
# Given the contents of a header field such as
|
12
|
+
#
|
13
|
+
# text/plain; charset=US-ASCII
|
14
|
+
#
|
15
|
+
# this will return:
|
16
|
+
#
|
17
|
+
# [ 'text', 'plain', { 'charset' => 'US-ASCII' } ]
|
18
|
+
#
|
19
|
+
# The type and the keys of the hash are all converted to lower case.
|
20
|
+
#
|
21
|
+
# This parses Content-Type and Content-Disposition headers according to
|
22
|
+
# the description of the Content-Type header in section 5.1 of RFC2045.
|
23
|
+
#
|
24
|
+
# It also handles continuations and character sets in parameter values
|
25
|
+
# as described in sections 3 and 4 of RFC2231.
|
26
|
+
def decode_content_field(text)
|
27
|
+
s = StringScanner.new(text)
|
28
|
+
|
29
|
+
type = s.scan(/[^;]*/)
|
30
|
+
s.skip(/;\s*/)
|
31
|
+
|
32
|
+
params = {}
|
33
|
+
charsets = {}
|
34
|
+
while key = s.scan(/[^=]+/)
|
35
|
+
s.skip(/=/)
|
36
|
+
if s.skip(/"/)
|
37
|
+
# Deal with quoted parameters.
|
38
|
+
value = s.scan(/(\\.|[^"])*/)
|
39
|
+
s.skip(/"/)
|
40
|
+
value.gsub!(/\\(.)/, '\1')
|
41
|
+
else
|
42
|
+
value = s.scan(/[^;\s]+/)
|
43
|
+
end
|
44
|
+
|
45
|
+
is_encoded = false
|
46
|
+
if key =~ /^(.*)\*$/
|
47
|
+
key = $1
|
48
|
+
is_encoded = true
|
49
|
+
end
|
50
|
+
|
51
|
+
is_continued = false
|
52
|
+
if key =~ /^(.*)\*[0-9]+$/
|
53
|
+
key = $1
|
54
|
+
is_continued = true
|
55
|
+
end
|
56
|
+
|
57
|
+
if is_encoded
|
58
|
+
# Deal with character sets and languages.
|
59
|
+
if value =~ /^([^']*)'([^']*)'(.*)$/
|
60
|
+
charsets[key] = ($1 or 'US-ASCII')
|
61
|
+
value = $3
|
62
|
+
end
|
63
|
+
value.gsub!(/%([[:xdigit:]]{2})/) { $1.hex.chr }
|
64
|
+
value.iconv!(charsets[key], 'UTF-8')
|
65
|
+
end
|
66
|
+
|
67
|
+
if is_continued and params[key]
|
68
|
+
# Deal with parameter continuations.
|
69
|
+
params[key] << value
|
70
|
+
else
|
71
|
+
params[key] = value
|
72
|
+
end
|
73
|
+
|
74
|
+
s.skip(/\s*;?\s*/) # skip any whitespace before and after a semicolon
|
75
|
+
end
|
76
|
+
|
77
|
+
# Some mail clients (I'm looking at you Becky!) don't use RFC2231 parameter
|
78
|
+
# value character set information but instead encode the parameters as
|
79
|
+
# RFC2047 fields, so lets cycle through them and try to decode, this should
|
80
|
+
# not do any harm if they don't have encoded fields
|
81
|
+
params.each_key {|key|
|
82
|
+
params[key] = self.decode_field(params[key])
|
83
|
+
}
|
84
|
+
|
85
|
+
[type, params]
|
86
|
+
end
|
87
|
+
|
88
|
+
# Decodes any RFC2047 words in a string and returns the string as UTF-8.
|
89
|
+
# Uses our iconv to deal with common encoding problems
|
90
|
+
def decode_field(text)
|
91
|
+
return nil if text.nil?
|
92
|
+
result = ''
|
93
|
+
while text =~ Regexp.new(PATTERN_RFC2047_FIELD, Regexp::MULTILINE)
|
94
|
+
prefix, encoded, text = $1, $2, $3
|
95
|
+
result << prefix unless prefix =~ Regexp.new('\A\s*\Z', Regexp::MULTILINE)
|
96
|
+
result << decode_word(encoded)
|
97
|
+
end
|
98
|
+
result << text
|
99
|
+
result
|
100
|
+
end
|
101
|
+
|
102
|
+
# Decodes an RFC2047 word to a UTF-8 string.
|
103
|
+
# Uses our iconv to deal with common encoding problems
|
104
|
+
def decode_word(text)
|
105
|
+
return text unless text =~ /=\?([^?]+)\?(.)\?([^?]*)\?=/
|
106
|
+
|
107
|
+
charset, method, encoded_string = $1, $2, $3
|
108
|
+
|
109
|
+
# Strip out the RFC2231 language specification if there is one.
|
110
|
+
charset = $1 if charset =~ /^([^\*]+)\*?(.*)$/
|
111
|
+
|
112
|
+
# Quoted-printable in RFC2047 substitutes spaces with underscores.
|
113
|
+
encoded_string.tr!('_', ' ') if method.downcase == 'q'
|
114
|
+
|
115
|
+
encoded_string.decode_mime(method).iconv('utf-8', charset)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Takes the given UTF-8 string, converts it the given character set, and
|
119
|
+
# encodes it as an RFC2047 style field.
|
120
|
+
#
|
121
|
+
# All arguments after text are optional. If a method is not supplied,
|
122
|
+
# the String.best_mime_encoding method is used to pick one. The charset
|
123
|
+
# defaults to UTF-8, and the line length to 66 characters.
|
124
|
+
def encode_field(text, method = nil, charset = 'UTF-8', line_length = 66)
|
125
|
+
return '' if text.nil?
|
126
|
+
method ||= text.best_mime_encoding
|
127
|
+
method = method.downcase if method.kind_of?(String)
|
128
|
+
case method
|
129
|
+
when :none
|
130
|
+
text
|
131
|
+
when :base64, 'b', 'base64'
|
132
|
+
encode_base64_field(text, charset, line_length)
|
133
|
+
when :quoted_printable, 'q', 'quoted-printable'
|
134
|
+
encode_quoted_printable_field(text, charset, line_length)
|
135
|
+
else
|
136
|
+
raise ArgumentError, "Bad MIME encoding"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def encode_quoted_printable_field(text, charset = 'UTF-8', line_length = 66) #:nodoc:
|
141
|
+
charset.upcase!
|
142
|
+
encoded_line_length = line_length - (charset.length + 7)
|
143
|
+
|
144
|
+
iconv = Iconv.new(charset, 'UTF-8')
|
145
|
+
encoded_text = ''
|
146
|
+
word = ''
|
147
|
+
text.each_char do |char|
|
148
|
+
char = iconv.iconv(char)
|
149
|
+
# RFC2047 has its own ideas about quoted-printable encoding.
|
150
|
+
char.encode_quoted_printable!
|
151
|
+
char = case char
|
152
|
+
when "_": "=5F"
|
153
|
+
when " ": "_"
|
154
|
+
when "?": "=3F"
|
155
|
+
when "\t": "=09"
|
156
|
+
else char
|
157
|
+
end
|
158
|
+
if word.length + char.length > encoded_line_length
|
159
|
+
encoded_text << "=?#{charset}?Q?#{word}?=\n "
|
160
|
+
word = ''
|
161
|
+
end
|
162
|
+
word << char
|
163
|
+
end
|
164
|
+
encoded_text << "=?#{charset}?Q?#{word}?="
|
165
|
+
encoded_text
|
166
|
+
end
|
167
|
+
|
168
|
+
def encode_base64_field(text, charset = 'UTF-8', line_length = 66) #:nodoc:
|
169
|
+
charset.upcase!
|
170
|
+
unencoded_line_length = (line_length - (charset.length + 7)) / 4 * 3
|
171
|
+
|
172
|
+
iconv = Iconv.new(charset, 'UTF-8')
|
173
|
+
encoded_text = ''
|
174
|
+
word = ''
|
175
|
+
text.each_char do |char|
|
176
|
+
char = iconv.iconv(char)
|
177
|
+
if word.length + char.length > unencoded_line_length
|
178
|
+
encoded_text << "=?#{charset}?B?#{word.encode_base64.chomp}?=\n "
|
179
|
+
word = ''
|
180
|
+
end
|
181
|
+
word << char
|
182
|
+
end
|
183
|
+
encoded_text << "=?#{charset}?B?#{word.encode_base64.chomp}?="
|
184
|
+
encoded_text
|
185
|
+
end
|
186
|
+
|
187
|
+
end # self
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
@@ -0,0 +1,257 @@
|
|
1
|
+
class SMail
|
2
|
+
class MIME < SMail
|
3
|
+
class << self
|
4
|
+
end
|
5
|
+
|
6
|
+
attr_accessor :parts, :preamble, :epilogue
|
7
|
+
attr_reader :content_type, :boundary
|
8
|
+
|
9
|
+
def initialize(text = '')
|
10
|
+
super(text)
|
11
|
+
self.content_type = self.header('content-type')
|
12
|
+
fill_parts
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns the size of the message in bytes.
|
16
|
+
def size
|
17
|
+
self.to_s.length
|
18
|
+
end
|
19
|
+
|
20
|
+
# Sets the content type
|
21
|
+
def content_type=(content_type)
|
22
|
+
case content_type
|
23
|
+
when SMail::MIME::ContentType
|
24
|
+
@content_type = content_type
|
25
|
+
when String
|
26
|
+
self.content_type = SMail::MIME::ContentType.new(content_type)
|
27
|
+
when nil
|
28
|
+
self.content_type = SMail::MIME::ContentType.new
|
29
|
+
else
|
30
|
+
raise ArgumentError
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Is this a multipart message
|
35
|
+
def multipart?
|
36
|
+
@content_type.composite?
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns the MIME-Version as a string (unlikely to be anything but '1.0')
|
40
|
+
def version
|
41
|
+
self.header('mime-version') || '1.0'
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the subject in UTF-8
|
45
|
+
def subject
|
46
|
+
SMail::MIME.decode_field(subject_raw)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Sets the subject, performs any necessary encoding
|
50
|
+
def subject=(text)
|
51
|
+
self.subject_raw = SMail::MIME.encode_field(text)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Returns the raw potentially MIME encoded subject
|
55
|
+
def subject_raw
|
56
|
+
self.header('subject')
|
57
|
+
end
|
58
|
+
|
59
|
+
# Set the subject directly, any necessary MIME encoding is up to the caller
|
60
|
+
def subject_raw=(text)
|
61
|
+
self.header_set('subject', text)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the date from the Date header as a DateTime object.
|
65
|
+
def date
|
66
|
+
date = self.header('date')
|
67
|
+
return nil unless date
|
68
|
+
SMail::MIME::Date.parse(date)
|
69
|
+
#(year, month, day, hour, minute, second, timezone, weekday) = ParseDate.parsedate(date)
|
70
|
+
#Time.gm(second, minute, hour, day, month, year, weekday, nil, nil, timezone)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
# Returns the raw body of the email including all parts
|
75
|
+
alias body_raw body
|
76
|
+
|
77
|
+
# Returns the body decoded and converted to UTF-8 if necessary, if this is is a
|
78
|
+
# multipart message this is not what you suspect
|
79
|
+
def body
|
80
|
+
if self.multipart? # what if it is message/rfc822 ?
|
81
|
+
@preamble
|
82
|
+
else
|
83
|
+
# decode
|
84
|
+
case self.header('content-transfer-encoding')
|
85
|
+
when 'quoted-printable'
|
86
|
+
body = @body.decode_quoted_printable
|
87
|
+
when 'base64'
|
88
|
+
body = @body.decode_base64
|
89
|
+
else
|
90
|
+
# matches nil when there is no header or an unrecognised encoding
|
91
|
+
body = @body
|
92
|
+
end
|
93
|
+
|
94
|
+
# convert to UTF-8 if text
|
95
|
+
if self.content_type.media_type == 'text'
|
96
|
+
charset = self.content_type.params['charset'] || 'us-ascii'
|
97
|
+
body.iconv!('utf-8', charset)
|
98
|
+
end
|
99
|
+
|
100
|
+
body
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns a string description of the MIME structure of this message.
|
105
|
+
#
|
106
|
+
# This is useful for debugging and testing. The returned string is
|
107
|
+
# formatted as shown in the following example:
|
108
|
+
# multipart/mixed
|
109
|
+
# multipart/alternative
|
110
|
+
# text/plain
|
111
|
+
# multipart/related
|
112
|
+
# text/html
|
113
|
+
# image/gif
|
114
|
+
# application/octet-stream
|
115
|
+
def describe_mime_structure(depth = 0)
|
116
|
+
result = (' '*depth) + self.content_type.type + "\n"
|
117
|
+
if self.multipart?
|
118
|
+
self.parts.each do |part|
|
119
|
+
result << part.describe_mime_structure(depth+1)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
result.chomp! if depth == 0
|
123
|
+
result
|
124
|
+
end
|
125
|
+
|
126
|
+
# Pulls out any body parts matching the given MIME types and puts them
|
127
|
+
# into an array.
|
128
|
+
#
|
129
|
+
# This is useful for pulling out parts in the appropriate order for
|
130
|
+
# rendering. For example calling:
|
131
|
+
# message.flatten_body('text/plain', /^application\/.*$)
|
132
|
+
# should return all the text parts and attached files in the order in
|
133
|
+
# which they appear in the original message.
|
134
|
+
#
|
135
|
+
# The various multipart subtypes are handled sensibly. For example,
|
136
|
+
# for multipart/alternative messages, the best matching part (i.e. the
|
137
|
+
# last part consisting entirely of the given types) is used.
|
138
|
+
def flatten_body(*types)
|
139
|
+
types = types.flatten
|
140
|
+
if self.multipart?
|
141
|
+
case self.content_type.type
|
142
|
+
when 'multipart/alternative'
|
143
|
+
part = self.parts.reverse.find {|part| part.consists_of_mime_types?(types) }
|
144
|
+
part ? part.flatten_body(types) : []
|
145
|
+
when 'multipart/mixed', 'multipart/related'
|
146
|
+
# FIXME: For multipart/related, this should look for a start parameter and try that first.
|
147
|
+
parts = self.parts.collect {|part| part.flatten_body(types) }
|
148
|
+
parts.flatten
|
149
|
+
when 'multipart/signed'
|
150
|
+
self.parts.first.flatten_body(types)
|
151
|
+
when 'multipart/appledouble'
|
152
|
+
self.parts[1].flatten_body(types)
|
153
|
+
else
|
154
|
+
# FIXME: should we also have an entry for message/rfc822 etc.
|
155
|
+
[]
|
156
|
+
end
|
157
|
+
else
|
158
|
+
self.consists_of_mime_types?(types) ? [self] : []
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns true if the message consists entirely of the given mime types.
|
163
|
+
#
|
164
|
+
# For single part messages this is simple: the Content-Type of the
|
165
|
+
# message must by one of the supplied types.
|
166
|
+
#
|
167
|
+
# For multipart messages it gets a bit more complicated. We try to
|
168
|
+
# make sure that the message can be entirely decomposed into
|
169
|
+
# just the supplied types.
|
170
|
+
#
|
171
|
+
# The rules are as follows:
|
172
|
+
# [multipart/alternative]
|
173
|
+
# At least one sub-part must consist of the given types.
|
174
|
+
# [multipart/mixed]
|
175
|
+
# All sub-parts must consist of the given types.
|
176
|
+
# [multipart/related]
|
177
|
+
# The root part (usually the first part) must consist of the
|
178
|
+
# given types.
|
179
|
+
# [multipart/signed]
|
180
|
+
# The first part must consist of the given types.
|
181
|
+
# [multipart/appledouble]
|
182
|
+
# The second part must consist of the given types. (See RFC 1740.)
|
183
|
+
def consists_of_mime_types?(*types)
|
184
|
+
types = types.flatten
|
185
|
+
type = self.content_type.type
|
186
|
+
|
187
|
+
if self.multipart?
|
188
|
+
case type
|
189
|
+
when 'multipart/alternative'
|
190
|
+
self.parts.any? {|part| part.consists_of_mime_types?(types) }
|
191
|
+
when 'multipart/mixed'
|
192
|
+
self.parts.all? {|part| part.consists_of_mime_types?(types) }
|
193
|
+
when 'multipart/related'
|
194
|
+
# FIXME: This should look for a start parameter and try that first.
|
195
|
+
self.parts.first.consists_of_mime_types?(types)
|
196
|
+
when 'multipart/signed'
|
197
|
+
self.parts.first.consists_of_mime_types?(types)
|
198
|
+
when 'multipart/appledouble'
|
199
|
+
self.parts[1].consists_of_mime_types?(types)
|
200
|
+
when 'message/rfc822', 'message/rfc2822'
|
201
|
+
self.parts.first.consists_of_mime_types?(types)
|
202
|
+
else
|
203
|
+
false
|
204
|
+
end
|
205
|
+
else
|
206
|
+
types.any? {|t| t === type }
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
private
|
213
|
+
|
214
|
+
def fill_parts
|
215
|
+
if self.content_type.discrete?
|
216
|
+
parts_single_part
|
217
|
+
else
|
218
|
+
parts_multipart
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def parts_single_part
|
223
|
+
@parts = []
|
224
|
+
end
|
225
|
+
|
226
|
+
def parts_multipart
|
227
|
+
@parts = []
|
228
|
+
@boundary = self.content_type.params['boundary']
|
229
|
+
|
230
|
+
if self.content_type.type == 'message/rfc822' or self.content_type.type == 'message/rfc2822'
|
231
|
+
@parts << SMail::MIME.new(@body)
|
232
|
+
return @parts
|
233
|
+
end
|
234
|
+
|
235
|
+
return parts_single_part unless @boundary
|
236
|
+
|
237
|
+
#alias body_raw body # FIXME: does this work?
|
238
|
+
|
239
|
+
epilogue_re = Regexp.new("^--#{Regexp.escape(@boundary)}--\s*\r?$", Regexp::MULTILINE)
|
240
|
+
(body, @epilogue) = @body.split(epilogue_re, 2)
|
241
|
+
@epilogue.lstrip! unless @epilogue.nil?
|
242
|
+
|
243
|
+
bits_re = Regexp.new("^--#{Regexp.escape(@boundary)}\s*\r?$", Regexp::MULTILINE)
|
244
|
+
bits = body.split(bits_re)
|
245
|
+
|
246
|
+
@preamble = bits.shift # FIXME is this OK? or better to see a header in the first line?
|
247
|
+
|
248
|
+
bits.each do |bit|
|
249
|
+
bit.lstrip!
|
250
|
+
@parts << SMail::MIME.new(bit)
|
251
|
+
end
|
252
|
+
|
253
|
+
@parts
|
254
|
+
end
|
255
|
+
|
256
|
+
end
|
257
|
+
end
|
data/lib/smail/mime.rb
ADDED
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: smail-mime
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Walker
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-17 00:00:00 +11:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: smail
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.5
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: activesupport
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.0.0
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rspec
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.0.5
|
44
|
+
version:
|
45
|
+
description:
|
46
|
+
email: matthew@walker.wattle.id.au
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- lib/smail/mime.rb
|
55
|
+
- lib/smail/mime/coding_extensions.rb
|
56
|
+
- lib/smail/mime/content_fields.rb
|
57
|
+
- lib/smail/mime/date.rb
|
58
|
+
- lib/smail/mime/header.rb
|
59
|
+
- lib/smail/mime/mime.rb
|
60
|
+
- lib/smail/mime/version.rb
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/mwalker/smail-mime
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: "0"
|
75
|
+
version:
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: "0"
|
81
|
+
version:
|
82
|
+
requirements: []
|
83
|
+
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 1.3.5
|
86
|
+
signing_key:
|
87
|
+
specification_version: 3
|
88
|
+
summary: A simple MIME email parser
|
89
|
+
test_files: []
|
90
|
+
|