smail-mime 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/smail/mime/coding_extensions.rb +192 -0
- data/lib/smail/mime/content_fields.rb +189 -0
- data/lib/smail/mime/date.rb +13 -0
- data/lib/smail/mime/header.rb +190 -0
- data/lib/smail/mime/mime.rb +257 -0
- data/lib/smail/mime/version.rb +11 -0
- data/lib/smail/mime.rb +4 -0
- metadata +90 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
#require 'jcode'
|
2
|
+
require 'iconv'
|
3
|
+
|
4
|
+
# Extensions to the String library for encoding and decoding of MIME data.
|
5
|
+
class String
|
6
|
+
|
7
|
+
# Returns true if the string consists entirely of whitespace.
|
8
|
+
# (The empty string will return false.)
|
9
|
+
def is_space?
|
10
|
+
return Regexp.new('\A\s+\Z', Regexp::MULTILINE).match(self) != nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns true if the string contains only valid ASCII characters
|
14
|
+
# (i.e. nothing over ASCII 127).
|
15
|
+
def is_ascii?
|
16
|
+
self.length == self.tr("\200-\377", '').length
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns this string encoded as base64 as defined in RFC2045, section 6.8.
|
20
|
+
def encode_base64
|
21
|
+
[self].pack("m*")
|
22
|
+
end
|
23
|
+
|
24
|
+
# Performs encode_base64 in place, and returns the string.
|
25
|
+
def encode_base64!
|
26
|
+
self.replace(self.encode_base64)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns this string decoded from base64 as defined in RFC2045, section 6.8.
|
30
|
+
def decode_base64
|
31
|
+
#self.unpack("m*").first
|
32
|
+
# This should be the above line but due to a bug in the ruby base64 decoder
|
33
|
+
# it will only decode base64 where the lines are in multiples of 4, this is
|
34
|
+
# contrary to RFC2045 which says that all characters other than the 65 used
|
35
|
+
# are to be ignored. Currently we remove all the other characters but it
|
36
|
+
# might be better to use it's advice to only remove line breaks and white
|
37
|
+
# space
|
38
|
+
self.tr("^A-Za-z0-9+/=", "").unpack("m*").first
|
39
|
+
end
|
40
|
+
|
41
|
+
# Performs decode_base64 in place, and returns the string.
|
42
|
+
def decode_base64!
|
43
|
+
self.replace(self.decode_base64)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns this string encoded as quoted-printable as defined in RFC2045, section 6.7.
|
47
|
+
def encode_quoted_printable
|
48
|
+
result = [self].pack("M*")
|
49
|
+
# Ruby's quoted printable encoding uses soft line breaks to buffer spaces
|
50
|
+
# at the end of lines, rather than encoding them with =20. We fix this.
|
51
|
+
result.gsub!(/( +)=\n\n/) { "=20" * $1.length + "\n" }
|
52
|
+
# Ruby's quoted printable encode puts a soft line break on the end of any
|
53
|
+
# string that doesn't already end in a hard line break, so we have to
|
54
|
+
# clean it up.
|
55
|
+
result.gsub!(/=\n\Z/, '')
|
56
|
+
result
|
57
|
+
end
|
58
|
+
|
59
|
+
# Performs encode_quoted_printable in place, and returns the string.
|
60
|
+
def encode_quoted_printable!
|
61
|
+
self.replace(self.encode_quoted_printable)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns this string decoded from quoted-printable as defined in RFC2045, section 6.7.
|
65
|
+
def decode_quoted_printable
|
66
|
+
self.unpack("M*").first
|
67
|
+
end
|
68
|
+
|
69
|
+
# Performs decode_quoted_printable in place, and returns the string.
|
70
|
+
def decode_quoted_printable!
|
71
|
+
self.replace(self.decode_quoted_printable)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Guesses whether this string is encoded in base64 or quoted-printable.
|
75
|
+
#
|
76
|
+
# Returns either :base64 or :quoted_printable
|
77
|
+
def guess_mime_encoding
|
78
|
+
# Grab the first line and have a guess?
|
79
|
+
# A multiple of 4 and no characters that aren't in base64 ?
|
80
|
+
# Need to allow for = at end of base64 string
|
81
|
+
squashed = self.tr("\r\n\s", '').strip.sub(/=*\Z/, '')
|
82
|
+
if squashed.length.remainder(4) == 0 && squashed.count("^A-Za-z0-9+/") == 0
|
83
|
+
:base64
|
84
|
+
else
|
85
|
+
:quoted_printable
|
86
|
+
end
|
87
|
+
# or should we just try both and see what works?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns the MIME encoding that is likely to produce the shortest
|
91
|
+
# encoded string, either :none, :base64, or :quoted_printable.
|
92
|
+
def best_mime_encoding
|
93
|
+
if self.is_ascii?
|
94
|
+
:none
|
95
|
+
elsif self.length > (self.mb_chars.length * 1.1)
|
96
|
+
:base64
|
97
|
+
else
|
98
|
+
:quoted_printable
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Decodes this string according to method, where method is
|
103
|
+
# :base64, :quoted_printable, or :none.
|
104
|
+
#
|
105
|
+
# If method is not supplied or is nil, guess_mime_encoding is used to
|
106
|
+
# try to pick an appropriate method.
|
107
|
+
#
|
108
|
+
# Method can also be a string: 'q', 'quoted-printable', 'b', or 'base64'
|
109
|
+
# This lets you pass in methods directly from Content-Transfer-Encoding
|
110
|
+
# headers, or from RFC2047 words. Matching is case-insensitive.
|
111
|
+
def decode_mime(method = nil)
|
112
|
+
method ||= guess_mime_encoding
|
113
|
+
method = method.downcase if method.kind_of?(String)
|
114
|
+
case method
|
115
|
+
when :none
|
116
|
+
self
|
117
|
+
when :base64, 'b', 'base64'
|
118
|
+
self.decode_base64
|
119
|
+
when :quoted_printable, 'q', 'quoted-printable'
|
120
|
+
self.decode_quoted_printable
|
121
|
+
else
|
122
|
+
raise ArgumentError, "Bad MIME encoding"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Performs decode_mime in place, and returns the string.
|
127
|
+
def decode_mime!(method = nil)
|
128
|
+
self.replace(self.decode_mime(method))
|
129
|
+
end
|
130
|
+
|
131
|
+
# Converts this string to to_charset from from_charset using Iconv.
|
132
|
+
#
|
133
|
+
# Because there are cases where charsets are encoded incorrectly on the 'net
|
134
|
+
# we also allow for them and attempt to fix them up here. If conversion
|
135
|
+
# ultimately fails we remove all characters 0x80 and above, replacing them
|
136
|
+
# with ! symbols and effectively making it a US-ASCII (and therefore UTF-8)
|
137
|
+
# string.
|
138
|
+
def iconv(to_charset, from_charset)
|
139
|
+
failed = false
|
140
|
+
begin
|
141
|
+
converted = Iconv.new(to_charset, from_charset).iconv(self)
|
142
|
+
rescue Iconv::IllegalSequence
|
143
|
+
case from_charset.downcase
|
144
|
+
when 'us-ascii'
|
145
|
+
# Some mailers do not send a charset when it should be CP1252,
|
146
|
+
# the default Windows Latin charset
|
147
|
+
begin
|
148
|
+
converted = Iconv.new(to_charset, 'cp1252').iconv(self)
|
149
|
+
rescue Iconv::IllegalSequence
|
150
|
+
failed = true
|
151
|
+
end
|
152
|
+
when 'ks_c_5601-1987'
|
153
|
+
# Microsoft products erroneously use this for what should be CP949
|
154
|
+
# see http://tagunov.tripod.com/cjk.html
|
155
|
+
begin
|
156
|
+
converted = Iconv.new(to_charset, 'cp949').iconv(self)
|
157
|
+
rescue Iconv::IllegalSequence, Iconv::InvalidCharacter
|
158
|
+
failed = true
|
159
|
+
end
|
160
|
+
else
|
161
|
+
failed = true
|
162
|
+
end
|
163
|
+
rescue Iconv::InvalidCharacter
|
164
|
+
if self =~ /\n$/
|
165
|
+
# Some messages can come in with a superfluous new line on the end,
|
166
|
+
# which screws up the encoding. (ISO-2022-JP for example.)
|
167
|
+
begin
|
168
|
+
converted = Iconv.new(to_charset, 'iso-2022-jp').iconv(self.chomp) + "\n"
|
169
|
+
rescue Iconv::InvalidCharacter
|
170
|
+
converted = self.tr("\200-\377", "\041")
|
171
|
+
end
|
172
|
+
else
|
173
|
+
converted = self.tr("\200-\377", "\041")
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
if failed
|
178
|
+
begin
|
179
|
+
converted = Iconv.new(to_charset + '//IGNORE', from_charset).iconv(self)
|
180
|
+
rescue Iconv::InvalidCharacter
|
181
|
+
converted = self.tr("\200-\377", "\041")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
converted
|
186
|
+
end
|
187
|
+
|
188
|
+
def iconv!(to_charset, from_charset)
|
189
|
+
self.replace(self.iconv(to_charset, from_charset))
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
class SMail #:nodoc:
|
2
|
+
class MIME < SMail
|
3
|
+
class ContentField
|
4
|
+
class << self
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_accessor :type_raw # The raw type as parsed from the message.
|
8
|
+
attr_reader :params
|
9
|
+
|
10
|
+
def initialize(text = nil)
|
11
|
+
@params = Params.new
|
12
|
+
unless text.nil?
|
13
|
+
(@type_raw, params_raw) = SMail::MIME.decode_content_field(text)
|
14
|
+
@params.replace(params_raw)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns the type as a lower case string
|
19
|
+
def type
|
20
|
+
@type_raw.nil? ? @type_raw : @type_raw.downcase
|
21
|
+
end
|
22
|
+
|
23
|
+
def type=(text)
|
24
|
+
@type_raw = text
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns the Content Field as a string suitable for inclusion in an email header.
|
28
|
+
def to_s
|
29
|
+
"#{@type_raw}#{self.params.to_s}"
|
30
|
+
end
|
31
|
+
|
32
|
+
class Params < Hash
|
33
|
+
class << self
|
34
|
+
# FIXME: These two should probably be moved
|
35
|
+
def needs_quoting?(text)
|
36
|
+
false # FIXME
|
37
|
+
end
|
38
|
+
|
39
|
+
def quote(text)
|
40
|
+
"\"#{text}\"" # FIXME
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
if self.empty?
|
46
|
+
return ""
|
47
|
+
else
|
48
|
+
pairs = []
|
49
|
+
self.each do |key, value|
|
50
|
+
pair = key + '='
|
51
|
+
if SMail::MIME::ContentField::Params.needs_quoting?(value)
|
52
|
+
pair << SMail::MIME::ContentField::Params.quote(value)
|
53
|
+
else
|
54
|
+
pair << value
|
55
|
+
end
|
56
|
+
pairs << pair
|
57
|
+
end
|
58
|
+
end
|
59
|
+
'; ' + pairs.join('; ')
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end # ContentField
|
63
|
+
|
64
|
+
# An object representing a Content-Disposition header as specified in RFC2183.
|
65
|
+
class ContentDisposition < ContentField
|
66
|
+
|
67
|
+
# Is this an inline part??
|
68
|
+
def inline?
|
69
|
+
type == 'inline'
|
70
|
+
end
|
71
|
+
|
72
|
+
# Is this an attachment part?
|
73
|
+
def attachment?
|
74
|
+
type == "attachment"
|
75
|
+
end
|
76
|
+
|
77
|
+
# Returns the filename if specified
|
78
|
+
def filename
|
79
|
+
self.params['filename']
|
80
|
+
end
|
81
|
+
|
82
|
+
# Returns the creation date if available or nil.
|
83
|
+
def creation_date
|
84
|
+
self.params['creation-date'] # FIXME: parse as a date?
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns the modification date if available or nil.
|
88
|
+
def modification_date
|
89
|
+
self.params['modification-date'] # FIXME: parse as a date?
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns the read date if available or nil.
|
93
|
+
def read_date
|
94
|
+
self.params['read-date'] # FIXME: parse as a date?
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns the size if available or nil.
|
98
|
+
def size
|
99
|
+
self.params['size']
|
100
|
+
end
|
101
|
+
|
102
|
+
# FIXME: add all the other parameters specified in RFC2183, also add setters
|
103
|
+
|
104
|
+
# Returns the Content-Disposition as a string suitable for inclusion in an email
|
105
|
+
# header. If no disposition type is specified it will default to a disposition
|
106
|
+
# type of 'attachment'.
|
107
|
+
def to_s
|
108
|
+
"#{self.type_raw || 'attachment'}#{self.params.to_s}"
|
109
|
+
end
|
110
|
+
|
111
|
+
end # ContentDisposition
|
112
|
+
|
113
|
+
class ContentType < ContentField
|
114
|
+
|
115
|
+
attr_accessor :media_type_raw, :media_subtype_raw
|
116
|
+
|
117
|
+
def initialize(text = nil)
|
118
|
+
super(text)
|
119
|
+
self.type = @type_raw
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns the media type
|
123
|
+
def media_type
|
124
|
+
@media_type_raw.nil? ? @media_type_raw : @media_type_raw.downcase
|
125
|
+
end
|
126
|
+
|
127
|
+
# Set the media type
|
128
|
+
def media_type=(text)
|
129
|
+
@media_type_raw = text
|
130
|
+
end
|
131
|
+
|
132
|
+
# Returns the media subtype
|
133
|
+
def media_subtype
|
134
|
+
@media_subtype.nil? ? @media_subtype_raw : @media_subtype_raw.downcase
|
135
|
+
end
|
136
|
+
|
137
|
+
# Set the media subtype
|
138
|
+
def media_subtype=(text)
|
139
|
+
@media_subtype_raw = text
|
140
|
+
end
|
141
|
+
|
142
|
+
# Returns the media 'type/subtype' as a lower case string.
|
143
|
+
def type
|
144
|
+
# Default to 'text/plain'
|
145
|
+
if @media_type_raw.nil? or @media_subtype_raw.nil?
|
146
|
+
'text/plain'
|
147
|
+
else
|
148
|
+
"#{@media_type_raw}/#{@media_subtype_raw}".downcase
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Set the media 'type/subtype' together
|
153
|
+
def type=(text = nil)
|
154
|
+
unless text.nil?
|
155
|
+
@type_raw = text # keep this inherited accessor in sync
|
156
|
+
(@media_type_raw, @media_subtype_raw) = text.split('/', 2)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Is this a composite media type as specified in section 5.1 of RFC 2045.
|
161
|
+
#
|
162
|
+
# Note extension tokens are permitted to be composite but will always be seen
|
163
|
+
# as discrete by this code.
|
164
|
+
def composite?
|
165
|
+
media_type == 'message' or media_type == 'multipart'
|
166
|
+
end
|
167
|
+
|
168
|
+
# Is this a discrete media type as specified in section 5.1 of RFC 2045.
|
169
|
+
#
|
170
|
+
# Note extension tokens are permitted to be composite but will always be seen
|
171
|
+
# as discrete by this code.
|
172
|
+
def discrete?
|
173
|
+
!composite?
|
174
|
+
end
|
175
|
+
|
176
|
+
# Returns the full Content-Type header as a string suitable for inclusion in an
|
177
|
+
# email header. If either of the media type or subtype are not specified it will
|
178
|
+
# default to 'text/plain; charset=us-ascii'.
|
179
|
+
def to_s
|
180
|
+
# Default to 'text/plain; charset=us-ascii'
|
181
|
+
if @media_type_raw.nil? or @media_subtype_raw.nil?
|
182
|
+
'text/plain; charset=us-ascii'
|
183
|
+
else
|
184
|
+
"#{@media_type_raw}/#{@media_subtype_raw}#{self.params.to_s}"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end # ContentType
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class SMail #:nodoc:
|
2
|
+
class MIME < SMail
|
3
|
+
# We inherit from DateTime in order to make its to_s method return an RFC2822
|
4
|
+
# compliant date string.
|
5
|
+
class Date < DateTime
|
6
|
+
# Return an RFC2822 compliant date string suitable for use in the Date header.
|
7
|
+
def to_s
|
8
|
+
# This should meet RFC2822 requirements
|
9
|
+
self.strftime('%a, %e %b %Y %H:%M:%S %z').gsub(/\s+/, ' ')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
class SMail #:nodoc:
|
4
|
+
class MIME < SMail
|
5
|
+
|
6
|
+
PATTERN_RFC2047_FIELD = '(.*?)(=\?(?:[^?]+)\?(?:.)\?(?:[^?]*)\?=)(.*)'
|
7
|
+
|
8
|
+
class << self
|
9
|
+
# Parses a Content-* header and returns the bits.
|
10
|
+
#
|
11
|
+
# Given the contents of a header field such as
|
12
|
+
#
|
13
|
+
# text/plain; charset=US-ASCII
|
14
|
+
#
|
15
|
+
# this will return:
|
16
|
+
#
|
17
|
+
# [ 'text', 'plain', { 'charset' => 'US-ASCII' } ]
|
18
|
+
#
|
19
|
+
# The type and the keys of the hash are all converted to lower case.
|
20
|
+
#
|
21
|
+
# This parses Content-Type and Content-Disposition headers according to
|
22
|
+
# the description of the Content-Type header in section 5.1 of RFC2045.
|
23
|
+
#
|
24
|
+
# It also handles continuations and character sets in parameter values
|
25
|
+
# as described in sections 3 and 4 of RFC2231.
|
26
|
+
def decode_content_field(text)
|
27
|
+
s = StringScanner.new(text)
|
28
|
+
|
29
|
+
type = s.scan(/[^;]*/)
|
30
|
+
s.skip(/;\s*/)
|
31
|
+
|
32
|
+
params = {}
|
33
|
+
charsets = {}
|
34
|
+
while key = s.scan(/[^=]+/)
|
35
|
+
s.skip(/=/)
|
36
|
+
if s.skip(/"/)
|
37
|
+
# Deal with quoted parameters.
|
38
|
+
value = s.scan(/(\\.|[^"])*/)
|
39
|
+
s.skip(/"/)
|
40
|
+
value.gsub!(/\\(.)/, '\1')
|
41
|
+
else
|
42
|
+
value = s.scan(/[^;\s]+/)
|
43
|
+
end
|
44
|
+
|
45
|
+
is_encoded = false
|
46
|
+
if key =~ /^(.*)\*$/
|
47
|
+
key = $1
|
48
|
+
is_encoded = true
|
49
|
+
end
|
50
|
+
|
51
|
+
is_continued = false
|
52
|
+
if key =~ /^(.*)\*[0-9]+$/
|
53
|
+
key = $1
|
54
|
+
is_continued = true
|
55
|
+
end
|
56
|
+
|
57
|
+
if is_encoded
|
58
|
+
# Deal with character sets and languages.
|
59
|
+
if value =~ /^([^']*)'([^']*)'(.*)$/
|
60
|
+
charsets[key] = ($1 or 'US-ASCII')
|
61
|
+
value = $3
|
62
|
+
end
|
63
|
+
value.gsub!(/%([[:xdigit:]]{2})/) { $1.hex.chr }
|
64
|
+
value.iconv!(charsets[key], 'UTF-8')
|
65
|
+
end
|
66
|
+
|
67
|
+
if is_continued and params[key]
|
68
|
+
# Deal with parameter continuations.
|
69
|
+
params[key] << value
|
70
|
+
else
|
71
|
+
params[key] = value
|
72
|
+
end
|
73
|
+
|
74
|
+
s.skip(/\s*;?\s*/) # skip any whitespace before and after a semicolon
|
75
|
+
end
|
76
|
+
|
77
|
+
# Some mail clients (I'm looking at you Becky!) don't use RFC2231 parameter
|
78
|
+
# value character set information but instead encode the parameters as
|
79
|
+
# RFC2047 fields, so lets cycle through them and try to decode, this should
|
80
|
+
# not do any harm if they don't have encoded fields
|
81
|
+
params.each_key {|key|
|
82
|
+
params[key] = self.decode_field(params[key])
|
83
|
+
}
|
84
|
+
|
85
|
+
[type, params]
|
86
|
+
end
|
87
|
+
|
88
|
+
# Decodes any RFC2047 words in a string and returns the string as UTF-8.
|
89
|
+
# Uses our iconv to deal with common encoding problems
|
90
|
+
def decode_field(text)
|
91
|
+
return nil if text.nil?
|
92
|
+
result = ''
|
93
|
+
while text =~ Regexp.new(PATTERN_RFC2047_FIELD, Regexp::MULTILINE)
|
94
|
+
prefix, encoded, text = $1, $2, $3
|
95
|
+
result << prefix unless prefix =~ Regexp.new('\A\s*\Z', Regexp::MULTILINE)
|
96
|
+
result << decode_word(encoded)
|
97
|
+
end
|
98
|
+
result << text
|
99
|
+
result
|
100
|
+
end
|
101
|
+
|
102
|
+
# Decodes an RFC2047 word to a UTF-8 string.
|
103
|
+
# Uses our iconv to deal with common encoding problems
|
104
|
+
def decode_word(text)
|
105
|
+
return text unless text =~ /=\?([^?]+)\?(.)\?([^?]*)\?=/
|
106
|
+
|
107
|
+
charset, method, encoded_string = $1, $2, $3
|
108
|
+
|
109
|
+
# Strip out the RFC2231 language specification if there is one.
|
110
|
+
charset = $1 if charset =~ /^([^\*]+)\*?(.*)$/
|
111
|
+
|
112
|
+
# Quoted-printable in RFC2047 substitutes spaces with underscores.
|
113
|
+
encoded_string.tr!('_', ' ') if method.downcase == 'q'
|
114
|
+
|
115
|
+
encoded_string.decode_mime(method).iconv('utf-8', charset)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Takes the given UTF-8 string, converts it the given character set, and
|
119
|
+
# encodes it as an RFC2047 style field.
|
120
|
+
#
|
121
|
+
# All arguments after text are optional. If a method is not supplied,
|
122
|
+
# the String.best_mime_encoding method is used to pick one. The charset
|
123
|
+
# defaults to UTF-8, and the line length to 66 characters.
|
124
|
+
def encode_field(text, method = nil, charset = 'UTF-8', line_length = 66)
|
125
|
+
return '' if text.nil?
|
126
|
+
method ||= text.best_mime_encoding
|
127
|
+
method = method.downcase if method.kind_of?(String)
|
128
|
+
case method
|
129
|
+
when :none
|
130
|
+
text
|
131
|
+
when :base64, 'b', 'base64'
|
132
|
+
encode_base64_field(text, charset, line_length)
|
133
|
+
when :quoted_printable, 'q', 'quoted-printable'
|
134
|
+
encode_quoted_printable_field(text, charset, line_length)
|
135
|
+
else
|
136
|
+
raise ArgumentError, "Bad MIME encoding"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def encode_quoted_printable_field(text, charset = 'UTF-8', line_length = 66) #:nodoc:
|
141
|
+
charset.upcase!
|
142
|
+
encoded_line_length = line_length - (charset.length + 7)
|
143
|
+
|
144
|
+
iconv = Iconv.new(charset, 'UTF-8')
|
145
|
+
encoded_text = ''
|
146
|
+
word = ''
|
147
|
+
text.each_char do |char|
|
148
|
+
char = iconv.iconv(char)
|
149
|
+
# RFC2047 has its own ideas about quoted-printable encoding.
|
150
|
+
char.encode_quoted_printable!
|
151
|
+
char = case char
|
152
|
+
when "_": "=5F"
|
153
|
+
when " ": "_"
|
154
|
+
when "?": "=3F"
|
155
|
+
when "\t": "=09"
|
156
|
+
else char
|
157
|
+
end
|
158
|
+
if word.length + char.length > encoded_line_length
|
159
|
+
encoded_text << "=?#{charset}?Q?#{word}?=\n "
|
160
|
+
word = ''
|
161
|
+
end
|
162
|
+
word << char
|
163
|
+
end
|
164
|
+
encoded_text << "=?#{charset}?Q?#{word}?="
|
165
|
+
encoded_text
|
166
|
+
end
|
167
|
+
|
168
|
+
def encode_base64_field(text, charset = 'UTF-8', line_length = 66) #:nodoc:
|
169
|
+
charset.upcase!
|
170
|
+
unencoded_line_length = (line_length - (charset.length + 7)) / 4 * 3
|
171
|
+
|
172
|
+
iconv = Iconv.new(charset, 'UTF-8')
|
173
|
+
encoded_text = ''
|
174
|
+
word = ''
|
175
|
+
text.each_char do |char|
|
176
|
+
char = iconv.iconv(char)
|
177
|
+
if word.length + char.length > unencoded_line_length
|
178
|
+
encoded_text << "=?#{charset}?B?#{word.encode_base64.chomp}?=\n "
|
179
|
+
word = ''
|
180
|
+
end
|
181
|
+
word << char
|
182
|
+
end
|
183
|
+
encoded_text << "=?#{charset}?B?#{word.encode_base64.chomp}?="
|
184
|
+
encoded_text
|
185
|
+
end
|
186
|
+
|
187
|
+
end # self
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
@@ -0,0 +1,257 @@
|
|
1
|
+
class SMail
|
2
|
+
class MIME < SMail
|
3
|
+
class << self
|
4
|
+
end
|
5
|
+
|
6
|
+
attr_accessor :parts, :preamble, :epilogue
|
7
|
+
attr_reader :content_type, :boundary
|
8
|
+
|
9
|
+
def initialize(text = '')
|
10
|
+
super(text)
|
11
|
+
self.content_type = self.header('content-type')
|
12
|
+
fill_parts
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns the size of the message in bytes.
|
16
|
+
def size
|
17
|
+
self.to_s.length
|
18
|
+
end
|
19
|
+
|
20
|
+
# Sets the content type
|
21
|
+
def content_type=(content_type)
|
22
|
+
case content_type
|
23
|
+
when SMail::MIME::ContentType
|
24
|
+
@content_type = content_type
|
25
|
+
when String
|
26
|
+
self.content_type = SMail::MIME::ContentType.new(content_type)
|
27
|
+
when nil
|
28
|
+
self.content_type = SMail::MIME::ContentType.new
|
29
|
+
else
|
30
|
+
raise ArgumentError
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Is this a multipart message
|
35
|
+
def multipart?
|
36
|
+
@content_type.composite?
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns the MIME-Version as a string (unlikely to be anything but '1.0')
|
40
|
+
def version
|
41
|
+
self.header('mime-version') || '1.0'
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the subject in UTF-8
|
45
|
+
def subject
|
46
|
+
SMail::MIME.decode_field(subject_raw)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Sets the subject, performs any necessary encoding
|
50
|
+
def subject=(text)
|
51
|
+
self.subject_raw = SMail::MIME.encode_field(text)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Returns the raw potentially MIME encoded subject
|
55
|
+
def subject_raw
|
56
|
+
self.header('subject')
|
57
|
+
end
|
58
|
+
|
59
|
+
# Set the subject directly, any necessary MIME encoding is up to the caller
|
60
|
+
def subject_raw=(text)
|
61
|
+
self.header_set('subject', text)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the date from the Date header as a DateTime object.
|
65
|
+
def date
|
66
|
+
date = self.header('date')
|
67
|
+
return nil unless date
|
68
|
+
SMail::MIME::Date.parse(date)
|
69
|
+
#(year, month, day, hour, minute, second, timezone, weekday) = ParseDate.parsedate(date)
|
70
|
+
#Time.gm(second, minute, hour, day, month, year, weekday, nil, nil, timezone)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
# Returns the raw body of the email including all parts
|
75
|
+
alias body_raw body
|
76
|
+
|
77
|
+
# Returns the body decoded and converted to UTF-8 if necessary, if this is is a
|
78
|
+
# multipart message this is not what you suspect
|
79
|
+
def body
|
80
|
+
if self.multipart? # what if it is message/rfc822 ?
|
81
|
+
@preamble
|
82
|
+
else
|
83
|
+
# decode
|
84
|
+
case self.header('content-transfer-encoding')
|
85
|
+
when 'quoted-printable'
|
86
|
+
body = @body.decode_quoted_printable
|
87
|
+
when 'base64'
|
88
|
+
body = @body.decode_base64
|
89
|
+
else
|
90
|
+
# matches nil when there is no header or an unrecognised encoding
|
91
|
+
body = @body
|
92
|
+
end
|
93
|
+
|
94
|
+
# convert to UTF-8 if text
|
95
|
+
if self.content_type.media_type == 'text'
|
96
|
+
charset = self.content_type.params['charset'] || 'us-ascii'
|
97
|
+
body.iconv!('utf-8', charset)
|
98
|
+
end
|
99
|
+
|
100
|
+
body
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns a string description of the MIME structure of this message.
|
105
|
+
#
|
106
|
+
# This is useful for debugging and testing. The returned string is
|
107
|
+
# formatted as shown in the following example:
|
108
|
+
# multipart/mixed
|
109
|
+
# multipart/alternative
|
110
|
+
# text/plain
|
111
|
+
# multipart/related
|
112
|
+
# text/html
|
113
|
+
# image/gif
|
114
|
+
# application/octet-stream
|
115
|
+
def describe_mime_structure(depth = 0)
|
116
|
+
result = (' '*depth) + self.content_type.type + "\n"
|
117
|
+
if self.multipart?
|
118
|
+
self.parts.each do |part|
|
119
|
+
result << part.describe_mime_structure(depth+1)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
result.chomp! if depth == 0
|
123
|
+
result
|
124
|
+
end
|
125
|
+
|
126
|
+
# Pulls out any body parts matching the given MIME types and puts them
|
127
|
+
# into an array.
|
128
|
+
#
|
129
|
+
# This is useful for pulling out parts in the appropriate order for
|
130
|
+
# rendering. For example calling:
|
131
|
+
# message.flatten_body('text/plain', /^application\/.*$)
|
132
|
+
# should return all the text parts and attached files in the order in
|
133
|
+
# which they appear in the original message.
|
134
|
+
#
|
135
|
+
# The various multipart subtypes are handled sensibly. For example,
|
136
|
+
# for multipart/alternative messages, the best matching part (i.e. the
|
137
|
+
# last part consisting entirely of the given types) is used.
|
138
|
+
def flatten_body(*types)
|
139
|
+
types = types.flatten
|
140
|
+
if self.multipart?
|
141
|
+
case self.content_type.type
|
142
|
+
when 'multipart/alternative'
|
143
|
+
part = self.parts.reverse.find {|part| part.consists_of_mime_types?(types) }
|
144
|
+
part ? part.flatten_body(types) : []
|
145
|
+
when 'multipart/mixed', 'multipart/related'
|
146
|
+
# FIXME: For multipart/related, this should look for a start parameter and try that first.
|
147
|
+
parts = self.parts.collect {|part| part.flatten_body(types) }
|
148
|
+
parts.flatten
|
149
|
+
when 'multipart/signed'
|
150
|
+
self.parts.first.flatten_body(types)
|
151
|
+
when 'multipart/appledouble'
|
152
|
+
self.parts[1].flatten_body(types)
|
153
|
+
else
|
154
|
+
# FIXME: should we also have an entry for message/rfc822 etc.
|
155
|
+
[]
|
156
|
+
end
|
157
|
+
else
|
158
|
+
self.consists_of_mime_types?(types) ? [self] : []
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns true if the message consists entirely of the given mime types.
|
163
|
+
#
|
164
|
+
# For single part messages this is simple: the Content-Type of the
|
165
|
+
# message must by one of the supplied types.
|
166
|
+
#
|
167
|
+
# For multipart messages it gets a bit more complicated. We try to
|
168
|
+
# make sure that the message can be entirely decomposed into
|
169
|
+
# just the supplied types.
|
170
|
+
#
|
171
|
+
# The rules are as follows:
|
172
|
+
# [multipart/alternative]
|
173
|
+
# At least one sub-part must consist of the given types.
|
174
|
+
# [multipart/mixed]
|
175
|
+
# All sub-parts must consist of the given types.
|
176
|
+
# [multipart/related]
|
177
|
+
# The root part (usually the first part) must consist of the
|
178
|
+
# given types.
|
179
|
+
# [multipart/signed]
|
180
|
+
# The first part must consist of the given types.
|
181
|
+
# [multipart/appledouble]
|
182
|
+
# The second part must consist of the given types. (See RFC 1740.)
|
183
|
+
def consists_of_mime_types?(*types)
|
184
|
+
types = types.flatten
|
185
|
+
type = self.content_type.type
|
186
|
+
|
187
|
+
if self.multipart?
|
188
|
+
case type
|
189
|
+
when 'multipart/alternative'
|
190
|
+
self.parts.any? {|part| part.consists_of_mime_types?(types) }
|
191
|
+
when 'multipart/mixed'
|
192
|
+
self.parts.all? {|part| part.consists_of_mime_types?(types) }
|
193
|
+
when 'multipart/related'
|
194
|
+
# FIXME: This should look for a start parameter and try that first.
|
195
|
+
self.parts.first.consists_of_mime_types?(types)
|
196
|
+
when 'multipart/signed'
|
197
|
+
self.parts.first.consists_of_mime_types?(types)
|
198
|
+
when 'multipart/appledouble'
|
199
|
+
self.parts[1].consists_of_mime_types?(types)
|
200
|
+
when 'message/rfc822', 'message/rfc2822'
|
201
|
+
self.parts.first.consists_of_mime_types?(types)
|
202
|
+
else
|
203
|
+
false
|
204
|
+
end
|
205
|
+
else
|
206
|
+
types.any? {|t| t === type }
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
private
|
213
|
+
|
214
|
+
def fill_parts
|
215
|
+
if self.content_type.discrete?
|
216
|
+
parts_single_part
|
217
|
+
else
|
218
|
+
parts_multipart
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def parts_single_part
|
223
|
+
@parts = []
|
224
|
+
end
|
225
|
+
|
226
|
+
def parts_multipart
|
227
|
+
@parts = []
|
228
|
+
@boundary = self.content_type.params['boundary']
|
229
|
+
|
230
|
+
if self.content_type.type == 'message/rfc822' or self.content_type.type == 'message/rfc2822'
|
231
|
+
@parts << SMail::MIME.new(@body)
|
232
|
+
return @parts
|
233
|
+
end
|
234
|
+
|
235
|
+
return parts_single_part unless @boundary
|
236
|
+
|
237
|
+
#alias body_raw body # FIXME: does this work?
|
238
|
+
|
239
|
+
epilogue_re = Regexp.new("^--#{Regexp.escape(@boundary)}--\s*\r?$", Regexp::MULTILINE)
|
240
|
+
(body, @epilogue) = @body.split(epilogue_re, 2)
|
241
|
+
@epilogue.lstrip! unless @epilogue.nil?
|
242
|
+
|
243
|
+
bits_re = Regexp.new("^--#{Regexp.escape(@boundary)}\s*\r?$", Regexp::MULTILINE)
|
244
|
+
bits = body.split(bits_re)
|
245
|
+
|
246
|
+
@preamble = bits.shift # FIXME is this OK? or better to see a header in the first line?
|
247
|
+
|
248
|
+
bits.each do |bit|
|
249
|
+
bit.lstrip!
|
250
|
+
@parts << SMail::MIME.new(bit)
|
251
|
+
end
|
252
|
+
|
253
|
+
@parts
|
254
|
+
end
|
255
|
+
|
256
|
+
end
|
257
|
+
end
|
data/lib/smail/mime.rb
ADDED
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: smail-mime
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Walker
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-17 00:00:00 +11:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: smail
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.5
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: activesupport
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.0.0
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rspec
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.0.5
|
44
|
+
version:
|
45
|
+
description:
|
46
|
+
email: matthew@walker.wattle.id.au
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- lib/smail/mime.rb
|
55
|
+
- lib/smail/mime/coding_extensions.rb
|
56
|
+
- lib/smail/mime/content_fields.rb
|
57
|
+
- lib/smail/mime/date.rb
|
58
|
+
- lib/smail/mime/header.rb
|
59
|
+
- lib/smail/mime/mime.rb
|
60
|
+
- lib/smail/mime/version.rb
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://github.com/mwalker/smail-mime
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: "0"
|
75
|
+
version:
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: "0"
|
81
|
+
version:
|
82
|
+
requirements: []
|
83
|
+
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 1.3.5
|
86
|
+
signing_key:
|
87
|
+
specification_version: 3
|
88
|
+
summary: A simple MIME email parser
|
89
|
+
test_files: []
|
90
|
+
|