astrotrain 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/Gemfile +8 -0
  2. data/LICENSE +18 -17
  3. data/Rakefile +118 -103
  4. data/astrotrain.gemspec +87 -136
  5. data/lib/astrotrain.rb +47 -51
  6. data/lib/astrotrain/attachment.rb +55 -0
  7. data/lib/astrotrain/message.rb +221 -235
  8. data/lib/astrotrain/transports/http_post.rb +67 -0
  9. data/lib/astrotrain/transports/resque.rb +63 -0
  10. data/test/fixtures/bad_email_format.txt +15 -0
  11. data/test/fixtures/basic.txt +4 -1
  12. data/test/fixtures/iso-8859-1.txt +1 -0
  13. data/test/message_test.rb +146 -457
  14. data/test/test_helper.rb +20 -42
  15. data/test/transport_test.rb +98 -100
  16. metadata +100 -243
  17. data/.gitignore +0 -26
  18. data/README +0 -47
  19. data/VERSION +0 -1
  20. data/config/sample.rb +0 -12
  21. data/lib/astrotrain/api.rb +0 -52
  22. data/lib/astrotrain/logged_mail.rb +0 -48
  23. data/lib/astrotrain/mapping.rb +0 -162
  24. data/lib/astrotrain/mapping/http_post.rb +0 -18
  25. data/lib/astrotrain/mapping/jabber.rb +0 -28
  26. data/lib/astrotrain/mapping/transport.rb +0 -55
  27. data/lib/astrotrain/tmail.rb +0 -58
  28. data/lib/astrotrain/worker.rb +0 -65
  29. data/lib/vendor/rest-client/README.rdoc +0 -104
  30. data/lib/vendor/rest-client/Rakefile +0 -84
  31. data/lib/vendor/rest-client/bin/restclient +0 -65
  32. data/lib/vendor/rest-client/foo.diff +0 -66
  33. data/lib/vendor/rest-client/lib/rest_client.rb +0 -188
  34. data/lib/vendor/rest-client/lib/rest_client/net_http_ext.rb +0 -23
  35. data/lib/vendor/rest-client/lib/rest_client/payload.rb +0 -185
  36. data/lib/vendor/rest-client/lib/rest_client/request_errors.rb +0 -75
  37. data/lib/vendor/rest-client/lib/rest_client/resource.rb +0 -103
  38. data/lib/vendor/rest-client/rest-client.gemspec +0 -18
  39. data/lib/vendor/rest-client/spec/base.rb +0 -5
  40. data/lib/vendor/rest-client/spec/master_shake.jpg +0 -0
  41. data/lib/vendor/rest-client/spec/payload_spec.rb +0 -71
  42. data/lib/vendor/rest-client/spec/request_errors_spec.rb +0 -44
  43. data/lib/vendor/rest-client/spec/resource_spec.rb +0 -52
  44. data/lib/vendor/rest-client/spec/rest_client_spec.rb +0 -219
  45. data/test/api_test.rb +0 -32
  46. data/test/logged_mail_test.rb +0 -67
  47. data/test/mapping_test.rb +0 -129
data/lib/astrotrain.rb CHANGED
@@ -1,56 +1,52 @@
1
1
  module Astrotrain
2
- class ProcessingCancelled < StandardError; end
3
-
4
- CALLBACK_TYPES = [:pre_mapping, :pre_processing, :post_processing]
5
- class << self
6
- attr_accessor :root, :lib_root, :callbacks
7
- end
8
-
9
- def self.load(root = Dir.pwd)
10
- self.root = File.expand_path(root)
11
- self.lib_root = File.expand_path(File.dirname(__FILE__))
12
- load_dependencies
13
- yield if block_given?
14
- %w(tmail message mapping logged_mail mapping/transport mapping/http_post mapping/jabber).each do |lib|
15
- require "astrotrain/#{lib}"
16
- end
17
- Astrotrain::Mail::ALLOW_MULTIPLE['delivered-to'] = true
18
- end
19
-
20
- def self.callback(name, *args, &block)
21
- found = callbacks[name]
22
- if block
23
- found << block
24
- else
25
- found.each { |cback| cback.call(*args) }
26
- end
27
- found
2
+ VERSION = '0.6.0'
3
+
4
+ require 'utf8'
5
+ require 'charlock_holmes'
6
+ require 'addressable/uri'
7
+ require 'faraday'
8
+ require 'astrotrain/attachment'
9
+ require 'astrotrain/message'
10
+
11
+ # Processes an Astrotrain message.
12
+ #
13
+ # message - Astrotrain::Message instance.
14
+ # destination - String URL to deliver the message. The scheme selects
15
+ # which Transport module to use (http://, resque://)
16
+ # options - Optional hash of options:
17
+ # :recipient - The main String recipient of the email.
18
+ # :payload - Optional hash to be sent with the request.
19
+ #
20
+ # Returns nothing.
21
+ def self.deliver(message, destination, options = {})
22
+ uri = Addressable::URI.parse(destination.to_s)
23
+ klass = Transports.load(uri.scheme)
24
+ klass.deliver(message, destination,
25
+ :recipient => options[:recipient],
26
+ :extra => options[:payload])
28
27
  end
29
28
 
30
- def self.clear_callbacks
31
- self.callbacks = CALLBACK_TYPES.inject({}) { |memo, ctype| memo.update(ctype => []) }
32
- end
33
-
34
- clear_callbacks
35
-
36
- private
37
- # help me ryan tomayko, you're my only help
38
- def self.load_dependencies
39
- require 'rubygems'
40
- gem 'addressable', '2.0.2'
41
-
42
- dm_ver = "0.9.11"
43
- gem "data_objects", dm_ver
44
- gem "dm-core", dm_ver # The datamapper ORM
45
- gem "dm-aggregates", dm_ver # Provides your DM models with count, sum, avg, min, max, etc.
46
- gem "dm-timestamps", dm_ver # Automatically populate created_at, created_on, etc. when those properties are present.
47
- gem "dm-types", dm_ver # Provides additional types, including csv, json, yaml.
48
- gem "dm-validations", dm_ver # Validation framework
49
-
50
- $LOAD_PATH.unshift File.join(lib_root, 'vendor', 'rest-client', 'lib')
51
-
52
- %w(dm-core dm-aggregates dm-timestamps dm-types dm-validations tmail rest_client).each do |lib|
53
- require lib
29
+ # Transports are responsible for getting this email where it is supposed
30
+ # to go.
31
+ #
32
+ # All Transports should conform to this API:
33
+ #
34
+ # Transports::HttpPost.process(address, message, main_recipient, extra_payload={})
35
+ #
36
+ module Transports
37
+ MAP = {:http => :http_post, :resque => :resque}
38
+
39
+ def self.load(key)
40
+ key = key.to_sym if key
41
+ value = MAP[key]
42
+ if !value
43
+ raise ArgumentError, "No transport #{key.inspect} found in #{MAP.keys.inspect}"
44
+ elsif value.is_a?(Module)
45
+ value
46
+ else
47
+ require "astrotrain/transports/#{value}"
48
+ MAP[key]
49
+ end
54
50
  end
55
51
  end
56
- end
52
+ end
@@ -0,0 +1,55 @@
1
+ module Astrotrain
2
+ # Simple class that wraps a TMail part attachment in the IO API for
3
+ # Faraday
4
+ class Attachment
5
+ def initialize(part)
6
+ @data = nil
7
+ @part = part
8
+ @is_read = false
9
+ end
10
+
11
+ def content_type
12
+ @part.content_type
13
+ end
14
+
15
+ def filename
16
+ @part.filename
17
+ end
18
+
19
+ alias local_path filename
20
+ alias original_filename filename
21
+
22
+ def read(value = nil)
23
+ if read?
24
+ nil
25
+ else
26
+ @is_read = true
27
+ data
28
+ end
29
+ end
30
+
31
+ def read?
32
+ @is_read == true
33
+ end
34
+
35
+ def data
36
+ @data ||= @part.body.to_s
37
+ end
38
+
39
+ def length
40
+ data.size
41
+ end
42
+
43
+ def attached?
44
+ !filename.nil?
45
+ end
46
+
47
+ def ==(other)
48
+ super || (filename == other.filename && content_type == other.content_type)
49
+ end
50
+
51
+ def inspect
52
+ %(#<Message::Attachment filename=#{filename.inspect} content_type=#{content_type.inspect}>)
53
+ end
54
+ end
55
+ end
@@ -1,313 +1,290 @@
1
- require 'digest/sha1'
2
- require 'fileutils'
3
- require 'tempfile'
1
+ # encoding: UTF-8
2
+
3
+ require 'mail'
4
4
  require 'set'
5
+ require 'iconv'
6
+
5
7
 
6
8
  module Astrotrain
7
9
  # Wrapper around a TMail object
8
10
  class Message
9
- attr_accessor :body
11
+ EMAIL_REGEX = /[\w\.\_\%\+\-]+[^\.]@[\w\-\_\.]+/
12
+
13
+ # Reference to the internal Mail object that parsed the raw email.
10
14
  attr_reader :mail
11
15
 
16
+ # Refebrence to the original file that this Mail came from.
17
+ attr_reader :path
18
+
12
19
  class << self
13
- attr_reader :queue_path, :archive_path
14
20
  attr_accessor :recipient_header_order, :skipped_headers
15
21
  end
16
22
 
17
- def self.queue_path=(path)
18
- if path
19
- path = File.expand_path(path)
20
- FileUtils.mkdir_p path
21
- end
22
- @queue_path = path
23
- end
24
-
25
- def self.archive_path=(path)
26
- if path
27
- path = File.expand_path(path)
28
- FileUtils.mkdir_p path
29
- end
30
- @archive_path = path
31
- end
23
+ # Astrotrain::Message#headers does not show these headers
24
+ self.skipped_headers = Set.new %w(to cc from subject delivered-to
25
+ x-original-to received)
32
26
 
33
- self.skipped_headers = Set.new %w(date from subject delivered-to x-original-to received)
27
+ # This is the default order that Astrotrain will search for a matching
28
+ # recipient.
34
29
  self.recipient_header_order = %w(original_to delivered_to to)
35
- self.queue_path = File.join(Astrotrain.root, 'queue')
36
-
37
- # Dumps the raw text into the queue_path. Not really recommended, since you should
38
- # set the queue_path to the directory your incoming emails are dumped into.
39
- def self.queue(raw)
40
- filename = nil
41
- digest = Digest::SHA1.hexdigest(raw)
42
- while filename.nil? || File.exist?(filename)
43
- filename = File.join(queue_path, Digest::SHA1.hexdigest(digest + rand.to_s))
44
- end
45
- File.open filename, 'wb' do |f|
46
- f.write raw
47
- end
48
- filename
49
- end
50
30
 
51
- # Parses the given raw email text and processes it with a matching Mapping.
52
- def self.receive(raw, file = nil)
53
- message = parse(raw)
54
- Astrotrain.callback(:pre_mapping, message)
55
- Mapping.process(message, file)
56
- message
57
- rescue Astrotrain::ProcessingCancelled
31
+ # Public: Parses the raw email headers into a Astrotrain::Message instance.
32
+ #
33
+ # path - String path to the file.
34
+ #
35
+ # Returns Astrotrain::Message instance.
36
+ def self.read(path)
37
+ new(::Mail.read(path), path)
58
38
  end
59
39
 
60
- # Processes the given file. It parses it by reading the contents, and optionally
61
- # archives or removes the original file.
62
- def self.receive_file(path)
63
- raw = IO.read(path)
64
- logged_path = path
65
- if archive_path
66
- daily_archive_path = archive_path / Time.now.year.to_s / Time.now.month.to_s / Time.now.day.to_s
67
- FileUtils.mkdir_p(daily_archive_path)
68
- logged_path = daily_archive_path / File.basename(path)
69
- FileUtils.mv path, logged_path if path != logged_path
70
- end
71
- receive(raw, logged_path)
72
- end
73
-
74
- # Parses the raw email headers into a Astrotrain::Message instance.
40
+ # Public: Parses the raw email headers into a Astrotrain::Message instance.
41
+ #
42
+ # raw - String of the email content
43
+ #
44
+ # Returns Astrotrain::Message instance.
75
45
  def self.parse(raw)
76
- new Mail.parse(raw)
77
- end
78
-
79
- def self.parse_email_addresses(value)
80
- emails = value.split(",")
81
- collection = []
82
- emails.each do |addr|
83
- addr.strip!
84
- next if addr.blank?
85
- header = parse_email_address(addr.to_s)
86
- collection << unescape(header[:email]) if !header[:email].blank?
87
- end
88
- collection
46
+ new(::Mail.new(raw))
89
47
  end
90
48
 
91
- def self.parse_email_address(email)
92
- return {} if email.blank?
93
- begin
94
- header = TMail::Address.parse(email)
95
- parsed = {:name => header.name}
96
- if header.is_a?(TMail::AddressGroup)
97
- header = header[0]
98
- end
99
- if !header.blank?
100
- parsed[:email] = header.address
101
- end
102
- parsed
103
- rescue SyntaxError, TMail::SyntaxError
104
- email = email.scan(/\<([^\>]+)\>/)[0]
105
- if email.blank?
106
- return {:name => nil, :email => nil}
107
- else
108
- email = email[0]
109
- retry
110
- end
111
- end
112
- end
113
-
114
- # Stolen from Rack/Camping, remove the "+" => " " translation
115
- def self.unescape(s)
116
- s.gsub!(/((?:%[0-9a-fA-F]{2})+)/n){
117
- [$1.delete('%')].pack('H*')
118
- }
119
- s
120
- end
121
-
122
- def initialize(mail)
49
+ def initialize(mail, path = nil)
50
+ @body = @html = @attachments = nil
51
+ @path = path
123
52
  @mail = mail
124
- @mapping = nil
125
- @attachments = []
126
53
  @recipients = {}
127
54
  end
128
55
 
129
- # Gets the recipients of an email using the To/Delivered-To/X-Original-To headers.
130
- # It's not always straightforward which email we want when dealing with filters
131
- # and forward rules.
56
+ # Public: Gets the recipients of an email using the
57
+ # To/Delivered-To/X-Original-To headers. It's not always straightforward
58
+ # which email we want when dealing with filters and forward rules.
59
+ #
60
+ # order - Array of email header names that specifies the order that the
61
+ # list of recipient emails is assembled. Valid strings are:
62
+ # 'original_to', 'delivered_to', and 'to'.
63
+ #
64
+ # Returns Array of possible recipients.
132
65
  def recipients(order = nil)
133
66
  if !@recipients.key?(order)
134
67
  order = self.class.recipient_header_order if order.blank?
68
+ order.push :body
135
69
  recipients = []
136
70
 
137
- order.each do |key|
138
- parse_email_headers(send("recipients_from_#{key}"), recipients)
71
+ emails = order.inject([]) do |memo, key|
72
+ memo.push *send("recipients_from_#{key}")
139
73
  end
140
- parse_email_headers recipients_from_body, recipients
141
74
 
142
- recipients.flatten!
143
- recipients.uniq!
144
- @recipients[order] = recipients
145
- else
146
- @recipients[order]
75
+ @recipients[order] = emails.map! { |em| em.address }
76
+ @recipients[order].uniq!
147
77
  end
78
+ @recipients[order]
148
79
  end
149
80
 
150
- def recipients_from_to
151
- @recipient_from_to ||= [@mail['to'].to_s]
81
+ # Public: Unquotes and converts the From header to UTF-8.
82
+ #
83
+ # Returns Array of Mail::Address objects
84
+ def from
85
+ @from ||= unquoted_address_header(:from)
152
86
  end
87
+ alias sender from
153
88
 
154
- def recipients_from_delivered_to
155
- @recipient_from_delivered_to ||= begin
156
- delivered = @mail['Delivered-To']
157
- if delivered.respond_to?(:first)
158
- delivered.map! { |a| a.to_s }
159
- else
160
- [delivered.to_s]
161
- end
162
- end
89
+ # Public: Unquotes and converts the To header to UTF-8.
90
+ #
91
+ # Returns Array of Mail::Address objects
92
+ def to
93
+ @to ||= unquoted_address_header(:to)
163
94
  end
164
95
 
165
- def recipients_from_original_to
166
- @recipient_from_original_to ||= [@mail['X-Original-To'].to_s]
167
- end
168
-
169
- def recipients_from_body
170
- @recipients_from_body ||= body.scan(/<[\w\.\_\%\+\-]+@[\w\-\_\.]+>/)
171
- end
172
-
173
- def sender
174
- @sender ||= TMail::Unquoter.unquote_and_convert_to(@mail['from'].to_s, "utf-8")
96
+ # Public: Unquotes and converts the Cc header to UTF-8.
97
+ #
98
+ # Returns Array of Mail::Address objects
99
+ def cc
100
+ @cc ||= unquoted_address_header(:cc)
175
101
  end
176
102
 
103
+ # Public: Unquotes and converts the Subject header to UTF-8.
104
+ #
105
+ # Returns String
177
106
  def subject
178
107
  @mail.subject
179
- rescue Iconv::InvalidCharacter
180
- @mail.quoted_subject
181
108
  end
182
109
 
110
+ # Public: Gets the unique message-id for the email, with the surrounding
111
+ # `<` and `>` parsed out.
112
+ #
113
+ # Returns String
183
114
  def message_id
184
- @message_id ||= header('message-id').to_s.gsub(/^<|>$/, '')
115
+ @mail.message_id
185
116
  end
186
117
 
118
+ # Public: Gets the plain/text body of the email.
119
+ #
120
+ # Returns String
187
121
  def body
188
- @body ||= process_message_body(:body)
122
+ process_message_body if !@body
123
+ @body
189
124
  end
190
125
 
126
+ # Public: Gets the html body of the email.
127
+ #
128
+ # Returns String
191
129
  def html
192
- @html ||= process_message_body(:html)
130
+ process_message_body if !@html
131
+ @html
193
132
  end
194
133
 
134
+ # Public: Gets the attachments in the email.
135
+ #
136
+ # Returns Array of Astrotrain::Attachment objects.
195
137
  def attachments
196
- @attachments ||= process_message_body(:attachments)
197
- end
198
-
199
- def raw
200
- @mail.port.to_s
201
- end
202
-
203
- def header(key)
204
- headers[key]
138
+ process_message_body if !@attachments
139
+ @attachments
205
140
  end
206
141
 
142
+ # Public: Builds a hash of headers, skipping the keys specified in
143
+ # #skipped_headers. If header values cannot be parsed, the original
144
+ # raw value is provided.
145
+ #
146
+ # Returns Hash of the headers with String keys and values.
207
147
  def headers
208
148
  @headers ||= begin
209
- h = {}
210
- @mail.header.each do |key, value|
211
- next if self.class.skipped_headers.include?(key)
212
- h[key] = read_header(key)
149
+ @mail.header.fields.inject({}) do |memo, field|
150
+ name = field.name.downcase.to_s
151
+ header = unquoted_header(name)
152
+ self.class.skipped_headers.include?(name) ?
153
+ memo :
154
+ memo.update(name => self.class.unescape(unquoted_header(name)))
213
155
  end
214
- h
215
156
  end
216
157
  end
217
158
 
218
- class Attachment
219
- def initialize(part)
220
- @part = part
221
- @is_read = false
222
- end
223
-
224
- def content_type
225
- @part.content_type
226
- end
227
-
228
- def filename
229
- @filename ||= @part.type_param("name") || @part.disposition_param('filename')
230
- end
231
-
232
- # For IO API compatibility when used with Rest-Client
233
- def close
234
- end
159
+ # UTILITY METHODS
235
160
 
236
- alias path filename
161
+ # Parses the 'To' header for email address.
162
+ #
163
+ # Returns Array of Mail::Address objects
164
+ def recipients_from_to
165
+ to
166
+ end
237
167
 
238
- def read(value = nil)
239
- if read?
240
- nil
241
- else
242
- @is_read = true
243
- data
244
- end
245
- end
168
+ # Parses the 'Delivered-To' header for email address.
169
+ #
170
+ # Returns Array of Mail::Address objects
171
+ def recipients_from_delivered_to
172
+ @recipients_from_delivered_to ||= unquoted_address_header('delivered-to')
173
+ end
246
174
 
247
- def read?
248
- @is_read == true
249
- end
175
+ # Parses the 'X-Original-To' header for email address.
176
+ #
177
+ # Returns Array of Mail::Address objects
178
+ def recipients_from_original_to
179
+ @recipients_from_original_to ||= unquoted_address_header('x-original-to')
180
+ end
250
181
 
251
- def data
252
- @part.body
182
+ # Parses out all email addresses from the body of the email.
183
+ #
184
+ # Returns Array of Mail::Address objects
185
+ def recipients_from_body
186
+ @recipients_from_body ||= begin
187
+ emails_from_body = body.scan(EMAIL_REGEX)
188
+ address_list_for(emails_from_body)
253
189
  end
190
+ end
254
191
 
255
- def attached?
256
- !filename.nil?
192
+ # Parses the quoted header values: `=?...?=`.
193
+ #
194
+ # key - String or Symbol header name
195
+ #
196
+ # Returns unquoted String.
197
+ def unquoted_header(key)
198
+ if header = @mail[key]
199
+ value = header.respond_to?(:map) ?
200
+ header.map { |h| h.value }.join("\n") :
201
+ header.value
202
+ Mail::Encodings.value_decode(value)
203
+ else
204
+ ''
257
205
  end
206
+ end
258
207
 
259
- def ==(other)
260
- super || (filename == other.filename && content_type == other.content_type)
208
+ # Parses the given header for email addresses. Handles the case where some
209
+ # keys return arrays if there are multiple values.
210
+ #
211
+ # key - String or Symbol header name
212
+ #
213
+ # Returns Array of Mail::Address objects
214
+ def unquoted_address_header(key)
215
+ if header = @mail[key]
216
+ emails = if header.respond_to?(:value)
217
+ [header.value]
218
+ else
219
+ header.map { |h| h.value }
220
+ end
221
+ address_list_for(emails)
222
+ else
223
+ []
261
224
  end
225
+ end
262
226
 
263
- def inspect
264
- %(#<Message::Attachment filename=#{filename.inspect} content_type=#{content_type.inspect}>)
265
- end
227
+ # Uses Mail::AddressList to parse the given comma separated emails.
228
+ #
229
+ # emails - Array of String emails (foo@example.com, Bar <bar@example.com...)
230
+ #
231
+ # Returns Array of Mail::Address objects
232
+ def address_list_for(emails)
233
+ emails = emails * ", "
234
+ list = Mail::AddressList.new(self.class.unescape(emails))
235
+ addrs = list.addresses.each { |a| a.decoded }
236
+ addrs.uniq!
237
+ addrs
238
+ rescue Mail::Field::ParseError
239
+ address_list_for(emails.scan(EMAIL_REGEX))
266
240
  end
267
241
 
268
- protected
269
- def read_header(key)
270
- header = @mail.header[key]
271
- begin
272
- header.to_s
273
- rescue
274
- header.raw_body
275
- end
242
+ # Stolen from Rack/Camping, remove the "+" => " " translation
243
+ def self.unescape(s)
244
+ s.gsub!(/((?:%[0-9a-fA-F]{2})+)/n){
245
+ [$1.delete('%')].pack('H*')
246
+ }
247
+ s
276
248
  end
277
249
 
278
- def process_message_body(var = nil)
250
+ # Parses the mail's parts, assembling the plain/HTML Strings, as well as
251
+ # any attachments.
252
+ #
253
+ # Returns nothing.
254
+ def process_message_body
255
+ @attachments = []
279
256
  if @mail.multipart?
280
- @attachments.clear
281
257
  @body, @html = [], []
282
258
  scan_parts(@mail)
283
259
  @body = @body.join("\n")
284
260
  @html = @html.join("\n")
285
261
  else
286
- if @mail.content_type == 'text/html'
287
- @html = @mail.body
262
+ if @mail.content_type =~ /text\/html/
263
+ @html = @mail.body.to_s
288
264
  @body = ''
289
265
  else
290
- @body = @mail.body
266
+ @body = @mail.body.to_s
291
267
  @html = ''
292
268
  end
293
269
  end
294
- if !@mail.charset
295
- @body = convert_to_utf8(@body)
296
- @html = convert_to_utf8(@html)
297
- end
298
- instance_variable_get "@#{var}" if var
270
+
271
+ @body = convert_to_utf8(@body)
272
+ @html = convert_to_utf8(@html)
299
273
  end
300
274
 
275
+ # Recursive method to scan all the parts of the given part.
276
+ #
277
+ # Returns nothing.
301
278
  def scan_parts(message)
302
279
  message.parts.each do |part|
303
280
  if part.multipart?
304
281
  scan_parts(part)
305
282
  else
306
283
  case part.content_type
307
- when 'text/plain'
308
- @body << part.body
309
- when 'text/html'
310
- @html << part.body
284
+ when /text\/plain/
285
+ @body << part.body.to_s
286
+ when /text\/html/
287
+ @html << part.body.to_s
311
288
  else
312
289
  att = Attachment.new(part)
313
290
  @attachments << att if att.attached?
@@ -316,27 +293,36 @@ module Astrotrain
316
293
  end
317
294
  end
318
295
 
319
- def parse_email_headers(values, collection)
320
- values.each do |value|
321
- if !value.blank?
322
- collection.push *self.class.parse_email_addresses(value)
323
- end
296
+ # Converts a given String to UTF-8.
297
+ # If the message has no charset assigned, we'll attempt to detect it
298
+ # then convert it to UTF-8.
299
+ #
300
+ # s - unconverted String in the wrong character set
301
+ #
302
+ # Returns converted String.
303
+ def convert_to_utf8(s)
304
+ # If this string is already valid UTF-8 just hand it back
305
+ return s if s.as_utf8.valid?
306
+
307
+ # First lets try to detect the encoding if the message didn't specify
308
+ if !@mail.charset && detection = CharlockHolmes::EncodingDetector.detect(s)
309
+ @mail.charset = detection[:encoding]
324
310
  end
325
- end
326
311
 
327
- # Attempts to run iconv conversions in common charsets to UTF-8. Needed for
328
- # those crappy emails that don't properly specify a charset in the headers.
329
- ICONV_CONVERSIONS = %w(utf-8 ISO-8859-1 ISO-8859-2 ISO-8859-3 ISO-8859-4 ISO-8859-5 ISO-8859-6 ISO-8859-7 ISO-8859-8 ISO-8859-9
330
- ISO-8859-15 GB2312)
331
- def convert_to_utf8(s)
332
- ICONV_CONVERSIONS.each do |from|
333
- begin
334
- return Iconv.iconv(ICONV_CONVERSIONS[0], from, s).to_s
335
- rescue Iconv::IllegalSequence
336
- ensure
337
- s
338
- end
312
+ # if the encoding was already set or we just detected it AND it's not already
313
+ # set to UTF-8 - try to transcode the body into UTF-8
314
+ if @mail.charset && @mail.charset != 'UTF-8'
315
+ s = CharlockHolmes::Converter.convert s, @mail.charset, 'UTF-8'
339
316
  end
317
+
318
+ # By the time we get here, `s` is either UTF-8 or we need to force it to be
319
+ # But, even if it's UTF-8 we could be in the case where the charset on the
320
+ # message was set to UTF-8 but is in fact invalid.
321
+ # So for either case, we want to make sure the output is valid UTF-8 - even
322
+ # if it means mutating the invalid string.
323
+ # Also we're not reusing the String::UTF8 version of `s` from above here
324
+ # because by this point, it may be a new string.
325
+ s.as_utf8.clean.as_raw
340
326
  end
341
327
  end
342
- end
328
+ end