astrotrain 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/Gemfile +8 -0
  2. data/LICENSE +18 -17
  3. data/Rakefile +118 -103
  4. data/astrotrain.gemspec +87 -136
  5. data/lib/astrotrain.rb +47 -51
  6. data/lib/astrotrain/attachment.rb +55 -0
  7. data/lib/astrotrain/message.rb +221 -235
  8. data/lib/astrotrain/transports/http_post.rb +67 -0
  9. data/lib/astrotrain/transports/resque.rb +63 -0
  10. data/test/fixtures/bad_email_format.txt +15 -0
  11. data/test/fixtures/basic.txt +4 -1
  12. data/test/fixtures/iso-8859-1.txt +1 -0
  13. data/test/message_test.rb +146 -457
  14. data/test/test_helper.rb +20 -42
  15. data/test/transport_test.rb +98 -100
  16. metadata +100 -243
  17. data/.gitignore +0 -26
  18. data/README +0 -47
  19. data/VERSION +0 -1
  20. data/config/sample.rb +0 -12
  21. data/lib/astrotrain/api.rb +0 -52
  22. data/lib/astrotrain/logged_mail.rb +0 -48
  23. data/lib/astrotrain/mapping.rb +0 -162
  24. data/lib/astrotrain/mapping/http_post.rb +0 -18
  25. data/lib/astrotrain/mapping/jabber.rb +0 -28
  26. data/lib/astrotrain/mapping/transport.rb +0 -55
  27. data/lib/astrotrain/tmail.rb +0 -58
  28. data/lib/astrotrain/worker.rb +0 -65
  29. data/lib/vendor/rest-client/README.rdoc +0 -104
  30. data/lib/vendor/rest-client/Rakefile +0 -84
  31. data/lib/vendor/rest-client/bin/restclient +0 -65
  32. data/lib/vendor/rest-client/foo.diff +0 -66
  33. data/lib/vendor/rest-client/lib/rest_client.rb +0 -188
  34. data/lib/vendor/rest-client/lib/rest_client/net_http_ext.rb +0 -23
  35. data/lib/vendor/rest-client/lib/rest_client/payload.rb +0 -185
  36. data/lib/vendor/rest-client/lib/rest_client/request_errors.rb +0 -75
  37. data/lib/vendor/rest-client/lib/rest_client/resource.rb +0 -103
  38. data/lib/vendor/rest-client/rest-client.gemspec +0 -18
  39. data/lib/vendor/rest-client/spec/base.rb +0 -5
  40. data/lib/vendor/rest-client/spec/master_shake.jpg +0 -0
  41. data/lib/vendor/rest-client/spec/payload_spec.rb +0 -71
  42. data/lib/vendor/rest-client/spec/request_errors_spec.rb +0 -44
  43. data/lib/vendor/rest-client/spec/resource_spec.rb +0 -52
  44. data/lib/vendor/rest-client/spec/rest_client_spec.rb +0 -219
  45. data/test/api_test.rb +0 -32
  46. data/test/logged_mail_test.rb +0 -67
  47. data/test/mapping_test.rb +0 -129
data/lib/astrotrain.rb CHANGED
@@ -1,56 +1,52 @@
1
1
  module Astrotrain
2
- class ProcessingCancelled < StandardError; end
3
-
4
- CALLBACK_TYPES = [:pre_mapping, :pre_processing, :post_processing]
5
- class << self
6
- attr_accessor :root, :lib_root, :callbacks
7
- end
8
-
9
- def self.load(root = Dir.pwd)
10
- self.root = File.expand_path(root)
11
- self.lib_root = File.expand_path(File.dirname(__FILE__))
12
- load_dependencies
13
- yield if block_given?
14
- %w(tmail message mapping logged_mail mapping/transport mapping/http_post mapping/jabber).each do |lib|
15
- require "astrotrain/#{lib}"
16
- end
17
- Astrotrain::Mail::ALLOW_MULTIPLE['delivered-to'] = true
18
- end
19
-
20
- def self.callback(name, *args, &block)
21
- found = callbacks[name]
22
- if block
23
- found << block
24
- else
25
- found.each { |cback| cback.call(*args) }
26
- end
27
- found
2
+ VERSION = '0.6.0'
3
+
4
+ require 'utf8'
5
+ require 'charlock_holmes'
6
+ require 'addressable/uri'
7
+ require 'faraday'
8
+ require 'astrotrain/attachment'
9
+ require 'astrotrain/message'
10
+
11
+ # Processes an Astrotrain message.
12
+ #
13
+ # message - Astrotrain::Message instance.
14
+ # destination - String URL to deliver the message. The scheme selects
15
+ # which Transport module to use (http://, resque://)
16
+ # options - Optional hash of options:
17
+ # :recipient - The main String recipient of the email.
18
+ # :payload - Optional hash to be sent with the request.
19
+ #
20
+ # Returns nothing.
21
+ def self.deliver(message, destination, options = {})
22
+ uri = Addressable::URI.parse(destination.to_s)
23
+ klass = Transports.load(uri.scheme)
24
+ klass.deliver(message, destination,
25
+ :recipient => options[:recipient],
26
+ :extra => options[:payload])
28
27
  end
29
28
 
30
- def self.clear_callbacks
31
- self.callbacks = CALLBACK_TYPES.inject({}) { |memo, ctype| memo.update(ctype => []) }
32
- end
33
-
34
- clear_callbacks
35
-
36
- private
37
- # help me ryan tomayko, you're my only help
38
- def self.load_dependencies
39
- require 'rubygems'
40
- gem 'addressable', '2.0.2'
41
-
42
- dm_ver = "0.9.11"
43
- gem "data_objects", dm_ver
44
- gem "dm-core", dm_ver # The datamapper ORM
45
- gem "dm-aggregates", dm_ver # Provides your DM models with count, sum, avg, min, max, etc.
46
- gem "dm-timestamps", dm_ver # Automatically populate created_at, created_on, etc. when those properties are present.
47
- gem "dm-types", dm_ver # Provides additional types, including csv, json, yaml.
48
- gem "dm-validations", dm_ver # Validation framework
49
-
50
- $LOAD_PATH.unshift File.join(lib_root, 'vendor', 'rest-client', 'lib')
51
-
52
- %w(dm-core dm-aggregates dm-timestamps dm-types dm-validations tmail rest_client).each do |lib|
53
- require lib
29
+ # Transports are responsible for getting this email where it is supposed
30
+ # to go.
31
+ #
32
+ # All Transports should conform to this API:
33
+ #
34
+ # Transports::HttpPost.process(address, message, main_recipient, extra_payload={})
35
+ #
36
+ module Transports
37
+ MAP = {:http => :http_post, :resque => :resque}
38
+
39
+ def self.load(key)
40
+ key = key.to_sym if key
41
+ value = MAP[key]
42
+ if !value
43
+ raise ArgumentError, "No transport #{key.inspect} found in #{MAP.keys.inspect}"
44
+ elsif value.is_a?(Module)
45
+ value
46
+ else
47
+ require "astrotrain/transports/#{value}"
48
+ MAP[key]
49
+ end
54
50
  end
55
51
  end
56
- end
52
+ end
@@ -0,0 +1,55 @@
1
+ module Astrotrain
2
+ # Simple class that wraps a TMail part attachment in the IO API for
3
+ # Faraday
4
+ class Attachment
5
+ def initialize(part)
6
+ @data = nil
7
+ @part = part
8
+ @is_read = false
9
+ end
10
+
11
+ def content_type
12
+ @part.content_type
13
+ end
14
+
15
+ def filename
16
+ @part.filename
17
+ end
18
+
19
+ alias local_path filename
20
+ alias original_filename filename
21
+
22
+ def read(value = nil)
23
+ if read?
24
+ nil
25
+ else
26
+ @is_read = true
27
+ data
28
+ end
29
+ end
30
+
31
+ def read?
32
+ @is_read == true
33
+ end
34
+
35
+ def data
36
+ @data ||= @part.body.to_s
37
+ end
38
+
39
+ def length
40
+ data.size
41
+ end
42
+
43
+ def attached?
44
+ !filename.nil?
45
+ end
46
+
47
+ def ==(other)
48
+ super || (filename == other.filename && content_type == other.content_type)
49
+ end
50
+
51
+ def inspect
52
+ %(#<Message::Attachment filename=#{filename.inspect} content_type=#{content_type.inspect}>)
53
+ end
54
+ end
55
+ end
@@ -1,313 +1,290 @@
1
- require 'digest/sha1'
2
- require 'fileutils'
3
- require 'tempfile'
1
+ # encoding: UTF-8
2
+
3
+ require 'mail'
4
4
  require 'set'
5
+ require 'iconv'
6
+
5
7
 
6
8
  module Astrotrain
7
9
  # Wrapper around a TMail object
8
10
  class Message
9
- attr_accessor :body
11
+ EMAIL_REGEX = /[\w\.\_\%\+\-]+[^\.]@[\w\-\_\.]+/
12
+
13
+ # Reference to the internal Mail object that parsed the raw email.
10
14
  attr_reader :mail
11
15
 
16
+ # Refebrence to the original file that this Mail came from.
17
+ attr_reader :path
18
+
12
19
  class << self
13
- attr_reader :queue_path, :archive_path
14
20
  attr_accessor :recipient_header_order, :skipped_headers
15
21
  end
16
22
 
17
- def self.queue_path=(path)
18
- if path
19
- path = File.expand_path(path)
20
- FileUtils.mkdir_p path
21
- end
22
- @queue_path = path
23
- end
24
-
25
- def self.archive_path=(path)
26
- if path
27
- path = File.expand_path(path)
28
- FileUtils.mkdir_p path
29
- end
30
- @archive_path = path
31
- end
23
+ # Astrotrain::Message#headers does not show these headers
24
+ self.skipped_headers = Set.new %w(to cc from subject delivered-to
25
+ x-original-to received)
32
26
 
33
- self.skipped_headers = Set.new %w(date from subject delivered-to x-original-to received)
27
+ # This is the default order that Astrotrain will search for a matching
28
+ # recipient.
34
29
  self.recipient_header_order = %w(original_to delivered_to to)
35
- self.queue_path = File.join(Astrotrain.root, 'queue')
36
-
37
- # Dumps the raw text into the queue_path. Not really recommended, since you should
38
- # set the queue_path to the directory your incoming emails are dumped into.
39
- def self.queue(raw)
40
- filename = nil
41
- digest = Digest::SHA1.hexdigest(raw)
42
- while filename.nil? || File.exist?(filename)
43
- filename = File.join(queue_path, Digest::SHA1.hexdigest(digest + rand.to_s))
44
- end
45
- File.open filename, 'wb' do |f|
46
- f.write raw
47
- end
48
- filename
49
- end
50
30
 
51
- # Parses the given raw email text and processes it with a matching Mapping.
52
- def self.receive(raw, file = nil)
53
- message = parse(raw)
54
- Astrotrain.callback(:pre_mapping, message)
55
- Mapping.process(message, file)
56
- message
57
- rescue Astrotrain::ProcessingCancelled
31
+ # Public: Parses the raw email headers into a Astrotrain::Message instance.
32
+ #
33
+ # path - String path to the file.
34
+ #
35
+ # Returns Astrotrain::Message instance.
36
+ def self.read(path)
37
+ new(::Mail.read(path), path)
58
38
  end
59
39
 
60
- # Processes the given file. It parses it by reading the contents, and optionally
61
- # archives or removes the original file.
62
- def self.receive_file(path)
63
- raw = IO.read(path)
64
- logged_path = path
65
- if archive_path
66
- daily_archive_path = archive_path / Time.now.year.to_s / Time.now.month.to_s / Time.now.day.to_s
67
- FileUtils.mkdir_p(daily_archive_path)
68
- logged_path = daily_archive_path / File.basename(path)
69
- FileUtils.mv path, logged_path if path != logged_path
70
- end
71
- receive(raw, logged_path)
72
- end
73
-
74
- # Parses the raw email headers into a Astrotrain::Message instance.
40
+ # Public: Parses the raw email headers into a Astrotrain::Message instance.
41
+ #
42
+ # raw - String of the email content
43
+ #
44
+ # Returns Astrotrain::Message instance.
75
45
  def self.parse(raw)
76
- new Mail.parse(raw)
77
- end
78
-
79
- def self.parse_email_addresses(value)
80
- emails = value.split(",")
81
- collection = []
82
- emails.each do |addr|
83
- addr.strip!
84
- next if addr.blank?
85
- header = parse_email_address(addr.to_s)
86
- collection << unescape(header[:email]) if !header[:email].blank?
87
- end
88
- collection
46
+ new(::Mail.new(raw))
89
47
  end
90
48
 
91
- def self.parse_email_address(email)
92
- return {} if email.blank?
93
- begin
94
- header = TMail::Address.parse(email)
95
- parsed = {:name => header.name}
96
- if header.is_a?(TMail::AddressGroup)
97
- header = header[0]
98
- end
99
- if !header.blank?
100
- parsed[:email] = header.address
101
- end
102
- parsed
103
- rescue SyntaxError, TMail::SyntaxError
104
- email = email.scan(/\<([^\>]+)\>/)[0]
105
- if email.blank?
106
- return {:name => nil, :email => nil}
107
- else
108
- email = email[0]
109
- retry
110
- end
111
- end
112
- end
113
-
114
- # Stolen from Rack/Camping, remove the "+" => " " translation
115
- def self.unescape(s)
116
- s.gsub!(/((?:%[0-9a-fA-F]{2})+)/n){
117
- [$1.delete('%')].pack('H*')
118
- }
119
- s
120
- end
121
-
122
- def initialize(mail)
49
+ def initialize(mail, path = nil)
50
+ @body = @html = @attachments = nil
51
+ @path = path
123
52
  @mail = mail
124
- @mapping = nil
125
- @attachments = []
126
53
  @recipients = {}
127
54
  end
128
55
 
129
- # Gets the recipients of an email using the To/Delivered-To/X-Original-To headers.
130
- # It's not always straightforward which email we want when dealing with filters
131
- # and forward rules.
56
+ # Public: Gets the recipients of an email using the
57
+ # To/Delivered-To/X-Original-To headers. It's not always straightforward
58
+ # which email we want when dealing with filters and forward rules.
59
+ #
60
+ # order - Array of email header names that specifies the order that the
61
+ # list of recipient emails is assembled. Valid strings are:
62
+ # 'original_to', 'delivered_to', and 'to'.
63
+ #
64
+ # Returns Array of possible recipients.
132
65
  def recipients(order = nil)
133
66
  if !@recipients.key?(order)
134
67
  order = self.class.recipient_header_order if order.blank?
68
+ order.push :body
135
69
  recipients = []
136
70
 
137
- order.each do |key|
138
- parse_email_headers(send("recipients_from_#{key}"), recipients)
71
+ emails = order.inject([]) do |memo, key|
72
+ memo.push *send("recipients_from_#{key}")
139
73
  end
140
- parse_email_headers recipients_from_body, recipients
141
74
 
142
- recipients.flatten!
143
- recipients.uniq!
144
- @recipients[order] = recipients
145
- else
146
- @recipients[order]
75
+ @recipients[order] = emails.map! { |em| em.address }
76
+ @recipients[order].uniq!
147
77
  end
78
+ @recipients[order]
148
79
  end
149
80
 
150
- def recipients_from_to
151
- @recipient_from_to ||= [@mail['to'].to_s]
81
+ # Public: Unquotes and converts the From header to UTF-8.
82
+ #
83
+ # Returns Array of Mail::Address objects
84
+ def from
85
+ @from ||= unquoted_address_header(:from)
152
86
  end
87
+ alias sender from
153
88
 
154
- def recipients_from_delivered_to
155
- @recipient_from_delivered_to ||= begin
156
- delivered = @mail['Delivered-To']
157
- if delivered.respond_to?(:first)
158
- delivered.map! { |a| a.to_s }
159
- else
160
- [delivered.to_s]
161
- end
162
- end
89
+ # Public: Unquotes and converts the To header to UTF-8.
90
+ #
91
+ # Returns Array of Mail::Address objects
92
+ def to
93
+ @to ||= unquoted_address_header(:to)
163
94
  end
164
95
 
165
- def recipients_from_original_to
166
- @recipient_from_original_to ||= [@mail['X-Original-To'].to_s]
167
- end
168
-
169
- def recipients_from_body
170
- @recipients_from_body ||= body.scan(/<[\w\.\_\%\+\-]+@[\w\-\_\.]+>/)
171
- end
172
-
173
- def sender
174
- @sender ||= TMail::Unquoter.unquote_and_convert_to(@mail['from'].to_s, "utf-8")
96
+ # Public: Unquotes and converts the Cc header to UTF-8.
97
+ #
98
+ # Returns Array of Mail::Address objects
99
+ def cc
100
+ @cc ||= unquoted_address_header(:cc)
175
101
  end
176
102
 
103
+ # Public: Unquotes and converts the Subject header to UTF-8.
104
+ #
105
+ # Returns String
177
106
  def subject
178
107
  @mail.subject
179
- rescue Iconv::InvalidCharacter
180
- @mail.quoted_subject
181
108
  end
182
109
 
110
+ # Public: Gets the unique message-id for the email, with the surrounding
111
+ # `<` and `>` parsed out.
112
+ #
113
+ # Returns String
183
114
  def message_id
184
- @message_id ||= header('message-id').to_s.gsub(/^<|>$/, '')
115
+ @mail.message_id
185
116
  end
186
117
 
118
+ # Public: Gets the plain/text body of the email.
119
+ #
120
+ # Returns String
187
121
  def body
188
- @body ||= process_message_body(:body)
122
+ process_message_body if !@body
123
+ @body
189
124
  end
190
125
 
126
+ # Public: Gets the html body of the email.
127
+ #
128
+ # Returns String
191
129
  def html
192
- @html ||= process_message_body(:html)
130
+ process_message_body if !@html
131
+ @html
193
132
  end
194
133
 
134
+ # Public: Gets the attachments in the email.
135
+ #
136
+ # Returns Array of Astrotrain::Attachment objects.
195
137
  def attachments
196
- @attachments ||= process_message_body(:attachments)
197
- end
198
-
199
- def raw
200
- @mail.port.to_s
201
- end
202
-
203
- def header(key)
204
- headers[key]
138
+ process_message_body if !@attachments
139
+ @attachments
205
140
  end
206
141
 
142
+ # Public: Builds a hash of headers, skipping the keys specified in
143
+ # #skipped_headers. If header values cannot be parsed, the original
144
+ # raw value is provided.
145
+ #
146
+ # Returns Hash of the headers with String keys and values.
207
147
  def headers
208
148
  @headers ||= begin
209
- h = {}
210
- @mail.header.each do |key, value|
211
- next if self.class.skipped_headers.include?(key)
212
- h[key] = read_header(key)
149
+ @mail.header.fields.inject({}) do |memo, field|
150
+ name = field.name.downcase.to_s
151
+ header = unquoted_header(name)
152
+ self.class.skipped_headers.include?(name) ?
153
+ memo :
154
+ memo.update(name => self.class.unescape(unquoted_header(name)))
213
155
  end
214
- h
215
156
  end
216
157
  end
217
158
 
218
- class Attachment
219
- def initialize(part)
220
- @part = part
221
- @is_read = false
222
- end
223
-
224
- def content_type
225
- @part.content_type
226
- end
227
-
228
- def filename
229
- @filename ||= @part.type_param("name") || @part.disposition_param('filename')
230
- end
231
-
232
- # For IO API compatibility when used with Rest-Client
233
- def close
234
- end
159
+ # UTILITY METHODS
235
160
 
236
- alias path filename
161
+ # Parses the 'To' header for email address.
162
+ #
163
+ # Returns Array of Mail::Address objects
164
+ def recipients_from_to
165
+ to
166
+ end
237
167
 
238
- def read(value = nil)
239
- if read?
240
- nil
241
- else
242
- @is_read = true
243
- data
244
- end
245
- end
168
+ # Parses the 'Delivered-To' header for email address.
169
+ #
170
+ # Returns Array of Mail::Address objects
171
+ def recipients_from_delivered_to
172
+ @recipients_from_delivered_to ||= unquoted_address_header('delivered-to')
173
+ end
246
174
 
247
- def read?
248
- @is_read == true
249
- end
175
+ # Parses the 'X-Original-To' header for email address.
176
+ #
177
+ # Returns Array of Mail::Address objects
178
+ def recipients_from_original_to
179
+ @recipients_from_original_to ||= unquoted_address_header('x-original-to')
180
+ end
250
181
 
251
- def data
252
- @part.body
182
+ # Parses out all email addresses from the body of the email.
183
+ #
184
+ # Returns Array of Mail::Address objects
185
+ def recipients_from_body
186
+ @recipients_from_body ||= begin
187
+ emails_from_body = body.scan(EMAIL_REGEX)
188
+ address_list_for(emails_from_body)
253
189
  end
190
+ end
254
191
 
255
- def attached?
256
- !filename.nil?
192
+ # Parses the quoted header values: `=?...?=`.
193
+ #
194
+ # key - String or Symbol header name
195
+ #
196
+ # Returns unquoted String.
197
+ def unquoted_header(key)
198
+ if header = @mail[key]
199
+ value = header.respond_to?(:map) ?
200
+ header.map { |h| h.value }.join("\n") :
201
+ header.value
202
+ Mail::Encodings.value_decode(value)
203
+ else
204
+ ''
257
205
  end
206
+ end
258
207
 
259
- def ==(other)
260
- super || (filename == other.filename && content_type == other.content_type)
208
+ # Parses the given header for email addresses. Handles the case where some
209
+ # keys return arrays if there are multiple values.
210
+ #
211
+ # key - String or Symbol header name
212
+ #
213
+ # Returns Array of Mail::Address objects
214
+ def unquoted_address_header(key)
215
+ if header = @mail[key]
216
+ emails = if header.respond_to?(:value)
217
+ [header.value]
218
+ else
219
+ header.map { |h| h.value }
220
+ end
221
+ address_list_for(emails)
222
+ else
223
+ []
261
224
  end
225
+ end
262
226
 
263
- def inspect
264
- %(#<Message::Attachment filename=#{filename.inspect} content_type=#{content_type.inspect}>)
265
- end
227
+ # Uses Mail::AddressList to parse the given comma separated emails.
228
+ #
229
+ # emails - Array of String emails (foo@example.com, Bar <bar@example.com...)
230
+ #
231
+ # Returns Array of Mail::Address objects
232
+ def address_list_for(emails)
233
+ emails = emails * ", "
234
+ list = Mail::AddressList.new(self.class.unescape(emails))
235
+ addrs = list.addresses.each { |a| a.decoded }
236
+ addrs.uniq!
237
+ addrs
238
+ rescue Mail::Field::ParseError
239
+ address_list_for(emails.scan(EMAIL_REGEX))
266
240
  end
267
241
 
268
- protected
269
- def read_header(key)
270
- header = @mail.header[key]
271
- begin
272
- header.to_s
273
- rescue
274
- header.raw_body
275
- end
242
+ # Stolen from Rack/Camping, remove the "+" => " " translation
243
+ def self.unescape(s)
244
+ s.gsub!(/((?:%[0-9a-fA-F]{2})+)/n){
245
+ [$1.delete('%')].pack('H*')
246
+ }
247
+ s
276
248
  end
277
249
 
278
- def process_message_body(var = nil)
250
+ # Parses the mail's parts, assembling the plain/HTML Strings, as well as
251
+ # any attachments.
252
+ #
253
+ # Returns nothing.
254
+ def process_message_body
255
+ @attachments = []
279
256
  if @mail.multipart?
280
- @attachments.clear
281
257
  @body, @html = [], []
282
258
  scan_parts(@mail)
283
259
  @body = @body.join("\n")
284
260
  @html = @html.join("\n")
285
261
  else
286
- if @mail.content_type == 'text/html'
287
- @html = @mail.body
262
+ if @mail.content_type =~ /text\/html/
263
+ @html = @mail.body.to_s
288
264
  @body = ''
289
265
  else
290
- @body = @mail.body
266
+ @body = @mail.body.to_s
291
267
  @html = ''
292
268
  end
293
269
  end
294
- if !@mail.charset
295
- @body = convert_to_utf8(@body)
296
- @html = convert_to_utf8(@html)
297
- end
298
- instance_variable_get "@#{var}" if var
270
+
271
+ @body = convert_to_utf8(@body)
272
+ @html = convert_to_utf8(@html)
299
273
  end
300
274
 
275
+ # Recursive method to scan all the parts of the given part.
276
+ #
277
+ # Returns nothing.
301
278
  def scan_parts(message)
302
279
  message.parts.each do |part|
303
280
  if part.multipart?
304
281
  scan_parts(part)
305
282
  else
306
283
  case part.content_type
307
- when 'text/plain'
308
- @body << part.body
309
- when 'text/html'
310
- @html << part.body
284
+ when /text\/plain/
285
+ @body << part.body.to_s
286
+ when /text\/html/
287
+ @html << part.body.to_s
311
288
  else
312
289
  att = Attachment.new(part)
313
290
  @attachments << att if att.attached?
@@ -316,27 +293,36 @@ module Astrotrain
316
293
  end
317
294
  end
318
295
 
319
- def parse_email_headers(values, collection)
320
- values.each do |value|
321
- if !value.blank?
322
- collection.push *self.class.parse_email_addresses(value)
323
- end
296
+ # Converts a given String to UTF-8.
297
+ # If the message has no charset assigned, we'll attempt to detect it
298
+ # then convert it to UTF-8.
299
+ #
300
+ # s - unconverted String in the wrong character set
301
+ #
302
+ # Returns converted String.
303
+ def convert_to_utf8(s)
304
+ # If this string is already valid UTF-8 just hand it back
305
+ return s if s.as_utf8.valid?
306
+
307
+ # First lets try to detect the encoding if the message didn't specify
308
+ if !@mail.charset && detection = CharlockHolmes::EncodingDetector.detect(s)
309
+ @mail.charset = detection[:encoding]
324
310
  end
325
- end
326
311
 
327
- # Attempts to run iconv conversions in common charsets to UTF-8. Needed for
328
- # those crappy emails that don't properly specify a charset in the headers.
329
- ICONV_CONVERSIONS = %w(utf-8 ISO-8859-1 ISO-8859-2 ISO-8859-3 ISO-8859-4 ISO-8859-5 ISO-8859-6 ISO-8859-7 ISO-8859-8 ISO-8859-9
330
- ISO-8859-15 GB2312)
331
- def convert_to_utf8(s)
332
- ICONV_CONVERSIONS.each do |from|
333
- begin
334
- return Iconv.iconv(ICONV_CONVERSIONS[0], from, s).to_s
335
- rescue Iconv::IllegalSequence
336
- ensure
337
- s
338
- end
312
+ # if the encoding was already set or we just detected it AND it's not already
313
+ # set to UTF-8 - try to transcode the body into UTF-8
314
+ if @mail.charset && @mail.charset != 'UTF-8'
315
+ s = CharlockHolmes::Converter.convert s, @mail.charset, 'UTF-8'
339
316
  end
317
+
318
+ # By the time we get here, `s` is either UTF-8 or we need to force it to be
319
+ # But, even if it's UTF-8 we could be in the case where the charset on the
320
+ # message was set to UTF-8 but is in fact invalid.
321
+ # So for either case, we want to make sure the output is valid UTF-8 - even
322
+ # if it means mutating the invalid string.
323
+ # Also we're not reusing the String::UTF8 version of `s` from above here
324
+ # because by this point, it may be a new string.
325
+ s.as_utf8.clean.as_raw
340
326
  end
341
327
  end
342
- end
328
+ end