ruby-msg 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,287 @@
1
+ require 'rubygems'
2
+ require 'tmail'
3
+
4
+ # these will be removed later
5
+ require 'time'
6
+ require 'mime'
7
+
8
+ # there is some Msg specific stuff in here.
9
+
10
+ class TMail::Mail
11
+ def quoted_body= str
12
+ body_port.wopen { |f| f.write str }
13
+ str
14
+ end
15
+ end
16
+
17
+ module Mapi
18
+ class Message
19
+ def mime
20
+ return @mime if @mime
21
+ # if these headers exist at all, they can be helpful. we may however get a
22
+ # application/ms-tnef mime root, which means there will be little other than
23
+ # headers. we may get nothing.
24
+ # and other times, when received from external, we get the full cigar, boundaries
25
+ # etc and all.
26
+ # sometimes its multipart, with no boundaries. that throws an error. so we'll be more
27
+ # forgiving here
28
+ @mime = Mime.new props.transport_message_headers.to_s, true
29
+ populate_headers
30
+ @mime
31
+ end
32
+
33
+ def headers
34
+ mime.headers
35
+ end
36
+
37
+ # copy data from msg properties storage to standard mime. headers
38
+ # i've now seen it where the existing headers had heaps on stuff, and the msg#props had
39
+ # practically nothing. think it was because it was a tnef - msg conversion done by exchange.
40
+ def populate_headers
41
+ # construct a From value
42
+ # should this kind of thing only be done when headers don't exist already? maybe not. if its
43
+ # sent, then modified and saved, the headers could be wrong?
44
+ # hmmm. i just had an example where a mail is sent, from an internal user, but it has transport
45
+ # headers, i think because one recipient was external. the only place the senders email address
46
+ # exists is in the transport headers. so its maybe not good to overwrite from.
47
+ # recipients however usually have smtp address available.
48
+ # maybe we'll do it for all addresses that are smtp? (is that equivalent to
49
+ # sender_email_address !~ /^\//
50
+ name, email = props.sender_name, props.sender_email_address
51
+ if props.sender_addrtype == 'SMTP'
52
+ headers['From'] = if name and email and name != email
53
+ [%{"#{name}" <#{email}>}]
54
+ else
55
+ [email || name]
56
+ end
57
+ elsif !headers.has_key?('From')
58
+ # some messages were never sent, so that sender stuff isn't filled out. need to find another
59
+ # way to get something
60
+ # what about marking whether we thing the email was sent or not? or draft?
61
+ # for partition into an eventual Inbox, Sent, Draft mbox set?
62
+ # i've now seen cases where this stuff is missing, but exists in transport message headers,
63
+ # so maybe i should inhibit this in that case.
64
+ if email
65
+ # disabling this warning for now
66
+ #Log.warn "* no smtp sender email address available (only X.400). creating fake one"
67
+ # this is crap. though i've specially picked the logic so that it generates the correct
68
+ # email addresses in my case (for my organisation).
69
+ # this user stuff will give valid email i think, based on alias.
70
+ user = name ? name.sub(/(.*), (.*)/, "\\2.\\1") : email[/\w+$/].downcase
71
+ domain = (email[%r{^/O=([^/]+)}i, 1].downcase + '.com' rescue email)
72
+ headers['From'] = [name ? %{"#{name}" <#{user}@#{domain}>} : "<#{user}@#{domain}>" ]
73
+ elsif name
74
+ # we only have a name? thats screwed up.
75
+ # disabling this warning for now
76
+ #Log.warn "* no smtp sender email address available (only name). creating fake one"
77
+ headers['From'] = [%{"#{name}"}]
78
+ else
79
+ # disabling this warning for now
80
+ #Log.warn "* no sender email address available at all. FIXME"
81
+ end
82
+ # else we leave the transport message header version
83
+ end
84
+
85
+ # for all of this stuff, i'm assigning in utf8 strings.
86
+ # thats ok i suppose, maybe i can say its the job of the mime class to handle that.
87
+ # but a lot of the headers are overloaded in different ways. plain string, many strings
88
+ # other stuff. what happens to a person who has a " in their name etc etc. encoded words
89
+ # i suppose. but that then happens before assignment. and can't be automatically undone
90
+ # until the header is decomposed into recipients.
91
+ recips_by_type = recipients.group_by { |r| r.type }
92
+ # i want to the the types in a specific order.
93
+ [:to, :cc, :bcc].each do |type|
94
+ # don't know why i bother, but if we can, we try to sort recipients by the numerical part
95
+ # of the ole name, or just leave it if we can't
96
+ recips = recips_by_type[type]
97
+ recips = (recips.sort_by { |r| r.obj.name[/\d{8}$/].hex } rescue recips)
98
+ # switched to using , for separation, not ;. see issue #4
99
+ # recips.empty? is strange. i wouldn't have thought it possible, but it was right?
100
+ headers[type.to_s.sub(/^(.)/) { $1.upcase }] = [recips.join(', ')] unless recips.empty?
101
+ end
102
+ headers['Subject'] = [props.subject] if props.subject
103
+
104
+ # fill in a date value. by default, we won't mess with existing value hear
105
+ if !headers.has_key?('Date')
106
+ # we want to get a received date, as i understand it.
107
+ # use this preference order, or pull the most recent?
108
+ keys = %w[message_delivery_time client_submit_time last_modification_time creation_time]
109
+ time = keys.each { |key| break time if time = props.send(key) }
110
+ time = nil unless Date === time
111
+
112
+ # now convert and store
113
+ # this is a little funky. not sure about time zone stuff either?
114
+ # actually seems ok. maybe its always UTC and interpreted anyway. or can be timezoneless.
115
+ # i have no timezone info anyway.
116
+ # in gmail, i see stuff like 15 Jan 2007 00:48:19 -0000, and it displays as 11:48.
117
+ # can also add .localtime here if desired. but that feels wrong.
118
+ headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
119
+ end
120
+
121
+ # some very simplistic mapping between internet message headers and the
122
+ # mapi properties
123
+ # any of these could be causing duplicates due to case issues. the hack in #to_mime
124
+ # just stops re-duplication at that point. need to move some smarts into the mime
125
+ # code to handle it.
126
+ mapi_header_map = [
127
+ [:internet_message_id, 'Message-ID'],
128
+ [:in_reply_to_id, 'In-Reply-To'],
129
+ # don't set these values if they're equal to the defaults anyway
130
+ [:importance, 'Importance', proc { |val| val.to_s == '1' ? nil : val }],
131
+ [:priority, 'Priority', proc { |val| val.to_s == '1' ? nil : val }],
132
+ [:sensitivity, 'Sensitivity', proc { |val| val.to_s == '0' ? nil : val }],
133
+ # yeah?
134
+ [:conversation_topic, 'Thread-Topic'],
135
+ # not sure of the distinction here
136
+ # :originator_delivery_report_requested ??
137
+ [:read_receipt_requested, 'Disposition-Notification-To', proc { |val| from }]
138
+ ]
139
+ mapi_header_map.each do |mapi, mime, *f|
140
+ next unless q = val = props.send(mapi) or headers.has_key?(mime)
141
+ next if f[0] and !(val = f[0].call(val))
142
+ headers[mime] = [val.to_s]
143
+ end
144
+ end
145
+
146
+ # redundant?
147
+ def type
148
+ props.message_class[/IPM\.(.*)/, 1].downcase rescue nil
149
+ end
150
+
151
+ # shortcuts to some things from the headers
152
+ %w[From To Cc Bcc Subject].each do |key|
153
+ define_method(key.downcase) { headers[key].join(' ') if headers.has_key?(key) }
154
+ end
155
+
156
+ def body_to_tmail
157
+ # to create the body
158
+ # should have some options about serializing rtf. and possibly options to check the rtf
159
+ # for rtf2html conversion, stripping those html tags or other similar stuff. maybe want to
160
+ # ignore it in the cases where it is generated from incoming html. but keep it if it was the
161
+ # source for html and plaintext.
162
+ if props.body_rtf or props.body_html
163
+ # should plain come first?
164
+ part = TMail::Mail.new
165
+ # its actually possible for plain body to be empty, but the others not.
166
+ # if i can get an html version, then maybe a callout to lynx can be made...
167
+ part.parts << TMail::Mail.parse("Content-Type: text/plain\r\n\r\n" + props.body) if props.body
168
+ # this may be automatically unwrapped from the rtf if the rtf includes the html
169
+ part.parts << TMail::Mail.parse("Content-Type: text/html\r\n\r\n" + props.body_html) if props.body_html
170
+ # temporarily disabled the rtf. its just showing up as an attachment anyway.
171
+ #mime.parts << Mime.new("Content-Type: text/rtf\r\n\r\n" + props.body_rtf) if props.body_rtf
172
+ # its thus currently possible to get no body at all if the only body is rtf. that is not
173
+ # really acceptable FIXME
174
+ part['Content-Type'] = 'multipart/alternative'
175
+ part
176
+ else
177
+ # check no header case. content type? etc?. not sure if my Mime class will accept
178
+ Log.debug "taking that other path"
179
+ # body can be nil, hence the to_s
180
+ TMail::Mail.parse "Content-Type: text/plain\r\n\r\n" + props.body.to_s
181
+ end
182
+ end
183
+
184
+ def to_tmail
185
+ # intended to be used for IPM.note, which is the email type. can use it for others if desired,
186
+ # YMMV
187
+ Log.warn "to_mime used on a #{props.message_class}" unless props.message_class == 'IPM.Note'
188
+ # we always have a body
189
+ mail = body = body_to_tmail
190
+
191
+ # If we have attachments, we take the current mime root (body), and make it the first child
192
+ # of a new tree that will contain body and attachments.
193
+ unless attachments.empty?
194
+ raise NotImplementedError
195
+ mime = Mime.new "Content-Type: multipart/mixed\r\n\r\n"
196
+ mime.parts << body
197
+ # i don't know any better way to do this. need multipart/related for inline images
198
+ # referenced by cid: urls to work, but don't want to use it otherwise...
199
+ related = false
200
+ attachments.each do |attach|
201
+ part = attach.to_mime
202
+ related = true if part.headers.has_key?('Content-ID') or part.headers.has_key?('Content-Location')
203
+ mime.parts << part
204
+ end
205
+ mime.headers['Content-Type'] = ['multipart/related'] if related
206
+ end
207
+
208
+ # at this point, mime is either
209
+ # - a single text/plain, consisting of the body ('taking that other path' above. rare)
210
+ # - a multipart/alternative, consiting of a few bodies (plain and html body. common)
211
+ # - a multipart/mixed, consisting of 1 of the above 2 types of bodies, and attachments.
212
+ # we add this standard preamble if its multipart
213
+ # FIXME preamble.replace, and body.replace both suck.
214
+ # preamble= is doable. body= wasn't being done because body will get rewritten from parts
215
+ # if multipart, and is only there readonly. can do that, or do a reparse...
216
+ # The way i do this means that only the first preamble will say it, not preambles of nested
217
+ # multipart chunks.
218
+ mail.quoted_body = "This is a multi-part message in MIME format.\r\n" if mail.multipart?
219
+
220
+ # now that we have a root, we can mix in all our headers
221
+ headers.each do |key, vals|
222
+ # don't overwrite the content-type, encoding style stuff
223
+ next if mail[key]
224
+ # some new temporary hacks
225
+ next if key =~ /content-type/i and vals[0] =~ /base64/
226
+ #next if mime.headers.keys.map(&:downcase).include? key.downcase
227
+ mail[key] = vals.first
228
+ end
229
+ # just a stupid hack to make the content-type header last, when using OrderedHash
230
+ #mime.headers['Content-Type'] = mime.headers.delete 'Content-Type'
231
+
232
+ mail
233
+ end
234
+ end
235
+
236
+ class Attachment
237
+ def to_tmail
238
+ # TODO: smarter mime typing.
239
+ mimetype = props.attach_mime_tag || 'application/octet-stream'
240
+ part = TMail::Mail.parse "Content-Type: #{mimetype}\r\n\r\n"
241
+ part['Content-Disposition'] = %{attachment; filename="#{filename}"}
242
+ part['Content-Transfer-Encoding'] = 'base64'
243
+ part['Content-Location'] = props.attach_content_location if props.attach_content_location
244
+ part['Content-ID'] = props.attach_content_id if props.attach_content_id
245
+ # data.to_s for now. data was nil for some reason.
246
+ # perhaps it was a data object not correctly handled?
247
+ # hmmm, have to use read here. that assumes that the data isa stream.
248
+ # but if the attachment data is a string, then it won't work. possible?
249
+ data_str = if @embedded_msg
250
+ raise NotImplementedError
251
+ mime.headers['Content-Type'] = 'message/rfc822'
252
+ # lets try making it not base64 for now
253
+ mime.headers.delete 'Content-Transfer-Encoding'
254
+ # not filename. rather name, or something else right?
255
+ # maybe it should be inline?? i forget attach_method / access meaning
256
+ mime.headers['Content-Disposition'] = [%{attachment; filename="#{@embedded_msg.subject}"}]
257
+ @embedded_msg.to_mime.to_s
258
+ elsif @embedded_ole
259
+ raise NotImplementedError
260
+ # kind of hacky
261
+ io = StringIO.new
262
+ Ole::Storage.new io do |ole|
263
+ ole.root.type = :dir
264
+ Ole::Storage::Dirent.copy @embedded_ole, ole.root
265
+ end
266
+ io.string
267
+ else
268
+ data.read.to_s
269
+ end
270
+ part.body = @embedded_msg ? data_str : Base64.encode64(data_str).gsub(/\n/, "\r\n")
271
+ part
272
+ end
273
+ end
274
+
275
+ class Msg < Message
276
+ def populate_headers
277
+ super
278
+ if !headers.has_key?('Date')
279
+ # can employ other methods for getting a time. heres one in a similar vein to msgconvert.pl,
280
+ # ie taking the time from an ole object
281
+ time = @root.ole.dirents.map { |dirent| dirent.modify_time || dirent.create_time }.compact.sort.last
282
+ headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
283
+ end
284
+ end
285
+ end
286
+ end
287
+
@@ -0,0 +1,440 @@
1
+ require 'rubygems'
2
+ require 'ole/storage'
3
+ require 'mapi'
4
+ require 'mapi/rtf'
5
+
6
+ module Mapi
7
+ #
8
+ # = Introduction
9
+ #
10
+ # Primary class interface to the vagaries of .msg files.
11
+ #
12
+ # The core of the work is done by the <tt>Msg::PropertyStore</tt> class.
13
+ #
14
+ class Msg < Message
15
+ #
16
+ # = Introduction
17
+ #
18
+ # A big compononent of +Msg+ files is the property store, which holds
19
+ # all the key/value pairs of properties. The message itself, and all
20
+ # its <tt>Attachment</tt>s and <tt>Recipient</tt>s have an instance of
21
+ # this class.
22
+ #
23
+ # = Storage model
24
+ #
25
+ # Property keys (tags?) can be either simple hex numbers, in the
26
+ # range 0x0000 - 0xffff, or they can be named properties. In fact,
27
+ # properties in the range 0x0000 to 0x7fff are supposed to be the non-
28
+ # named properties, and can be considered to be in the +PS_MAPI+
29
+ # namespace. (correct?)
30
+ #
31
+ # Named properties are serialized in the 0x8000 to 0xffff range,
32
+ # and are referenced as a guid and long/string pair.
33
+ #
34
+ # There are key ranges, which can be used to imply things generally
35
+ # about keys.
36
+ #
37
+ # Further, we can give symbolic names to most keys, coming from
38
+ # constants in various places. Eg:
39
+ #
40
+ # 0x0037 => subject
41
+ # {00062002-0000-0000-C000-000000000046}/0x8218 => response_status
42
+ # # displayed as categories in outlook
43
+ # {00020329-0000-0000-C000-000000000046}/"Keywords" => categories
44
+ #
45
+ # Futher, there are completely different names, coming from other
46
+ # object models that get mapped to these things (CDO's model,
47
+ # Outlook's model etc). Eg "urn:schemas:httpmail:subject"
48
+ # I think these can be ignored though, as they aren't defined clearly
49
+ # in terms of mapi properties, and i'm really just trying to make
50
+ # a mapi property store. (It should also be relatively easy to
51
+ # support them later.)
52
+ #
53
+ # = Usage
54
+ #
55
+ # The api is driven by a desire to have the simple stuff "just work", ie
56
+ #
57
+ # properties.subject
58
+ # properties.display_name
59
+ #
60
+ # There also needs to be a way to look up properties more specifically:
61
+ #
62
+ # properties[0x0037] # => gets the subject
63
+ # properties[0x0037, PS_MAPI] # => still gets the subject
64
+ # properties['Keywords', PS_PUBLIC_STRINGS] # => gets outlook's categories array
65
+ #
66
+ # The abbreviated versions work by "resolving" the symbols to full keys:
67
+ #
68
+ # # the guid here is just PS_PUBLIC_STRINGS
69
+ # properties.resolve :keywords # => #<Key {00020329-0000-0000-c000-000000000046}/"Keywords">
70
+ # # the result here is actually also a key
71
+ # k = properties.resolve :subject # => 0x0037
72
+ # # it has a guid
73
+ # k.guid == Msg::Properties::PS_MAPI # => true
74
+ #
75
+ # = Parsing
76
+ #
77
+ # There are three objects that need to be parsed to load a +Msg+ property store:
78
+ #
79
+ # 1. The +nameid+ directory (<tt>Properties.parse_nameid</tt>)
80
+ # 2. The many +substg+ objects, whose names should match <tt>Properties::SUBSTG_RX</tt>
81
+ # (<tt>Properties#parse_substg</tt>)
82
+ # 3. The +properties+ file (<tt>Properties#parse_properties</tt>)
83
+ #
84
+ # Understanding of the formats is by no means perfect.
85
+ #
86
+ # = TODO
87
+ #
88
+ # * While the key objects are sufficient, the value objects are just plain
89
+ # ruby types. It currently isn't possible to write to the values, or to know
90
+ # which encoding the value had.
91
+ # * Update this doc.
92
+ # * Perhaps change from eager loading, to be load-on-demand.
93
+ #
94
+ class PropertyStore
95
+ include PropertySet::Constants
96
+ Key = PropertySet::Key
97
+
98
+ # note that binary and default both use obj.open. not the block form. this means we should
99
+ # #close it later, which we don't. as we're only reading though, it shouldn't matter right?
100
+ # not really good though FIXME
101
+ # change these to use mapi symbolic const names
102
+ ENCODINGS = {
103
+ 0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right?
104
+ 0x001f => proc { |obj| Ole::Types::FROM_UTF16.iconv obj.read }, # unicode
105
+ # ascii
106
+ # FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp
107
+ 0x001e => proc { |obj| obj.read.chomp 0.chr },
108
+ 0x0102 => proc { |obj| obj.open }, # binary?
109
+ :default => proc { |obj| obj.open }
110
+ }
111
+
112
+ SUBSTG_RX = /^__substg1\.0_([0-9A-F]{4})([0-9A-F]{4})(?:-([0-9A-F]{8}))?$/
113
+ PROPERTIES_RX = /^__properties_version1\.0$/
114
+ NAMEID_RX = /^__nameid_version1\.0$/
115
+ VALID_RX = /#{SUBSTG_RX}|#{PROPERTIES_RX}|#{NAMEID_RX}/
116
+
117
+ attr_reader :nameid
118
+
119
+ def initialize
120
+ @nameid = nil
121
+ # not exactly a cache currently
122
+ @cache = {}
123
+ end
124
+
125
+ #--
126
+ # The parsing methods
127
+ #++
128
+
129
+ def self.load obj
130
+ prop = new
131
+ prop.load obj
132
+ prop
133
+ end
134
+
135
+ # Parse properties from the +Dirent+ obj
136
+ def load obj
137
+ # we need to do the nameid first, as it provides the map for later user defined properties
138
+ if nameid_obj = obj.children.find { |child| child.name =~ NAMEID_RX }
139
+ @nameid = PropertyStore.parse_nameid nameid_obj
140
+ # hack to make it available to all msg files from the same ole storage object
141
+ # FIXME - come up with a neater way
142
+ class << obj.ole
143
+ attr_accessor :msg_nameid
144
+ end
145
+ obj.ole.msg_nameid = @nameid
146
+ elsif obj.ole
147
+ @nameid = obj.ole.msg_nameid rescue nil
148
+ end
149
+ # now parse the actual properties. i think dirs that match the substg should be decoded
150
+ # as properties to. 0x000d is just another encoding, the dir encoding. it should match
151
+ # whether the object is file / dir. currently only example is embedded msgs anyway
152
+ obj.children.each do |child|
153
+ next unless child.file?
154
+ case child.name
155
+ when PROPERTIES_RX
156
+ parse_properties child
157
+ when SUBSTG_RX
158
+ parse_substg(*($~[1..-1].map { |num| num.hex rescue nil } + [child]))
159
+ end
160
+ end
161
+ end
162
+
163
+ # Read nameid from the +Dirent+ obj, which is used for mapping of named properties keys to
164
+ # proxy keys in the 0x8000 - 0xffff range.
165
+ # Returns a hash of integer -> Key.
166
+ def self.parse_nameid obj
167
+ remaining = obj.children.dup
168
+ guids_obj, props_obj, names_obj =
169
+ %w[__substg1.0_00020102 __substg1.0_00030102 __substg1.0_00040102].map do |name|
170
+ remaining.delete obj/name
171
+ end
172
+
173
+ # parse guids
174
+ # this is the guids for named properities (other than builtin ones)
175
+ # i think PS_PUBLIC_STRINGS, and PS_MAPI are builtin.
176
+ guids = [PS_PUBLIC_STRINGS] + guids_obj.read.scan(/.{16}/m).map do |str|
177
+ Ole::Types.load_guid str
178
+ end
179
+
180
+ # parse names.
181
+ # the string ids for named properties
182
+ # they are no longer parsed, as they're referred to by offset not
183
+ # index. they are simply sequentially packed, as a long, giving
184
+ # the string length, then padding to 4 byte multiple, and repeat.
185
+ names_data = names_obj.read
186
+
187
+ # parse actual props.
188
+ # not sure about any of this stuff really.
189
+ # should flip a few bits in the real msg, to get a better understanding of how this works.
190
+ props = props_obj.read.scan(/.{8}/m).map do |str|
191
+ flags, offset = str[4..-1].unpack 'v2'
192
+ # the property will be serialised as this pseudo property, mapping it to this named property
193
+ pseudo_prop = 0x8000 + offset
194
+ named = flags & 1 == 1
195
+ prop = if named
196
+ str_off = *str.unpack('V')
197
+ len = *names_data[str_off, 4].unpack('V')
198
+ Ole::Types::FROM_UTF16.iconv names_data[str_off + 4, len]
199
+ else
200
+ a, b = str.unpack('v2')
201
+ Log.debug "b not 0" if b != 0
202
+ a
203
+ end
204
+ # a bit sus
205
+ guid_off = flags >> 1
206
+ # missing a few builtin PS_*
207
+ Log.debug "guid off < 2 (#{guid_off})" if guid_off < 2
208
+ guid = guids[guid_off - 2]
209
+ [pseudo_prop, Key.new(prop, guid)]
210
+ end
211
+
212
+ #Log.warn "* ignoring #{remaining.length} objects in nameid" unless remaining.empty?
213
+ # this leaves a bunch of other unknown chunks of data with completely unknown meaning.
214
+ # pp [:unknown, child.name, child.data.unpack('H*')[0].scan(/.{16}/m)]
215
+ Hash[*props.flatten]
216
+ end
217
+
218
+ # Parse an +Dirent+, as per <tt>msgconvert.pl</tt>. This is how larger properties, such
219
+ # as strings, binary blobs, and other ole sub-directories (eg nested Msg) are stored.
220
+ def parse_substg key, encoding, offset, obj
221
+ if (encoding & 0x1000) != 0
222
+ if !offset
223
+ # there is typically one with no offset first, whose data is a series of numbers
224
+ # equal to the lengths of all the sub parts. gives an implied array size i suppose.
225
+ # maybe you can initialize the array at this time. the sizes are the same as all the
226
+ # ole object sizes anyway, its to pre-allocate i suppose.
227
+ #p obj.data.unpack('V*')
228
+ # ignore this one
229
+ return
230
+ else
231
+ # remove multivalue flag for individual pieces
232
+ encoding &= ~0x1000
233
+ end
234
+ else
235
+ Log.warn "offset specified for non-multivalue encoding #{obj.name}" if offset
236
+ offset = nil
237
+ end
238
+ # offset is for multivalue encodings.
239
+ unless encoder = ENCODINGS[encoding]
240
+ Log.warn "unknown encoding #{encoding}"
241
+ #encoder = proc { |obj| obj.io } #.read }. maybe not a good idea
242
+ encoder = ENCODINGS[:default]
243
+ end
244
+ add_property key, encoder[obj], offset
245
+ end
246
+
247
+ # For parsing the +properties+ file. Smaller properties are serialized in one chunk,
248
+ # such as longs, bools, times etc. The parsing has problems.
249
+ def parse_properties obj
250
+ data = obj.read
251
+ # don't really understand this that well...
252
+ pad = data.length % 16
253
+ unless (pad == 0 || pad == 8) and data[0...pad] == "\000" * pad
254
+ Log.warn "padding was not as expected #{pad} (#{data.length}) -> #{data[0...pad].inspect}"
255
+ end
256
+ data[pad..-1].scan(/.{16}/m).each do |data|
257
+ property, encoding = ('%08x' % data.unpack('V')).scan /.{4}/
258
+ key = property.hex
259
+ # doesn't make any sense to me. probably because its a serialization of some internal
260
+ # outlook structure...
261
+ next if property == '0000'
262
+ case encoding
263
+ when '0102', '001e', '001f', '101e', '101f', '000d'
264
+ # ignore on purpose. not sure what its for
265
+ # multivalue versions ignored also
266
+ when '0003' # long
267
+ # don't know what all the other data is for
268
+ add_property key, *data[8, 4].unpack('V')
269
+ when '000b' # boolean
270
+ # again, heaps more data than needed. and its not always 0 or 1.
271
+ # they are in fact quite big numbers. this is wrong.
272
+ # p [property, data[4..-1].unpack('H*')[0]]
273
+ add_property key, data[8, 4].unpack('V')[0] != 0
274
+ when '0040' # systime
275
+ # seems to work:
276
+ add_property key, Ole::Types.load_time(data[8..-1])
277
+ else
278
+ #Log.warn "ignoring data in __properties section, encoding: #{encoding}"
279
+ #Log << data.unpack('H*').inspect + "\n"
280
+ end
281
+ end
282
+ end
283
+
284
+ def add_property key, value, pos=nil
285
+ # map keys in the named property range through nameid
286
+ if Integer === key and key >= 0x8000
287
+ if !@nameid
288
+ Log.warn "no nameid section yet named properties used"
289
+ key = Key.new key
290
+ elsif real_key = @nameid[key]
291
+ key = real_key
292
+ else
293
+ # i think i hit these when i have a named property, in the PS_MAPI
294
+ # guid
295
+ Log.warn "property in named range not in nameid #{key.inspect}"
296
+ key = Key.new key
297
+ end
298
+ else
299
+ key = Key.new key
300
+ end
301
+ if pos
302
+ @cache[key] ||= []
303
+ Log.warn "duplicate property" unless Array === @cache[key]
304
+ # ^ this is actually a trickier problem. the issue is more that they must all be of
305
+ # the same type.
306
+ @cache[key][pos] = value
307
+ else
308
+ # take the last.
309
+ Log.warn "duplicate property #{key.inspect}" if @cache[key]
310
+ @cache[key] = value
311
+ end
312
+ end
313
+
314
+ # delegate to cache
315
+ def method_missing name, *args, &block
316
+ @cache.send name, *args, &block
317
+ end
318
+ end
319
+
320
+ # these 2 will actually be of the form
321
+ # 1\.0_#([0-9A-Z]{8}), where $1 is the 0 based index number in hex
322
+ # should i parse that and use it as an index, or just return in
323
+ # file order? probably should use it later...
324
+ ATTACH_RX = /^__attach_version1\.0_.*/
325
+ RECIP_RX = /^__recip_version1\.0_.*/
326
+ VALID_RX = /#{PropertyStore::VALID_RX}|#{ATTACH_RX}|#{RECIP_RX}/
327
+
328
+ attr_reader :root
329
+ attr_accessor :close_parent
330
+
331
+ # Alternate constructor, to create an +Msg+ directly from +arg+ and +mode+, passed
332
+ # directly to Ole::Storage (ie either filename or seekable IO object).
333
+ def self.open arg, mode=nil
334
+ msg = new Ole::Storage.open(arg, mode).root
335
+ # we will close the ole when we are #closed
336
+ msg.close_parent = true
337
+ if block_given?
338
+ begin yield msg
339
+ ensure; msg.close
340
+ end
341
+ else msg
342
+ end
343
+ end
344
+
345
+ # Create an Msg from +root+, an <tt>Ole::Storage::Dirent</tt> object
346
+ def initialize root
347
+ @root = root
348
+ @close_parent = false
349
+ super PropertySet.new(PropertyStore.load(@root))
350
+ Msg.warn_unknown @root
351
+ end
352
+
353
+ def self.warn_unknown obj
354
+ # bit of validation. not important if there is extra stuff, though would be
355
+ # interested to know what it is. doesn't check dir/file stuff.
356
+ unknown = obj.children.reject { |child| child.name =~ VALID_RX }
357
+ Log.warn "skipped #{unknown.length} unknown msg object(s)" unless unknown.empty?
358
+ end
359
+
360
+ def close
361
+ @root.ole.close if @close_parent
362
+ end
363
+
364
+ def attachments
365
+ @attachments ||= @root.children.
366
+ select { |child| child.dir? and child.name =~ ATTACH_RX }.
367
+ map { |child| Attachment.new child }.
368
+ select { |attach| attach.valid? }
369
+ end
370
+
371
+ def recipients
372
+ @recipients ||= @root.children.
373
+ select { |child| child.dir? and child.name =~ RECIP_RX }.
374
+ map { |child| Recipient.new child }
375
+ end
376
+
377
+ class Attachment < Mapi::Attachment
378
+ attr_reader :obj, :properties
379
+ alias props :properties
380
+
381
+ def initialize obj
382
+ @obj = obj
383
+ @embedded_ole = nil
384
+ @embedded_msg = nil
385
+
386
+ super PropertySet.new(PropertyStore.load(@obj))
387
+ Msg.warn_unknown @obj
388
+
389
+ @obj.children.each do |child|
390
+ # temp hack. PropertyStore doesn't do directory properties atm - FIXME
391
+ if child.dir? and child.name =~ PropertyStore::SUBSTG_RX and
392
+ $1 == '3701' and $2.downcase == '000d'
393
+ @embedded_ole = child
394
+ class << @embedded_ole
395
+ def compobj
396
+ return nil unless compobj = self["\001CompObj"]
397
+ compobj.read[/^.{32}([^\x00]+)/m, 1]
398
+ end
399
+
400
+ def embedded_type
401
+ temp = compobj and return temp
402
+ # try to guess more
403
+ if children.select { |child| child.name =~ /__(substg|properties|recip|attach|nameid)/ }.length > 2
404
+ return 'Microsoft Office Outlook Message'
405
+ end
406
+ nil
407
+ end
408
+ end
409
+ if @embedded_ole.embedded_type == 'Microsoft Office Outlook Message'
410
+ @embedded_msg = Msg.new @embedded_ole
411
+ end
412
+ end
413
+ end
414
+ end
415
+
416
+ def valid?
417
+ # something i started to notice when handling embedded ole object attachments is
418
+ # the particularly strange case where there are empty attachments
419
+ not props.raw.keys.empty?
420
+ end
421
+ end
422
+
423
+ #
424
+ # +Recipient+ serves as a container for the +recip+ directories in the .msg.
425
+ # It has things like office_location, business_telephone_number, but I don't
426
+ # think enough to make a vCard out of?
427
+ #
428
+ class Recipient < Mapi::Recipient
429
+ attr_reader :obj, :properties
430
+ alias props :properties
431
+
432
+ def initialize obj
433
+ @obj = obj
434
+ super PropertySet.new(PropertyStore.load(@obj))
435
+ Msg.warn_unknown @obj
436
+ end
437
+ end
438
+ end
439
+ end
440
+