ruby-msg 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,287 @@
1
+ require 'rubygems'
2
+ require 'tmail'
3
+
4
+ # these will be removed later
5
+ require 'time'
6
+ require 'mime'
7
+
8
+ # there is some Msg specific stuff in here.
9
+
10
+ class TMail::Mail
11
+ def quoted_body= str
12
+ body_port.wopen { |f| f.write str }
13
+ str
14
+ end
15
+ end
16
+
17
+ module Mapi
18
+ class Message
19
+ def mime
20
+ return @mime if @mime
21
+ # if these headers exist at all, they can be helpful. we may however get a
22
+ # application/ms-tnef mime root, which means there will be little other than
23
+ # headers. we may get nothing.
24
+ # and other times, when received from external, we get the full cigar, boundaries
25
+ # etc and all.
26
+ # sometimes its multipart, with no boundaries. that throws an error. so we'll be more
27
+ # forgiving here
28
+ @mime = Mime.new props.transport_message_headers.to_s, true
29
+ populate_headers
30
+ @mime
31
+ end
32
+
33
+ def headers
34
+ mime.headers
35
+ end
36
+
37
+ # copy data from msg properties storage to standard mime. headers
38
+ # i've now seen it where the existing headers had heaps on stuff, and the msg#props had
39
+ # practically nothing. think it was because it was a tnef - msg conversion done by exchange.
40
+ def populate_headers
41
+ # construct a From value
42
+ # should this kind of thing only be done when headers don't exist already? maybe not. if its
43
+ # sent, then modified and saved, the headers could be wrong?
44
+ # hmmm. i just had an example where a mail is sent, from an internal user, but it has transport
45
+ # headers, i think because one recipient was external. the only place the senders email address
46
+ # exists is in the transport headers. so its maybe not good to overwrite from.
47
+ # recipients however usually have smtp address available.
48
+ # maybe we'll do it for all addresses that are smtp? (is that equivalent to
49
+ # sender_email_address !~ /^\//
50
+ name, email = props.sender_name, props.sender_email_address
51
+ if props.sender_addrtype == 'SMTP'
52
+ headers['From'] = if name and email and name != email
53
+ [%{"#{name}" <#{email}>}]
54
+ else
55
+ [email || name]
56
+ end
57
+ elsif !headers.has_key?('From')
58
+ # some messages were never sent, so that sender stuff isn't filled out. need to find another
59
+ # way to get something
60
+ # what about marking whether we thing the email was sent or not? or draft?
61
+ # for partition into an eventual Inbox, Sent, Draft mbox set?
62
+ # i've now seen cases where this stuff is missing, but exists in transport message headers,
63
+ # so maybe i should inhibit this in that case.
64
+ if email
65
+ # disabling this warning for now
66
+ #Log.warn "* no smtp sender email address available (only X.400). creating fake one"
67
+ # this is crap. though i've specially picked the logic so that it generates the correct
68
+ # email addresses in my case (for my organisation).
69
+ # this user stuff will give valid email i think, based on alias.
70
+ user = name ? name.sub(/(.*), (.*)/, "\\2.\\1") : email[/\w+$/].downcase
71
+ domain = (email[%r{^/O=([^/]+)}i, 1].downcase + '.com' rescue email)
72
+ headers['From'] = [name ? %{"#{name}" <#{user}@#{domain}>} : "<#{user}@#{domain}>" ]
73
+ elsif name
74
+ # we only have a name? thats screwed up.
75
+ # disabling this warning for now
76
+ #Log.warn "* no smtp sender email address available (only name). creating fake one"
77
+ headers['From'] = [%{"#{name}"}]
78
+ else
79
+ # disabling this warning for now
80
+ #Log.warn "* no sender email address available at all. FIXME"
81
+ end
82
+ # else we leave the transport message header version
83
+ end
84
+
85
+ # for all of this stuff, i'm assigning in utf8 strings.
86
+ # thats ok i suppose, maybe i can say its the job of the mime class to handle that.
87
+ # but a lot of the headers are overloaded in different ways. plain string, many strings
88
+ # other stuff. what happens to a person who has a " in their name etc etc. encoded words
89
+ # i suppose. but that then happens before assignment. and can't be automatically undone
90
+ # until the header is decomposed into recipients.
91
+ recips_by_type = recipients.group_by { |r| r.type }
92
+ # i want to the the types in a specific order.
93
+ [:to, :cc, :bcc].each do |type|
94
+ # don't know why i bother, but if we can, we try to sort recipients by the numerical part
95
+ # of the ole name, or just leave it if we can't
96
+ recips = recips_by_type[type]
97
+ recips = (recips.sort_by { |r| r.obj.name[/\d{8}$/].hex } rescue recips)
98
+ # switched to using , for separation, not ;. see issue #4
99
+ # recips.empty? is strange. i wouldn't have thought it possible, but it was right?
100
+ headers[type.to_s.sub(/^(.)/) { $1.upcase }] = [recips.join(', ')] unless recips.empty?
101
+ end
102
+ headers['Subject'] = [props.subject] if props.subject
103
+
104
+ # fill in a date value. by default, we won't mess with existing value hear
105
+ if !headers.has_key?('Date')
106
+ # we want to get a received date, as i understand it.
107
+ # use this preference order, or pull the most recent?
108
+ keys = %w[message_delivery_time client_submit_time last_modification_time creation_time]
109
+ time = keys.each { |key| break time if time = props.send(key) }
110
+ time = nil unless Date === time
111
+
112
+ # now convert and store
113
+ # this is a little funky. not sure about time zone stuff either?
114
+ # actually seems ok. maybe its always UTC and interpreted anyway. or can be timezoneless.
115
+ # i have no timezone info anyway.
116
+ # in gmail, i see stuff like 15 Jan 2007 00:48:19 -0000, and it displays as 11:48.
117
+ # can also add .localtime here if desired. but that feels wrong.
118
+ headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
119
+ end
120
+
121
+ # some very simplistic mapping between internet message headers and the
122
+ # mapi properties
123
+ # any of these could be causing duplicates due to case issues. the hack in #to_mime
124
+ # just stops re-duplication at that point. need to move some smarts into the mime
125
+ # code to handle it.
126
+ mapi_header_map = [
127
+ [:internet_message_id, 'Message-ID'],
128
+ [:in_reply_to_id, 'In-Reply-To'],
129
+ # don't set these values if they're equal to the defaults anyway
130
+ [:importance, 'Importance', proc { |val| val.to_s == '1' ? nil : val }],
131
+ [:priority, 'Priority', proc { |val| val.to_s == '1' ? nil : val }],
132
+ [:sensitivity, 'Sensitivity', proc { |val| val.to_s == '0' ? nil : val }],
133
+ # yeah?
134
+ [:conversation_topic, 'Thread-Topic'],
135
+ # not sure of the distinction here
136
+ # :originator_delivery_report_requested ??
137
+ [:read_receipt_requested, 'Disposition-Notification-To', proc { |val| from }]
138
+ ]
139
+ mapi_header_map.each do |mapi, mime, *f|
140
+ next unless q = val = props.send(mapi) or headers.has_key?(mime)
141
+ next if f[0] and !(val = f[0].call(val))
142
+ headers[mime] = [val.to_s]
143
+ end
144
+ end
145
+
146
+ # redundant?
147
+ def type
148
+ props.message_class[/IPM\.(.*)/, 1].downcase rescue nil
149
+ end
150
+
151
+ # shortcuts to some things from the headers
152
+ %w[From To Cc Bcc Subject].each do |key|
153
+ define_method(key.downcase) { headers[key].join(' ') if headers.has_key?(key) }
154
+ end
155
+
156
+ def body_to_tmail
157
+ # to create the body
158
+ # should have some options about serializing rtf. and possibly options to check the rtf
159
+ # for rtf2html conversion, stripping those html tags or other similar stuff. maybe want to
160
+ # ignore it in the cases where it is generated from incoming html. but keep it if it was the
161
+ # source for html and plaintext.
162
+ if props.body_rtf or props.body_html
163
+ # should plain come first?
164
+ part = TMail::Mail.new
165
+ # its actually possible for plain body to be empty, but the others not.
166
+ # if i can get an html version, then maybe a callout to lynx can be made...
167
+ part.parts << TMail::Mail.parse("Content-Type: text/plain\r\n\r\n" + props.body) if props.body
168
+ # this may be automatically unwrapped from the rtf if the rtf includes the html
169
+ part.parts << TMail::Mail.parse("Content-Type: text/html\r\n\r\n" + props.body_html) if props.body_html
170
+ # temporarily disabled the rtf. its just showing up as an attachment anyway.
171
+ #mime.parts << Mime.new("Content-Type: text/rtf\r\n\r\n" + props.body_rtf) if props.body_rtf
172
+ # its thus currently possible to get no body at all if the only body is rtf. that is not
173
+ # really acceptable FIXME
174
+ part['Content-Type'] = 'multipart/alternative'
175
+ part
176
+ else
177
+ # check no header case. content type? etc?. not sure if my Mime class will accept
178
+ Log.debug "taking that other path"
179
+ # body can be nil, hence the to_s
180
+ TMail::Mail.parse "Content-Type: text/plain\r\n\r\n" + props.body.to_s
181
+ end
182
+ end
183
+
184
+ def to_tmail
185
+ # intended to be used for IPM.note, which is the email type. can use it for others if desired,
186
+ # YMMV
187
+ Log.warn "to_mime used on a #{props.message_class}" unless props.message_class == 'IPM.Note'
188
+ # we always have a body
189
+ mail = body = body_to_tmail
190
+
191
+ # If we have attachments, we take the current mime root (body), and make it the first child
192
+ # of a new tree that will contain body and attachments.
193
+ unless attachments.empty?
194
+ raise NotImplementedError
195
+ mime = Mime.new "Content-Type: multipart/mixed\r\n\r\n"
196
+ mime.parts << body
197
+ # i don't know any better way to do this. need multipart/related for inline images
198
+ # referenced by cid: urls to work, but don't want to use it otherwise...
199
+ related = false
200
+ attachments.each do |attach|
201
+ part = attach.to_mime
202
+ related = true if part.headers.has_key?('Content-ID') or part.headers.has_key?('Content-Location')
203
+ mime.parts << part
204
+ end
205
+ mime.headers['Content-Type'] = ['multipart/related'] if related
206
+ end
207
+
208
+ # at this point, mime is either
209
+ # - a single text/plain, consisting of the body ('taking that other path' above. rare)
210
+ # - a multipart/alternative, consiting of a few bodies (plain and html body. common)
211
+ # - a multipart/mixed, consisting of 1 of the above 2 types of bodies, and attachments.
212
+ # we add this standard preamble if its multipart
213
+ # FIXME preamble.replace, and body.replace both suck.
214
+ # preamble= is doable. body= wasn't being done because body will get rewritten from parts
215
+ # if multipart, and is only there readonly. can do that, or do a reparse...
216
+ # The way i do this means that only the first preamble will say it, not preambles of nested
217
+ # multipart chunks.
218
+ mail.quoted_body = "This is a multi-part message in MIME format.\r\n" if mail.multipart?
219
+
220
+ # now that we have a root, we can mix in all our headers
221
+ headers.each do |key, vals|
222
+ # don't overwrite the content-type, encoding style stuff
223
+ next if mail[key]
224
+ # some new temporary hacks
225
+ next if key =~ /content-type/i and vals[0] =~ /base64/
226
+ #next if mime.headers.keys.map(&:downcase).include? key.downcase
227
+ mail[key] = vals.first
228
+ end
229
+ # just a stupid hack to make the content-type header last, when using OrderedHash
230
+ #mime.headers['Content-Type'] = mime.headers.delete 'Content-Type'
231
+
232
+ mail
233
+ end
234
+ end
235
+
236
+ class Attachment
237
+ def to_tmail
238
+ # TODO: smarter mime typing.
239
+ mimetype = props.attach_mime_tag || 'application/octet-stream'
240
+ part = TMail::Mail.parse "Content-Type: #{mimetype}\r\n\r\n"
241
+ part['Content-Disposition'] = %{attachment; filename="#{filename}"}
242
+ part['Content-Transfer-Encoding'] = 'base64'
243
+ part['Content-Location'] = props.attach_content_location if props.attach_content_location
244
+ part['Content-ID'] = props.attach_content_id if props.attach_content_id
245
+ # data.to_s for now. data was nil for some reason.
246
+ # perhaps it was a data object not correctly handled?
247
+ # hmmm, have to use read here. that assumes that the data isa stream.
248
+ # but if the attachment data is a string, then it won't work. possible?
249
+ data_str = if @embedded_msg
250
+ raise NotImplementedError
251
+ mime.headers['Content-Type'] = 'message/rfc822'
252
+ # lets try making it not base64 for now
253
+ mime.headers.delete 'Content-Transfer-Encoding'
254
+ # not filename. rather name, or something else right?
255
+ # maybe it should be inline?? i forget attach_method / access meaning
256
+ mime.headers['Content-Disposition'] = [%{attachment; filename="#{@embedded_msg.subject}"}]
257
+ @embedded_msg.to_mime.to_s
258
+ elsif @embedded_ole
259
+ raise NotImplementedError
260
+ # kind of hacky
261
+ io = StringIO.new
262
+ Ole::Storage.new io do |ole|
263
+ ole.root.type = :dir
264
+ Ole::Storage::Dirent.copy @embedded_ole, ole.root
265
+ end
266
+ io.string
267
+ else
268
+ data.read.to_s
269
+ end
270
+ part.body = @embedded_msg ? data_str : Base64.encode64(data_str).gsub(/\n/, "\r\n")
271
+ part
272
+ end
273
+ end
274
+
275
+ class Msg < Message
276
+ def populate_headers
277
+ super
278
+ if !headers.has_key?('Date')
279
+ # can employ other methods for getting a time. heres one in a similar vein to msgconvert.pl,
280
+ # ie taking the time from an ole object
281
+ time = @root.ole.dirents.map { |dirent| dirent.modify_time || dirent.create_time }.compact.sort.last
282
+ headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
283
+ end
284
+ end
285
+ end
286
+ end
287
+
@@ -0,0 +1,440 @@
1
+ require 'rubygems'
2
+ require 'ole/storage'
3
+ require 'mapi'
4
+ require 'mapi/rtf'
5
+
6
+ module Mapi
7
+ #
8
+ # = Introduction
9
+ #
10
+ # Primary class interface to the vagaries of .msg files.
11
+ #
12
+ # The core of the work is done by the <tt>Msg::PropertyStore</tt> class.
13
+ #
14
+ class Msg < Message
15
+ #
16
+ # = Introduction
17
+ #
18
+ # A big compononent of +Msg+ files is the property store, which holds
19
+ # all the key/value pairs of properties. The message itself, and all
20
+ # its <tt>Attachment</tt>s and <tt>Recipient</tt>s have an instance of
21
+ # this class.
22
+ #
23
+ # = Storage model
24
+ #
25
+ # Property keys (tags?) can be either simple hex numbers, in the
26
+ # range 0x0000 - 0xffff, or they can be named properties. In fact,
27
+ # properties in the range 0x0000 to 0x7fff are supposed to be the non-
28
+ # named properties, and can be considered to be in the +PS_MAPI+
29
+ # namespace. (correct?)
30
+ #
31
+ # Named properties are serialized in the 0x8000 to 0xffff range,
32
+ # and are referenced as a guid and long/string pair.
33
+ #
34
+ # There are key ranges, which can be used to imply things generally
35
+ # about keys.
36
+ #
37
+ # Further, we can give symbolic names to most keys, coming from
38
+ # constants in various places. Eg:
39
+ #
40
+ # 0x0037 => subject
41
+ # {00062002-0000-0000-C000-000000000046}/0x8218 => response_status
42
+ # # displayed as categories in outlook
43
+ # {00020329-0000-0000-C000-000000000046}/"Keywords" => categories
44
+ #
45
+ # Futher, there are completely different names, coming from other
46
+ # object models that get mapped to these things (CDO's model,
47
+ # Outlook's model etc). Eg "urn:schemas:httpmail:subject"
48
+ # I think these can be ignored though, as they aren't defined clearly
49
+ # in terms of mapi properties, and i'm really just trying to make
50
+ # a mapi property store. (It should also be relatively easy to
51
+ # support them later.)
52
+ #
53
+ # = Usage
54
+ #
55
+ # The api is driven by a desire to have the simple stuff "just work", ie
56
+ #
57
+ # properties.subject
58
+ # properties.display_name
59
+ #
60
+ # There also needs to be a way to look up properties more specifically:
61
+ #
62
+ # properties[0x0037] # => gets the subject
63
+ # properties[0x0037, PS_MAPI] # => still gets the subject
64
+ # properties['Keywords', PS_PUBLIC_STRINGS] # => gets outlook's categories array
65
+ #
66
+ # The abbreviated versions work by "resolving" the symbols to full keys:
67
+ #
68
+ # # the guid here is just PS_PUBLIC_STRINGS
69
+ # properties.resolve :keywords # => #<Key {00020329-0000-0000-c000-000000000046}/"Keywords">
70
+ # # the result here is actually also a key
71
+ # k = properties.resolve :subject # => 0x0037
72
+ # # it has a guid
73
+ # k.guid == Msg::Properties::PS_MAPI # => true
74
+ #
75
+ # = Parsing
76
+ #
77
+ # There are three objects that need to be parsed to load a +Msg+ property store:
78
+ #
79
+ # 1. The +nameid+ directory (<tt>Properties.parse_nameid</tt>)
80
+ # 2. The many +substg+ objects, whose names should match <tt>Properties::SUBSTG_RX</tt>
81
+ # (<tt>Properties#parse_substg</tt>)
82
+ # 3. The +properties+ file (<tt>Properties#parse_properties</tt>)
83
+ #
84
+ # Understanding of the formats is by no means perfect.
85
+ #
86
+ # = TODO
87
+ #
88
+ # * While the key objects are sufficient, the value objects are just plain
89
+ # ruby types. It currently isn't possible to write to the values, or to know
90
+ # which encoding the value had.
91
+ # * Update this doc.
92
+ # * Perhaps change from eager loading, to be load-on-demand.
93
+ #
94
+ class PropertyStore
95
+ include PropertySet::Constants
96
+ Key = PropertySet::Key
97
+
98
+ # note that binary and default both use obj.open. not the block form. this means we should
99
+ # #close it later, which we don't. as we're only reading though, it shouldn't matter right?
100
+ # not really good though FIXME
101
+ # change these to use mapi symbolic const names
102
+ ENCODINGS = {
103
+ 0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right?
104
+ 0x001f => proc { |obj| Ole::Types::FROM_UTF16.iconv obj.read }, # unicode
105
+ # ascii
106
+ # FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp
107
+ 0x001e => proc { |obj| obj.read.chomp 0.chr },
108
+ 0x0102 => proc { |obj| obj.open }, # binary?
109
+ :default => proc { |obj| obj.open }
110
+ }
111
+
112
+ SUBSTG_RX = /^__substg1\.0_([0-9A-F]{4})([0-9A-F]{4})(?:-([0-9A-F]{8}))?$/
113
+ PROPERTIES_RX = /^__properties_version1\.0$/
114
+ NAMEID_RX = /^__nameid_version1\.0$/
115
+ VALID_RX = /#{SUBSTG_RX}|#{PROPERTIES_RX}|#{NAMEID_RX}/
116
+
117
+ attr_reader :nameid
118
+
119
+ def initialize
120
+ @nameid = nil
121
+ # not exactly a cache currently
122
+ @cache = {}
123
+ end
124
+
125
+ #--
126
+ # The parsing methods
127
+ #++
128
+
129
+ def self.load obj
130
+ prop = new
131
+ prop.load obj
132
+ prop
133
+ end
134
+
135
+ # Parse properties from the +Dirent+ obj
136
+ def load obj
137
+ # we need to do the nameid first, as it provides the map for later user defined properties
138
+ if nameid_obj = obj.children.find { |child| child.name =~ NAMEID_RX }
139
+ @nameid = PropertyStore.parse_nameid nameid_obj
140
+ # hack to make it available to all msg files from the same ole storage object
141
+ # FIXME - come up with a neater way
142
+ class << obj.ole
143
+ attr_accessor :msg_nameid
144
+ end
145
+ obj.ole.msg_nameid = @nameid
146
+ elsif obj.ole
147
+ @nameid = obj.ole.msg_nameid rescue nil
148
+ end
149
+ # now parse the actual properties. i think dirs that match the substg should be decoded
150
+ # as properties to. 0x000d is just another encoding, the dir encoding. it should match
151
+ # whether the object is file / dir. currently only example is embedded msgs anyway
152
+ obj.children.each do |child|
153
+ next unless child.file?
154
+ case child.name
155
+ when PROPERTIES_RX
156
+ parse_properties child
157
+ when SUBSTG_RX
158
+ parse_substg(*($~[1..-1].map { |num| num.hex rescue nil } + [child]))
159
+ end
160
+ end
161
+ end
162
+
163
+ # Read nameid from the +Dirent+ obj, which is used for mapping of named properties keys to
164
+ # proxy keys in the 0x8000 - 0xffff range.
165
+ # Returns a hash of integer -> Key.
166
+ def self.parse_nameid obj
167
+ remaining = obj.children.dup
168
+ guids_obj, props_obj, names_obj =
169
+ %w[__substg1.0_00020102 __substg1.0_00030102 __substg1.0_00040102].map do |name|
170
+ remaining.delete obj/name
171
+ end
172
+
173
+ # parse guids
174
+ # this is the guids for named properities (other than builtin ones)
175
+ # i think PS_PUBLIC_STRINGS, and PS_MAPI are builtin.
176
+ guids = [PS_PUBLIC_STRINGS] + guids_obj.read.scan(/.{16}/m).map do |str|
177
+ Ole::Types.load_guid str
178
+ end
179
+
180
+ # parse names.
181
+ # the string ids for named properties
182
+ # they are no longer parsed, as they're referred to by offset not
183
+ # index. they are simply sequentially packed, as a long, giving
184
+ # the string length, then padding to 4 byte multiple, and repeat.
185
+ names_data = names_obj.read
186
+
187
+ # parse actual props.
188
+ # not sure about any of this stuff really.
189
+ # should flip a few bits in the real msg, to get a better understanding of how this works.
190
+ props = props_obj.read.scan(/.{8}/m).map do |str|
191
+ flags, offset = str[4..-1].unpack 'v2'
192
+ # the property will be serialised as this pseudo property, mapping it to this named property
193
+ pseudo_prop = 0x8000 + offset
194
+ named = flags & 1 == 1
195
+ prop = if named
196
+ str_off = *str.unpack('V')
197
+ len = *names_data[str_off, 4].unpack('V')
198
+ Ole::Types::FROM_UTF16.iconv names_data[str_off + 4, len]
199
+ else
200
+ a, b = str.unpack('v2')
201
+ Log.debug "b not 0" if b != 0
202
+ a
203
+ end
204
+ # a bit sus
205
+ guid_off = flags >> 1
206
+ # missing a few builtin PS_*
207
+ Log.debug "guid off < 2 (#{guid_off})" if guid_off < 2
208
+ guid = guids[guid_off - 2]
209
+ [pseudo_prop, Key.new(prop, guid)]
210
+ end
211
+
212
+ #Log.warn "* ignoring #{remaining.length} objects in nameid" unless remaining.empty?
213
+ # this leaves a bunch of other unknown chunks of data with completely unknown meaning.
214
+ # pp [:unknown, child.name, child.data.unpack('H*')[0].scan(/.{16}/m)]
215
+ Hash[*props.flatten]
216
+ end
217
+
218
+ # Parse an +Dirent+, as per <tt>msgconvert.pl</tt>. This is how larger properties, such
219
+ # as strings, binary blobs, and other ole sub-directories (eg nested Msg) are stored.
220
+ def parse_substg key, encoding, offset, obj
221
+ if (encoding & 0x1000) != 0
222
+ if !offset
223
+ # there is typically one with no offset first, whose data is a series of numbers
224
+ # equal to the lengths of all the sub parts. gives an implied array size i suppose.
225
+ # maybe you can initialize the array at this time. the sizes are the same as all the
226
+ # ole object sizes anyway, its to pre-allocate i suppose.
227
+ #p obj.data.unpack('V*')
228
+ # ignore this one
229
+ return
230
+ else
231
+ # remove multivalue flag for individual pieces
232
+ encoding &= ~0x1000
233
+ end
234
+ else
235
+ Log.warn "offset specified for non-multivalue encoding #{obj.name}" if offset
236
+ offset = nil
237
+ end
238
+ # offset is for multivalue encodings.
239
+ unless encoder = ENCODINGS[encoding]
240
+ Log.warn "unknown encoding #{encoding}"
241
+ #encoder = proc { |obj| obj.io } #.read }. maybe not a good idea
242
+ encoder = ENCODINGS[:default]
243
+ end
244
+ add_property key, encoder[obj], offset
245
+ end
246
+
247
+ # For parsing the +properties+ file. Smaller properties are serialized in one chunk,
248
+ # such as longs, bools, times etc. The parsing has problems.
249
+ def parse_properties obj
250
+ data = obj.read
251
+ # don't really understand this that well...
252
+ pad = data.length % 16
253
+ unless (pad == 0 || pad == 8) and data[0...pad] == "\000" * pad
254
+ Log.warn "padding was not as expected #{pad} (#{data.length}) -> #{data[0...pad].inspect}"
255
+ end
256
+ data[pad..-1].scan(/.{16}/m).each do |data|
257
+ property, encoding = ('%08x' % data.unpack('V')).scan /.{4}/
258
+ key = property.hex
259
+ # doesn't make any sense to me. probably because its a serialization of some internal
260
+ # outlook structure...
261
+ next if property == '0000'
262
+ case encoding
263
+ when '0102', '001e', '001f', '101e', '101f', '000d'
264
+ # ignore on purpose. not sure what its for
265
+ # multivalue versions ignored also
266
+ when '0003' # long
267
+ # don't know what all the other data is for
268
+ add_property key, *data[8, 4].unpack('V')
269
+ when '000b' # boolean
270
+ # again, heaps more data than needed. and its not always 0 or 1.
271
+ # they are in fact quite big numbers. this is wrong.
272
+ # p [property, data[4..-1].unpack('H*')[0]]
273
+ add_property key, data[8, 4].unpack('V')[0] != 0
274
+ when '0040' # systime
275
+ # seems to work:
276
+ add_property key, Ole::Types.load_time(data[8..-1])
277
+ else
278
+ #Log.warn "ignoring data in __properties section, encoding: #{encoding}"
279
+ #Log << data.unpack('H*').inspect + "\n"
280
+ end
281
+ end
282
+ end
283
+
284
+ def add_property key, value, pos=nil
285
+ # map keys in the named property range through nameid
286
+ if Integer === key and key >= 0x8000
287
+ if !@nameid
288
+ Log.warn "no nameid section yet named properties used"
289
+ key = Key.new key
290
+ elsif real_key = @nameid[key]
291
+ key = real_key
292
+ else
293
+ # i think i hit these when i have a named property, in the PS_MAPI
294
+ # guid
295
+ Log.warn "property in named range not in nameid #{key.inspect}"
296
+ key = Key.new key
297
+ end
298
+ else
299
+ key = Key.new key
300
+ end
301
+ if pos
302
+ @cache[key] ||= []
303
+ Log.warn "duplicate property" unless Array === @cache[key]
304
+ # ^ this is actually a trickier problem. the issue is more that they must all be of
305
+ # the same type.
306
+ @cache[key][pos] = value
307
+ else
308
+ # take the last.
309
+ Log.warn "duplicate property #{key.inspect}" if @cache[key]
310
+ @cache[key] = value
311
+ end
312
+ end
313
+
314
+ # delegate to cache
315
+ def method_missing name, *args, &block
316
+ @cache.send name, *args, &block
317
+ end
318
+ end
319
+
320
+ # these 2 will actually be of the form
321
+ # 1\.0_#([0-9A-Z]{8}), where $1 is the 0 based index number in hex
322
+ # should i parse that and use it as an index, or just return in
323
+ # file order? probably should use it later...
324
+ ATTACH_RX = /^__attach_version1\.0_.*/
325
+ RECIP_RX = /^__recip_version1\.0_.*/
326
+ VALID_RX = /#{PropertyStore::VALID_RX}|#{ATTACH_RX}|#{RECIP_RX}/
327
+
328
+ attr_reader :root
329
+ attr_accessor :close_parent
330
+
331
+ # Alternate constructor, to create an +Msg+ directly from +arg+ and +mode+, passed
332
+ # directly to Ole::Storage (ie either filename or seekable IO object).
333
+ def self.open arg, mode=nil
334
+ msg = new Ole::Storage.open(arg, mode).root
335
+ # we will close the ole when we are #closed
336
+ msg.close_parent = true
337
+ if block_given?
338
+ begin yield msg
339
+ ensure; msg.close
340
+ end
341
+ else msg
342
+ end
343
+ end
344
+
345
+ # Create an Msg from +root+, an <tt>Ole::Storage::Dirent</tt> object
346
+ def initialize root
347
+ @root = root
348
+ @close_parent = false
349
+ super PropertySet.new(PropertyStore.load(@root))
350
+ Msg.warn_unknown @root
351
+ end
352
+
353
+ def self.warn_unknown obj
354
+ # bit of validation. not important if there is extra stuff, though would be
355
+ # interested to know what it is. doesn't check dir/file stuff.
356
+ unknown = obj.children.reject { |child| child.name =~ VALID_RX }
357
+ Log.warn "skipped #{unknown.length} unknown msg object(s)" unless unknown.empty?
358
+ end
359
+
360
+ def close
361
+ @root.ole.close if @close_parent
362
+ end
363
+
364
+ def attachments
365
+ @attachments ||= @root.children.
366
+ select { |child| child.dir? and child.name =~ ATTACH_RX }.
367
+ map { |child| Attachment.new child }.
368
+ select { |attach| attach.valid? }
369
+ end
370
+
371
+ def recipients
372
+ @recipients ||= @root.children.
373
+ select { |child| child.dir? and child.name =~ RECIP_RX }.
374
+ map { |child| Recipient.new child }
375
+ end
376
+
377
+ class Attachment < Mapi::Attachment
378
+ attr_reader :obj, :properties
379
+ alias props :properties
380
+
381
+ def initialize obj
382
+ @obj = obj
383
+ @embedded_ole = nil
384
+ @embedded_msg = nil
385
+
386
+ super PropertySet.new(PropertyStore.load(@obj))
387
+ Msg.warn_unknown @obj
388
+
389
+ @obj.children.each do |child|
390
+ # temp hack. PropertyStore doesn't do directory properties atm - FIXME
391
+ if child.dir? and child.name =~ PropertyStore::SUBSTG_RX and
392
+ $1 == '3701' and $2.downcase == '000d'
393
+ @embedded_ole = child
394
+ class << @embedded_ole
395
+ def compobj
396
+ return nil unless compobj = self["\001CompObj"]
397
+ compobj.read[/^.{32}([^\x00]+)/m, 1]
398
+ end
399
+
400
+ def embedded_type
401
+ temp = compobj and return temp
402
+ # try to guess more
403
+ if children.select { |child| child.name =~ /__(substg|properties|recip|attach|nameid)/ }.length > 2
404
+ return 'Microsoft Office Outlook Message'
405
+ end
406
+ nil
407
+ end
408
+ end
409
+ if @embedded_ole.embedded_type == 'Microsoft Office Outlook Message'
410
+ @embedded_msg = Msg.new @embedded_ole
411
+ end
412
+ end
413
+ end
414
+ end
415
+
416
+ def valid?
417
+ # something i started to notice when handling embedded ole object attachments is
418
+ # the particularly strange case where there are empty attachments
419
+ not props.raw.keys.empty?
420
+ end
421
+ end
422
+
423
+ #
424
+ # +Recipient+ serves as a container for the +recip+ directories in the .msg.
425
+ # It has things like office_location, business_telephone_number, but I don't
426
+ # think enough to make a vCard out of?
427
+ #
428
+ class Recipient < Mapi::Recipient
429
+ attr_reader :obj, :properties
430
+ alias props :properties
431
+
432
+ def initialize obj
433
+ @obj = obj
434
+ super PropertySet.new(PropertyStore.load(@obj))
435
+ Msg.warn_unknown @obj
436
+ end
437
+ end
438
+ end
439
+ end
440
+