ruby-msg 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +108 -113
- data/Rakefile +42 -28
- data/bin/mapitool +195 -0
- data/lib/mapi.rb +109 -0
- data/lib/mapi/convert.rb +61 -0
- data/lib/mapi/convert/contact.rb +142 -0
- data/lib/mapi/convert/note-mime.rb +274 -0
- data/lib/mapi/convert/note-tmail.rb +287 -0
- data/lib/mapi/msg.rb +440 -0
- data/lib/mapi/property_set.rb +269 -0
- data/lib/mapi/pst.rb +1806 -0
- data/lib/mapi/rtf.rb +169 -0
- data/lib/mapi/types.rb +51 -0
- data/lib/rtf.rb +0 -9
- data/test/test_convert_contact.rb +60 -0
- data/test/test_convert_note.rb +66 -0
- data/test/test_mime.rb +4 -2
- data/test/test_msg.rb +29 -0
- data/test/test_property_set.rb +116 -0
- data/test/test_types.rb +17 -0
- metadata +78 -48
- data/bin/msgtool +0 -65
- data/lib/msg.rb +0 -522
- data/lib/msg/properties.rb +0 -532
- data/lib/msg/rtf.rb +0 -236
@@ -0,0 +1,287 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'tmail'
|
3
|
+
|
4
|
+
# these will be removed later
|
5
|
+
require 'time'
|
6
|
+
require 'mime'
|
7
|
+
|
8
|
+
# there is some Msg specific stuff in here.
|
9
|
+
|
10
|
+
class TMail::Mail
|
11
|
+
def quoted_body= str
|
12
|
+
body_port.wopen { |f| f.write str }
|
13
|
+
str
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module Mapi
|
18
|
+
class Message
|
19
|
+
def mime
|
20
|
+
return @mime if @mime
|
21
|
+
# if these headers exist at all, they can be helpful. we may however get a
|
22
|
+
# application/ms-tnef mime root, which means there will be little other than
|
23
|
+
# headers. we may get nothing.
|
24
|
+
# and other times, when received from external, we get the full cigar, boundaries
|
25
|
+
# etc and all.
|
26
|
+
# sometimes its multipart, with no boundaries. that throws an error. so we'll be more
|
27
|
+
# forgiving here
|
28
|
+
@mime = Mime.new props.transport_message_headers.to_s, true
|
29
|
+
populate_headers
|
30
|
+
@mime
|
31
|
+
end
|
32
|
+
|
33
|
+
def headers
|
34
|
+
mime.headers
|
35
|
+
end
|
36
|
+
|
37
|
+
# copy data from msg properties storage to standard mime. headers
|
38
|
+
# i've now seen it where the existing headers had heaps on stuff, and the msg#props had
|
39
|
+
# practically nothing. think it was because it was a tnef - msg conversion done by exchange.
|
40
|
+
def populate_headers
|
41
|
+
# construct a From value
|
42
|
+
# should this kind of thing only be done when headers don't exist already? maybe not. if its
|
43
|
+
# sent, then modified and saved, the headers could be wrong?
|
44
|
+
# hmmm. i just had an example where a mail is sent, from an internal user, but it has transport
|
45
|
+
# headers, i think because one recipient was external. the only place the senders email address
|
46
|
+
# exists is in the transport headers. so its maybe not good to overwrite from.
|
47
|
+
# recipients however usually have smtp address available.
|
48
|
+
# maybe we'll do it for all addresses that are smtp? (is that equivalent to
|
49
|
+
# sender_email_address !~ /^\//
|
50
|
+
name, email = props.sender_name, props.sender_email_address
|
51
|
+
if props.sender_addrtype == 'SMTP'
|
52
|
+
headers['From'] = if name and email and name != email
|
53
|
+
[%{"#{name}" <#{email}>}]
|
54
|
+
else
|
55
|
+
[email || name]
|
56
|
+
end
|
57
|
+
elsif !headers.has_key?('From')
|
58
|
+
# some messages were never sent, so that sender stuff isn't filled out. need to find another
|
59
|
+
# way to get something
|
60
|
+
# what about marking whether we thing the email was sent or not? or draft?
|
61
|
+
# for partition into an eventual Inbox, Sent, Draft mbox set?
|
62
|
+
# i've now seen cases where this stuff is missing, but exists in transport message headers,
|
63
|
+
# so maybe i should inhibit this in that case.
|
64
|
+
if email
|
65
|
+
# disabling this warning for now
|
66
|
+
#Log.warn "* no smtp sender email address available (only X.400). creating fake one"
|
67
|
+
# this is crap. though i've specially picked the logic so that it generates the correct
|
68
|
+
# email addresses in my case (for my organisation).
|
69
|
+
# this user stuff will give valid email i think, based on alias.
|
70
|
+
user = name ? name.sub(/(.*), (.*)/, "\\2.\\1") : email[/\w+$/].downcase
|
71
|
+
domain = (email[%r{^/O=([^/]+)}i, 1].downcase + '.com' rescue email)
|
72
|
+
headers['From'] = [name ? %{"#{name}" <#{user}@#{domain}>} : "<#{user}@#{domain}>" ]
|
73
|
+
elsif name
|
74
|
+
# we only have a name? thats screwed up.
|
75
|
+
# disabling this warning for now
|
76
|
+
#Log.warn "* no smtp sender email address available (only name). creating fake one"
|
77
|
+
headers['From'] = [%{"#{name}"}]
|
78
|
+
else
|
79
|
+
# disabling this warning for now
|
80
|
+
#Log.warn "* no sender email address available at all. FIXME"
|
81
|
+
end
|
82
|
+
# else we leave the transport message header version
|
83
|
+
end
|
84
|
+
|
85
|
+
# for all of this stuff, i'm assigning in utf8 strings.
|
86
|
+
# thats ok i suppose, maybe i can say its the job of the mime class to handle that.
|
87
|
+
# but a lot of the headers are overloaded in different ways. plain string, many strings
|
88
|
+
# other stuff. what happens to a person who has a " in their name etc etc. encoded words
|
89
|
+
# i suppose. but that then happens before assignment. and can't be automatically undone
|
90
|
+
# until the header is decomposed into recipients.
|
91
|
+
recips_by_type = recipients.group_by { |r| r.type }
|
92
|
+
# i want to the the types in a specific order.
|
93
|
+
[:to, :cc, :bcc].each do |type|
|
94
|
+
# don't know why i bother, but if we can, we try to sort recipients by the numerical part
|
95
|
+
# of the ole name, or just leave it if we can't
|
96
|
+
recips = recips_by_type[type]
|
97
|
+
recips = (recips.sort_by { |r| r.obj.name[/\d{8}$/].hex } rescue recips)
|
98
|
+
# switched to using , for separation, not ;. see issue #4
|
99
|
+
# recips.empty? is strange. i wouldn't have thought it possible, but it was right?
|
100
|
+
headers[type.to_s.sub(/^(.)/) { $1.upcase }] = [recips.join(', ')] unless recips.empty?
|
101
|
+
end
|
102
|
+
headers['Subject'] = [props.subject] if props.subject
|
103
|
+
|
104
|
+
# fill in a date value. by default, we won't mess with existing value hear
|
105
|
+
if !headers.has_key?('Date')
|
106
|
+
# we want to get a received date, as i understand it.
|
107
|
+
# use this preference order, or pull the most recent?
|
108
|
+
keys = %w[message_delivery_time client_submit_time last_modification_time creation_time]
|
109
|
+
time = keys.each { |key| break time if time = props.send(key) }
|
110
|
+
time = nil unless Date === time
|
111
|
+
|
112
|
+
# now convert and store
|
113
|
+
# this is a little funky. not sure about time zone stuff either?
|
114
|
+
# actually seems ok. maybe its always UTC and interpreted anyway. or can be timezoneless.
|
115
|
+
# i have no timezone info anyway.
|
116
|
+
# in gmail, i see stuff like 15 Jan 2007 00:48:19 -0000, and it displays as 11:48.
|
117
|
+
# can also add .localtime here if desired. but that feels wrong.
|
118
|
+
headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
|
119
|
+
end
|
120
|
+
|
121
|
+
# some very simplistic mapping between internet message headers and the
|
122
|
+
# mapi properties
|
123
|
+
# any of these could be causing duplicates due to case issues. the hack in #to_mime
|
124
|
+
# just stops re-duplication at that point. need to move some smarts into the mime
|
125
|
+
# code to handle it.
|
126
|
+
mapi_header_map = [
|
127
|
+
[:internet_message_id, 'Message-ID'],
|
128
|
+
[:in_reply_to_id, 'In-Reply-To'],
|
129
|
+
# don't set these values if they're equal to the defaults anyway
|
130
|
+
[:importance, 'Importance', proc { |val| val.to_s == '1' ? nil : val }],
|
131
|
+
[:priority, 'Priority', proc { |val| val.to_s == '1' ? nil : val }],
|
132
|
+
[:sensitivity, 'Sensitivity', proc { |val| val.to_s == '0' ? nil : val }],
|
133
|
+
# yeah?
|
134
|
+
[:conversation_topic, 'Thread-Topic'],
|
135
|
+
# not sure of the distinction here
|
136
|
+
# :originator_delivery_report_requested ??
|
137
|
+
[:read_receipt_requested, 'Disposition-Notification-To', proc { |val| from }]
|
138
|
+
]
|
139
|
+
mapi_header_map.each do |mapi, mime, *f|
|
140
|
+
next unless q = val = props.send(mapi) or headers.has_key?(mime)
|
141
|
+
next if f[0] and !(val = f[0].call(val))
|
142
|
+
headers[mime] = [val.to_s]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# redundant?
|
147
|
+
def type
|
148
|
+
props.message_class[/IPM\.(.*)/, 1].downcase rescue nil
|
149
|
+
end
|
150
|
+
|
151
|
+
# shortcuts to some things from the headers
|
152
|
+
%w[From To Cc Bcc Subject].each do |key|
|
153
|
+
define_method(key.downcase) { headers[key].join(' ') if headers.has_key?(key) }
|
154
|
+
end
|
155
|
+
|
156
|
+
def body_to_tmail
|
157
|
+
# to create the body
|
158
|
+
# should have some options about serializing rtf. and possibly options to check the rtf
|
159
|
+
# for rtf2html conversion, stripping those html tags or other similar stuff. maybe want to
|
160
|
+
# ignore it in the cases where it is generated from incoming html. but keep it if it was the
|
161
|
+
# source for html and plaintext.
|
162
|
+
if props.body_rtf or props.body_html
|
163
|
+
# should plain come first?
|
164
|
+
part = TMail::Mail.new
|
165
|
+
# its actually possible for plain body to be empty, but the others not.
|
166
|
+
# if i can get an html version, then maybe a callout to lynx can be made...
|
167
|
+
part.parts << TMail::Mail.parse("Content-Type: text/plain\r\n\r\n" + props.body) if props.body
|
168
|
+
# this may be automatically unwrapped from the rtf if the rtf includes the html
|
169
|
+
part.parts << TMail::Mail.parse("Content-Type: text/html\r\n\r\n" + props.body_html) if props.body_html
|
170
|
+
# temporarily disabled the rtf. its just showing up as an attachment anyway.
|
171
|
+
#mime.parts << Mime.new("Content-Type: text/rtf\r\n\r\n" + props.body_rtf) if props.body_rtf
|
172
|
+
# its thus currently possible to get no body at all if the only body is rtf. that is not
|
173
|
+
# really acceptable FIXME
|
174
|
+
part['Content-Type'] = 'multipart/alternative'
|
175
|
+
part
|
176
|
+
else
|
177
|
+
# check no header case. content type? etc?. not sure if my Mime class will accept
|
178
|
+
Log.debug "taking that other path"
|
179
|
+
# body can be nil, hence the to_s
|
180
|
+
TMail::Mail.parse "Content-Type: text/plain\r\n\r\n" + props.body.to_s
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def to_tmail
|
185
|
+
# intended to be used for IPM.note, which is the email type. can use it for others if desired,
|
186
|
+
# YMMV
|
187
|
+
Log.warn "to_mime used on a #{props.message_class}" unless props.message_class == 'IPM.Note'
|
188
|
+
# we always have a body
|
189
|
+
mail = body = body_to_tmail
|
190
|
+
|
191
|
+
# If we have attachments, we take the current mime root (body), and make it the first child
|
192
|
+
# of a new tree that will contain body and attachments.
|
193
|
+
unless attachments.empty?
|
194
|
+
raise NotImplementedError
|
195
|
+
mime = Mime.new "Content-Type: multipart/mixed\r\n\r\n"
|
196
|
+
mime.parts << body
|
197
|
+
# i don't know any better way to do this. need multipart/related for inline images
|
198
|
+
# referenced by cid: urls to work, but don't want to use it otherwise...
|
199
|
+
related = false
|
200
|
+
attachments.each do |attach|
|
201
|
+
part = attach.to_mime
|
202
|
+
related = true if part.headers.has_key?('Content-ID') or part.headers.has_key?('Content-Location')
|
203
|
+
mime.parts << part
|
204
|
+
end
|
205
|
+
mime.headers['Content-Type'] = ['multipart/related'] if related
|
206
|
+
end
|
207
|
+
|
208
|
+
# at this point, mime is either
|
209
|
+
# - a single text/plain, consisting of the body ('taking that other path' above. rare)
|
210
|
+
# - a multipart/alternative, consiting of a few bodies (plain and html body. common)
|
211
|
+
# - a multipart/mixed, consisting of 1 of the above 2 types of bodies, and attachments.
|
212
|
+
# we add this standard preamble if its multipart
|
213
|
+
# FIXME preamble.replace, and body.replace both suck.
|
214
|
+
# preamble= is doable. body= wasn't being done because body will get rewritten from parts
|
215
|
+
# if multipart, and is only there readonly. can do that, or do a reparse...
|
216
|
+
# The way i do this means that only the first preamble will say it, not preambles of nested
|
217
|
+
# multipart chunks.
|
218
|
+
mail.quoted_body = "This is a multi-part message in MIME format.\r\n" if mail.multipart?
|
219
|
+
|
220
|
+
# now that we have a root, we can mix in all our headers
|
221
|
+
headers.each do |key, vals|
|
222
|
+
# don't overwrite the content-type, encoding style stuff
|
223
|
+
next if mail[key]
|
224
|
+
# some new temporary hacks
|
225
|
+
next if key =~ /content-type/i and vals[0] =~ /base64/
|
226
|
+
#next if mime.headers.keys.map(&:downcase).include? key.downcase
|
227
|
+
mail[key] = vals.first
|
228
|
+
end
|
229
|
+
# just a stupid hack to make the content-type header last, when using OrderedHash
|
230
|
+
#mime.headers['Content-Type'] = mime.headers.delete 'Content-Type'
|
231
|
+
|
232
|
+
mail
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
class Attachment
|
237
|
+
def to_tmail
|
238
|
+
# TODO: smarter mime typing.
|
239
|
+
mimetype = props.attach_mime_tag || 'application/octet-stream'
|
240
|
+
part = TMail::Mail.parse "Content-Type: #{mimetype}\r\n\r\n"
|
241
|
+
part['Content-Disposition'] = %{attachment; filename="#{filename}"}
|
242
|
+
part['Content-Transfer-Encoding'] = 'base64'
|
243
|
+
part['Content-Location'] = props.attach_content_location if props.attach_content_location
|
244
|
+
part['Content-ID'] = props.attach_content_id if props.attach_content_id
|
245
|
+
# data.to_s for now. data was nil for some reason.
|
246
|
+
# perhaps it was a data object not correctly handled?
|
247
|
+
# hmmm, have to use read here. that assumes that the data isa stream.
|
248
|
+
# but if the attachment data is a string, then it won't work. possible?
|
249
|
+
data_str = if @embedded_msg
|
250
|
+
raise NotImplementedError
|
251
|
+
mime.headers['Content-Type'] = 'message/rfc822'
|
252
|
+
# lets try making it not base64 for now
|
253
|
+
mime.headers.delete 'Content-Transfer-Encoding'
|
254
|
+
# not filename. rather name, or something else right?
|
255
|
+
# maybe it should be inline?? i forget attach_method / access meaning
|
256
|
+
mime.headers['Content-Disposition'] = [%{attachment; filename="#{@embedded_msg.subject}"}]
|
257
|
+
@embedded_msg.to_mime.to_s
|
258
|
+
elsif @embedded_ole
|
259
|
+
raise NotImplementedError
|
260
|
+
# kind of hacky
|
261
|
+
io = StringIO.new
|
262
|
+
Ole::Storage.new io do |ole|
|
263
|
+
ole.root.type = :dir
|
264
|
+
Ole::Storage::Dirent.copy @embedded_ole, ole.root
|
265
|
+
end
|
266
|
+
io.string
|
267
|
+
else
|
268
|
+
data.read.to_s
|
269
|
+
end
|
270
|
+
part.body = @embedded_msg ? data_str : Base64.encode64(data_str).gsub(/\n/, "\r\n")
|
271
|
+
part
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
class Msg < Message
|
276
|
+
def populate_headers
|
277
|
+
super
|
278
|
+
if !headers.has_key?('Date')
|
279
|
+
# can employ other methods for getting a time. heres one in a similar vein to msgconvert.pl,
|
280
|
+
# ie taking the time from an ole object
|
281
|
+
time = @root.ole.dirents.map { |dirent| dirent.modify_time || dirent.create_time }.compact.sort.last
|
282
|
+
headers['Date'] = [Time.iso8601(time.to_s).rfc2822] if time
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
data/lib/mapi/msg.rb
ADDED
@@ -0,0 +1,440 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'ole/storage'
|
3
|
+
require 'mapi'
|
4
|
+
require 'mapi/rtf'
|
5
|
+
|
6
|
+
module Mapi
|
7
|
+
#
|
8
|
+
# = Introduction
|
9
|
+
#
|
10
|
+
# Primary class interface to the vagaries of .msg files.
|
11
|
+
#
|
12
|
+
# The core of the work is done by the <tt>Msg::PropertyStore</tt> class.
|
13
|
+
#
|
14
|
+
class Msg < Message
|
15
|
+
#
|
16
|
+
# = Introduction
|
17
|
+
#
|
18
|
+
# A big compononent of +Msg+ files is the property store, which holds
|
19
|
+
# all the key/value pairs of properties. The message itself, and all
|
20
|
+
# its <tt>Attachment</tt>s and <tt>Recipient</tt>s have an instance of
|
21
|
+
# this class.
|
22
|
+
#
|
23
|
+
# = Storage model
|
24
|
+
#
|
25
|
+
# Property keys (tags?) can be either simple hex numbers, in the
|
26
|
+
# range 0x0000 - 0xffff, or they can be named properties. In fact,
|
27
|
+
# properties in the range 0x0000 to 0x7fff are supposed to be the non-
|
28
|
+
# named properties, and can be considered to be in the +PS_MAPI+
|
29
|
+
# namespace. (correct?)
|
30
|
+
#
|
31
|
+
# Named properties are serialized in the 0x8000 to 0xffff range,
|
32
|
+
# and are referenced as a guid and long/string pair.
|
33
|
+
#
|
34
|
+
# There are key ranges, which can be used to imply things generally
|
35
|
+
# about keys.
|
36
|
+
#
|
37
|
+
# Further, we can give symbolic names to most keys, coming from
|
38
|
+
# constants in various places. Eg:
|
39
|
+
#
|
40
|
+
# 0x0037 => subject
|
41
|
+
# {00062002-0000-0000-C000-000000000046}/0x8218 => response_status
|
42
|
+
# # displayed as categories in outlook
|
43
|
+
# {00020329-0000-0000-C000-000000000046}/"Keywords" => categories
|
44
|
+
#
|
45
|
+
# Futher, there are completely different names, coming from other
|
46
|
+
# object models that get mapped to these things (CDO's model,
|
47
|
+
# Outlook's model etc). Eg "urn:schemas:httpmail:subject"
|
48
|
+
# I think these can be ignored though, as they aren't defined clearly
|
49
|
+
# in terms of mapi properties, and i'm really just trying to make
|
50
|
+
# a mapi property store. (It should also be relatively easy to
|
51
|
+
# support them later.)
|
52
|
+
#
|
53
|
+
# = Usage
|
54
|
+
#
|
55
|
+
# The api is driven by a desire to have the simple stuff "just work", ie
|
56
|
+
#
|
57
|
+
# properties.subject
|
58
|
+
# properties.display_name
|
59
|
+
#
|
60
|
+
# There also needs to be a way to look up properties more specifically:
|
61
|
+
#
|
62
|
+
# properties[0x0037] # => gets the subject
|
63
|
+
# properties[0x0037, PS_MAPI] # => still gets the subject
|
64
|
+
# properties['Keywords', PS_PUBLIC_STRINGS] # => gets outlook's categories array
|
65
|
+
#
|
66
|
+
# The abbreviated versions work by "resolving" the symbols to full keys:
|
67
|
+
#
|
68
|
+
# # the guid here is just PS_PUBLIC_STRINGS
|
69
|
+
# properties.resolve :keywords # => #<Key {00020329-0000-0000-c000-000000000046}/"Keywords">
|
70
|
+
# # the result here is actually also a key
|
71
|
+
# k = properties.resolve :subject # => 0x0037
|
72
|
+
# # it has a guid
|
73
|
+
# k.guid == Msg::Properties::PS_MAPI # => true
|
74
|
+
#
|
75
|
+
# = Parsing
|
76
|
+
#
|
77
|
+
# There are three objects that need to be parsed to load a +Msg+ property store:
|
78
|
+
#
|
79
|
+
# 1. The +nameid+ directory (<tt>Properties.parse_nameid</tt>)
|
80
|
+
# 2. The many +substg+ objects, whose names should match <tt>Properties::SUBSTG_RX</tt>
|
81
|
+
# (<tt>Properties#parse_substg</tt>)
|
82
|
+
# 3. The +properties+ file (<tt>Properties#parse_properties</tt>)
|
83
|
+
#
|
84
|
+
# Understanding of the formats is by no means perfect.
|
85
|
+
#
|
86
|
+
# = TODO
|
87
|
+
#
|
88
|
+
# * While the key objects are sufficient, the value objects are just plain
|
89
|
+
# ruby types. It currently isn't possible to write to the values, or to know
|
90
|
+
# which encoding the value had.
|
91
|
+
# * Update this doc.
|
92
|
+
# * Perhaps change from eager loading, to be load-on-demand.
|
93
|
+
#
|
94
|
+
class PropertyStore
|
95
|
+
include PropertySet::Constants
|
96
|
+
Key = PropertySet::Key
|
97
|
+
|
98
|
+
# note that binary and default both use obj.open. not the block form. this means we should
|
99
|
+
# #close it later, which we don't. as we're only reading though, it shouldn't matter right?
|
100
|
+
# not really good though FIXME
|
101
|
+
# change these to use mapi symbolic const names
|
102
|
+
ENCODINGS = {
|
103
|
+
0x000d => proc { |obj| obj }, # seems to be used when its going to be a directory instead of a file. eg nested ole. 3701 usually. in which case we shouldn't get here right?
|
104
|
+
0x001f => proc { |obj| Ole::Types::FROM_UTF16.iconv obj.read }, # unicode
|
105
|
+
# ascii
|
106
|
+
# FIXME hack did a[0..-2] before, seems right sometimes, but for some others it chopped the text. chomp
|
107
|
+
0x001e => proc { |obj| obj.read.chomp 0.chr },
|
108
|
+
0x0102 => proc { |obj| obj.open }, # binary?
|
109
|
+
:default => proc { |obj| obj.open }
|
110
|
+
}
|
111
|
+
|
112
|
+
SUBSTG_RX = /^__substg1\.0_([0-9A-F]{4})([0-9A-F]{4})(?:-([0-9A-F]{8}))?$/
|
113
|
+
PROPERTIES_RX = /^__properties_version1\.0$/
|
114
|
+
NAMEID_RX = /^__nameid_version1\.0$/
|
115
|
+
VALID_RX = /#{SUBSTG_RX}|#{PROPERTIES_RX}|#{NAMEID_RX}/
|
116
|
+
|
117
|
+
attr_reader :nameid
|
118
|
+
|
119
|
+
def initialize
|
120
|
+
@nameid = nil
|
121
|
+
# not exactly a cache currently
|
122
|
+
@cache = {}
|
123
|
+
end
|
124
|
+
|
125
|
+
#--
|
126
|
+
# The parsing methods
|
127
|
+
#++
|
128
|
+
|
129
|
+
def self.load obj
|
130
|
+
prop = new
|
131
|
+
prop.load obj
|
132
|
+
prop
|
133
|
+
end
|
134
|
+
|
135
|
+
# Parse properties from the +Dirent+ obj
|
136
|
+
def load obj
|
137
|
+
# we need to do the nameid first, as it provides the map for later user defined properties
|
138
|
+
if nameid_obj = obj.children.find { |child| child.name =~ NAMEID_RX }
|
139
|
+
@nameid = PropertyStore.parse_nameid nameid_obj
|
140
|
+
# hack to make it available to all msg files from the same ole storage object
|
141
|
+
# FIXME - come up with a neater way
|
142
|
+
class << obj.ole
|
143
|
+
attr_accessor :msg_nameid
|
144
|
+
end
|
145
|
+
obj.ole.msg_nameid = @nameid
|
146
|
+
elsif obj.ole
|
147
|
+
@nameid = obj.ole.msg_nameid rescue nil
|
148
|
+
end
|
149
|
+
# now parse the actual properties. i think dirs that match the substg should be decoded
|
150
|
+
# as properties to. 0x000d is just another encoding, the dir encoding. it should match
|
151
|
+
# whether the object is file / dir. currently only example is embedded msgs anyway
|
152
|
+
obj.children.each do |child|
|
153
|
+
next unless child.file?
|
154
|
+
case child.name
|
155
|
+
when PROPERTIES_RX
|
156
|
+
parse_properties child
|
157
|
+
when SUBSTG_RX
|
158
|
+
parse_substg(*($~[1..-1].map { |num| num.hex rescue nil } + [child]))
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# Read nameid from the +Dirent+ obj, which is used for mapping of named properties keys to
|
164
|
+
# proxy keys in the 0x8000 - 0xffff range.
|
165
|
+
# Returns a hash of integer -> Key.
|
166
|
+
def self.parse_nameid obj
|
167
|
+
remaining = obj.children.dup
|
168
|
+
guids_obj, props_obj, names_obj =
|
169
|
+
%w[__substg1.0_00020102 __substg1.0_00030102 __substg1.0_00040102].map do |name|
|
170
|
+
remaining.delete obj/name
|
171
|
+
end
|
172
|
+
|
173
|
+
# parse guids
|
174
|
+
# this is the guids for named properities (other than builtin ones)
|
175
|
+
# i think PS_PUBLIC_STRINGS, and PS_MAPI are builtin.
|
176
|
+
guids = [PS_PUBLIC_STRINGS] + guids_obj.read.scan(/.{16}/m).map do |str|
|
177
|
+
Ole::Types.load_guid str
|
178
|
+
end
|
179
|
+
|
180
|
+
# parse names.
|
181
|
+
# the string ids for named properties
|
182
|
+
# they are no longer parsed, as they're referred to by offset not
|
183
|
+
# index. they are simply sequentially packed, as a long, giving
|
184
|
+
# the string length, then padding to 4 byte multiple, and repeat.
|
185
|
+
names_data = names_obj.read
|
186
|
+
|
187
|
+
# parse actual props.
|
188
|
+
# not sure about any of this stuff really.
|
189
|
+
# should flip a few bits in the real msg, to get a better understanding of how this works.
|
190
|
+
props = props_obj.read.scan(/.{8}/m).map do |str|
|
191
|
+
flags, offset = str[4..-1].unpack 'v2'
|
192
|
+
# the property will be serialised as this pseudo property, mapping it to this named property
|
193
|
+
pseudo_prop = 0x8000 + offset
|
194
|
+
named = flags & 1 == 1
|
195
|
+
prop = if named
|
196
|
+
str_off = *str.unpack('V')
|
197
|
+
len = *names_data[str_off, 4].unpack('V')
|
198
|
+
Ole::Types::FROM_UTF16.iconv names_data[str_off + 4, len]
|
199
|
+
else
|
200
|
+
a, b = str.unpack('v2')
|
201
|
+
Log.debug "b not 0" if b != 0
|
202
|
+
a
|
203
|
+
end
|
204
|
+
# a bit sus
|
205
|
+
guid_off = flags >> 1
|
206
|
+
# missing a few builtin PS_*
|
207
|
+
Log.debug "guid off < 2 (#{guid_off})" if guid_off < 2
|
208
|
+
guid = guids[guid_off - 2]
|
209
|
+
[pseudo_prop, Key.new(prop, guid)]
|
210
|
+
end
|
211
|
+
|
212
|
+
#Log.warn "* ignoring #{remaining.length} objects in nameid" unless remaining.empty?
|
213
|
+
# this leaves a bunch of other unknown chunks of data with completely unknown meaning.
|
214
|
+
# pp [:unknown, child.name, child.data.unpack('H*')[0].scan(/.{16}/m)]
|
215
|
+
Hash[*props.flatten]
|
216
|
+
end
|
217
|
+
|
218
|
+
# Parse an +Dirent+, as per <tt>msgconvert.pl</tt>. This is how larger properties, such
|
219
|
+
# as strings, binary blobs, and other ole sub-directories (eg nested Msg) are stored.
|
220
|
+
def parse_substg key, encoding, offset, obj
|
221
|
+
if (encoding & 0x1000) != 0
|
222
|
+
if !offset
|
223
|
+
# there is typically one with no offset first, whose data is a series of numbers
|
224
|
+
# equal to the lengths of all the sub parts. gives an implied array size i suppose.
|
225
|
+
# maybe you can initialize the array at this time. the sizes are the same as all the
|
226
|
+
# ole object sizes anyway, its to pre-allocate i suppose.
|
227
|
+
#p obj.data.unpack('V*')
|
228
|
+
# ignore this one
|
229
|
+
return
|
230
|
+
else
|
231
|
+
# remove multivalue flag for individual pieces
|
232
|
+
encoding &= ~0x1000
|
233
|
+
end
|
234
|
+
else
|
235
|
+
Log.warn "offset specified for non-multivalue encoding #{obj.name}" if offset
|
236
|
+
offset = nil
|
237
|
+
end
|
238
|
+
# offset is for multivalue encodings.
|
239
|
+
unless encoder = ENCODINGS[encoding]
|
240
|
+
Log.warn "unknown encoding #{encoding}"
|
241
|
+
#encoder = proc { |obj| obj.io } #.read }. maybe not a good idea
|
242
|
+
encoder = ENCODINGS[:default]
|
243
|
+
end
|
244
|
+
add_property key, encoder[obj], offset
|
245
|
+
end
|
246
|
+
|
247
|
+
# For parsing the +properties+ file. Smaller properties are serialized in one chunk,
|
248
|
+
# such as longs, bools, times etc. The parsing has problems.
|
249
|
+
def parse_properties obj
|
250
|
+
data = obj.read
|
251
|
+
# don't really understand this that well...
|
252
|
+
pad = data.length % 16
|
253
|
+
unless (pad == 0 || pad == 8) and data[0...pad] == "\000" * pad
|
254
|
+
Log.warn "padding was not as expected #{pad} (#{data.length}) -> #{data[0...pad].inspect}"
|
255
|
+
end
|
256
|
+
data[pad..-1].scan(/.{16}/m).each do |data|
|
257
|
+
property, encoding = ('%08x' % data.unpack('V')).scan /.{4}/
|
258
|
+
key = property.hex
|
259
|
+
# doesn't make any sense to me. probably because its a serialization of some internal
|
260
|
+
# outlook structure...
|
261
|
+
next if property == '0000'
|
262
|
+
case encoding
|
263
|
+
when '0102', '001e', '001f', '101e', '101f', '000d'
|
264
|
+
# ignore on purpose. not sure what its for
|
265
|
+
# multivalue versions ignored also
|
266
|
+
when '0003' # long
|
267
|
+
# don't know what all the other data is for
|
268
|
+
add_property key, *data[8, 4].unpack('V')
|
269
|
+
when '000b' # boolean
|
270
|
+
# again, heaps more data than needed. and its not always 0 or 1.
|
271
|
+
# they are in fact quite big numbers. this is wrong.
|
272
|
+
# p [property, data[4..-1].unpack('H*')[0]]
|
273
|
+
add_property key, data[8, 4].unpack('V')[0] != 0
|
274
|
+
when '0040' # systime
|
275
|
+
# seems to work:
|
276
|
+
add_property key, Ole::Types.load_time(data[8..-1])
|
277
|
+
else
|
278
|
+
#Log.warn "ignoring data in __properties section, encoding: #{encoding}"
|
279
|
+
#Log << data.unpack('H*').inspect + "\n"
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
def add_property key, value, pos=nil
|
285
|
+
# map keys in the named property range through nameid
|
286
|
+
if Integer === key and key >= 0x8000
|
287
|
+
if !@nameid
|
288
|
+
Log.warn "no nameid section yet named properties used"
|
289
|
+
key = Key.new key
|
290
|
+
elsif real_key = @nameid[key]
|
291
|
+
key = real_key
|
292
|
+
else
|
293
|
+
# i think i hit these when i have a named property, in the PS_MAPI
|
294
|
+
# guid
|
295
|
+
Log.warn "property in named range not in nameid #{key.inspect}"
|
296
|
+
key = Key.new key
|
297
|
+
end
|
298
|
+
else
|
299
|
+
key = Key.new key
|
300
|
+
end
|
301
|
+
if pos
|
302
|
+
@cache[key] ||= []
|
303
|
+
Log.warn "duplicate property" unless Array === @cache[key]
|
304
|
+
# ^ this is actually a trickier problem. the issue is more that they must all be of
|
305
|
+
# the same type.
|
306
|
+
@cache[key][pos] = value
|
307
|
+
else
|
308
|
+
# take the last.
|
309
|
+
Log.warn "duplicate property #{key.inspect}" if @cache[key]
|
310
|
+
@cache[key] = value
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
# delegate to cache
|
315
|
+
def method_missing name, *args, &block
|
316
|
+
@cache.send name, *args, &block
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# these 2 will actually be of the form
|
321
|
+
# 1\.0_#([0-9A-Z]{8}), where $1 is the 0 based index number in hex
|
322
|
+
# should i parse that and use it as an index, or just return in
|
323
|
+
# file order? probably should use it later...
|
324
|
+
ATTACH_RX = /^__attach_version1\.0_.*/
|
325
|
+
RECIP_RX = /^__recip_version1\.0_.*/
|
326
|
+
VALID_RX = /#{PropertyStore::VALID_RX}|#{ATTACH_RX}|#{RECIP_RX}/
|
327
|
+
|
328
|
+
attr_reader :root
|
329
|
+
attr_accessor :close_parent
|
330
|
+
|
331
|
+
# Alternate constructor, to create an +Msg+ directly from +arg+ and +mode+, passed
|
332
|
+
# directly to Ole::Storage (ie either filename or seekable IO object).
|
333
|
+
def self.open arg, mode=nil
|
334
|
+
msg = new Ole::Storage.open(arg, mode).root
|
335
|
+
# we will close the ole when we are #closed
|
336
|
+
msg.close_parent = true
|
337
|
+
if block_given?
|
338
|
+
begin yield msg
|
339
|
+
ensure; msg.close
|
340
|
+
end
|
341
|
+
else msg
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
# Create an Msg from +root+, an <tt>Ole::Storage::Dirent</tt> object
|
346
|
+
def initialize root
|
347
|
+
@root = root
|
348
|
+
@close_parent = false
|
349
|
+
super PropertySet.new(PropertyStore.load(@root))
|
350
|
+
Msg.warn_unknown @root
|
351
|
+
end
|
352
|
+
|
353
|
+
def self.warn_unknown obj
|
354
|
+
# bit of validation. not important if there is extra stuff, though would be
|
355
|
+
# interested to know what it is. doesn't check dir/file stuff.
|
356
|
+
unknown = obj.children.reject { |child| child.name =~ VALID_RX }
|
357
|
+
Log.warn "skipped #{unknown.length} unknown msg object(s)" unless unknown.empty?
|
358
|
+
end
|
359
|
+
|
360
|
+
def close
|
361
|
+
@root.ole.close if @close_parent
|
362
|
+
end
|
363
|
+
|
364
|
+
def attachments
|
365
|
+
@attachments ||= @root.children.
|
366
|
+
select { |child| child.dir? and child.name =~ ATTACH_RX }.
|
367
|
+
map { |child| Attachment.new child }.
|
368
|
+
select { |attach| attach.valid? }
|
369
|
+
end
|
370
|
+
|
371
|
+
def recipients
|
372
|
+
@recipients ||= @root.children.
|
373
|
+
select { |child| child.dir? and child.name =~ RECIP_RX }.
|
374
|
+
map { |child| Recipient.new child }
|
375
|
+
end
|
376
|
+
|
377
|
+
class Attachment < Mapi::Attachment
|
378
|
+
attr_reader :obj, :properties
|
379
|
+
alias props :properties
|
380
|
+
|
381
|
+
def initialize obj
|
382
|
+
@obj = obj
|
383
|
+
@embedded_ole = nil
|
384
|
+
@embedded_msg = nil
|
385
|
+
|
386
|
+
super PropertySet.new(PropertyStore.load(@obj))
|
387
|
+
Msg.warn_unknown @obj
|
388
|
+
|
389
|
+
@obj.children.each do |child|
|
390
|
+
# temp hack. PropertyStore doesn't do directory properties atm - FIXME
|
391
|
+
if child.dir? and child.name =~ PropertyStore::SUBSTG_RX and
|
392
|
+
$1 == '3701' and $2.downcase == '000d'
|
393
|
+
@embedded_ole = child
|
394
|
+
class << @embedded_ole
|
395
|
+
def compobj
|
396
|
+
return nil unless compobj = self["\001CompObj"]
|
397
|
+
compobj.read[/^.{32}([^\x00]+)/m, 1]
|
398
|
+
end
|
399
|
+
|
400
|
+
def embedded_type
|
401
|
+
temp = compobj and return temp
|
402
|
+
# try to guess more
|
403
|
+
if children.select { |child| child.name =~ /__(substg|properties|recip|attach|nameid)/ }.length > 2
|
404
|
+
return 'Microsoft Office Outlook Message'
|
405
|
+
end
|
406
|
+
nil
|
407
|
+
end
|
408
|
+
end
|
409
|
+
if @embedded_ole.embedded_type == 'Microsoft Office Outlook Message'
|
410
|
+
@embedded_msg = Msg.new @embedded_ole
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
def valid?
|
417
|
+
# something i started to notice when handling embedded ole object attachments is
|
418
|
+
# the particularly strange case where there are empty attachments
|
419
|
+
not props.raw.keys.empty?
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
#
|
424
|
+
# +Recipient+ serves as a container for the +recip+ directories in the .msg.
|
425
|
+
# It has things like office_location, business_telephone_number, but I don't
|
426
|
+
# think enough to make a vCard out of?
|
427
|
+
#
|
428
|
+
class Recipient < Mapi::Recipient
|
429
|
+
attr_reader :obj, :properties
|
430
|
+
alias props :properties
|
431
|
+
|
432
|
+
def initialize obj
|
433
|
+
@obj = obj
|
434
|
+
super PropertySet.new(PropertyStore.load(@obj))
|
435
|
+
Msg.warn_unknown @obj
|
436
|
+
end
|
437
|
+
end
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|