ruby-msg 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +108 -113
- data/Rakefile +42 -28
- data/bin/mapitool +195 -0
- data/lib/mapi.rb +109 -0
- data/lib/mapi/convert.rb +61 -0
- data/lib/mapi/convert/contact.rb +142 -0
- data/lib/mapi/convert/note-mime.rb +274 -0
- data/lib/mapi/convert/note-tmail.rb +287 -0
- data/lib/mapi/msg.rb +440 -0
- data/lib/mapi/property_set.rb +269 -0
- data/lib/mapi/pst.rb +1806 -0
- data/lib/mapi/rtf.rb +169 -0
- data/lib/mapi/types.rb +51 -0
- data/lib/rtf.rb +0 -9
- data/test/test_convert_contact.rb +60 -0
- data/test/test_convert_note.rb +66 -0
- data/test/test_mime.rb +4 -2
- data/test/test_msg.rb +29 -0
- data/test/test_property_set.rb +116 -0
- data/test/test_types.rb +17 -0
- metadata +78 -48
- data/bin/msgtool +0 -65
- data/lib/msg.rb +0 -522
- data/lib/msg/properties.rb +0 -532
- data/lib/msg/rtf.rb +0 -236
@@ -0,0 +1,269 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'mapi/types'
|
3
|
+
require 'mapi/rtf'
|
4
|
+
require 'rtf'
|
5
|
+
|
6
|
+
module Mapi
|
7
|
+
#
|
8
|
+
# The Mapi::PropertySet class is used to wrap the lower level Msg or Pst property stores,
|
9
|
+
# and provide a consistent and more friendly interface. It allows you to just say:
|
10
|
+
#
|
11
|
+
# properties.subject
|
12
|
+
#
|
13
|
+
# instead of:
|
14
|
+
#
|
15
|
+
# properites.raw[0x0037, PS_MAPI]
|
16
|
+
#
|
17
|
+
# The underlying store can be just a hash, or lazily loading directly from the file. A good
|
18
|
+
# compromise is to cache all the available keys, and just return the values on demand, rather
|
19
|
+
# than load up many possibly unwanted values.
|
20
|
+
#
|
21
|
+
class PropertySet
|
22
|
+
# the property set guid constants
|
23
|
+
# these guids are all defined with the macro DEFINE_OLEGUID in mapiguid.h.
|
24
|
+
# see http://doc.ddart.net/msdn/header/include/mapiguid.h.html
|
25
|
+
oleguid = proc do |prefix|
|
26
|
+
Ole::Types::Clsid.parse "{#{prefix}-0000-0000-c000-000000000046}"
|
27
|
+
end
|
28
|
+
|
29
|
+
NAMES = {
|
30
|
+
oleguid['00020328'] => 'PS_MAPI',
|
31
|
+
oleguid['00020329'] => 'PS_PUBLIC_STRINGS',
|
32
|
+
oleguid['00020380'] => 'PS_ROUTING_EMAIL_ADDRESSES',
|
33
|
+
oleguid['00020381'] => 'PS_ROUTING_ADDRTYPE',
|
34
|
+
oleguid['00020382'] => 'PS_ROUTING_DISPLAY_NAME',
|
35
|
+
oleguid['00020383'] => 'PS_ROUTING_ENTRYID',
|
36
|
+
oleguid['00020384'] => 'PS_ROUTING_SEARCH_KEY',
|
37
|
+
# string properties in this namespace automatically get added to the internet headers
|
38
|
+
oleguid['00020386'] => 'PS_INTERNET_HEADERS',
|
39
|
+
# theres are bunch of outlook ones i think
|
40
|
+
# http://blogs.msdn.com/stephen_griffin/archive/2006/05/10/outlook-2007-beta-documentation-notification-based-indexing-support.aspx
|
41
|
+
# IPM.Appointment
|
42
|
+
oleguid['00062002'] => 'PSETID_Appointment',
|
43
|
+
# IPM.Task
|
44
|
+
oleguid['00062003'] => 'PSETID_Task',
|
45
|
+
# used for IPM.Contact
|
46
|
+
oleguid['00062004'] => 'PSETID_Address',
|
47
|
+
oleguid['00062008'] => 'PSETID_Common',
|
48
|
+
# didn't find a source for this name. it is for IPM.StickyNote
|
49
|
+
oleguid['0006200e'] => 'PSETID_Note',
|
50
|
+
# for IPM.Activity. also called the journal?
|
51
|
+
oleguid['0006200a'] => 'PSETID_Log',
|
52
|
+
}
|
53
|
+
|
54
|
+
module Constants
|
55
|
+
NAMES.each { |guid, name| const_set name, guid }
|
56
|
+
end
|
57
|
+
|
58
|
+
include Constants
|
59
|
+
|
60
|
+
# +Properties+ are accessed by <tt>Key</tt>s, which are coerced to this class.
|
61
|
+
# Includes a bunch of methods (hash, ==, eql?) to allow it to work as a key in
|
62
|
+
# a +Hash+.
|
63
|
+
#
|
64
|
+
# Also contains the code that maps keys to symbolic names.
|
65
|
+
class Key
|
66
|
+
include Constants
|
67
|
+
|
68
|
+
attr_reader :code, :guid
|
69
|
+
def initialize code, guid=PS_MAPI
|
70
|
+
@code, @guid = code, guid
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_sym
|
74
|
+
# hmmm, for some stuff, like, eg, the message class specific range, sym-ification
|
75
|
+
# of the key depends on knowing our message class. i don't want to store anything else
|
76
|
+
# here though, so if that kind of thing is needed, it can be passed to this function.
|
77
|
+
# worry about that when some examples arise.
|
78
|
+
case code
|
79
|
+
when Integer
|
80
|
+
if guid == PS_MAPI # and < 0x8000 ?
|
81
|
+
# the hash should be updated now that i've changed the process
|
82
|
+
TAGS['%04x' % code].first[/_(.*)/, 1].downcase.to_sym rescue code
|
83
|
+
else
|
84
|
+
# handle other guids here, like mapping names to outlook properties, based on the
|
85
|
+
# outlook object model.
|
86
|
+
NAMED_MAP[self].to_sym rescue code
|
87
|
+
end
|
88
|
+
when String
|
89
|
+
# return something like
|
90
|
+
# note that named properties don't go through the map at the moment. so #categories
|
91
|
+
# doesn't work yet
|
92
|
+
code.downcase.to_sym
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def to_s
|
97
|
+
to_sym.to_s
|
98
|
+
end
|
99
|
+
|
100
|
+
# FIXME implement these
|
101
|
+
def transmittable?
|
102
|
+
# etc, can go here too
|
103
|
+
end
|
104
|
+
|
105
|
+
# this stuff is to allow it to be a useful key
|
106
|
+
def hash
|
107
|
+
[code, guid].hash
|
108
|
+
end
|
109
|
+
|
110
|
+
def == other
|
111
|
+
hash == other.hash
|
112
|
+
end
|
113
|
+
|
114
|
+
alias eql? :==
|
115
|
+
|
116
|
+
def inspect
|
117
|
+
# maybe the way to do this, would be to be able to register guids
|
118
|
+
# in a global lookup, which are used by Clsid#inspect itself, to
|
119
|
+
# provide symbolic names...
|
120
|
+
guid_str = NAMES[guid] || "{#{guid.format}}"
|
121
|
+
if Integer === code
|
122
|
+
hex = '0x%04x' % code
|
123
|
+
if guid == PS_MAPI
|
124
|
+
# just display as plain hex number
|
125
|
+
hex
|
126
|
+
else
|
127
|
+
"#<Key #{guid_str}/#{hex}>"
|
128
|
+
end
|
129
|
+
else
|
130
|
+
# display full guid and code
|
131
|
+
"#<Key #{guid_str}/#{code.inspect}>"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# duplicated here for now
|
137
|
+
SUPPORT_DIR = File.dirname(__FILE__) + '/../..'
|
138
|
+
|
139
|
+
# data files that provide for the code to symbolic name mapping
|
140
|
+
# guids in named_map are really constant references to the above
|
141
|
+
TAGS = YAML.load_file "#{SUPPORT_DIR}/data/mapitags.yaml"
|
142
|
+
NAMED_MAP = YAML.load_file("#{SUPPORT_DIR}/data/named_map.yaml").inject({}) do |hash, (key, value)|
|
143
|
+
hash.update Key.new(key[0], const_get(key[1])) => value
|
144
|
+
end
|
145
|
+
|
146
|
+
attr_reader :raw
|
147
|
+
|
148
|
+
# +raw+ should be an hash-like object that maps <tt>Key</tt>s to values. Should respond_to?
|
149
|
+
# [], keys, values, each, and optionally []=, and delete.
|
150
|
+
def initialize raw
|
151
|
+
@raw = raw
|
152
|
+
end
|
153
|
+
|
154
|
+
# resolve +arg+ (could be key, code, string, or symbol), and possible +guid+ to a key.
|
155
|
+
# returns nil on failure
|
156
|
+
def resolve arg, guid=nil
|
157
|
+
if guid; Key.new arg, guid
|
158
|
+
else
|
159
|
+
case arg
|
160
|
+
when Key; arg
|
161
|
+
when Integer; Key.new arg
|
162
|
+
else sym_to_key[arg.to_sym]
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# this is the function that creates a symbol to key mapping. currently this works by making a
|
168
|
+
# pass through the raw properties, but conceivably you could map symbols to keys using the
|
169
|
+
# mapitags directly. problem with that would be that named properties wouldn't map automatically,
|
170
|
+
# but maybe thats not too important.
|
171
|
+
def sym_to_key
|
172
|
+
return @sym_to_key if @sym_to_key
|
173
|
+
@sym_to_key = {}
|
174
|
+
raw.keys.each do |key|
|
175
|
+
sym = key.to_sym
|
176
|
+
unless Symbol === sym
|
177
|
+
Log.debug "couldn't find symbolic name for key #{key.inspect}"
|
178
|
+
next
|
179
|
+
end
|
180
|
+
if @sym_to_key[sym]
|
181
|
+
Log.warn "duplicate key #{key.inspect}"
|
182
|
+
# we give preference to PS_MAPI keys
|
183
|
+
@sym_to_key[sym] = key if key.guid == PS_MAPI
|
184
|
+
else
|
185
|
+
# just assign
|
186
|
+
@sym_to_key[sym] = key
|
187
|
+
end
|
188
|
+
end
|
189
|
+
@sym_to_key
|
190
|
+
end
|
191
|
+
|
192
|
+
def keys
|
193
|
+
sym_to_key.keys
|
194
|
+
end
|
195
|
+
|
196
|
+
def values
|
197
|
+
sym_to_key.values.map { |key| raw[key] }
|
198
|
+
end
|
199
|
+
|
200
|
+
def [] arg, guid=nil
|
201
|
+
raw[resolve(arg, guid)]
|
202
|
+
end
|
203
|
+
|
204
|
+
def []= arg, *args
|
205
|
+
args.unshift nil if args.length == 1
|
206
|
+
guid, value = args
|
207
|
+
# FIXME this won't really work properly. it would need to go
|
208
|
+
# to TAGS to resolve, as it often won't be there already...
|
209
|
+
raw[resolve(arg, guid)] = value
|
210
|
+
end
|
211
|
+
|
212
|
+
def method_missing name, *args
|
213
|
+
if name.to_s !~ /\=$/ and args.empty?
|
214
|
+
self[name]
|
215
|
+
elsif name.to_s =~ /(.*)\=$/ and args.length == 1
|
216
|
+
self[$1] = args[0]
|
217
|
+
else
|
218
|
+
super
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def to_h
|
223
|
+
sym_to_key.inject({}) { |hash, (sym, key)| hash.update sym => raw[key] }
|
224
|
+
end
|
225
|
+
|
226
|
+
def inspect
|
227
|
+
"#<#{self.class} " + to_h.sort_by { |k, v| k.to_s }.map do |k, v|
|
228
|
+
v = v.inspect
|
229
|
+
"#{k}=#{v.length > 32 ? v[0..29] + '..."' : v}"
|
230
|
+
end.join(' ') + '>'
|
231
|
+
end
|
232
|
+
|
233
|
+
# -----
|
234
|
+
|
235
|
+
# temporary pseudo tags
|
236
|
+
|
237
|
+
# for providing rtf to plain text conversion. later, html to text too.
|
238
|
+
def body
|
239
|
+
return @body if defined?(@body)
|
240
|
+
@body = (self[:body] rescue nil)
|
241
|
+
# last resort
|
242
|
+
if !@body or @body.strip.empty?
|
243
|
+
Log.warn 'creating text body from rtf'
|
244
|
+
@body = (::RTF::Converter.rtf2text body_rtf rescue nil)
|
245
|
+
end
|
246
|
+
@body
|
247
|
+
end
|
248
|
+
|
249
|
+
# for providing rtf decompression
|
250
|
+
def body_rtf
|
251
|
+
return @body_rtf if defined?(@body_rtf)
|
252
|
+
@body_rtf = (RTF.rtfdecompr rtf_compressed.read rescue nil)
|
253
|
+
end
|
254
|
+
|
255
|
+
# for providing rtf to html conversion
|
256
|
+
def body_html
|
257
|
+
return @body_html if defined?(@body_html)
|
258
|
+
@body_html = (self[:body_html].read rescue nil)
|
259
|
+
@body_html = (RTF.rtf2html body_rtf rescue nil) if !@body_html or @body_html.strip.empty?
|
260
|
+
# last resort
|
261
|
+
if !@body_html or @body_html.strip.empty?
|
262
|
+
Log.warn 'creating html body from rtf'
|
263
|
+
@body_html = (::RTF::Converter.rtf2text body_rtf, :html rescue nil)
|
264
|
+
end
|
265
|
+
@body_html
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
data/lib/mapi/pst.rb
ADDED
@@ -0,0 +1,1806 @@
|
|
1
|
+
#
|
2
|
+
# = Introduction
|
3
|
+
#
|
4
|
+
# This file is mostly an attempt to port libpst to ruby, and simplify it in the process. It
|
5
|
+
# will leverage much of the existing MAPI => MIME conversion developed for Msg files, and as
|
6
|
+
# such is purely concerned with the file structure details.
|
7
|
+
#
|
8
|
+
# = TODO
|
9
|
+
#
|
10
|
+
# 1. solve recipient table problem (test4).
|
11
|
+
# this is done. turns out it was due to id2 clashes. find better solution
|
12
|
+
# 2. check parse consistency. an initial conversion of a 30M file to pst, shows
|
13
|
+
# a number of messages conveting badly. compare with libpst too.
|
14
|
+
# 3. xattribs
|
15
|
+
# 4. generalise the Mapi stuff better
|
16
|
+
# 5. refactor index load
|
17
|
+
# 6. msg serialization?
|
18
|
+
#
|
19
|
+
|
20
|
+
=begin
|
21
|
+
|
22
|
+
quick plan for cleanup.
|
23
|
+
|
24
|
+
have working tests for 97 and 03 file formats, so safe.
|
25
|
+
|
26
|
+
want to fix up:
|
27
|
+
|
28
|
+
64 bit unpacks scattered around. its ugly. not sure how best to handle it, but am slightly tempted
|
29
|
+
to override String#unpack to support a 64 bit little endian unpack (like L vs N/V, for Q). one way or
|
30
|
+
another need to fix it. Could really slow everything else down if its parsing the unpack strings twice,
|
31
|
+
once in ruby, for every single unpack i do :/
|
32
|
+
|
33
|
+
the index loading process, and the lack of shared code between normal vs 64 bit variants, and Index vs Desc.
|
34
|
+
should be able to reduce code by factor of 4. also think I should move load code into the class too. then
|
35
|
+
maybe have something like:
|
36
|
+
|
37
|
+
class Header
|
38
|
+
def index_class
|
39
|
+
version_2003 ? Index64 : Index
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_idx
|
44
|
+
header.index_class.load_index
|
45
|
+
end
|
46
|
+
|
47
|
+
OR
|
48
|
+
|
49
|
+
def initialize
|
50
|
+
@header = ...
|
51
|
+
extend @header.index_class::Load
|
52
|
+
load_idx
|
53
|
+
end
|
54
|
+
|
55
|
+
need to think about the role of the mapi code, and Pst::Item etc, but that layer can come later.
|
56
|
+
|
57
|
+
=end
|
58
|
+
|
59
|
+
require 'mapi'
|
60
|
+
require 'enumerator'
|
61
|
+
require 'ostruct'
|
62
|
+
require 'ole/ranges_io'
|
63
|
+
|
64
|
+
module Mapi
|
65
|
+
class Pst
|
66
|
+
class FormatError < StandardError
|
67
|
+
end
|
68
|
+
|
69
|
+
# unfortunately there is no Q analogue which is little endian only.
|
70
|
+
# this translates T as an unsigned quad word, little endian byte order, to
|
71
|
+
# not pollute the rest of the code.
|
72
|
+
#
|
73
|
+
# didn't want to override String#unpack, cause its too hacky, and incomplete.
|
74
|
+
def self.unpack str, unpack_spec
|
75
|
+
return str.unpack(unpack_spec) unless unpack_spec['T']
|
76
|
+
@unpack_cache ||= {}
|
77
|
+
t_offsets, new_spec = @unpack_cache[unpack_spec]
|
78
|
+
unless t_offsets
|
79
|
+
t_offsets = []
|
80
|
+
offset = 0
|
81
|
+
new_spec = ''
|
82
|
+
unpack_spec.scan(/([^\d])_?(\*|\d+)?/o) do
|
83
|
+
num_elems = $1.downcase == 'a' ? 1 : ($2 || 1).to_i
|
84
|
+
if $1 == 'T'
|
85
|
+
num_elems.times { |i| t_offsets << offset + i }
|
86
|
+
new_spec << "V#{num_elems * 2}"
|
87
|
+
else
|
88
|
+
new_spec << $~[0]
|
89
|
+
end
|
90
|
+
offset += num_elems
|
91
|
+
end
|
92
|
+
@unpack_cache[unpack_spec] = [t_offsets, new_spec]
|
93
|
+
end
|
94
|
+
a = str.unpack(new_spec)
|
95
|
+
t_offsets.each do |offset|
|
96
|
+
low, high = a[offset, 2]
|
97
|
+
a[offset, 2] = low && high ? low + (high << 32) : nil
|
98
|
+
end
|
99
|
+
a
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# this is the header and encryption encapsulation code
|
104
|
+
# ----------------------------------------------------------------------------
|
105
|
+
#
|
106
|
+
|
107
|
+
# class which encapsulates the pst header
|
108
|
+
class Header
|
109
|
+
SIZE = 512
|
110
|
+
MAGIC = 0x2142444e
|
111
|
+
|
112
|
+
# these are the constants defined in libpst.c, that
|
113
|
+
# are referenced in pst_open()
|
114
|
+
INDEX_TYPE_OFFSET = 0x0A
|
115
|
+
FILE_SIZE_POINTER = 0xA8
|
116
|
+
FILE_SIZE_POINTER_64 = 0xB8
|
117
|
+
SECOND_POINTER = 0xBC
|
118
|
+
INDEX_POINTER = 0xC4
|
119
|
+
SECOND_POINTER_64 = 0xE0
|
120
|
+
INDEX_POINTER_64 = 0xF0
|
121
|
+
ENC_OFFSET = 0x1CD
|
122
|
+
|
123
|
+
attr_reader :magic, :index_type, :encrypt_type, :size
|
124
|
+
attr_reader :index1_count, :index1, :index2_count, :index2
|
125
|
+
attr_reader :version
|
126
|
+
def initialize data
|
127
|
+
@magic = data.unpack('N')[0]
|
128
|
+
@index_type = data[INDEX_TYPE_OFFSET]
|
129
|
+
@version = {0x0e => 1997, 0x17 => 2003}[@index_type]
|
130
|
+
|
131
|
+
if version_2003?
|
132
|
+
# don't know?
|
133
|
+
# >> data1.unpack('V*').zip(data2.unpack('V*')).enum_with_index.select { |(c, d), i| c != d and not [46, 56, 60].include?(i) }.select { |(a, b), i| b == 0 }.map { |(a, b), i| [a / 256, i] }
|
134
|
+
# [8, 76], [32768, 84], [128, 89]
|
135
|
+
# >> data1.unpack('C*').zip(data2.unpack('C*')).enum_with_index.select { |(c, d), i| c != d and not [184..187, 224..227, 240..243].any? { |r| r === i } }.select { |(a, b), i| b == 0 and ((Math.log(a) / Math.log(2)) % 1) < 0.0001 }
|
136
|
+
# [[[2, 0], 61], [[2, 0], 76], [[2, 0], 195], [[2, 0], 257], [[8, 0], 305], [[128, 0], 338], [[128, 0], 357]]
|
137
|
+
# i have only 2 psts to base this guess on, so i can't really come up with anything that looks reasonable yet. not sure what the offset is. unfortunately there is so much in the header
|
138
|
+
# that isn't understood...
|
139
|
+
@encrypt_type = 1
|
140
|
+
|
141
|
+
@index2_count, @index2 = data[SECOND_POINTER_64 - 4, 8].unpack('V2')
|
142
|
+
@index1_count, @index1 = data[INDEX_POINTER_64 - 4, 8].unpack('V2')
|
143
|
+
|
144
|
+
@size = data[FILE_SIZE_POINTER_64, 4].unpack('V')[0]
|
145
|
+
else
|
146
|
+
@encrypt_type = data[ENC_OFFSET]
|
147
|
+
|
148
|
+
@index2_count, @index2 = data[SECOND_POINTER - 4, 8].unpack('V2')
|
149
|
+
@index1_count, @index1 = data[INDEX_POINTER - 4, 8].unpack('V2')
|
150
|
+
|
151
|
+
@size = data[FILE_SIZE_POINTER, 4].unpack('V')[0]
|
152
|
+
end
|
153
|
+
|
154
|
+
validate!
|
155
|
+
end
|
156
|
+
|
157
|
+
def version_2003?
|
158
|
+
version == 2003
|
159
|
+
end
|
160
|
+
|
161
|
+
def encrypted?
|
162
|
+
encrypt_type != 0
|
163
|
+
end
|
164
|
+
|
165
|
+
def validate!
|
166
|
+
raise FormatError, "bad signature on pst file (#{'0x%x' % magic})" unless magic == MAGIC
|
167
|
+
raise FormatError, "only index types 0x0e and 0x17 are handled (#{'0x%x' % index_type})" unless [0x0e, 0x17].include?(index_type)
|
168
|
+
raise FormatError, "only encrytion types 0 and 1 are handled (#{encrypt_type.inspect})" unless [0, 1].include?(encrypt_type)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# compressible encryption! :D
|
173
|
+
#
|
174
|
+
# simple substitution. see libpst.c
|
175
|
+
# maybe test switch to using a String#tr!
|
176
|
+
class CompressibleEncryption
|
177
|
+
DECRYPT_TABLE = [
|
178
|
+
0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48,
|
179
|
+
0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, # 0x0f
|
180
|
+
0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab,
|
181
|
+
0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, # 0x1f
|
182
|
+
0x39, 0x05, 0x96, 0x30, 0xf5, 0x37, 0x60, 0x82,
|
183
|
+
0x8c, 0xc9, 0x13, 0x4a, 0x6b, 0x1d, 0xf3, 0xfb, # 0x2f
|
184
|
+
0x8f, 0x26, 0x97, 0xca, 0x91, 0x17, 0x01, 0xc4,
|
185
|
+
0x32, 0x2d, 0x6e, 0x31, 0x95, 0xff, 0xd9, 0x23, # 0x3f
|
186
|
+
0xd1, 0x00, 0x5e, 0x79, 0xdc, 0x44, 0x3b, 0x1a,
|
187
|
+
0x28, 0xc5, 0x61, 0x57, 0x20, 0x90, 0x3d, 0x83, # 0x4f
|
188
|
+
0xb9, 0x43, 0xbe, 0x67, 0xd2, 0x46, 0x42, 0x76,
|
189
|
+
0xc0, 0x6d, 0x5b, 0x7e, 0xb2, 0x0f, 0x16, 0x29, # 0x5f
|
190
|
+
0x3c, 0xa9, 0x03, 0x54, 0x0d, 0xda, 0x5d, 0xdf,
|
191
|
+
0xf6, 0xb7, 0xc7, 0x62, 0xcd, 0x8d, 0x06, 0xd3, # 0x6f
|
192
|
+
0x69, 0x5c, 0x86, 0xd6, 0x14, 0xf7, 0xa5, 0x66,
|
193
|
+
0x75, 0xac, 0xb1, 0xe9, 0x45, 0x21, 0x70, 0x0c, # 0x7f
|
194
|
+
0x87, 0x9f, 0x74, 0xa4, 0x22, 0x4c, 0x6f, 0xbf,
|
195
|
+
0x1f, 0x56, 0xaa, 0x2e, 0xb3, 0x78, 0x33, 0x50, # 0x8f
|
196
|
+
0xb0, 0xa3, 0x92, 0xbc, 0xcf, 0x19, 0x1c, 0xa7,
|
197
|
+
0x63, 0xcb, 0x1e, 0x4d, 0x3e, 0x4b, 0x1b, 0x9b, # 0x9f
|
198
|
+
0x4f, 0xe7, 0xf0, 0xee, 0xad, 0x3a, 0xb5, 0x59,
|
199
|
+
0x04, 0xea, 0x40, 0x55, 0x25, 0x51, 0xe5, 0x7a, # 0xaf
|
200
|
+
0x89, 0x38, 0x68, 0x52, 0x7b, 0xfc, 0x27, 0xae,
|
201
|
+
0xd7, 0xbd, 0xfa, 0x07, 0xf4, 0xcc, 0x8e, 0x5f, # 0xbf
|
202
|
+
0xef, 0x35, 0x9c, 0x84, 0x2b, 0x15, 0xd5, 0x77,
|
203
|
+
0x34, 0x49, 0xb6, 0x12, 0x0a, 0x7f, 0x71, 0x88, # 0xcf
|
204
|
+
0xfd, 0x9d, 0x18, 0x41, 0x7d, 0x93, 0xd8, 0x58,
|
205
|
+
0x2c, 0xce, 0xfe, 0x24, 0xaf, 0xde, 0xb8, 0x36, # 0xdf
|
206
|
+
0xc8, 0xa1, 0x80, 0xa6, 0x99, 0x98, 0xa8, 0x2f,
|
207
|
+
0x0e, 0x81, 0x65, 0x73, 0xe4, 0xc2, 0xa2, 0x8a, # 0xef
|
208
|
+
0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2,
|
209
|
+
0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec # 0xff
|
210
|
+
]
|
211
|
+
|
212
|
+
ENCRYPT_TABLE = [nil] * 256
|
213
|
+
DECRYPT_TABLE.each_with_index { |i, j| ENCRYPT_TABLE[i] = j }
|
214
|
+
|
215
|
+
def self.decrypt_alt encrypted
|
216
|
+
decrypted = ''
|
217
|
+
encrypted.length.times { |i| decrypted << DECRYPT_TABLE[encrypted[i]] }
|
218
|
+
decrypted
|
219
|
+
end
|
220
|
+
|
221
|
+
def self.encrypt_alt decrypted
|
222
|
+
encrypted = ''
|
223
|
+
decrypted.length.times { |i| encrypted << ENCRYPT_TABLE[decrypted[i]] }
|
224
|
+
encrypted
|
225
|
+
end
|
226
|
+
|
227
|
+
# an alternate implementation that is possibly faster....
|
228
|
+
# TODO - bench
|
229
|
+
DECRYPT_STR, ENCRYPT_STR = [DECRYPT_TABLE, (0...256)].map do |values|
|
230
|
+
values.map { |i| i.chr }.join.gsub(/([\^\-\\])/, "\\\\\\1")
|
231
|
+
end
|
232
|
+
|
233
|
+
def self.decrypt encrypted
|
234
|
+
encrypted.tr ENCRYPT_STR, DECRYPT_STR
|
235
|
+
end
|
236
|
+
|
237
|
+
def self.encrypt decrypted
|
238
|
+
decrypted.tr DECRYPT_STR, ENCRYPT_STR
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
class RangesIOEncryptable < RangesIO
|
243
|
+
def initialize io, mode='r', params={}
|
244
|
+
mode, params = 'r', mode if Hash === mode
|
245
|
+
@decrypt = !!params[:decrypt]
|
246
|
+
super
|
247
|
+
end
|
248
|
+
|
249
|
+
def encrypted?
|
250
|
+
@decrypt
|
251
|
+
end
|
252
|
+
|
253
|
+
def read limit=nil
|
254
|
+
buf = super
|
255
|
+
buf = CompressibleEncryption.decrypt(buf) if encrypted?
|
256
|
+
buf
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
attr_reader :io, :header, :idx, :desc, :special_folder_ids
|
261
|
+
|
262
|
+
# corresponds to
|
263
|
+
# * pst_open
|
264
|
+
# * pst_load_index
|
265
|
+
def initialize io
|
266
|
+
@io = io
|
267
|
+
io.pos = 0
|
268
|
+
@header = Header.new io.read(Header::SIZE)
|
269
|
+
|
270
|
+
# would prefer this to be in Header#validate, but it doesn't have the io size.
|
271
|
+
# should perhaps downgrade this to just be a warning...
|
272
|
+
raise FormatError, "header size field invalid (#{header.size} != #{io.size}}" unless header.size == io.size
|
273
|
+
|
274
|
+
load_idx
|
275
|
+
load_desc
|
276
|
+
load_xattrib
|
277
|
+
|
278
|
+
@special_folder_ids = {}
|
279
|
+
end
|
280
|
+
|
281
|
+
def encrypted?
|
282
|
+
@header.encrypted?
|
283
|
+
end
|
284
|
+
|
285
|
+
# until i properly fix logging...
|
286
|
+
def warn s
|
287
|
+
Mapi::Log.warn s
|
288
|
+
end
|
289
|
+
|
290
|
+
#
|
291
|
+
# this is the index and desc record loading code
|
292
|
+
# ----------------------------------------------------------------------------
|
293
|
+
#
|
294
|
+
|
295
|
+
ToTree = Module.new
|
296
|
+
|
297
|
+
module Index2
|
298
|
+
BLOCK_SIZE = 512
|
299
|
+
module RecursiveLoad
|
300
|
+
def load_chain
|
301
|
+
#...
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
module Base
|
306
|
+
def read
|
307
|
+
#...
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
class Version1997 < Struct.new(:a)#...)
|
312
|
+
SIZE = 12
|
313
|
+
|
314
|
+
include RecursiveLoad
|
315
|
+
include Base
|
316
|
+
end
|
317
|
+
|
318
|
+
class Version2003 < Struct.new(:a)#...)
|
319
|
+
SIZE = 24
|
320
|
+
|
321
|
+
include RecursiveLoad
|
322
|
+
include Base
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
module Desc2
|
327
|
+
module Base
|
328
|
+
def desc
|
329
|
+
#...
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
class Version1997 < Struct.new(:a)#...)
|
334
|
+
#include Index::RecursiveLoad
|
335
|
+
include Base
|
336
|
+
end
|
337
|
+
|
338
|
+
class Version2003 < Struct.new(:a)#...)
|
339
|
+
#include Index::RecursiveLoad
|
340
|
+
include Base
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# more constants from libpst.c
|
345
|
+
# these relate to the index block
|
346
|
+
ITEM_COUNT_OFFSET = 0x1f0 # count byte
|
347
|
+
LEVEL_INDICATOR_OFFSET = 0x1f3 # node or leaf
|
348
|
+
BACKLINK_OFFSET = 0x1f8 # backlink u1 value
|
349
|
+
|
350
|
+
# these 3 classes are used to hold various file records
|
351
|
+
|
352
|
+
# pst_index
|
353
|
+
class Index < Struct.new(:id, :offset, :size, :u1)
|
354
|
+
UNPACK_STR = 'VVvv'
|
355
|
+
SIZE = 12
|
356
|
+
BLOCK_SIZE = 512 # index blocks was 516 but bogus
|
357
|
+
COUNT_MAX = 41 # max active items (ITEM_COUNT_OFFSET / Index::SIZE = 41)
|
358
|
+
|
359
|
+
attr_accessor :pst
|
360
|
+
def initialize data
|
361
|
+
data = Pst.unpack data, UNPACK_STR if String === data
|
362
|
+
super(*data)
|
363
|
+
end
|
364
|
+
|
365
|
+
def type
|
366
|
+
@type ||= begin
|
367
|
+
if id & 0x2 == 0
|
368
|
+
:data
|
369
|
+
else
|
370
|
+
first_byte, second_byte = read.unpack('CC')
|
371
|
+
if first_byte == 1
|
372
|
+
raise second_byte unless second_byte == 1
|
373
|
+
:data_chain_header
|
374
|
+
elsif first_byte == 2
|
375
|
+
raise second_byte unless second_byte == 0
|
376
|
+
:id2_assoc
|
377
|
+
else
|
378
|
+
raise FormatError, 'unknown first byte for block - %p' % first_byte
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
def data?
|
385
|
+
(id & 0x2) == 0
|
386
|
+
end
|
387
|
+
|
388
|
+
def read decrypt=true
|
389
|
+
# only data blocks are every encrypted
|
390
|
+
decrypt = false unless data?
|
391
|
+
pst.pst_read_block_size offset, size, decrypt
|
392
|
+
end
|
393
|
+
|
394
|
+
# show all numbers in hex
|
395
|
+
def inspect
|
396
|
+
super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }.sub(/Index /, "Index type=#{type.inspect}, ")
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
# mostly guesses.
|
401
|
+
ITEM_COUNT_OFFSET_64 = 0x1e8
|
402
|
+
LEVEL_INDICATOR_OFFSET_64 = 0x1eb # diff of 3 between these 2 as above...
|
403
|
+
|
404
|
+
# will maybe inherit from Index64, in order to get the same #type function.
|
405
|
+
class Index64 < Index
|
406
|
+
UNPACK_STR = 'TTvvV'
|
407
|
+
SIZE = 24
|
408
|
+
BLOCK_SIZE = 512
|
409
|
+
COUNT_MAX = 20 # bit of a guess really. 512 / 24 = 21, but doesn't leave enough header room
|
410
|
+
|
411
|
+
# this is the extra item on the end of the UNPACK_STR above
|
412
|
+
attr_accessor :u2
|
413
|
+
|
414
|
+
def initialize data
|
415
|
+
data = Pst.unpack data, UNPACK_STR if String === data
|
416
|
+
@u2 = data.pop
|
417
|
+
super data
|
418
|
+
end
|
419
|
+
|
420
|
+
def inspect
|
421
|
+
super.sub(/>$/, ', u2=%p>' % u2)
|
422
|
+
end
|
423
|
+
|
424
|
+
def self.load_chain io, header
|
425
|
+
load_idx_rec io, header.index1, 0, 0
|
426
|
+
end
|
427
|
+
|
428
|
+
# almost identical to load code for Index, just different offsets and unpack strings.
|
429
|
+
# can probably merge them, or write a generic load_tree function or something.
|
430
|
+
def self.load_idx_rec io, offset, linku1, start_val
|
431
|
+
io.seek offset
|
432
|
+
buf = io.read BLOCK_SIZE
|
433
|
+
idxs = []
|
434
|
+
|
435
|
+
item_count = buf[ITEM_COUNT_OFFSET_64]
|
436
|
+
raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
|
437
|
+
|
438
|
+
#idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
|
439
|
+
#raise 'blah 1' unless idx.id == linku1
|
440
|
+
|
441
|
+
if buf[LEVEL_INDICATOR_OFFSET_64] == 0
|
442
|
+
# leaf pointers
|
443
|
+
# split the data into item_count index objects
|
444
|
+
buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
|
445
|
+
idx = new data
|
446
|
+
# first entry
|
447
|
+
raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
|
448
|
+
#idx.pst = self
|
449
|
+
break if idx.id == 0
|
450
|
+
idxs << idx
|
451
|
+
end
|
452
|
+
else
|
453
|
+
# node pointers
|
454
|
+
# split the data into item_count table pointers
|
455
|
+
buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
|
456
|
+
start, u1, offset = Pst.unpack data, 'T3'
|
457
|
+
# for the first value, we expect the start to be equal
|
458
|
+
raise 'blah 3' if i == 0 and start_val != 0 and start != start_val
|
459
|
+
break if start == 0
|
460
|
+
idxs += load_idx_rec io, offset, u1, start
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
idxs
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
# pst_desc
|
469
|
+
class Desc64 < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id, :u2)
|
470
|
+
UNPACK_STR = 'T3VV'
|
471
|
+
SIZE = 32
|
472
|
+
BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
|
473
|
+
COUNT_MAX = 15 # guess as per Index64
|
474
|
+
|
475
|
+
include RecursivelyEnumerable
|
476
|
+
|
477
|
+
attr_accessor :pst
|
478
|
+
attr_reader :children
|
479
|
+
def initialize data
|
480
|
+
super(*Pst.unpack(data, UNPACK_STR))
|
481
|
+
@children = []
|
482
|
+
end
|
483
|
+
|
484
|
+
def desc
|
485
|
+
pst.idx_from_id idx_id
|
486
|
+
end
|
487
|
+
|
488
|
+
def list_index
|
489
|
+
pst.idx_from_id idx2_id
|
490
|
+
end
|
491
|
+
|
492
|
+
def self.load_chain io, header
|
493
|
+
load_desc_rec io, header.index2, 0, 0x21
|
494
|
+
end
|
495
|
+
|
496
|
+
def self.load_desc_rec io, offset, linku1, start_val
|
497
|
+
io.seek offset
|
498
|
+
buf = io.read BLOCK_SIZE
|
499
|
+
descs = []
|
500
|
+
item_count = buf[ITEM_COUNT_OFFSET_64]
|
501
|
+
|
502
|
+
# not real desc
|
503
|
+
#desc = Desc.new buf[BACKLINK_OFFSET, 4]
|
504
|
+
#raise 'blah 1' unless desc.desc_id == linku1
|
505
|
+
|
506
|
+
if buf[LEVEL_INDICATOR_OFFSET_64] == 0
|
507
|
+
# leaf pointers
|
508
|
+
raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
|
509
|
+
# split the data into item_count desc objects
|
510
|
+
buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
|
511
|
+
desc = new data
|
512
|
+
# first entry
|
513
|
+
raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
|
514
|
+
break if desc.desc_id == 0
|
515
|
+
descs << desc
|
516
|
+
end
|
517
|
+
else
|
518
|
+
# node pointers
|
519
|
+
raise "have too many active items in index (#{item_count})" if item_count > Index64::COUNT_MAX
|
520
|
+
# split the data into item_count table pointers
|
521
|
+
buf[0, Index64::SIZE * item_count].scan(/.{#{Index64::SIZE}}/mo).each_with_index do |data, i|
|
522
|
+
start, u1, offset = Pst.unpack data, 'T3'
|
523
|
+
# for the first value, we expect the start to be equal note that ids -1, so even for the
|
524
|
+
# first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
|
525
|
+
# that the first desc record is always 33...
|
526
|
+
# thats because 0x21 is the pst root itself...
|
527
|
+
raise 'blah 3' if i == 0 and start_val != -1 and start != start_val
|
528
|
+
# this shouldn't really happen i'd imagine
|
529
|
+
break if start == 0
|
530
|
+
descs += load_desc_rec io, offset, u1, start
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
descs
|
535
|
+
end
|
536
|
+
|
537
|
+
def each_child(&block)
|
538
|
+
@children.each(&block)
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
# _pst_table_ptr_struct
|
543
|
+
class TablePtr < Struct.new(:start, :u1, :offset)
|
544
|
+
UNPACK_STR = 'V3'
|
545
|
+
SIZE = 12
|
546
|
+
|
547
|
+
def initialize data
|
548
|
+
data = data.unpack(UNPACK_STR) if String === data
|
549
|
+
super(*data)
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
# pst_desc
|
554
|
+
# idx_id is a pointer to an idx record which gets the primary data stream for the Desc record.
|
555
|
+
# idx2_id gets you an idx record, that when read gives you an ID2 association list, which just maps
|
556
|
+
# another set of ids to index values
|
557
|
+
class Desc < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id)
|
558
|
+
UNPACK_STR = 'V4'
|
559
|
+
SIZE = 16
|
560
|
+
BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
|
561
|
+
COUNT_MAX = 31 # max active desc records (ITEM_COUNT_OFFSET / Desc::SIZE = 31)
|
562
|
+
|
563
|
+
include ToTree
|
564
|
+
|
565
|
+
attr_accessor :pst
|
566
|
+
attr_reader :children
|
567
|
+
def initialize data
|
568
|
+
super(*data.unpack(UNPACK_STR))
|
569
|
+
@children = []
|
570
|
+
end
|
571
|
+
|
572
|
+
def desc
|
573
|
+
pst.idx_from_id idx_id
|
574
|
+
end
|
575
|
+
|
576
|
+
def list_index
|
577
|
+
pst.idx_from_id idx2_id
|
578
|
+
end
|
579
|
+
|
580
|
+
# show all numbers in hex
|
581
|
+
def inspect
|
582
|
+
super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }
|
583
|
+
end
|
584
|
+
end
|
585
|
+
|
586
|
+
# corresponds to
|
587
|
+
# * _pst_build_id_ptr
|
588
|
+
def load_idx
|
589
|
+
@idx = []
|
590
|
+
@idx_offsets = []
|
591
|
+
if header.version_2003?
|
592
|
+
@idx = Index64.load_chain io, header
|
593
|
+
@idx.each { |idx| idx.pst = self }
|
594
|
+
else
|
595
|
+
load_idx_rec header.index1, header.index1_count, 0
|
596
|
+
end
|
597
|
+
|
598
|
+
# we'll typically be accessing by id, so create a hash as a lookup cache
|
599
|
+
@idx_from_id = {}
|
600
|
+
@idx.each do |idx|
|
601
|
+
warn "there are duplicate idx records with id #{idx.id}" if @idx_from_id[idx.id]
|
602
|
+
@idx_from_id[idx.id] = idx
|
603
|
+
end
|
604
|
+
end
|
605
|
+
|
606
|
+
# load the flat idx table, which maps ids to file ranges. this is the recursive helper
|
607
|
+
#
|
608
|
+
# corresponds to
|
609
|
+
# * _pst_build_id_ptr
|
610
|
+
def load_idx_rec offset, linku1, start_val
|
611
|
+
@idx_offsets << offset
|
612
|
+
|
613
|
+
#_pst_read_block_size(pf, offset, BLOCK_SIZE, &buf, 0, 0) < BLOCK_SIZE)
|
614
|
+
buf = pst_read_block_size offset, Index::BLOCK_SIZE, false
|
615
|
+
|
616
|
+
item_count = buf[ITEM_COUNT_OFFSET]
|
617
|
+
raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
|
618
|
+
|
619
|
+
idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
|
620
|
+
raise 'blah 1' unless idx.id == linku1
|
621
|
+
|
622
|
+
if buf[LEVEL_INDICATOR_OFFSET] == 0
|
623
|
+
# leaf pointers
|
624
|
+
# split the data into item_count index objects
|
625
|
+
buf[0, Index::SIZE * item_count].scan(/.{#{Index::SIZE}}/mo).each_with_index do |data, i|
|
626
|
+
idx = Index.new data
|
627
|
+
# first entry
|
628
|
+
raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
|
629
|
+
idx.pst = self
|
630
|
+
# this shouldn't really happen i'd imagine
|
631
|
+
break if idx.id == 0
|
632
|
+
@idx << idx
|
633
|
+
end
|
634
|
+
else
|
635
|
+
# node pointers
|
636
|
+
# split the data into item_count table pointers
|
637
|
+
buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
|
638
|
+
table = TablePtr.new data
|
639
|
+
# for the first value, we expect the start to be equal
|
640
|
+
raise 'blah 3' if i == 0 and start_val != 0 and table.start != start_val
|
641
|
+
# this shouldn't really happen i'd imagine
|
642
|
+
break if table.start == 0
|
643
|
+
load_idx_rec table.offset, table.u1, table.start
|
644
|
+
end
|
645
|
+
end
|
646
|
+
end
|
647
|
+
|
648
|
+
# most access to idx objects will use this function
|
649
|
+
#
|
650
|
+
# corresponds to
|
651
|
+
# * _pst_getID
|
652
|
+
def idx_from_id id
|
653
|
+
@idx_from_id[id]
|
654
|
+
end
|
655
|
+
|
656
|
+
# corresponds to
|
657
|
+
# * _pst_build_desc_ptr
|
658
|
+
# * record_descriptor
|
659
|
+
def load_desc
|
660
|
+
@desc = []
|
661
|
+
@desc_offsets = []
|
662
|
+
if header.version_2003?
|
663
|
+
@desc = Desc64.load_chain io, header
|
664
|
+
@desc.each { |desc| desc.pst = self }
|
665
|
+
else
|
666
|
+
load_desc_rec header.index2, header.index2_count, 0x21
|
667
|
+
end
|
668
|
+
|
669
|
+
# first create a lookup cache
|
670
|
+
@desc_from_id = {}
|
671
|
+
@desc.each do |desc|
|
672
|
+
desc.pst = self
|
673
|
+
warn "there are duplicate desc records with id #{desc.desc_id}" if @desc_from_id[desc.desc_id]
|
674
|
+
@desc_from_id[desc.desc_id] = desc
|
675
|
+
end
|
676
|
+
|
677
|
+
# now turn the flat list of loaded desc records into a tree
|
678
|
+
|
679
|
+
# well, they have no parent, so they're more like, the toplevel descs.
|
680
|
+
@orphans = []
|
681
|
+
# now assign each node to the parents child array, putting the orphans in the above
|
682
|
+
@desc.each do |desc|
|
683
|
+
parent = @desc_from_id[desc.parent_desc_id]
|
684
|
+
# note, besides this, its possible to create other circular structures.
|
685
|
+
if parent == desc
|
686
|
+
# this actually happens usually, for the root_item it appears.
|
687
|
+
#warn "desc record's parent is itself (#{desc.inspect})"
|
688
|
+
# maybe add some more checks in here for circular structures
|
689
|
+
elsif parent
|
690
|
+
parent.children << desc
|
691
|
+
next
|
692
|
+
end
|
693
|
+
@orphans << desc
|
694
|
+
end
|
695
|
+
|
696
|
+
# maybe change this to some sort of sane-ness check. orphans are expected
|
697
|
+
# warn "have #{@orphans.length} orphan desc record(s)." unless @orphans.empty?
|
698
|
+
end
|
699
|
+
|
700
|
+
# load the flat list of desc records recursively
|
701
|
+
#
|
702
|
+
# corresponds to
|
703
|
+
# * _pst_build_desc_ptr
|
704
|
+
# * record_descriptor
|
705
|
+
def load_desc_rec offset, linku1, start_val
|
706
|
+
@desc_offsets << offset
|
707
|
+
|
708
|
+
buf = pst_read_block_size offset, Desc::BLOCK_SIZE, false
|
709
|
+
item_count = buf[ITEM_COUNT_OFFSET]
|
710
|
+
|
711
|
+
# not real desc
|
712
|
+
desc = Desc.new buf[BACKLINK_OFFSET, 4]
|
713
|
+
raise 'blah 1' unless desc.desc_id == linku1
|
714
|
+
|
715
|
+
if buf[LEVEL_INDICATOR_OFFSET] == 0
|
716
|
+
# leaf pointers
|
717
|
+
raise "have too many active items in index (#{item_count})" if item_count > Desc::COUNT_MAX
|
718
|
+
# split the data into item_count desc objects
|
719
|
+
buf[0, Desc::SIZE * item_count].scan(/.{#{Desc::SIZE}}/mo).each_with_index do |data, i|
|
720
|
+
desc = Desc.new data
|
721
|
+
# first entry
|
722
|
+
raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
|
723
|
+
# this shouldn't really happen i'd imagine
|
724
|
+
break if desc.desc_id == 0
|
725
|
+
@desc << desc
|
726
|
+
end
|
727
|
+
else
|
728
|
+
# node pointers
|
729
|
+
raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
|
730
|
+
# split the data into item_count table pointers
|
731
|
+
buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
|
732
|
+
table = TablePtr.new data
|
733
|
+
# for the first value, we expect the start to be equal note that ids -1, so even for the
|
734
|
+
# first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
|
735
|
+
# that the first desc record is always 33...
|
736
|
+
raise 'blah 3' if i == 0 and start_val != -1 and table.start != start_val
|
737
|
+
# this shouldn't really happen i'd imagine
|
738
|
+
break if table.start == 0
|
739
|
+
load_desc_rec table.offset, table.u1, table.start
|
740
|
+
end
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
# as for idx
|
745
|
+
#
|
746
|
+
# corresponds to:
|
747
|
+
# * _pst_getDptr
|
748
|
+
def desc_from_id id
|
749
|
+
@desc_from_id[id]
|
750
|
+
end
|
751
|
+
|
752
|
+
# corresponds to
|
753
|
+
# * pst_load_extended_attributes
|
754
|
+
def load_xattrib
|
755
|
+
unless desc = desc_from_id(0x61)
|
756
|
+
warn "no extended attributes desc record found"
|
757
|
+
return
|
758
|
+
end
|
759
|
+
unless desc.desc
|
760
|
+
warn "no desc idx for extended attributes"
|
761
|
+
return
|
762
|
+
end
|
763
|
+
if desc.list_index
|
764
|
+
end
|
765
|
+
#warn "skipping loading xattribs"
|
766
|
+
# FIXME implement loading xattribs
|
767
|
+
end
|
768
|
+
|
769
|
+
# corresponds to:
|
770
|
+
# * _pst_read_block_size
|
771
|
+
# * _pst_read_block ??
|
772
|
+
# * _pst_ff_getIDblock_dec ??
|
773
|
+
# * _pst_ff_getIDblock ??
|
774
|
+
def pst_read_block_size offset, size, decrypt=true
|
775
|
+
io.seek offset
|
776
|
+
buf = io.read size
|
777
|
+
warn "tried to read #{size} bytes but only got #{buf.length}" if buf.length != size
|
778
|
+
encrypted? && decrypt ? CompressibleEncryption.decrypt(buf) : buf
|
779
|
+
end
|
780
|
+
|
781
|
+
#
|
782
|
+
# id2
|
783
|
+
# ----------------------------------------------------------------------------
|
784
|
+
#
|
785
|
+
|
786
|
+
class ID2Assoc < Struct.new(:id2, :id, :table2)
|
787
|
+
UNPACK_STR = 'V3'
|
788
|
+
SIZE = 12
|
789
|
+
|
790
|
+
def initialize data
|
791
|
+
data = data.unpack(UNPACK_STR) if String === data
|
792
|
+
super(*data)
|
793
|
+
end
|
794
|
+
end
|
795
|
+
|
796
|
+
class ID2Assoc64 < Struct.new(:id2, :u1, :id, :table2)
|
797
|
+
UNPACK_STR = 'VVT2'
|
798
|
+
SIZE = 24
|
799
|
+
|
800
|
+
def initialize data
|
801
|
+
if String === data
|
802
|
+
data = Pst.unpack data, UNPACK_STR
|
803
|
+
end
|
804
|
+
super(*data)
|
805
|
+
end
|
806
|
+
|
807
|
+
def self.load_chain idx
|
808
|
+
buf = idx.read
|
809
|
+
type, count = buf.unpack 'v2'
|
810
|
+
unless type == 0x0002
|
811
|
+
raise 'unknown id2 type 0x%04x' % type
|
812
|
+
#return
|
813
|
+
end
|
814
|
+
id2 = []
|
815
|
+
count.times do |i|
|
816
|
+
assoc = new buf[8 + SIZE * i, SIZE]
|
817
|
+
id2 << assoc
|
818
|
+
if assoc.table2 != 0
|
819
|
+
id2 += load_chain idx.pst.idx_from_id(assoc.table2)
|
820
|
+
end
|
821
|
+
end
|
822
|
+
id2
|
823
|
+
end
|
824
|
+
end
|
825
|
+
|
826
|
+
class ID2Mapping
|
827
|
+
attr_reader :list
|
828
|
+
def initialize pst, list
|
829
|
+
@pst = pst
|
830
|
+
@list = list
|
831
|
+
# create a lookup.
|
832
|
+
@id_from_id2 = {}
|
833
|
+
@list.each do |id2|
|
834
|
+
# NOTE we take the last value seen value if there are duplicates. this "fixes"
|
835
|
+
# test4-o1997.pst for the time being.
|
836
|
+
warn "there are duplicate id2 records with id #{id2.id2}" if @id_from_id2[id2.id2]
|
837
|
+
next if @id_from_id2[id2.id2]
|
838
|
+
@id_from_id2[id2.id2] = id2.id
|
839
|
+
end
|
840
|
+
end
|
841
|
+
|
842
|
+
# TODO: fix logging
|
843
|
+
def warn s
|
844
|
+
Mapi::Log.warn s
|
845
|
+
end
|
846
|
+
|
847
|
+
# corresponds to:
|
848
|
+
# * _pst_getID2
|
849
|
+
def [] id
|
850
|
+
#id2 = @list.find { |x| x.id2 == id }
|
851
|
+
id = @id_from_id2[id]
|
852
|
+
id and @pst.idx_from_id(id)
|
853
|
+
end
|
854
|
+
end
|
855
|
+
|
856
|
+
def load_idx2 idx
|
857
|
+
if header.version_2003?
|
858
|
+
id2 = ID2Assoc64.load_chain idx
|
859
|
+
else
|
860
|
+
id2 = load_idx2_rec idx
|
861
|
+
end
|
862
|
+
ID2Mapping.new self, id2
|
863
|
+
end
|
864
|
+
|
865
|
+
# corresponds to
|
866
|
+
# * _pst_build_id2
|
867
|
+
def load_idx2_rec idx
|
868
|
+
# i should perhaps use a idx chain style read here?
|
869
|
+
buf = pst_read_block_size idx.offset, idx.size, false
|
870
|
+
type, count = buf.unpack 'v2'
|
871
|
+
unless type == 0x0002
|
872
|
+
raise 'unknown id2 type 0x%04x' % type
|
873
|
+
#return
|
874
|
+
end
|
875
|
+
id2 = []
|
876
|
+
count.times do |i|
|
877
|
+
assoc = ID2Assoc.new buf[4 + ID2Assoc::SIZE * i, ID2Assoc::SIZE]
|
878
|
+
id2 << assoc
|
879
|
+
if assoc.table2 != 0
|
880
|
+
id2 += load_idx2_rec idx_from_id(assoc.table2)
|
881
|
+
end
|
882
|
+
end
|
883
|
+
id2
|
884
|
+
end
|
885
|
+
|
886
|
+
class RangesIOIdxChain < RangesIOEncryptable
|
887
|
+
def initialize pst, idx_head
|
888
|
+
@idxs = pst.id2_block_idx_chain idx_head
|
889
|
+
# whether or not a given idx needs encrypting
|
890
|
+
decrypts = @idxs.map do |idx|
|
891
|
+
decrypt = (idx.id & 2) != 0 ? false : pst.encrypted?
|
892
|
+
end.uniq
|
893
|
+
raise NotImplementedError, 'partial encryption in RangesIOID2' if decrypts.length > 1
|
894
|
+
decrypt = decrypts.first
|
895
|
+
# convert idxs to ranges
|
896
|
+
ranges = @idxs.map { |idx| [idx.offset, idx.size] }
|
897
|
+
super pst.io, :ranges => ranges, :decrypt => decrypt
|
898
|
+
end
|
899
|
+
end
|
900
|
+
|
901
|
+
class RangesIOID2 < RangesIOIdxChain
|
902
|
+
def self.new pst, id2, idx2
|
903
|
+
RangesIOIdxChain.new pst, idx2[id2]
|
904
|
+
end
|
905
|
+
end
|
906
|
+
|
907
|
+
# corresponds to:
|
908
|
+
# * _pst_ff_getID2block
|
909
|
+
# * _pst_ff_getID2data
|
910
|
+
# * _pst_ff_compile_ID
|
911
|
+
def id2_block_idx_chain idx
|
912
|
+
if (idx.id & 0x2) == 0
|
913
|
+
[idx]
|
914
|
+
else
|
915
|
+
buf = idx.read
|
916
|
+
type, fdepth, count = buf[0, 4].unpack 'CCv'
|
917
|
+
unless type == 1 # libpst.c:3958
|
918
|
+
warn 'Error in idx_chain - %p, %p, %p - attempting to ignore' % [type, fdepth, count]
|
919
|
+
return [idx]
|
920
|
+
end
|
921
|
+
# there are 4 unaccounted for bytes here, 4...8
|
922
|
+
if header.version_2003?
|
923
|
+
ids = buf[8, count * 8].unpack("T#{count}")
|
924
|
+
else
|
925
|
+
ids = buf[8, count * 4].unpack('V*')
|
926
|
+
end
|
927
|
+
if fdepth == 1
|
928
|
+
ids.map { |id| idx_from_id id }
|
929
|
+
else
|
930
|
+
ids.map { |id| id2_block_idx_chain idx_from_id(id) }.flatten
|
931
|
+
end
|
932
|
+
end
|
933
|
+
end
|
934
|
+
|
935
|
+
#
|
936
|
+
# main block parsing code. gets raw properties
|
937
|
+
# ----------------------------------------------------------------------------
|
938
|
+
#
|
939
|
+
|
940
|
+
# the job of this class, is to take a desc record, and be able to enumerate through the
|
941
|
+
# mapi properties of the associated thing.
|
942
|
+
#
|
943
|
+
# corresponds to
|
944
|
+
# * _pst_parse_block
|
945
|
+
# * _pst_process (in some ways. although perhaps thats more the Item::Properties#add_property)
|
946
|
+
class BlockParser
|
947
|
+
include Mapi::Types::Constants
|
948
|
+
|
949
|
+
TYPES = {
|
950
|
+
0xbcec => 1,
|
951
|
+
0x7cec => 2,
|
952
|
+
# type 3 is removed. an artifact of not handling the indirect blocks properly in libpst.
|
953
|
+
}
|
954
|
+
|
955
|
+
PR_SUBJECT = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_SUBJECT' }.first.hex
|
956
|
+
PR_BODY_HTML = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_BODY_HTML' }.first.hex
|
957
|
+
|
958
|
+
# this stuff could maybe be moved to Ole::Types? or leverage it somehow?
|
959
|
+
# whether or not a type is immeidate is more a property of the pst encoding though i expect.
|
960
|
+
# what i probably can add is a generic concept of whether a type is of variadic length or not.
|
961
|
+
|
962
|
+
# these lists are very incomplete. think they are largely copied from libpst
|
963
|
+
|
964
|
+
IMMEDIATE_TYPES = [
|
965
|
+
PT_SHORT, PT_LONG, PT_BOOLEAN
|
966
|
+
]
|
967
|
+
|
968
|
+
INDIRECT_TYPES = [
|
969
|
+
PT_DOUBLE, PT_OBJECT,
|
970
|
+
0x0014, # whats this? probably something like PT_LONGLONG, given the correspondence with the
|
971
|
+
# ole variant types. (= VT_I8)
|
972
|
+
PT_STRING8, PT_UNICODE, # unicode isn't in libpst, but added here for outlook 2003 down the track
|
973
|
+
PT_SYSTIME,
|
974
|
+
0x0048, # another unknown
|
975
|
+
0x0102, # this is PT_BINARY vs PT_CLSID
|
976
|
+
#0x1003, # these are vector types, but they're commented out for now because i'd expect that
|
977
|
+
#0x1014, # there's extra decoding needed that i'm not doing. (probably just need a simple
|
978
|
+
# # PT_* => unpack string mapping for the immediate types, and just do unpack('V*') etc
|
979
|
+
#0x101e,
|
980
|
+
#0x1102
|
981
|
+
]
|
982
|
+
|
983
|
+
# the attachment and recipient arrays appear to be always stored with these fixed
|
984
|
+
# id2 values. seems strange. are there other extra streams? can find out by making higher
|
985
|
+
# level IO wrapper, which has the id2 value, and doing the diff of available id2 values versus
|
986
|
+
# used id2 values in properties of an item.
|
987
|
+
ID2_ATTACHMENTS = 0x671
|
988
|
+
ID2_RECIPIENTS = 0x692
|
989
|
+
|
990
|
+
attr_reader :desc, :data, :data_chunks, :offset_tables
|
991
|
+
def initialize desc
|
992
|
+
raise FormatError, "unable to get associated index record for #{desc.inspect}" unless desc.desc
|
993
|
+
@desc = desc
|
994
|
+
#@data = desc.desc.read
|
995
|
+
if Pst::Index === desc.desc
|
996
|
+
#@data = RangesIOIdxChain.new(desc.pst, desc.desc).read
|
997
|
+
idxs = desc.pst.id2_block_idx_chain desc.desc
|
998
|
+
# this gets me the plain index chain.
|
999
|
+
else
|
1000
|
+
# fake desc
|
1001
|
+
#@data = desc.desc.read
|
1002
|
+
idxs = [desc.desc]
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
@data_chunks = idxs.map { |idx| idx.read }
|
1006
|
+
@data = @data_chunks.first
|
1007
|
+
|
1008
|
+
load_header
|
1009
|
+
|
1010
|
+
@index_offsets = [@index_offset] + @data_chunks[1..-1].map { |chunk| chunk.unpack('v')[0] }
|
1011
|
+
@offset_tables = []
|
1012
|
+
@ignored = []
|
1013
|
+
@data_chunks.zip(@index_offsets).each do |chunk, offset|
|
1014
|
+
ignore = chunk[offset, 2].unpack('v')[0]
|
1015
|
+
@ignored << ignore
|
1016
|
+
# p ignore
|
1017
|
+
@offset_tables.push offset_table = []
|
1018
|
+
# maybe its ok if there aren't to be any values ?
|
1019
|
+
raise FormatError if offset == 0
|
1020
|
+
offsets = chunk[offset + 2..-1].unpack('v*')
|
1021
|
+
#p offsets
|
1022
|
+
offsets[0, ignore + 2].each_cons 2 do |from, to|
|
1023
|
+
#next if to == 0
|
1024
|
+
raise FormatError, [from, to].inspect if from > to
|
1025
|
+
offset_table << [from, to]
|
1026
|
+
end
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
@offset_table = @offset_tables.first
|
1030
|
+
@idxs = idxs
|
1031
|
+
|
1032
|
+
# now, we may have multiple different blocks
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
# a given desc record may or may not have associated idx2 data. we lazily load it here, so it will never
|
1036
|
+
# actually be requested unless get_data_indirect actually needs to use it.
|
1037
|
+
def idx2
|
1038
|
+
return @idx2 if @idx2
|
1039
|
+
raise FormatError, 'idx2 requested but no idx2 available' unless desc.list_index
|
1040
|
+
# should check this can't return nil
|
1041
|
+
@idx2 = desc.pst.load_idx2 desc.list_index
|
1042
|
+
end
|
1043
|
+
|
1044
|
+
def load_header
|
1045
|
+
@index_offset, type, @offset1 = data.unpack 'vvV'
|
1046
|
+
raise FormatError, 'unknown block type signature 0x%04x' % type unless TYPES[type]
|
1047
|
+
@type = TYPES[type]
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
# based on the value of offset, return either some data from buf, or some data from the
|
1051
|
+
# id2 chain id2, where offset is some key into a lookup table that is stored as the id2
|
1052
|
+
# chain. i think i may need to create a BlockParser class that wraps up all this mess.
|
1053
|
+
#
|
1054
|
+
# corresponds to:
|
1055
|
+
# * _pst_getBlockOffsetPointer
|
1056
|
+
# * _pst_getBlockOffset
|
1057
|
+
def get_data_indirect offset
|
1058
|
+
return get_data_indirect_io(offset).read
|
1059
|
+
|
1060
|
+
if offset == 0
|
1061
|
+
nil
|
1062
|
+
elsif (offset & 0xf) == 0xf
|
1063
|
+
RangesIOID2.new(desc.pst, offset, idx2).read
|
1064
|
+
else
|
1065
|
+
low, high = offset & 0xf, offset >> 4
|
1066
|
+
raise FormatError if low != 0 or (high & 0x1) != 0 or (high / 2) > @offset_table.length
|
1067
|
+
from, to = @offset_table[high / 2]
|
1068
|
+
data[from...to]
|
1069
|
+
end
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
def get_data_indirect_io offset
|
1073
|
+
if offset == 0
|
1074
|
+
nil
|
1075
|
+
elsif (offset & 0xf) == 0xf
|
1076
|
+
if idx2[offset]
|
1077
|
+
RangesIOID2.new desc.pst, offset, idx2
|
1078
|
+
else
|
1079
|
+
warn "tried to get idx2 record for #{offset} but failed"
|
1080
|
+
return StringIO.new('')
|
1081
|
+
end
|
1082
|
+
else
|
1083
|
+
low, high = offset & 0xf, offset >> 4
|
1084
|
+
if low != 0 or (high & 0x1) != 0
|
1085
|
+
# raise FormatError,
|
1086
|
+
warn "bad - #{low} #{high} (1)"
|
1087
|
+
return StringIO.new('')
|
1088
|
+
end
|
1089
|
+
# lets see which block it should come from.
|
1090
|
+
block_idx, i = high.divmod 4096
|
1091
|
+
unless block_idx < @data_chunks.length
|
1092
|
+
warn "bad - block_idx to high (not #{block_idx} < #{@data_chunks.length})"
|
1093
|
+
return StringIO.new('')
|
1094
|
+
end
|
1095
|
+
data_chunk, offset_table = @data_chunks[block_idx], @offset_tables[block_idx]
|
1096
|
+
if i / 2 >= offset_table.length
|
1097
|
+
warn "bad - #{low} #{high} - #{i / 2} >= #{offset_table.length} (2)"
|
1098
|
+
return StringIO.new('')
|
1099
|
+
end
|
1100
|
+
#warn "ok - #{low} #{high} #{offset_table.length}"
|
1101
|
+
from, to = offset_table[i / 2]
|
1102
|
+
StringIO.new data_chunk[from...to]
|
1103
|
+
end
|
1104
|
+
end
|
1105
|
+
|
1106
|
+
def handle_indirect_values key, type, value
|
1107
|
+
case type
|
1108
|
+
when PT_BOOLEAN
|
1109
|
+
value = value != 0
|
1110
|
+
when *IMMEDIATE_TYPES # not including PT_BOOLEAN which we just did above
|
1111
|
+
# no processing current applied (needed?).
|
1112
|
+
when *INDIRECT_TYPES
|
1113
|
+
# the value is a pointer
|
1114
|
+
if String === value # ie, value size > 4 above
|
1115
|
+
value = StringIO.new value
|
1116
|
+
else
|
1117
|
+
value = get_data_indirect_io(value)
|
1118
|
+
end
|
1119
|
+
# keep strings as immediate values for now, for compatability with how i set up
|
1120
|
+
# Msg::Properties::ENCODINGS
|
1121
|
+
if value
|
1122
|
+
if type == PT_STRING8
|
1123
|
+
value = value.read
|
1124
|
+
elsif type == PT_UNICODE
|
1125
|
+
value = Ole::Types::FROM_UTF16.iconv value.read
|
1126
|
+
end
|
1127
|
+
end
|
1128
|
+
# special subject handling
|
1129
|
+
if key == PR_BODY_HTML and value
|
1130
|
+
# to keep the msg code happy, which thinks body_html will be an io
|
1131
|
+
# although, in 2003 version, they are 0102 already
|
1132
|
+
value = StringIO.new value unless value.respond_to?(:read)
|
1133
|
+
end
|
1134
|
+
if key == PR_SUBJECT and value
|
1135
|
+
ignore, offset = value.unpack 'C2'
|
1136
|
+
offset = (offset == 1 ? nil : offset - 3)
|
1137
|
+
value = value[2..-1]
|
1138
|
+
=begin
|
1139
|
+
index = value =~ /^[A-Z]*:/ ? $~[0].length - 1 : nil
|
1140
|
+
unless ignore == 1 and offset == index
|
1141
|
+
warn 'something wrong with subject hack'
|
1142
|
+
$x = [ignore, offset, value]
|
1143
|
+
require 'irb'
|
1144
|
+
IRB.start
|
1145
|
+
exit
|
1146
|
+
end
|
1147
|
+
=end
|
1148
|
+
=begin
|
1149
|
+
new idea:
|
1150
|
+
|
1151
|
+
making sense of the \001\00[156] i've seen prefixing subject. i think its to do with the placement
|
1152
|
+
of the ':', or the ' '. And perhaps an optimization to do with thread topic, and ignoring the prefixes
|
1153
|
+
added by mailers. thread topic is equal to subject with all that crap removed.
|
1154
|
+
|
1155
|
+
can test by creating some mails with bizarre subjects.
|
1156
|
+
|
1157
|
+
subject="\001\005RE: blah blah"
|
1158
|
+
subject="\001\001blah blah"
|
1159
|
+
subject="\001\032Out of Office AutoReply: blah blah"
|
1160
|
+
subject="\001\020Undeliverable: blah blah"
|
1161
|
+
|
1162
|
+
looks like it
|
1163
|
+
|
1164
|
+
=end
|
1165
|
+
|
1166
|
+
# now what i think, is that perhaps, value[offset..-1] ...
|
1167
|
+
# or something like that should be stored as a special tag. ie, do a double yield
|
1168
|
+
# for this case. probably PR_CONVERSATION_TOPIC, in which case i'd write instead:
|
1169
|
+
# yield [PR_SUBJECT, ref_type, value]
|
1170
|
+
# yield [PR_CONVERSATION_TOPIC, ref_type, value[offset..-1]
|
1171
|
+
# next # to skip the yield.
|
1172
|
+
end
|
1173
|
+
|
1174
|
+
# special handling for embedded objects
|
1175
|
+
# used for attach_data for attached messages. in which case attach_method should == 5,
|
1176
|
+
# for embedded object.
|
1177
|
+
if type == PT_OBJECT and value
|
1178
|
+
value = value.read if value.respond_to?(:read)
|
1179
|
+
id2, unknown = value.unpack 'V2'
|
1180
|
+
io = RangesIOID2.new desc.pst, id2, idx2
|
1181
|
+
|
1182
|
+
# hacky
|
1183
|
+
desc2 = OpenStruct.new(:desc => io, :pst => desc.pst, :list_index => desc.list_index, :children => [])
|
1184
|
+
# put nil instead of desc.list_index, otherwise the attachment is attached to itself ad infinitum.
|
1185
|
+
# should try and fix that FIXME
|
1186
|
+
# this shouldn't be done always. for an attached message, yes, but for an attached
|
1187
|
+
# meta file, for example, it shouldn't. difference between embedded_ole vs embedded_msg
|
1188
|
+
# really.
|
1189
|
+
# note that in the case where its a embedded ole, you actually get a regular serialized ole
|
1190
|
+
# object, so i need to create an ole storage object on a rangesioidxchain!
|
1191
|
+
# eg:
|
1192
|
+
=begin
|
1193
|
+
att.props.display_name # => "Picture (Metafile)"
|
1194
|
+
io = att.props.attach_data
|
1195
|
+
io.read(32).unpack('H*') # => ["d0cf11e0a1b11ae100000.... note the docfile signature.
|
1196
|
+
# plug some missing rangesio holes:
|
1197
|
+
def io.rewind; seek 0; end
|
1198
|
+
def io.flush; raise IOError; end
|
1199
|
+
ole = Ole::Storage.open io
|
1200
|
+
puts ole.root.to_tree
|
1201
|
+
|
1202
|
+
- #<Dirent:"Root Entry">
|
1203
|
+
|- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
|
1204
|
+
|- #<Dirent:"CONTENTS" size=65696 data="\327\315\306\232\000...">
|
1205
|
+
\- #<Dirent:"\003MailStream" size=12 data="\001\000\000\000[...">
|
1206
|
+
=end
|
1207
|
+
# until properly fixed, i have disabled this code here, so this will break
|
1208
|
+
# nested messages temporarily.
|
1209
|
+
#value = Item.new desc2, RawPropertyStore.new(desc2).to_a
|
1210
|
+
#desc2.list_index = nil
|
1211
|
+
value = io
|
1212
|
+
end
|
1213
|
+
# this is PT_MV_STRING8, i guess.
|
1214
|
+
# should probably have the 0x1000 flag, and do the or-ring.
|
1215
|
+
# example of 0x1102 is PR_OUTLOOK_2003_ENTRYIDS. less sure about that one.
|
1216
|
+
when 0x101e, 0x1102
|
1217
|
+
# example data:
|
1218
|
+
# 0x802b "\003\000\000\000\020\000\000\000\030\000\000\000#\000\000\000BusinessCompetitionFavorites"
|
1219
|
+
# this 0x802b would be an extended attribute for categories / keywords.
|
1220
|
+
value = get_data_indirect_io(value).read unless String === value
|
1221
|
+
num = value.unpack('V')[0]
|
1222
|
+
offsets = value[4, 4 * num].unpack("V#{num}")
|
1223
|
+
value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] }
|
1224
|
+
value.map! { |str| StringIO.new str } if type == 0x1102
|
1225
|
+
else
|
1226
|
+
name = Mapi::Types::DATA[type].first rescue nil
|
1227
|
+
warn '0x%04x %p' % [key, get_data_indirect_io(value).read]
|
1228
|
+
raise NotImplementedError, 'unsupported mapi property type - 0x%04x (%p)' % [type, name]
|
1229
|
+
end
|
1230
|
+
[key, type, value]
|
1231
|
+
end
|
1232
|
+
end
|
1233
|
+
|
1234
|
+
=begin
|
1235
|
+
* recipients:
|
1236
|
+
|
1237
|
+
affects: ["0x200764", "0x2011c4", "0x201b24", "0x201b44", "0x201ba4", "0x201c24", "0x201cc4", "0x202504"]
|
1238
|
+
|
1239
|
+
after adding the rawpropertystoretable fix, all except the second parse properly, and satisfy:
|
1240
|
+
|
1241
|
+
item.props.display_to == item.recipients.map { |r| r.props.display_name if r.props.recipient_type == 1 }.compact * '; '
|
1242
|
+
|
1243
|
+
only the second still has a problem
|
1244
|
+
|
1245
|
+
#[#<struct Pst::Desc desc_id=0x2011c4, idx_id=0x397c, idx2_id=0x398a, parent_desc_id=0x8082>]
|
1246
|
+
|
1247
|
+
think this is related to a multi block #data3. ie, when you use @x * rec_size, and it
|
1248
|
+
goes > 8190, or there abouts, then it stuffs up. probably there is header gunk, or something,
|
1249
|
+
similar to when #data is multi block.
|
1250
|
+
|
1251
|
+
same problem affects the attachment table in test4.
|
1252
|
+
|
1253
|
+
fixed that issue. round data3 ranges to rec_size.
|
1254
|
+
|
1255
|
+
fix other issue with attached objects.
|
1256
|
+
|
1257
|
+
all recipients and attachments in test2 are fine.
|
1258
|
+
|
1259
|
+
only remaining issue is test4 recipients of 200044. strange.
|
1260
|
+
|
1261
|
+
=end
|
1262
|
+
|
1263
|
+
# RawPropertyStore is used to iterate through the properties of an item, or the auxiliary
|
1264
|
+
# data for an attachment. its just a parser for the way the properties are serialized, when the
|
1265
|
+
# properties don't have to conform to a column structure.
|
1266
|
+
#
|
1267
|
+
# structure of this chunk of data is often
|
1268
|
+
# header, property keys, data values, and then indexes.
|
1269
|
+
# the property keys has value in it. value can be the actual value if its a short type,
|
1270
|
+
# otherwise you lookup the value in the indicies, where you get the offsets to use in the
|
1271
|
+
# main data body. due to the indirect thing though, any of these parts could actually come
|
1272
|
+
# from a separate stream.
|
1273
|
+
class RawPropertyStore < BlockParser
|
1274
|
+
include Enumerable
|
1275
|
+
|
1276
|
+
attr_reader :length
|
1277
|
+
def initialize desc
|
1278
|
+
super
|
1279
|
+
raise FormatError, "expected type 1 - got #{@type}" unless @type == 1
|
1280
|
+
|
1281
|
+
# the way that offset works, data1 may be a subset of buf, or something from id2. if its from buf,
|
1282
|
+
# it will be offset based on index_offset and offset. so it could be some random chunk of data anywhere
|
1283
|
+
# in the thing.
|
1284
|
+
header_data = get_data_indirect @offset1
|
1285
|
+
raise FormatError if header_data.length < 8
|
1286
|
+
signature, offset2 = header_data.unpack 'V2'
|
1287
|
+
#p [@type, signature]
|
1288
|
+
raise FormatError, 'unhandled block signature 0x%08x' % @type if signature != 0x000602b5
|
1289
|
+
# this is actually a big chunk of tag tuples.
|
1290
|
+
@index_data = get_data_indirect offset2
|
1291
|
+
@length = @index_data.length / 8
|
1292
|
+
end
|
1293
|
+
|
1294
|
+
# iterate through the property tuples
|
1295
|
+
def each
|
1296
|
+
length.times do |i|
|
1297
|
+
key, type, value = handle_indirect_values(*@index_data[8 * i, 8].unpack('vvV'))
|
1298
|
+
yield key, type, value
|
1299
|
+
end
|
1300
|
+
end
|
1301
|
+
end
|
1302
|
+
|
1303
|
+
# RawPropertyStoreTable is kind of like a database table.
|
1304
|
+
# it has a fixed set of columns.
|
1305
|
+
# #[] is kind of like getting a row from the table.
|
1306
|
+
# those rows are currently encapsulated by Row, which has #each like
|
1307
|
+
# RawPropertyStore.
|
1308
|
+
# only used for the recipients array, and the attachments array. completely lazy, doesn't
|
1309
|
+
# load any of the properties upon creation.
|
1310
|
+
class RawPropertyStoreTable < BlockParser
|
1311
|
+
class Column < Struct.new(:ref_type, :type, :ind2_off, :size, :slot)
|
1312
|
+
def initialize data
|
1313
|
+
super(*data.unpack('v3CC'))
|
1314
|
+
end
|
1315
|
+
|
1316
|
+
def nice_type_name
|
1317
|
+
Mapi::Types::DATA[ref_type].first[/_(.*)/, 1].downcase rescue '0x%04x' % ref_type
|
1318
|
+
end
|
1319
|
+
|
1320
|
+
def nice_prop_name
|
1321
|
+
Mapi::PropertyStore::TAGS['%04x' % type].first[/_(.*)/, 1].downcase rescue '0x%04x' % type
|
1322
|
+
end
|
1323
|
+
|
1324
|
+
def inspect
|
1325
|
+
"#<#{self.class} name=#{nice_prop_name.inspect}, type=#{nice_type_name.inspect}>"
|
1326
|
+
end
|
1327
|
+
end
|
1328
|
+
|
1329
|
+
include Enumerable
|
1330
|
+
|
1331
|
+
attr_reader :length, :index_data, :data2, :data3, :rec_size
|
1332
|
+
def initialize desc
|
1333
|
+
super
|
1334
|
+
raise FormatError, "expected type 2 - got #{@type}" unless @type == 2
|
1335
|
+
|
1336
|
+
header_data = get_data_indirect @offset1
|
1337
|
+
# seven_c_blk
|
1338
|
+
# often: u1 == u2 and u3 == u2 + 2, then rec_size == u3 + 4. wtf
|
1339
|
+
seven_c, @num_list, u1, u2, u3, @rec_size, b_five_offset,
|
1340
|
+
ind2_offset, u7, u8 = header_data[0, 22].unpack('CCv4V2v2')
|
1341
|
+
@index_data = header_data[22..-1]
|
1342
|
+
|
1343
|
+
raise FormatError if @num_list != schema.length or seven_c != 0x7c
|
1344
|
+
# another check
|
1345
|
+
min_size = schema.inject(0) { |total, col| total + col.size }
|
1346
|
+
# seem to have at max, 8 padding bytes on the end of the record. not sure if it means
|
1347
|
+
# anything. maybe its just space that hasn't been reclaimed due to columns being
|
1348
|
+
# removed or something. probably should just check lower bound.
|
1349
|
+
range = (min_size..min_size + 8)
|
1350
|
+
warn "rec_size seems wrong (#{range} !=== #{rec_size})" unless range === rec_size
|
1351
|
+
|
1352
|
+
header_data2 = get_data_indirect b_five_offset
|
1353
|
+
raise FormatError if header_data2.length < 8
|
1354
|
+
signature, offset2 = header_data2.unpack 'V2'
|
1355
|
+
# ??? seems a bit iffy
|
1356
|
+
# there's probably more to the differences than this, and the data2 difference below
|
1357
|
+
expect = desc.pst.header.version_2003? ? 0x000404b5 : 0x000204b5
|
1358
|
+
raise FormatError, 'unhandled block signature 0x%08x' % signature if signature != expect
|
1359
|
+
|
1360
|
+
# this holds all the row data
|
1361
|
+
# handle multiple block issue.
|
1362
|
+
@data3_io = get_data_indirect_io ind2_offset
|
1363
|
+
if RangesIOIdxChain === @data3_io
|
1364
|
+
@data3_idxs =
|
1365
|
+
# modify ranges
|
1366
|
+
ranges = @data3_io.ranges.map { |offset, size| [offset, size / @rec_size * @rec_size] }
|
1367
|
+
@data3_io.instance_variable_set :@ranges, ranges
|
1368
|
+
end
|
1369
|
+
@data3 = @data3_io.read
|
1370
|
+
|
1371
|
+
# there must be something to the data in data2. i think data2 is the array of objects essentially.
|
1372
|
+
# currently its only used to imply a length
|
1373
|
+
# actually, at size 6, its just some auxiliary data. i'm thinking either Vv/vV, for 97, and something
|
1374
|
+
# wider for 03. the second value is just the index (0...length), and the first value is
|
1375
|
+
# some kind of offset i expect. actually, they were all id2 values, in another case.
|
1376
|
+
# so maybe they're get_data_indirect values too?
|
1377
|
+
# actually, it turned out they were identical to the PR_ATTACHMENT_ID2 values...
|
1378
|
+
# id2_values = ie, data2.unpack('v*').to_enum(:each_slice, 3).transpose[0]
|
1379
|
+
# table[i].assoc(PR_ATTACHMENT_ID2).last == id2_values[i], for all i.
|
1380
|
+
@data2 = get_data_indirect(offset2) rescue nil
|
1381
|
+
#if data2
|
1382
|
+
# @length = (data2.length / 6.0).ceil
|
1383
|
+
#else
|
1384
|
+
# the above / 6, may have been ok for 97 files, but the new 0x0004 style block must have
|
1385
|
+
# different size records... just use this instead:
|
1386
|
+
# hmmm, actually, we can still figure it out:
|
1387
|
+
@length = @data3.length / @rec_size
|
1388
|
+
#end
|
1389
|
+
|
1390
|
+
# lets try and at least use data2 for a warning for now
|
1391
|
+
if data2
|
1392
|
+
data2_rec_size = desc.pst.header.version_2003? ? 8 : 6
|
1393
|
+
warn 'somthing seems wrong with data3' unless @length == (data2.length / data2_rec_size)
|
1394
|
+
end
|
1395
|
+
end
|
1396
|
+
|
1397
|
+
def schema
|
1398
|
+
@schema ||= index_data.scan(/.{8}/m).map { |data| Column.new data }
|
1399
|
+
end
|
1400
|
+
|
1401
|
+
def [] idx
|
1402
|
+
# handle funky rounding
|
1403
|
+
Row.new self, idx * @rec_size
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
def each
|
1407
|
+
length.times { |i| yield self[i] }
|
1408
|
+
end
|
1409
|
+
|
1410
|
+
class Row
|
1411
|
+
include Enumerable
|
1412
|
+
|
1413
|
+
def initialize array_parser, x
|
1414
|
+
@array_parser, @x = array_parser, x
|
1415
|
+
end
|
1416
|
+
|
1417
|
+
# iterate through the property tuples
|
1418
|
+
def each
|
1419
|
+
(@array_parser.index_data.length / 8).times do |i|
|
1420
|
+
ref_type, type, ind2_off, size, slot = @array_parser.index_data[8 * i, 8].unpack 'v3CC'
|
1421
|
+
# check this rescue too
|
1422
|
+
value = @array_parser.data3[@x + ind2_off, size]
|
1423
|
+
# if INDIRECT_TYPES.include? ref_type
|
1424
|
+
if size <= 4
|
1425
|
+
value = value.unpack('V')[0]
|
1426
|
+
end
|
1427
|
+
#p ['0x%04x' % ref_type, '0x%04x' % type, (Msg::Properties::MAPITAGS['%04x' % type].first[/^.._(.*)/, 1].downcase rescue nil),
|
1428
|
+
# value_orig, value, (get_data_indirect(value_orig.unpack('V')[0]) rescue nil), size, ind2_off, slot]
|
1429
|
+
key, type, value = @array_parser.handle_indirect_values type, ref_type, value
|
1430
|
+
yield key, type, value
|
1431
|
+
end
|
1432
|
+
end
|
1433
|
+
end
|
1434
|
+
end
|
1435
|
+
|
1436
|
+
class AttachmentTable < BlockParser
|
1437
|
+
# a "fake" MAPI property name for this constant. if you get a mapi property with
|
1438
|
+
# this value, it is the id2 value to use to get attachment data.
|
1439
|
+
PR_ATTACHMENT_ID2 = 0x67f2
|
1440
|
+
|
1441
|
+
attr_reader :desc, :table
|
1442
|
+
def initialize desc
|
1443
|
+
@desc = desc
|
1444
|
+
# no super, we only actually want BlockParser2#idx2
|
1445
|
+
@table = nil
|
1446
|
+
return unless desc.list_index
|
1447
|
+
return unless idx = idx2[ID2_ATTACHMENTS]
|
1448
|
+
# FIXME make a fake desc.
|
1449
|
+
@desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
|
1450
|
+
@table = RawPropertyStoreTable.new @desc2
|
1451
|
+
end
|
1452
|
+
|
1453
|
+
def to_a
|
1454
|
+
return [] if !table
|
1455
|
+
table.map do |attachment|
|
1456
|
+
attachment = attachment.to_a
|
1457
|
+
#p attachment
|
1458
|
+
# potentially merge with yet more properties
|
1459
|
+
# this still seems pretty broken - especially the property overlap
|
1460
|
+
if attachment_id2 = attachment.assoc(PR_ATTACHMENT_ID2)
|
1461
|
+
#p attachment_id2.last
|
1462
|
+
#p idx2[attachment_id2.last]
|
1463
|
+
@desc2.desc = idx2[attachment_id2.last]
|
1464
|
+
RawPropertyStore.new(@desc2).each do |a, b, c|
|
1465
|
+
record = attachment.assoc a
|
1466
|
+
attachment << record = [] unless record
|
1467
|
+
record.replace [a, b, c]
|
1468
|
+
end
|
1469
|
+
end
|
1470
|
+
attachment
|
1471
|
+
end
|
1472
|
+
end
|
1473
|
+
end
|
1474
|
+
|
1475
|
+
# there is no equivalent to this in libpst. ID2_RECIPIENTS was just guessed given the above
|
1476
|
+
# AttachmentTable.
|
1477
|
+
class RecipientTable < BlockParser
|
1478
|
+
attr_reader :desc, :table
|
1479
|
+
def initialize desc
|
1480
|
+
@desc = desc
|
1481
|
+
# no super, we only actually want BlockParser2#idx2
|
1482
|
+
@table = nil
|
1483
|
+
return unless desc.list_index
|
1484
|
+
return unless idx = idx2[ID2_RECIPIENTS]
|
1485
|
+
# FIXME make a fake desc.
|
1486
|
+
desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
|
1487
|
+
@table = RawPropertyStoreTable.new desc2
|
1488
|
+
end
|
1489
|
+
|
1490
|
+
def to_a
|
1491
|
+
return [] if !table
|
1492
|
+
table.map { |x| x.to_a }
|
1493
|
+
end
|
1494
|
+
end
|
1495
|
+
|
1496
|
+
#
|
1497
|
+
# higher level item code. wraps up the raw properties above, and gives nice
|
1498
|
+
# objects to work with. handles item relationships too.
|
1499
|
+
# ----------------------------------------------------------------------------
|
1500
|
+
#
|
1501
|
+
|
1502
|
+
def self.make_property_set property_list
|
1503
|
+
hash = property_list.inject({}) do |hash, (key, type, value)|
|
1504
|
+
hash.update PropertySet::Key.new(key) => value
|
1505
|
+
end
|
1506
|
+
PropertySet.new hash
|
1507
|
+
end
|
1508
|
+
|
1509
|
+
class Attachment < Mapi::Attachment
|
1510
|
+
def initialize list
|
1511
|
+
super Pst.make_property_set(list)
|
1512
|
+
|
1513
|
+
@embedded_msg = props.attach_data if Item === props.attach_data
|
1514
|
+
end
|
1515
|
+
end
|
1516
|
+
|
1517
|
+
class Recipient < Mapi::Recipient
|
1518
|
+
def initialize list
|
1519
|
+
super Pst.make_property_set(list)
|
1520
|
+
end
|
1521
|
+
end
|
1522
|
+
|
1523
|
+
class Item < Mapi::Message
|
1524
|
+
class EntryID < Struct.new(:u1, :entry_id, :id)
|
1525
|
+
UNPACK_STR = 'VA16V'
|
1526
|
+
|
1527
|
+
def initialize data
|
1528
|
+
data = data.unpack(UNPACK_STR) if String === data
|
1529
|
+
super(*data)
|
1530
|
+
end
|
1531
|
+
end
|
1532
|
+
|
1533
|
+
include RecursivelyEnumerable
|
1534
|
+
|
1535
|
+
attr_accessor :type, :parent
|
1536
|
+
|
1537
|
+
def initialize desc, list, type=nil
|
1538
|
+
@desc = desc
|
1539
|
+
super Pst.make_property_set(list)
|
1540
|
+
|
1541
|
+
# this is kind of weird, but the ids of the special folders are stored in a hash
|
1542
|
+
# when the root item is loaded
|
1543
|
+
if ipm_wastebasket_entryid
|
1544
|
+
desc.pst.special_folder_ids[ipm_wastebasket_entryid] = :wastebasket
|
1545
|
+
end
|
1546
|
+
|
1547
|
+
if finder_entryid
|
1548
|
+
desc.pst.special_folder_ids[finder_entryid] = :finder
|
1549
|
+
end
|
1550
|
+
|
1551
|
+
# and then here, those are used, along with a crappy heuristic to determine if we are an
|
1552
|
+
# item
|
1553
|
+
=begin
|
1554
|
+
i think the low bits of the desc_id can give some info on the type.
|
1555
|
+
|
1556
|
+
it seems that 0x4 is for regular messages (and maybe contacts etc)
|
1557
|
+
0x2 is for folders, and 0x8 is for special things like rules etc, that aren't visible.
|
1558
|
+
=end
|
1559
|
+
unless type
|
1560
|
+
type = props.valid_folder_mask || ipm_subtree_entryid || props.content_count || props.subfolders ? :folder : :message
|
1561
|
+
if type == :folder
|
1562
|
+
type = desc.pst.special_folder_ids[desc.desc_id] || type
|
1563
|
+
end
|
1564
|
+
end
|
1565
|
+
|
1566
|
+
@type = type
|
1567
|
+
end
|
1568
|
+
|
1569
|
+
def each_child
|
1570
|
+
id = ipm_subtree_entryid
|
1571
|
+
if id
|
1572
|
+
root = @desc.pst.desc_from_id id
|
1573
|
+
raise "couldn't find root" unless root
|
1574
|
+
raise 'both kinds of children' unless @desc.children.empty?
|
1575
|
+
children = root.children
|
1576
|
+
# lets look up the other ids we have.
|
1577
|
+
# typically the wastebasket one "deleted items" is in the children already, but
|
1578
|
+
# the search folder isn't.
|
1579
|
+
extras = [ipm_wastebasket_entryid, finder_entryid].compact.map do |id|
|
1580
|
+
root = @desc.pst.desc_from_id id
|
1581
|
+
warn "couldn't find root for id #{id}" unless root
|
1582
|
+
root
|
1583
|
+
end.compact
|
1584
|
+
# i do this instead of union, so as not to mess with the order of the
|
1585
|
+
# existing children.
|
1586
|
+
children += (extras - children)
|
1587
|
+
children
|
1588
|
+
else
|
1589
|
+
@desc.children
|
1590
|
+
end.each do |desc|
|
1591
|
+
item = @desc.pst.pst_parse_item(desc)
|
1592
|
+
item.parent = self
|
1593
|
+
yield item
|
1594
|
+
end
|
1595
|
+
end
|
1596
|
+
|
1597
|
+
def path
|
1598
|
+
parents, item = [], self
|
1599
|
+
parents.unshift item while item = item.parent
|
1600
|
+
# remove root
|
1601
|
+
parents.shift
|
1602
|
+
parents.map { |item| item.props.display_name or raise 'unable to construct path' } * '/'
|
1603
|
+
end
|
1604
|
+
|
1605
|
+
def children
|
1606
|
+
to_enum(:each_child).to_a
|
1607
|
+
end
|
1608
|
+
|
1609
|
+
# these are still around because they do different stuff
|
1610
|
+
|
1611
|
+
# Top of Personal Folder Record
|
1612
|
+
def ipm_subtree_entryid
|
1613
|
+
@ipm_subtree_entryid ||= EntryID.new(props.ipm_subtree_entryid.read).id rescue nil
|
1614
|
+
end
|
1615
|
+
|
1616
|
+
# Deleted Items Folder Record
|
1617
|
+
def ipm_wastebasket_entryid
|
1618
|
+
@ipm_wastebasket_entryid ||= EntryID.new(props.ipm_wastebasket_entryid.read).id rescue nil
|
1619
|
+
end
|
1620
|
+
|
1621
|
+
# Search Root Record
|
1622
|
+
def finder_entryid
|
1623
|
+
@finder_entryid ||= EntryID.new(props.finder_entryid.read).id rescue nil
|
1624
|
+
end
|
1625
|
+
|
1626
|
+
# all these have been replaced with the method_missing below
|
1627
|
+
=begin
|
1628
|
+
# States which folders are valid for this message store
|
1629
|
+
#def valid_folder_mask
|
1630
|
+
# props[0x35df]
|
1631
|
+
#end
|
1632
|
+
|
1633
|
+
# Number of emails stored in a folder
|
1634
|
+
def content_count
|
1635
|
+
props[0x3602]
|
1636
|
+
end
|
1637
|
+
|
1638
|
+
# Has children
|
1639
|
+
def subfolders
|
1640
|
+
props[0x360a]
|
1641
|
+
end
|
1642
|
+
=end
|
1643
|
+
|
1644
|
+
# i think i will change these, so they can inherit the lazyness from RawPropertyStoreTable.
|
1645
|
+
# so if you want the last attachment, you can get it without creating the others perhaps.
|
1646
|
+
# it just has to handle the no table at all case a bit more gracefully.
|
1647
|
+
|
1648
|
+
def attachments
|
1649
|
+
@attachments ||= AttachmentTable.new(@desc).to_a.map { |list| Attachment.new list }
|
1650
|
+
end
|
1651
|
+
|
1652
|
+
def recipients
|
1653
|
+
#[]
|
1654
|
+
@recipients ||= RecipientTable.new(@desc).to_a.map { |list| Recipient.new list }
|
1655
|
+
end
|
1656
|
+
|
1657
|
+
def each_recursive(&block)
|
1658
|
+
#p :self => self
|
1659
|
+
children.each do |child|
|
1660
|
+
#p :child => child
|
1661
|
+
block[child]
|
1662
|
+
child.each_recursive(&block)
|
1663
|
+
end
|
1664
|
+
end
|
1665
|
+
|
1666
|
+
def inspect
|
1667
|
+
attrs = %w[display_name subject sender_name subfolders]
|
1668
|
+
# attrs = %w[display_name valid_folder_mask ipm_wastebasket_entryid finder_entryid content_count subfolders]
|
1669
|
+
str = attrs.map { |a| b = props.send a; " #{a}=#{b.inspect}" if b }.compact * ','
|
1670
|
+
|
1671
|
+
type_s = type == :message ? 'Message' : type == :folder ? 'Folder' : type.to_s.capitalize + 'Folder'
|
1672
|
+
str2 = 'desc_id=0x%x' % @desc.desc_id
|
1673
|
+
|
1674
|
+
!str.empty? ? "#<Pst::#{type_s} #{str2}#{str}>" : "#<Pst::#{type_s} #{str2} props=#{props.inspect}>" #\n" + props.transport_message_headers + ">"
|
1675
|
+
end
|
1676
|
+
end
|
1677
|
+
|
1678
|
+
# corresponds to
|
1679
|
+
# * _pst_parse_item
|
1680
|
+
def pst_parse_item desc
|
1681
|
+
Item.new desc, RawPropertyStore.new(desc).to_a
|
1682
|
+
end
|
1683
|
+
|
1684
|
+
#
|
1685
|
+
# other random code
|
1686
|
+
# ----------------------------------------------------------------------------
|
1687
|
+
#
|
1688
|
+
|
1689
|
+
def dump_debug_info
|
1690
|
+
puts "* pst header"
|
1691
|
+
p header
|
1692
|
+
|
1693
|
+
=begin
|
1694
|
+
Looking at the output of this, for blank-o1997.pst, i see this part:
|
1695
|
+
...
|
1696
|
+
- (26624,516) desc block data (overlap of 4 bytes)
|
1697
|
+
- (27136,516) desc block data (gap of 508 bytes)
|
1698
|
+
- (28160,516) desc block data (gap of 2620 bytes)
|
1699
|
+
...
|
1700
|
+
|
1701
|
+
which confirms my belief that the block size for idx and desc is more likely 512
|
1702
|
+
=end
|
1703
|
+
if 0 + 0 == 0
|
1704
|
+
puts '* file range usage'
|
1705
|
+
file_ranges =
|
1706
|
+
# these 3 things, should account for most of the data in the file.
|
1707
|
+
[[0, Header::SIZE, 'pst file header']] +
|
1708
|
+
@idx_offsets.map { |offset| [offset, Index::BLOCK_SIZE, 'idx block data'] } +
|
1709
|
+
@desc_offsets.map { |offset| [offset, Desc::BLOCK_SIZE, 'desc block data'] } +
|
1710
|
+
@idx.map { |idx| [idx.offset, idx.size, 'idx id=0x%x (%s)' % [idx.id, idx.type]] }
|
1711
|
+
(file_ranges.sort_by { |idx| idx.first } + [nil]).to_enum(:each_cons, 2).each do |(offset, size, name), next_record|
|
1712
|
+
# i think there is a padding of the size out to 64 bytes
|
1713
|
+
# which is equivalent to padding out the final offset, because i think the offset is
|
1714
|
+
# similarly oriented
|
1715
|
+
pad_amount = 64
|
1716
|
+
warn 'i am wrong about the offset padding' if offset % pad_amount != 0
|
1717
|
+
# so, assuming i'm not wrong about that, then we can calculate how much padding is needed.
|
1718
|
+
pad = pad_amount - (size % pad_amount)
|
1719
|
+
pad = 0 if pad == pad_amount
|
1720
|
+
gap = next_record ? next_record.first - (offset + size + pad) : 0
|
1721
|
+
extra = case gap <=> 0
|
1722
|
+
when -1; ["overlap of #{gap.abs} bytes)"]
|
1723
|
+
when 0; []
|
1724
|
+
when +1; ["gap of #{gap} bytes"]
|
1725
|
+
end
|
1726
|
+
# how about we check that padding
|
1727
|
+
@io.pos = offset + size
|
1728
|
+
pad_bytes = @io.read(pad)
|
1729
|
+
extra += ["padding not all zero"] unless pad_bytes == 0.chr * pad
|
1730
|
+
puts "- #{offset}:#{size}+#{pad} #{name.inspect}" + (extra.empty? ? '' : ' [' + extra * ', ' + ']')
|
1731
|
+
end
|
1732
|
+
end
|
1733
|
+
|
1734
|
+
# i think the idea of the idx, and indeed the idx2, is just to be able to
|
1735
|
+
# refer to data indirectly, which means it can get moved around, and you just update
|
1736
|
+
# the idx table. it is simply a list of file offsets and sizes.
|
1737
|
+
# not sure i get how id2 plays into it though....
|
1738
|
+
# the sizes seem to be all even. is that a co-incidence? and the ids are all even. that
|
1739
|
+
# seems to be related to something else (see the (id & 2) == 1 stuff)
|
1740
|
+
puts '* idx entries'
|
1741
|
+
@idx.each { |idx| puts "- #{idx.inspect}" }
|
1742
|
+
|
1743
|
+
# if you look at the desc tree, you notice a few things:
|
1744
|
+
# 1. there is a desc that seems to be the parent of all the folders, messages etc.
|
1745
|
+
# it is the one whose parent is itself.
|
1746
|
+
# one of its children is referenced as the subtree_entryid of the first desc item,
|
1747
|
+
# the root.
|
1748
|
+
# 2. typically only 2 types of desc records have idx2_id != 0. messages themselves,
|
1749
|
+
# and the desc with id = 0x61 - the xattrib container. everything else uses the
|
1750
|
+
# regular ids to find its data. i think it should be reframed as small blocks and
|
1751
|
+
# big blocks, but i'll look into it more.
|
1752
|
+
#
|
1753
|
+
# idx_id and idx2_id are for getting to the data. desc_id and parent_desc_id just define
|
1754
|
+
# the parent <-> child relationship, and the desc_ids are how the items are referred to in
|
1755
|
+
# entryids.
|
1756
|
+
# note that these aren't unique! eg for 0, 4 etc. i expect these'd never change, as the ids
|
1757
|
+
# are stored in entryids. whereas the idx and idx2 could be a bit more volatile.
|
1758
|
+
puts '* desc tree'
|
1759
|
+
# make a dummy root hold everything just for convenience
|
1760
|
+
root = Desc.new ''
|
1761
|
+
def root.inspect; "#<Pst::Root>"; end
|
1762
|
+
root.children.replace @orphans
|
1763
|
+
# this still loads the whole thing as a string for gsub. should use directo output io
|
1764
|
+
# version.
|
1765
|
+
puts root.to_tree.gsub(/, (parent_desc_id|idx2_id)=0x0(?!\d)/, '')
|
1766
|
+
|
1767
|
+
# this is fairly easy to understand, its just an attempt to display the pst items in a tree form
|
1768
|
+
# which resembles what you'd see in outlook.
|
1769
|
+
puts '* item tree'
|
1770
|
+
# now streams directly
|
1771
|
+
root_item.to_tree STDOUT
|
1772
|
+
end
|
1773
|
+
|
1774
|
+
def root_desc
|
1775
|
+
@desc.first
|
1776
|
+
end
|
1777
|
+
|
1778
|
+
def root_item
|
1779
|
+
item = pst_parse_item root_desc
|
1780
|
+
item.type = :root
|
1781
|
+
item
|
1782
|
+
end
|
1783
|
+
|
1784
|
+
def root
|
1785
|
+
root_item
|
1786
|
+
end
|
1787
|
+
|
1788
|
+
# depth first search of all items
|
1789
|
+
include Enumerable
|
1790
|
+
|
1791
|
+
def each(&block)
|
1792
|
+
root = self.root
|
1793
|
+
block[root]
|
1794
|
+
root.each_recursive(&block)
|
1795
|
+
end
|
1796
|
+
|
1797
|
+
def name
|
1798
|
+
@name ||= root_item.props.display_name
|
1799
|
+
end
|
1800
|
+
|
1801
|
+
def inspect
|
1802
|
+
"#<Pst name=#{name.inspect} io=#{io.inspect}>"
|
1803
|
+
end
|
1804
|
+
end
|
1805
|
+
end
|
1806
|
+
|