libis-mapi 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/COPYING +20 -0
- data/ChangeLog +108 -0
- data/Home.md +133 -0
- data/Rakefile +56 -0
- data/bin/mapitool +204 -0
- data/data/mapitags.yaml +4168 -0
- data/data/named_map.yaml +114 -0
- data/data/types.yaml +15 -0
- data/lib/mapi/base.rb +104 -0
- data/lib/mapi/convert/contact.rb +142 -0
- data/lib/mapi/convert/note-mime.rb +288 -0
- data/lib/mapi/convert/note-tmail.rb +293 -0
- data/lib/mapi/convert.rb +69 -0
- data/lib/mapi/helper.rb +46 -0
- data/lib/mapi/mime.rb +227 -0
- data/lib/mapi/msg.rb +516 -0
- data/lib/mapi/property_set.rb +329 -0
- data/lib/mapi/pst.rb +1995 -0
- data/lib/mapi/rtf.rb +297 -0
- data/lib/mapi/types.rb +51 -0
- data/lib/mapi/version.rb +3 -0
- data/lib/mapi.rb +5 -0
- data/ruby-msg.gemspec +26 -0
- metadata +102 -0
data/lib/mapi/pst.rb
ADDED
@@ -0,0 +1,1995 @@
|
|
1
|
+
#
|
2
|
+
# = Introduction
|
3
|
+
#
|
4
|
+
# This file is mostly an attempt to port libpst to ruby, and simplify it in the process. It
|
5
|
+
# will leverage much of the existing MAPI => MIME conversion developed for Msg files, and as
|
6
|
+
# such is purely concerned with the file structure details.
|
7
|
+
#
|
8
|
+
# = TODO
|
9
|
+
#
|
10
|
+
# 1. solve recipient table problem (test4).
|
11
|
+
# this is done. turns out it was due to id2 clashes. find better solution
|
12
|
+
# 2. check parse consistency. an initial conversion of a 30M file to pst, shows
|
13
|
+
# a number of messages conveting badly. compare with libpst too.
|
14
|
+
# 3. xattribs
|
15
|
+
# 4. generalise the Mapi stuff better
|
16
|
+
# 5. refactor index load
|
17
|
+
# 6. msg serialization?
|
18
|
+
#
|
19
|
+
|
20
|
+
=begin
|
21
|
+
|
22
|
+
quick plan for cleanup.
|
23
|
+
|
24
|
+
have working tests for 97 and 03 file formats, so safe.
|
25
|
+
|
26
|
+
want to fix up:
|
27
|
+
|
28
|
+
64 bit unpacks scattered around. its ugly. not sure how best to handle it, but am slightly tempted
|
29
|
+
to override String#unpack to support a 64 bit little endian unpack (like L vs N/V, for Q). one way or
|
30
|
+
another need to fix it. Could really slow everything else down if its parsing the unpack strings twice,
|
31
|
+
once in ruby, for every single unpack i do :/
|
32
|
+
|
33
|
+
the index loading process, and the lack of shared code between normal vs 64 bit variants, and Index vs Desc.
|
34
|
+
should be able to reduce code by factor of 4. also think I should move load code into the class too. then
|
35
|
+
maybe have something like:
|
36
|
+
|
37
|
+
class Header
|
38
|
+
def index_class
|
39
|
+
version_2003 ? Index64 : Index
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_idx
|
44
|
+
header.index_class.load_index
|
45
|
+
end
|
46
|
+
|
47
|
+
OR
|
48
|
+
|
49
|
+
def initialize
|
50
|
+
@header = ...
|
51
|
+
extend @header.index_class::Load
|
52
|
+
load_idx
|
53
|
+
end
|
54
|
+
|
55
|
+
need to think about the role of the mapi code, and Pst::Item etc, but that layer can come later.
|
56
|
+
|
57
|
+
=end
|
58
|
+
|
59
|
+
require 'mapi'
|
60
|
+
require 'enumerator'
|
61
|
+
require 'ostruct'
|
62
|
+
require 'ole/ranges_io'
|
63
|
+
require 'mapi/helper'
|
64
|
+
|
65
|
+
module Mapi
|
66
|
+
# Read Outlook's pst file
|
67
|
+
class Pst
|
68
|
+
class FormatError < StandardError
|
69
|
+
end
|
70
|
+
|
71
|
+
# unfortunately there is no Q analogue which is little endian only.
|
72
|
+
# this translates T as an unsigned quad word, little endian byte order, to
|
73
|
+
# not pollute the rest of the code.
|
74
|
+
#
|
75
|
+
# didn't want to override String#unpack, cause its too hacky, and incomplete.
|
76
|
+
#
|
77
|
+
# @param str [String]
|
78
|
+
# @param unpack_spec [String]
|
79
|
+
# @return [Array]
|
80
|
+
# @private
|
81
|
+
def self.unpack str, unpack_spec
|
82
|
+
return str.unpack(unpack_spec) unless unpack_spec['T']
|
83
|
+
@unpack_cache ||= {}
|
84
|
+
t_offsets, new_spec = @unpack_cache[unpack_spec]
|
85
|
+
unless t_offsets
|
86
|
+
t_offsets = []
|
87
|
+
offset = 0
|
88
|
+
new_spec = ''
|
89
|
+
unpack_spec.scan(/([^\d])_?(\*|\d+)?/o) do
|
90
|
+
num_elems = $1.downcase == 'a' ? 1 : ($2 || 1).to_i
|
91
|
+
if $1 == 'T'
|
92
|
+
num_elems.times { |i| t_offsets << offset + i }
|
93
|
+
new_spec << "V#{num_elems * 2}"
|
94
|
+
else
|
95
|
+
new_spec << $~[0]
|
96
|
+
end
|
97
|
+
offset += num_elems
|
98
|
+
end
|
99
|
+
@unpack_cache[unpack_spec] = [t_offsets, new_spec]
|
100
|
+
end
|
101
|
+
a = str.unpack(new_spec)
|
102
|
+
t_offsets.each do |offset|
|
103
|
+
low, high = a[offset, 2]
|
104
|
+
a[offset, 2] = low && high ? low + (high << 32) : nil
|
105
|
+
end
|
106
|
+
a
|
107
|
+
end
|
108
|
+
|
109
|
+
# @param str [String]
|
110
|
+
# @param size [Integer]
|
111
|
+
# @param count [Integer]
|
112
|
+
# @return [Array<String>]
|
113
|
+
# @private
|
114
|
+
def self.split_per str, size, count
|
115
|
+
count = str.length / size if count < 0
|
116
|
+
list = []
|
117
|
+
count.times {|i| list << str[size * i, size]}
|
118
|
+
list
|
119
|
+
end
|
120
|
+
|
121
|
+
#
|
122
|
+
# this is the header and encryption encapsulation code
|
123
|
+
# ----------------------------------------------------------------------------
|
124
|
+
#
|
125
|
+
|
126
|
+
# class which encapsulates the pst header
|
127
|
+
#
|
128
|
+
# @private
|
129
|
+
class Header
|
130
|
+
SIZE = 512
|
131
|
+
MAGIC = 0x2142444e
|
132
|
+
|
133
|
+
# these are the constants defined in libpst.c, that
|
134
|
+
# are referenced in pst_open()
|
135
|
+
INDEX_TYPE_OFFSET = 0x0A
|
136
|
+
FILE_SIZE_POINTER = 0xA8
|
137
|
+
FILE_SIZE_POINTER_64 = 0xB8
|
138
|
+
SECOND_POINTER = 0xBC
|
139
|
+
INDEX_POINTER = 0xC4
|
140
|
+
SECOND_POINTER_64 = 0xE0
|
141
|
+
INDEX_POINTER_64 = 0xF0
|
142
|
+
ENC_OFFSET = 0x1CD
|
143
|
+
|
144
|
+
# @return [Integer]
|
145
|
+
attr_reader :magic
|
146
|
+
# @return [Integer]
|
147
|
+
attr_reader :index_type
|
148
|
+
# @return [Integer]
|
149
|
+
attr_reader :encrypt_type
|
150
|
+
# @return [Integer]
|
151
|
+
attr_reader :size
|
152
|
+
# @return [Integer]
|
153
|
+
attr_reader :block_btree_count
|
154
|
+
# @return [Integer]
|
155
|
+
attr_reader :block_btree
|
156
|
+
# @return [Integer]
|
157
|
+
attr_reader :node_btree_count
|
158
|
+
# @return [Integer]
|
159
|
+
attr_reader :node_btree
|
160
|
+
# @return [Integer]
|
161
|
+
attr_reader :version
|
162
|
+
|
163
|
+
def initialize data
|
164
|
+
@magic = data.unpack('N')[0]
|
165
|
+
@index_type = data[INDEX_TYPE_OFFSET].ord
|
166
|
+
@version = {0x0e => 1997, 0x17 => 2003, 0x24 => 2003}[@index_type]
|
167
|
+
|
168
|
+
if version_2003?
|
169
|
+
# don't know?
|
170
|
+
# >> data1.unpack('V*').zip(data2.unpack('V*')).enum_with_index.select { |(c, d), i| c != d and not [46, 56, 60].include?(i) }.select { |(a, b), i| b == 0 }.map { |(a, b), i| [a / 256, i] }
|
171
|
+
# [8, 76], [32768, 84], [128, 89]
|
172
|
+
# >> data1.unpack('C*').zip(data2.unpack('C*')).enum_with_index.select { |(c, d), i| c != d and not [184..187, 224..227, 240..243].any? { |r| r === i } }.select { |(a, b), i| b == 0 and ((Math.log(a) / Math.log(2)) % 1) < 0.0001 }
|
173
|
+
# [[[2, 0], 61], [[2, 0], 76], [[2, 0], 195], [[2, 0], 257], [[8, 0], 305], [[128, 0], 338], [[128, 0], 357]]
|
174
|
+
# i have only 2 psts to base this guess on, so i can't really come up with anything that looks reasonable yet. not sure what the offset is. unfortunately there is so much in the header
|
175
|
+
# that isn't understood...
|
176
|
+
@encrypt_type = 1
|
177
|
+
|
178
|
+
@node_btree_count, @node_btree = Pst.unpack(data[SECOND_POINTER_64 - 8, 16], "T2")
|
179
|
+
@block_btree_count, @block_btree = Pst.unpack(data[INDEX_POINTER_64 - 8, 16], "T2")
|
180
|
+
|
181
|
+
@size = data[FILE_SIZE_POINTER_64, 4].unpack('V')[0]
|
182
|
+
else
|
183
|
+
@encrypt_type = data[ENC_OFFSET].ord
|
184
|
+
|
185
|
+
@node_btree_count, @node_btree = data[SECOND_POINTER - 4, 8].unpack('V2')
|
186
|
+
@block_btree_count, @block_btree = data[INDEX_POINTER - 4, 8].unpack('V2')
|
187
|
+
|
188
|
+
@size = data[FILE_SIZE_POINTER, 4].unpack('V')[0]
|
189
|
+
end
|
190
|
+
|
191
|
+
validate!
|
192
|
+
end
|
193
|
+
|
194
|
+
# return `true` if pst is an Unicode version. Unicode version also uses 64-bit file pointer.
|
195
|
+
# otherwise return `false` where pst is an ANSI version. ANSI version uses 32-bit file pointer.
|
196
|
+
#
|
197
|
+
# @return [Boolean]
|
198
|
+
def version_2003?
|
199
|
+
version == 2003
|
200
|
+
end
|
201
|
+
|
202
|
+
def encrypted?
|
203
|
+
encrypt_type != 0
|
204
|
+
end
|
205
|
+
|
206
|
+
def validate!
|
207
|
+
raise FormatError, "bad signature on pst file (#{'0x%x' % magic})" unless magic == MAGIC
|
208
|
+
raise FormatError, "only index types 0x0e, 0x17 and 0x24 are handled (#{'0x%x' % index_type})" unless [0x0e, 0x17, 0x24].include?(index_type)
|
209
|
+
raise FormatError, "only encrytion types 0 and 1 are handled (#{encrypt_type.inspect})" unless [0, 1].include?(encrypt_type)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# compressible encryption! :D
|
214
|
+
#
|
215
|
+
# simple substitution. see libpst.c
|
216
|
+
# maybe test switch to using a String#tr!
|
217
|
+
#
|
218
|
+
# @private
|
219
|
+
class CompressibleEncryption
|
220
|
+
DECRYPT_TABLE = [
|
221
|
+
0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48,
|
222
|
+
0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, # 0x0f
|
223
|
+
0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab,
|
224
|
+
0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, # 0x1f
|
225
|
+
0x39, 0x05, 0x96, 0x30, 0xf5, 0x37, 0x60, 0x82,
|
226
|
+
0x8c, 0xc9, 0x13, 0x4a, 0x6b, 0x1d, 0xf3, 0xfb, # 0x2f
|
227
|
+
0x8f, 0x26, 0x97, 0xca, 0x91, 0x17, 0x01, 0xc4,
|
228
|
+
0x32, 0x2d, 0x6e, 0x31, 0x95, 0xff, 0xd9, 0x23, # 0x3f
|
229
|
+
0xd1, 0x00, 0x5e, 0x79, 0xdc, 0x44, 0x3b, 0x1a,
|
230
|
+
0x28, 0xc5, 0x61, 0x57, 0x20, 0x90, 0x3d, 0x83, # 0x4f
|
231
|
+
0xb9, 0x43, 0xbe, 0x67, 0xd2, 0x46, 0x42, 0x76,
|
232
|
+
0xc0, 0x6d, 0x5b, 0x7e, 0xb2, 0x0f, 0x16, 0x29, # 0x5f
|
233
|
+
0x3c, 0xa9, 0x03, 0x54, 0x0d, 0xda, 0x5d, 0xdf,
|
234
|
+
0xf6, 0xb7, 0xc7, 0x62, 0xcd, 0x8d, 0x06, 0xd3, # 0x6f
|
235
|
+
0x69, 0x5c, 0x86, 0xd6, 0x14, 0xf7, 0xa5, 0x66,
|
236
|
+
0x75, 0xac, 0xb1, 0xe9, 0x45, 0x21, 0x70, 0x0c, # 0x7f
|
237
|
+
0x87, 0x9f, 0x74, 0xa4, 0x22, 0x4c, 0x6f, 0xbf,
|
238
|
+
0x1f, 0x56, 0xaa, 0x2e, 0xb3, 0x78, 0x33, 0x50, # 0x8f
|
239
|
+
0xb0, 0xa3, 0x92, 0xbc, 0xcf, 0x19, 0x1c, 0xa7,
|
240
|
+
0x63, 0xcb, 0x1e, 0x4d, 0x3e, 0x4b, 0x1b, 0x9b, # 0x9f
|
241
|
+
0x4f, 0xe7, 0xf0, 0xee, 0xad, 0x3a, 0xb5, 0x59,
|
242
|
+
0x04, 0xea, 0x40, 0x55, 0x25, 0x51, 0xe5, 0x7a, # 0xaf
|
243
|
+
0x89, 0x38, 0x68, 0x52, 0x7b, 0xfc, 0x27, 0xae,
|
244
|
+
0xd7, 0xbd, 0xfa, 0x07, 0xf4, 0xcc, 0x8e, 0x5f, # 0xbf
|
245
|
+
0xef, 0x35, 0x9c, 0x84, 0x2b, 0x15, 0xd5, 0x77,
|
246
|
+
0x34, 0x49, 0xb6, 0x12, 0x0a, 0x7f, 0x71, 0x88, # 0xcf
|
247
|
+
0xfd, 0x9d, 0x18, 0x41, 0x7d, 0x93, 0xd8, 0x58,
|
248
|
+
0x2c, 0xce, 0xfe, 0x24, 0xaf, 0xde, 0xb8, 0x36, # 0xdf
|
249
|
+
0xc8, 0xa1, 0x80, 0xa6, 0x99, 0x98, 0xa8, 0x2f,
|
250
|
+
0x0e, 0x81, 0x65, 0x73, 0xe4, 0xc2, 0xa2, 0x8a, # 0xef
|
251
|
+
0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2,
|
252
|
+
0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec # 0xff
|
253
|
+
]
|
254
|
+
|
255
|
+
ENCRYPT_TABLE = [nil] * 256
|
256
|
+
DECRYPT_TABLE.each_with_index { |i, j| ENCRYPT_TABLE[i] = j }
|
257
|
+
|
258
|
+
def self.decrypt_alt encrypted
|
259
|
+
decrypted = ''
|
260
|
+
encrypted.length.times { |i| decrypted << DECRYPT_TABLE[encrypted[i]] }
|
261
|
+
decrypted
|
262
|
+
end
|
263
|
+
|
264
|
+
def self.encrypt_alt decrypted
|
265
|
+
encrypted = ''
|
266
|
+
decrypted.length.times { |i| encrypted << ENCRYPT_TABLE[decrypted[i]] }
|
267
|
+
encrypted
|
268
|
+
end
|
269
|
+
|
270
|
+
# an alternate implementation that is possibly faster....
|
271
|
+
# TODO - bench
|
272
|
+
DECRYPT_STR, ENCRYPT_STR = [DECRYPT_TABLE, (0...256)].map do |values|
|
273
|
+
values.map { |i| i.chr }.join.gsub(/([\^\-\\])/, "\\\\\\1")
|
274
|
+
end
|
275
|
+
|
276
|
+
def self.decrypt encrypted
|
277
|
+
encrypted.tr ENCRYPT_STR, DECRYPT_STR
|
278
|
+
end
|
279
|
+
|
280
|
+
def self.encrypt decrypted
|
281
|
+
decrypted.tr DECRYPT_STR, ENCRYPT_STR
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
# @return [IO]
|
286
|
+
# @private
|
287
|
+
attr_reader :io
|
288
|
+
|
289
|
+
# @return [Header]
|
290
|
+
# @private
|
291
|
+
attr_reader :header
|
292
|
+
|
293
|
+
# @return [Array<BlockPtr>]
|
294
|
+
# @private
|
295
|
+
attr_reader :blocks
|
296
|
+
|
297
|
+
# @return [Array<NodePtr>]
|
298
|
+
# @private
|
299
|
+
attr_reader :nodes
|
300
|
+
|
301
|
+
# @return [Hash<Integer, Symbol>]
|
302
|
+
# @private
|
303
|
+
attr_reader :special_folder_ids
|
304
|
+
|
305
|
+
# @return [Helper]
|
306
|
+
# @private
|
307
|
+
attr_reader :helper
|
308
|
+
|
309
|
+
# @param io [IO]
|
310
|
+
# @param helper [Helper,nil]
|
311
|
+
def initialize io, helper=nil
|
312
|
+
# corresponds to
|
313
|
+
# * pst_open
|
314
|
+
# * pst_load_index
|
315
|
+
|
316
|
+
@io = io
|
317
|
+
io.pos = 0
|
318
|
+
@helper = helper || Helper.new
|
319
|
+
@header = Header.new io.read(Header::SIZE)
|
320
|
+
|
321
|
+
# would prefer this to be in Header#validate, but it doesn't have the io size.
|
322
|
+
# should perhaps downgrade this to just be a warning...
|
323
|
+
raise FormatError, "header size field invalid (#{header.size} != #{io.size}}" unless header.size == io.size
|
324
|
+
|
325
|
+
load_block_btree
|
326
|
+
load_node_btree
|
327
|
+
load_xattrib
|
328
|
+
|
329
|
+
@special_folder_ids = {}
|
330
|
+
end
|
331
|
+
|
332
|
+
# @return [Boolean]
|
333
|
+
# @private
|
334
|
+
def encrypted?
|
335
|
+
@header.encrypted?
|
336
|
+
end
|
337
|
+
|
338
|
+
# until i properly fix logging...
|
339
|
+
#
|
340
|
+
# @private
|
341
|
+
def warn s
|
342
|
+
Mapi::Log.warn s
|
343
|
+
end
|
344
|
+
|
345
|
+
#
|
346
|
+
# this is the index and desc record loading code
|
347
|
+
# ----------------------------------------------------------------------------
|
348
|
+
#
|
349
|
+
|
350
|
+
# @private
|
351
|
+
ToTree = Module.new
|
352
|
+
|
353
|
+
# more constants from libpst.c
|
354
|
+
# these relate to the index block
|
355
|
+
# @private
|
356
|
+
ITEM_COUNT_OFFSET = 0x1f0 # count byte
|
357
|
+
|
358
|
+
# @private
|
359
|
+
LEVEL_INDICATOR_OFFSET = 0x1f3 # node or leaf
|
360
|
+
|
361
|
+
# @private
|
362
|
+
BACKLINK_OFFSET = 0x1f8 # backlink u1 value
|
363
|
+
|
364
|
+
# these 3 classes are used to hold various file records
|
365
|
+
|
366
|
+
# pst_index
|
367
|
+
#
|
368
|
+
# @private
|
369
|
+
class BlockPtr < Struct.new(:id, :offset, :size, :u1)
|
370
|
+
UNPACK_STR32 = 'VVvv'
|
371
|
+
UNPACK_STR64 = 'TTvv'
|
372
|
+
SIZE32 = 12
|
373
|
+
SIZE64 = 24
|
374
|
+
BLOCK_SIZE = 512 # index blocks was 516 but bogus
|
375
|
+
COUNT_MAX32 = 41 # max active items (ITEM_COUNT_OFFSET / Index::SIZE = 41)
|
376
|
+
COUNT_MAX64 = 20 # bit of a guess really. 512 / 24 = 21, but doesn't leave enough header room
|
377
|
+
|
378
|
+
# @return [Pst]
|
379
|
+
attr_accessor :pst
|
380
|
+
|
381
|
+
# @param data [String, Array]
|
382
|
+
# @param is64 [Boolean]
|
383
|
+
def initialize data, is64
|
384
|
+
data = Pst.unpack data, (is64 ? UNPACK_STR64 : UNPACK_STR32) if String === data
|
385
|
+
super(*data)
|
386
|
+
end
|
387
|
+
|
388
|
+
# @return [Symbol]
|
389
|
+
def type
|
390
|
+
@type ||= begin
|
391
|
+
if id & 0x2 == 0
|
392
|
+
:data
|
393
|
+
else
|
394
|
+
first_byte, second_byte = read.unpack('CC')
|
395
|
+
if first_byte == 1
|
396
|
+
raise second_byte unless second_byte == 1
|
397
|
+
:data_chain_header
|
398
|
+
elsif first_byte == 2
|
399
|
+
raise second_byte unless second_byte == 0
|
400
|
+
:id2_assoc
|
401
|
+
else
|
402
|
+
raise FormatError, 'unknown first byte for block - %p' % first_byte
|
403
|
+
end
|
404
|
+
end
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
# @return [Boolean]
|
409
|
+
def data?
|
410
|
+
(id & 0x2) == 0
|
411
|
+
end
|
412
|
+
|
413
|
+
# @return [String]
|
414
|
+
def read decrypt=true
|
415
|
+
# only data blocks are every encrypted
|
416
|
+
decrypt = false unless data?
|
417
|
+
pst.pst_read_block_size offset, size, decrypt
|
418
|
+
end
|
419
|
+
|
420
|
+
# show all numbers in hex
|
421
|
+
def inspect
|
422
|
+
super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }.sub(/Index /, "Index type=#{type.inspect}, ")
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
# mostly guesses.
|
427
|
+
|
428
|
+
# @private
|
429
|
+
ITEM_COUNT_OFFSET_64 = 0x1e8
|
430
|
+
|
431
|
+
# @private
|
432
|
+
LEVEL_INDICATOR_OFFSET_64 = 0x1eb # diff of 3 between these 2 as above...
|
433
|
+
|
434
|
+
# _pst_table_ptr_struct
|
435
|
+
#
|
436
|
+
# @private
|
437
|
+
class TablePtr < Struct.new(:start, :u1, :offset)
|
438
|
+
UNPACK_STR32 = 'V3'
|
439
|
+
UNPACK_STR64 = 'T3'
|
440
|
+
SIZE32 = 12
|
441
|
+
SIZE64 = 24
|
442
|
+
|
443
|
+
# @param data [String]
|
444
|
+
# @param is64 [Boolean]
|
445
|
+
def initialize data, is64
|
446
|
+
data = Pst.unpack(data, is64 ? UNPACK_STR64 : UNPACK_STR32) if String === data
|
447
|
+
super(*data)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
# pst_desc
|
452
|
+
# idx_id is a pointer to an idx record which gets the primary data stream for the Desc record.
|
453
|
+
# idx2_id gets you an idx record, that when read gives you an ID2 association list, which just maps
|
454
|
+
# another set of ids to index values
|
455
|
+
#
|
456
|
+
# @private
|
457
|
+
class NodePtr < Struct.new(:node_id, :block_id, :sub_block_id, :parent_node_id)
|
458
|
+
UNPACK_STR32 = 'V4'
|
459
|
+
UNPACK_STR64 = 'T3V'
|
460
|
+
SIZE32 = 16
|
461
|
+
SIZE64 = 32
|
462
|
+
BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
|
463
|
+
COUNT_MAX64 = 15
|
464
|
+
COUNT_MAX32 = 31 # max active desc records (ITEM_COUNT_OFFSET / Desc::SIZE = 31)
|
465
|
+
|
466
|
+
include ToTree
|
467
|
+
|
468
|
+
# @return [Pst]
|
469
|
+
attr_accessor :pst
|
470
|
+
|
471
|
+
# @return [Array]
|
472
|
+
attr_reader :children
|
473
|
+
|
474
|
+
# @param data [String]
|
475
|
+
# @param is64 [Boolean]
|
476
|
+
def initialize data, is64
|
477
|
+
super(*Pst.unpack(data, is64 ? UNPACK_STR64 : UNPACK_STR32))
|
478
|
+
@children = []
|
479
|
+
end
|
480
|
+
|
481
|
+
# @return [BlockPtr]
|
482
|
+
def block
|
483
|
+
raise "DO NOT USE"
|
484
|
+
pst.block_from_id block_id
|
485
|
+
end
|
486
|
+
|
487
|
+
# @return [BlockPtr]
|
488
|
+
def sub_block
|
489
|
+
raise "DO NOT USE"
|
490
|
+
pst.block_from_id sub_block_id
|
491
|
+
end
|
492
|
+
|
493
|
+
# Read node data
|
494
|
+
#
|
495
|
+
# @return [Array<String>]
|
496
|
+
def read_main_array
|
497
|
+
@read_main ||= begin
|
498
|
+
list = []
|
499
|
+
pst.load_node_main_data_to node_id, list
|
500
|
+
list
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
# Locate and read node sub data by its local id
|
505
|
+
#
|
506
|
+
# @param local_node_id [Integer]
|
507
|
+
# @return [Array<String>]
|
508
|
+
def read_sub_array local_node_id
|
509
|
+
list = []
|
510
|
+
pst.load_node_sub_data_to node_id, local_node_id, list
|
511
|
+
list
|
512
|
+
end
|
513
|
+
|
514
|
+
# @return [Array<String>]
|
515
|
+
def get_local_node_list
|
516
|
+
list = []
|
517
|
+
pst.get_local_node_list_to node_id, list
|
518
|
+
list
|
519
|
+
end
|
520
|
+
|
521
|
+
# Check if there is a sub data exists, where it is identified by its local id
|
522
|
+
#
|
523
|
+
# @param local_node_id [Integer]
|
524
|
+
# @return [Boolean]
|
525
|
+
def has_sub local_node_id
|
526
|
+
#TODO fixme
|
527
|
+
read_sub_array(local_node_id).length != 0
|
528
|
+
end
|
529
|
+
|
530
|
+
# show all numbers in hex
|
531
|
+
def inspect
|
532
|
+
super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }
|
533
|
+
end
|
534
|
+
end
|
535
|
+
|
536
|
+
# corresponds to
|
537
|
+
# * _pst_build_id_ptr
|
538
|
+
#
|
539
|
+
# @private
|
540
|
+
def load_block_btree
|
541
|
+
@blocks = []
|
542
|
+
@block_offsets = []
|
543
|
+
load_block_tree header.block_btree, header.block_btree_count, 0
|
544
|
+
|
545
|
+
# we'll typically be accessing by id, so create a hash as a lookup cache
|
546
|
+
@block_from_id = {}
|
547
|
+
@blocks.each do |idx|
|
548
|
+
id = idx.id & ~1
|
549
|
+
warn "there are duplicate idx records with id #{id}" if @block_from_id[id]
|
550
|
+
@block_from_id[id] = idx
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
# load the flat idx table, which maps ids to file ranges. this is the recursive helper
|
555
|
+
#
|
556
|
+
# corresponds to
|
557
|
+
# * _pst_build_id_ptr
|
558
|
+
#
|
559
|
+
# @private
|
560
|
+
def load_block_tree offset, linku1, start_val
|
561
|
+
@block_offsets << offset
|
562
|
+
|
563
|
+
#_pst_read_block_size(pf, offset, BLOCK_SIZE, &buf, 0, 0) < BLOCK_SIZE)
|
564
|
+
buf = pst_read_block_size offset, BlockPtr::BLOCK_SIZE, false
|
565
|
+
|
566
|
+
item_count = buf[is64 ? ITEM_COUNT_OFFSET_64 : ITEM_COUNT_OFFSET].ord
|
567
|
+
level = buf[is64 ? LEVEL_INDICATOR_OFFSET_64 : LEVEL_INDICATOR_OFFSET].ord
|
568
|
+
count_max = is64 ? BlockPtr::COUNT_MAX64 : BlockPtr::COUNT_MAX32
|
569
|
+
raise "have too many active items in index (#{item_count})" if item_count > count_max
|
570
|
+
|
571
|
+
this_node_id = is64 ? Pst.unpack(buf[BACKLINK_OFFSET, 8], "T").first : buf[BACKLINK_OFFSET, 4].unpack("V").first
|
572
|
+
raise 'blah 1' unless this_node_id == linku1
|
573
|
+
|
574
|
+
if level == 0
|
575
|
+
# leaf pointers
|
576
|
+
size = is64 ? BlockPtr::SIZE64 : BlockPtr::SIZE32
|
577
|
+
|
578
|
+
# split the data into item_count index objects
|
579
|
+
Pst.split_per(buf, size, item_count).each_with_index do |data, i|
|
580
|
+
idx = BlockPtr.new data, is64
|
581
|
+
# first entry
|
582
|
+
raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
|
583
|
+
idx.pst = self
|
584
|
+
# this shouldn't really happen i'd imagine
|
585
|
+
raise "OHNO" if idx.id == 0
|
586
|
+
@blocks << idx
|
587
|
+
end
|
588
|
+
else
|
589
|
+
# node pointers
|
590
|
+
size = is64 ? TablePtr::SIZE64 : TablePtr::SIZE32
|
591
|
+
# split the data into item_count table pointers
|
592
|
+
Pst.split_per(buf, size, item_count).each_with_index do |data, i|
|
593
|
+
table = TablePtr.new data, is64
|
594
|
+
# for the first value, we expect the start to be equal
|
595
|
+
raise 'blah 3' if i == 0 and start_val != 0 and table.start != start_val
|
596
|
+
# this shouldn't really happen i'd imagine
|
597
|
+
raise "OHNO" if table.start == 0
|
598
|
+
load_block_tree table.offset, table.u1, table.start
|
599
|
+
end
|
600
|
+
end
|
601
|
+
end
|
602
|
+
|
603
|
+
# most access to idx objects will use this function
|
604
|
+
#
|
605
|
+
# corresponds to
|
606
|
+
# * _pst_getID
|
607
|
+
#
|
608
|
+
# @param id [Integer]
|
609
|
+
# @return [BlockPtr]
|
610
|
+
# @private
|
611
|
+
def block_from_id id
|
612
|
+
@block_from_id[id & ~1]
|
613
|
+
end
|
614
|
+
|
615
|
+
# corresponds to
|
616
|
+
# * _pst_build_desc_ptr
|
617
|
+
# * record_descriptor
|
618
|
+
#
|
619
|
+
# @private
|
620
|
+
def load_node_btree
|
621
|
+
@nodes = []
|
622
|
+
@node_offsets = []
|
623
|
+
load_node_tree header.node_btree, header.node_btree_count, 0x21
|
624
|
+
|
625
|
+
# first create a lookup cache
|
626
|
+
@node_from_id = {}
|
627
|
+
@nodes.each do |node|
|
628
|
+
node.pst = self
|
629
|
+
warn "there are duplicate desc records with id #{node.node_id}" if @node_from_id[node.node_id]
|
630
|
+
@node_from_id[node.node_id] = node
|
631
|
+
end
|
632
|
+
|
633
|
+
# now turn the flat list of loaded desc records into a tree
|
634
|
+
|
635
|
+
# well, they have no parent, so they're more like, the toplevel descs.
|
636
|
+
@orphans = []
|
637
|
+
# now assign each node to the parents child array, putting the orphans in the above
|
638
|
+
@nodes.each do |node|
|
639
|
+
parent = @node_from_id[node.parent_node_id]
|
640
|
+
# note, besides this, its possible to create other circular structures.
|
641
|
+
if parent == node
|
642
|
+
# this actually happens usually, for the root_item it appears.
|
643
|
+
#warn "desc record's parent is itself (#{desc.inspect})"
|
644
|
+
# maybe add some more checks in here for circular structures
|
645
|
+
elsif parent
|
646
|
+
parent.children << node
|
647
|
+
next
|
648
|
+
end
|
649
|
+
@orphans << node
|
650
|
+
end
|
651
|
+
|
652
|
+
# maybe change this to some sort of sane-ness check. orphans are expected
|
653
|
+
# warn "have #{@orphans.length} orphan desc record(s)." unless @orphans.empty?
|
654
|
+
end
|
655
|
+
|
656
|
+
# @return [Boolean]
|
657
|
+
# @private
|
658
|
+
def is64
|
659
|
+
@header.version_2003?
|
660
|
+
end
|
661
|
+
|
662
|
+
# load the flat list of desc records recursively
|
663
|
+
#
|
664
|
+
# corresponds to
|
665
|
+
# * _pst_build_desc_ptr
|
666
|
+
# * record_descriptor
|
667
|
+
#
|
668
|
+
# @private
|
669
|
+
def load_node_tree offset, linku1, start_val
|
670
|
+
@node_offsets << offset
|
671
|
+
|
672
|
+
buf = pst_read_block_size offset, NodePtr::BLOCK_SIZE, false
|
673
|
+
item_count = buf[is64 ? ITEM_COUNT_OFFSET_64 : ITEM_COUNT_OFFSET].ord
|
674
|
+
level = buf[is64 ? LEVEL_INDICATOR_OFFSET_64 : LEVEL_INDICATOR_OFFSET].ord
|
675
|
+
|
676
|
+
# not real desc
|
677
|
+
this_node_id = is64 ? Pst.unpack(buf[BACKLINK_OFFSET, 8], "T").first : buf[BACKLINK_OFFSET, 4].unpack("V").first
|
678
|
+
raise 'blah 1' unless this_node_id == linku1
|
679
|
+
|
680
|
+
if level == 0
|
681
|
+
# leaf pointers
|
682
|
+
size = is64 ? NodePtr::SIZE64 : NodePtr::SIZE32
|
683
|
+
count_max = is64 ? NodePtr::COUNT_MAX64 : NodePtr::COUNT_MAX32
|
684
|
+
|
685
|
+
raise "have too many active items in index (#{item_count})" if item_count > count_max
|
686
|
+
# split the data into item_count desc objects
|
687
|
+
Pst.split_per(buf, size, item_count).each_with_index do |data, i|
|
688
|
+
node = NodePtr.new data, is64
|
689
|
+
# first entry
|
690
|
+
raise 'blah 3' if i == 0 and start_val != 0 and node.node_id != start_val
|
691
|
+
# this shouldn't really happen i'd imagine
|
692
|
+
break if node.node_id == 0
|
693
|
+
@nodes << node
|
694
|
+
end
|
695
|
+
else
|
696
|
+
# node pointers
|
697
|
+
size = is64 ? TablePtr::SIZE64 : TablePtr::SIZE32
|
698
|
+
count_max = is64 ? BlockPtr::COUNT_MAX64 : BlockPtr::COUNT_MAX32
|
699
|
+
|
700
|
+
raise "have too many active items in index (#{item_count})" if item_count > count_max
|
701
|
+
# split the data into item_count table pointers
|
702
|
+
Pst.split_per(buf, size, item_count).each_with_index do |data, i|
|
703
|
+
table = TablePtr.new data, is64
|
704
|
+
# for the first value, we expect the start to be equal note that ids -1, so even for the
|
705
|
+
# first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
|
706
|
+
# that the first desc record is always 33...
|
707
|
+
raise 'blah 3' if i == 0 and start_val != -1 and table.start != start_val
|
708
|
+
# this shouldn't really happen i'd imagine
|
709
|
+
break if table.start == 0
|
710
|
+
load_node_tree table.offset, table.u1, table.start
|
711
|
+
end
|
712
|
+
end
|
713
|
+
end
|
714
|
+
|
715
|
+
# as for idx
|
716
|
+
#
|
717
|
+
# corresponds to:
|
718
|
+
# * _pst_getDptr
|
719
|
+
#
|
720
|
+
# @param id [Integer]
|
721
|
+
# @return [NodePtr]
|
722
|
+
#
|
723
|
+
# @private
|
724
|
+
def node_from_id id
|
725
|
+
@node_from_id[id]
|
726
|
+
end
|
727
|
+
|
728
|
+
# corresponds to
|
729
|
+
# * pst_load_extended_attributes
|
730
|
+
#
|
731
|
+
# @private
|
732
|
+
def load_xattrib
|
733
|
+
end
|
734
|
+
|
735
|
+
# corresponds to:
|
736
|
+
# * _pst_read_block_size
|
737
|
+
# * _pst_read_block ??
|
738
|
+
# * _pst_ff_getIDblock_dec ??
|
739
|
+
# * _pst_ff_getIDblock ??
|
740
|
+
#
|
741
|
+
# @param offset [Integer]
|
742
|
+
# @param size [Integer]
|
743
|
+
# @param decrypt [Boolean]
|
744
|
+
# @return [String]
|
745
|
+
# @private
|
746
|
+
def pst_read_block_size offset, size, decrypt=true
|
747
|
+
io.seek offset
|
748
|
+
buf = io.read size
|
749
|
+
warn "tried to read #{size} bytes but only got #{buf.length}" if buf.length != size
|
750
|
+
encrypted? && decrypt ? CompressibleEncryption.decrypt(buf) : buf
|
751
|
+
end
|
752
|
+
|
753
|
+
# @param node_id [Integer]
|
754
|
+
# @param list [Array<String>]
|
755
|
+
# @private
|
756
|
+
def load_node_main_data_to node_id, list
|
757
|
+
raise 'node_is must be Integer' unless Integer === node_id
|
758
|
+
node = node_from_id node_id
|
759
|
+
load_main_block_to node.block_id, list
|
760
|
+
end
|
761
|
+
|
762
|
+
# @param node_id [Integer]
|
763
|
+
# @param local_node_id [Integer]
|
764
|
+
# @param list [Array<String>]
|
765
|
+
# @private
|
766
|
+
def load_node_sub_data_to node_id, local_node_id, list
|
767
|
+
raise 'node_is must be Integer' unless Integer === node_id
|
768
|
+
raise 'local_node_id must be Integer' unless Integer === local_node_id
|
769
|
+
node = node_from_id node_id
|
770
|
+
load_sub_block_to node.sub_block_id, local_node_id, list
|
771
|
+
end
|
772
|
+
|
773
|
+
# for debug
|
774
|
+
#
|
775
|
+
# @param node_id [String]
|
776
|
+
# @param list [Array<String>]
|
777
|
+
# @private
|
778
|
+
def get_local_node_list_to node_id, list
|
779
|
+
node = node_from_id node_id
|
780
|
+
get_local_node_list_of_sub_block_to node.sub_block_id, list
|
781
|
+
end
|
782
|
+
|
783
|
+
# for debug
|
784
|
+
#
|
785
|
+
# @param sub_block_id [String]
|
786
|
+
# @param list [Array<String>]
|
787
|
+
# @private
|
788
|
+
def get_local_node_list_of_sub_block_to sub_block_id, list
|
789
|
+
return if sub_block_id == 0
|
790
|
+
|
791
|
+
sub_block = block_from_id sub_block_id
|
792
|
+
p ["WALK",sub_block_id,sub_block]
|
793
|
+
raise 'must not be data' if sub_block.data?
|
794
|
+
|
795
|
+
# SLBLOCK or SIBLOCK
|
796
|
+
data = sub_block.read
|
797
|
+
|
798
|
+
btype = data[0].ord
|
799
|
+
raise 'btype != 2' if btype != 2
|
800
|
+
|
801
|
+
level = data[1].ord
|
802
|
+
case level
|
803
|
+
when 0 # SLBLOCK
|
804
|
+
count = data[2, 2].unpack("v").first
|
805
|
+
count.times do |i|
|
806
|
+
sl_node_id, sl_block_id, sl_sub_block_id = (
|
807
|
+
is64 ? Pst.unpack(data[(is64 ? 8 : 4) + 24 * i, 24], "T3") : data[(is64 ? 8 : 4) + 12 * i, 12].unpack("V3")
|
808
|
+
)
|
809
|
+
|
810
|
+
list << (sl_node_id & 0xffffffff)
|
811
|
+
|
812
|
+
get_local_node_list_of_sub_block_to sl_sub_block_id, list
|
813
|
+
end
|
814
|
+
when 1 # SIBLOCK
|
815
|
+
count = data[2, 2].unpack("v").first
|
816
|
+
count.times do |i|
|
817
|
+
si_node_id, si_block_id = (
|
818
|
+
is64 ? Pst.unpack(data[(is64 ? 8 : 4) + 16 * i, 16], "T2") : data[(is64 ? 8 : 4) + 8 * i, 8].unpack("V2")
|
819
|
+
)
|
820
|
+
|
821
|
+
list << (si_node_id & 0xffffffff)
|
822
|
+
end
|
823
|
+
else
|
824
|
+
raise 'level unk'
|
825
|
+
end
|
826
|
+
end
|
827
|
+
|
828
|
+
# @param sub_block_id [Integer]
|
829
|
+
# @param local_node_id [Integer]
|
830
|
+
# @param list [Array<String>]
|
831
|
+
# @private
|
832
|
+
def load_sub_block_to sub_block_id, local_node_id, list
|
833
|
+
raise 'sub_block_id must be Integer' unless Integer === sub_block_id
|
834
|
+
return if sub_block_id == 0
|
835
|
+
|
836
|
+
sub_block = block_from_id sub_block_id
|
837
|
+
raise 'must not be data' if sub_block.data?
|
838
|
+
|
839
|
+
# SLBLOCK or SIBLOCK
|
840
|
+
data = sub_block.read
|
841
|
+
|
842
|
+
btype = data[0].ord
|
843
|
+
raise 'btype != 2' if btype != 2
|
844
|
+
|
845
|
+
level = data[1].ord
|
846
|
+
case level
|
847
|
+
when 0 # SLBLOCK
|
848
|
+
count = data[2, 2].unpack("v").first
|
849
|
+
count.times do |i|
|
850
|
+
sl_node_id, sl_block_id, sl_sub_block_id = (
|
851
|
+
is64 ? Pst.unpack(data[(is64 ? 8 : 4) + 24 * i, 24], "T3") : data[(is64 ? 8 : 4) + 12 * i, 12].unpack("V3")
|
852
|
+
)
|
853
|
+
|
854
|
+
sl_node_id &= 0xffffffff
|
855
|
+
|
856
|
+
if sl_node_id == local_node_id
|
857
|
+
load_main_block_to sl_block_id, list
|
858
|
+
end
|
859
|
+
|
860
|
+
load_sub_block_to sl_sub_block_id, local_node_id, list
|
861
|
+
end
|
862
|
+
when 1 # SIBLOCK
|
863
|
+
count = data[2, 2].unpack("v").first
|
864
|
+
count.times do |i|
|
865
|
+
si_node_id, si_block_id = (
|
866
|
+
is64 ? Pst.unpack(data[(is64 ? 8 : 4) + 16 * i, 16], "T2") : data[(is64 ? 8 : 4) + 8 * i, 8].unpack("V2")
|
867
|
+
)
|
868
|
+
|
869
|
+
si_node_id &= 0xffffffff
|
870
|
+
|
871
|
+
if si_node_id == local_node_id
|
872
|
+
si_block = block_from_id si_block_id
|
873
|
+
raise 'must be data' unless si_block.data?
|
874
|
+
list << si_block.read.force_encoding("BINARY")
|
875
|
+
end
|
876
|
+
end
|
877
|
+
else
|
878
|
+
raise 'level unk'
|
879
|
+
end
|
880
|
+
end
|
881
|
+
|
882
|
+
# @param block_id [Integer]
|
883
|
+
# @param list [Array<String>]
|
884
|
+
# @private
|
885
|
+
def load_main_block_to block_id, list
|
886
|
+
return if block_id == 0
|
887
|
+
|
888
|
+
block = block_from_id block_id
|
889
|
+
|
890
|
+
if block.data?
|
891
|
+
# this is real data we want
|
892
|
+
list << block.read.force_encoding("BINARY")
|
893
|
+
return
|
894
|
+
end
|
895
|
+
|
896
|
+
# XBLOCK or XXBLOCK
|
897
|
+
data = block.read
|
898
|
+
|
899
|
+
btype = data[0].ord
|
900
|
+
raise 'btype must be 1' if btype != 1
|
901
|
+
|
902
|
+
level = data[1].ord
|
903
|
+
case level
|
904
|
+
when 1, 2
|
905
|
+
count, num_bytes = data[2, 6].unpack("vV")
|
906
|
+
|
907
|
+
items = (
|
908
|
+
is64 ? Pst.unpack(data[8, 8 * count], "T#{count}") : data[8, 4 * count].unpack("V#{count}")
|
909
|
+
)
|
910
|
+
items.each { |block_id|
|
911
|
+
load_main_block_to block_id, list
|
912
|
+
}
|
913
|
+
else
|
914
|
+
raise 'level unk'
|
915
|
+
end
|
916
|
+
end
|
917
|
+
|
918
|
+
#
|
919
|
+
# id2
|
920
|
+
# ----------------------------------------------------------------------------
|
921
|
+
#
|
922
|
+
|
923
|
+
#
|
924
|
+
# main block parsing code. gets raw properties
|
925
|
+
# ----------------------------------------------------------------------------
|
926
|
+
#
|
927
|
+
|
928
|
+
# the job of this class, is to take a desc record, and be able to enumerate through the
|
929
|
+
# mapi properties of the associated thing.
|
930
|
+
#
|
931
|
+
# corresponds to
|
932
|
+
# * _pst_parse_block
|
933
|
+
# * _pst_process (in some ways. although perhaps thats more the Item::Properties#add_property)
|
934
|
+
#
|
935
|
+
# @private
|
936
|
+
class BlockParser
|
937
|
+
include Mapi::Types::Constants
|
938
|
+
|
939
|
+
# @private
|
940
|
+
TYPES = {
|
941
|
+
0xbc => 1,
|
942
|
+
0x7c => 2,
|
943
|
+
# type 3 is removed. an artifact of not handling the indirect blocks properly in libpst.
|
944
|
+
}
|
945
|
+
|
946
|
+
# @private
|
947
|
+
PR_SUBJECT = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_SUBJECT' }.first.hex
|
948
|
+
# @private
|
949
|
+
PR_BODY_HTML = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_BODY_HTML' }.first.hex
|
950
|
+
|
951
|
+
# this stuff could maybe be moved to Ole::Types? or leverage it somehow?
|
952
|
+
# whether or not a type is immeidate is more a property of the pst encoding though i expect.
|
953
|
+
# what i probably can add is a generic concept of whether a type is of variadic length or not.
|
954
|
+
|
955
|
+
# these lists are very incomplete. think they are largely copied from libpst
|
956
|
+
|
957
|
+
# @private
|
958
|
+
IMMEDIATE_TYPES = [
|
959
|
+
PT_SHORT, PT_LONG, PT_BOOLEAN
|
960
|
+
]
|
961
|
+
|
962
|
+
# @private
|
963
|
+
INDIRECT_TYPES = [
|
964
|
+
PT_DOUBLE, PT_OBJECT,
|
965
|
+
0x0014, # whats this? probably something like PT_LONGLONG, given the correspondence with the
|
966
|
+
# ole variant types. (= VT_I8)
|
967
|
+
PT_STRING8, PT_UNICODE, # unicode isn't in libpst, but added here for outlook 2003 down the track
|
968
|
+
PT_SYSTIME,
|
969
|
+
0x0048, # another unknown
|
970
|
+
0x0102, # this is PT_BINARY vs PT_CLSID
|
971
|
+
#0x1003, # these are vector types, but they're commented out for now because i'd expect that
|
972
|
+
#0x1014, # there's extra decoding needed that i'm not doing. (probably just need a simple
|
973
|
+
# # PT_* => unpack string mapping for the immediate types, and just do unpack('V*') etc
|
974
|
+
#0x101e,
|
975
|
+
#0x1102
|
976
|
+
]
|
977
|
+
|
978
|
+
# the attachment and recipient arrays appear to be always stored with these fixed
|
979
|
+
# id2 values. seems strange. are there other extra streams? can find out by making higher
|
980
|
+
# level IO wrapper, which has the id2 value, and doing the diff of available id2 values versus
|
981
|
+
# used id2 values in properties of an item.
|
982
|
+
|
983
|
+
# @private
|
984
|
+
ID2_ATTACHMENTS = 0x671
|
985
|
+
|
986
|
+
# @private
|
987
|
+
ID2_RECIPIENTS = 0x692
|
988
|
+
|
989
|
+
# Targeting main data, not sub
|
990
|
+
USE_MAIN_DATA = -1
|
991
|
+
|
992
|
+
# @return [NodePtr]
|
993
|
+
# @private
|
994
|
+
attr_reader :node
|
995
|
+
|
996
|
+
# @return [Hash<Integer, String>] HID to data block
|
997
|
+
# @private
|
998
|
+
attr_reader :data_chunks
|
999
|
+
|
1000
|
+
# @param node [NodePtr]
|
1001
|
+
# @param local_node_id [Integer]
|
1002
|
+
def initialize node, local_node_id = USE_MAIN_DATA
|
1003
|
+
#raise FormatError, "unable to get associated index record for #{node.inspect}" unless node.block
|
1004
|
+
@node = node
|
1005
|
+
@data_chunks = {}
|
1006
|
+
|
1007
|
+
data_array = (local_node_id == USE_MAIN_DATA) ? node.read_main_array : (node.read_sub_array local_node_id)
|
1008
|
+
|
1009
|
+
data_array.each_with_index { |data, index|
|
1010
|
+
# see https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-pst/a3fa280c-eba3-434f-86e4-b95141b3c7b1
|
1011
|
+
if index == 0
|
1012
|
+
load_root_header data
|
1013
|
+
else
|
1014
|
+
load_page_header data, index
|
1015
|
+
end
|
1016
|
+
}
|
1017
|
+
|
1018
|
+
# now, we may have multiple different blocks
|
1019
|
+
end
|
1020
|
+
|
1021
|
+
# Parse HNPAGEHDR / HNBITMAPHDR
|
1022
|
+
#
|
1023
|
+
# @see https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-pst/9c34ecf8-36bc-45a1-a2df-ee35c6dc840a
|
1024
|
+
#
|
1025
|
+
# @param data [String]
|
1026
|
+
# @param page_index [Integer]
|
1027
|
+
# @private
|
1028
|
+
def load_page_header data, page_index
|
1029
|
+
page_map = data.unpack('v').first
|
1030
|
+
|
1031
|
+
# read HNPAGEMAP
|
1032
|
+
offsets_count = data[page_map, 2].unpack("v").first + 1
|
1033
|
+
offset_tables = data[page_map + 4, 2 * offsets_count].unpack("v#{offsets_count}")
|
1034
|
+
|
1035
|
+
offset_tables.each_cons(2).to_a.each_with_index do |(from, to), index|
|
1036
|
+
# conver to HID
|
1037
|
+
@data_chunks[0x20 * (1 + index) + 65536 * page_index] = data[from, to - from]
|
1038
|
+
end
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
# Parse HNHDR
|
1042
|
+
#
|
1043
|
+
# @see https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-pst/8e4ae05c-3c24-4103-b7e5-ffef6f244834
|
1044
|
+
# @private
|
1045
|
+
def load_root_header data
|
1046
|
+
page_map, sig, @heap_type, @offset1 = data.unpack 'vCCVV'
|
1047
|
+
raise FormatError, 'invalid signature 0x%02x' % sig unless sig == 0xec
|
1048
|
+
raise FormatError, 'unknown block type signature 0x%02x' % @heap_type unless TYPES[@heap_type]
|
1049
|
+
@type = TYPES[@heap_type]
|
1050
|
+
|
1051
|
+
# read HNPAGEMAP
|
1052
|
+
offsets_count = data[page_map, 2].unpack("v").first + 1
|
1053
|
+
offset_tables = data[page_map + 4, 2 * offsets_count].unpack("v#{offsets_count}")
|
1054
|
+
|
1055
|
+
offset_tables.each_cons(2).to_a.each_with_index do |(from, to), index|
|
1056
|
+
# conver to HID
|
1057
|
+
@data_chunks[0x20 * (1 + index)] = data[from, to - from]
|
1058
|
+
end
|
1059
|
+
end
|
1060
|
+
|
1061
|
+
# based on the value of offset, return either some data from buf, or some data from the
|
1062
|
+
# id2 chain id2, where offset is some key into a lookup table that is stored as the id2
|
1063
|
+
# chain. i think i may need to create a BlockParser class that wraps up all this mess.
|
1064
|
+
#
|
1065
|
+
# corresponds to:
|
1066
|
+
# * _pst_getBlockOffsetPointer
|
1067
|
+
# * _pst_getBlockOffset
|
1068
|
+
#
|
1069
|
+
# @param offset [Integer]
|
1070
|
+
# @return [String]
|
1071
|
+
# @private
|
1072
|
+
def get_data_indirect offset
|
1073
|
+
raise "offset must be Integer" unless Integer === offset
|
1074
|
+
|
1075
|
+
return get_data_indirect_io(offset).read
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
# Resolve data pointed by HNID
|
1079
|
+
#
|
1080
|
+
# @see https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-pst/7ac490ce-31af-4a75-97df-eb9d07a003fd
|
1081
|
+
# @param offset [Integer]
|
1082
|
+
# @return [StringIO]
|
1083
|
+
# @private
|
1084
|
+
def get_data_indirect_io offset
|
1085
|
+
raise "offset must be Integer" unless Integer === offset
|
1086
|
+
|
1087
|
+
if offset == 0
|
1088
|
+
nil
|
1089
|
+
elsif (offset & 0x1f) != 0
|
1090
|
+
# this is NID (node)
|
1091
|
+
data_array = node.read_sub_array(offset)
|
1092
|
+
raise "local node id #{offset} points multi page count #{data_array.count}, use get_data_array() instead" if data_array.count >= 2
|
1093
|
+
if data_array.empty?
|
1094
|
+
StringIO.new ""
|
1095
|
+
else
|
1096
|
+
StringIO.new data_array.first
|
1097
|
+
end
|
1098
|
+
else
|
1099
|
+
# this is HID (heap)
|
1100
|
+
StringIO.new data_chunks[offset]
|
1101
|
+
end
|
1102
|
+
end
|
1103
|
+
|
1104
|
+
# @param offset [Integer]
|
1105
|
+
# @return [Array<String>]
|
1106
|
+
# @private
|
1107
|
+
def get_data_array offset
|
1108
|
+
raise "offset must be Integer" unless Integer === offset
|
1109
|
+
|
1110
|
+
if offset == 0
|
1111
|
+
nil
|
1112
|
+
elsif (offset & 0x1f) != 0
|
1113
|
+
# this is NID (node)
|
1114
|
+
node.read_sub_array(offset)
|
1115
|
+
else
|
1116
|
+
# this is HID (heap)
|
1117
|
+
[data_chunks[offset]]
|
1118
|
+
end
|
1119
|
+
end
|
1120
|
+
|
1121
|
+
def handle_indirect_values key, type, value
|
1122
|
+
case type
|
1123
|
+
when PT_BOOLEAN
|
1124
|
+
value = value != 0
|
1125
|
+
when *IMMEDIATE_TYPES # not including PT_BOOLEAN which we just did above
|
1126
|
+
# no processing current applied (needed?).
|
1127
|
+
when *INDIRECT_TYPES
|
1128
|
+
# the value is a pointer
|
1129
|
+
if String === value # ie, value size > 4 above
|
1130
|
+
value = StringIO.new value
|
1131
|
+
else
|
1132
|
+
value = get_data_array(value)
|
1133
|
+
if value
|
1134
|
+
value = StringIO.new value.join("")
|
1135
|
+
end
|
1136
|
+
end
|
1137
|
+
# keep strings as immediate values for now, for compatability with how i set up
|
1138
|
+
# Msg::Properties::ENCODINGS
|
1139
|
+
if value
|
1140
|
+
if type == PT_STRING8
|
1141
|
+
value = node.pst.helper.convert_ansi_str value.read
|
1142
|
+
elsif type == PT_UNICODE
|
1143
|
+
value = Ole::Types::FROM_UTF16.iconv value.read
|
1144
|
+
end
|
1145
|
+
end
|
1146
|
+
# special subject handling
|
1147
|
+
if key == PR_BODY_HTML and value
|
1148
|
+
# to keep the msg code happy, which thinks body_html will be an io
|
1149
|
+
# although, in 2003 version, they are 0102 already
|
1150
|
+
value = StringIO.new value unless value.respond_to?(:read)
|
1151
|
+
end
|
1152
|
+
if key == PR_SUBJECT and String === value and value.length >= 2
|
1153
|
+
if value[0].ord == 1
|
1154
|
+
# This 2 chars header tell us how to omit subject prefix like `Yes: `, `Re: `, etc.
|
1155
|
+
# We need not to omit them.
|
1156
|
+
value = value[2..-1]
|
1157
|
+
end
|
1158
|
+
=begin
|
1159
|
+
index = value =~ /^[A-Z]*:/ ? $~[0].length - 1 : nil
|
1160
|
+
unless ignore == 1 and offset == index
|
1161
|
+
warn 'something wrong with subject hack'
|
1162
|
+
$x = [ignore, offset, value]
|
1163
|
+
require 'irb'
|
1164
|
+
IRB.start
|
1165
|
+
exit
|
1166
|
+
end
|
1167
|
+
=end
|
1168
|
+
=begin
|
1169
|
+
new idea:
|
1170
|
+
|
1171
|
+
making sense of the \001\00[156] i've seen prefixing subject. i think its to do with the placement
|
1172
|
+
of the ':', or the ' '. And perhaps an optimization to do with thread topic, and ignoring the prefixes
|
1173
|
+
added by mailers. thread topic is equal to subject with all that crap removed.
|
1174
|
+
|
1175
|
+
can test by creating some mails with bizarre subjects.
|
1176
|
+
|
1177
|
+
subject="\001\005RE: blah blah"
|
1178
|
+
subject="\001\001blah blah"
|
1179
|
+
subject="\001\032Out of Office AutoReply: blah blah"
|
1180
|
+
subject="\001\020Undeliverable: blah blah"
|
1181
|
+
|
1182
|
+
looks like it
|
1183
|
+
|
1184
|
+
=end
|
1185
|
+
|
1186
|
+
# now what i think, is that perhaps, value[offset..-1] ...
|
1187
|
+
# or something like that should be stored as a special tag. ie, do a double yield
|
1188
|
+
# for this case. probably PR_CONVERSATION_TOPIC, in which case i'd write instead:
|
1189
|
+
# yield [PR_SUBJECT, ref_type, value]
|
1190
|
+
# yield [PR_CONVERSATION_TOPIC, ref_type, value[offset..-1]
|
1191
|
+
# next # to skip the yield.
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
# special handling for embedded objects
|
1195
|
+
# used for attach_data for attached messages. in which case attach_method should == 5,
|
1196
|
+
# for embedded object.
|
1197
|
+
if type == PT_OBJECT and value
|
1198
|
+
value = value.read if value.respond_to?(:read)
|
1199
|
+
id2, unknown = value.unpack 'V2'
|
1200
|
+
io = get_data_indirect_io id2
|
1201
|
+
|
1202
|
+
# hacky
|
1203
|
+
#desc2 = OpenStruct.new(:node => io, :pst => node.pst, :sub_block => node.sub_block, :children => [])
|
1204
|
+
# put nil instead of desc.list_index, otherwise the attachment is attached to itself ad infinitum.
|
1205
|
+
# should try and fix that FIXME
|
1206
|
+
# this shouldn't be done always. for an attached message, yes, but for an attached
|
1207
|
+
# meta file, for example, it shouldn't. difference between embedded_ole vs embedded_msg
|
1208
|
+
# really.
|
1209
|
+
# note that in the case where its a embedded ole, you actually get a regular serialized ole
|
1210
|
+
# object, so i need to create an ole storage object on a rangesioidxchain!
|
1211
|
+
# eg:
|
1212
|
+
=begin
|
1213
|
+
att.props.display_name # => "Picture (Metafile)"
|
1214
|
+
io = att.props.attach_data
|
1215
|
+
io.read(32).unpack('H*') # => ["d0cf11e0a1b11ae100000.... note the docfile signature.
|
1216
|
+
# plug some missing rangesio holes:
|
1217
|
+
def io.rewind; seek 0; end
|
1218
|
+
def io.flush; raise IOError; end
|
1219
|
+
ole = Ole::Storage.open io
|
1220
|
+
puts ole.root.to_tree
|
1221
|
+
|
1222
|
+
- #<Dirent:"Root Entry">
|
1223
|
+
|- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
|
1224
|
+
|- #<Dirent:"CONTENTS" size=65696 data="\327\315\306\232\000...">
|
1225
|
+
\- #<Dirent:"\003MailStream" size=12 data="\001\000\000\000[...">
|
1226
|
+
=end
|
1227
|
+
# until properly fixed, i have disabled this code here, so this will break
|
1228
|
+
# nested messages temporarily.
|
1229
|
+
#value = Item.new desc2, RawPropertyStore.new(desc2).to_a
|
1230
|
+
#desc2.list_index = nil
|
1231
|
+
value = io
|
1232
|
+
end
|
1233
|
+
# this is PT_MV_STRING8, i guess.
|
1234
|
+
# should probably have the 0x1000 flag, and do the or-ring.
|
1235
|
+
# example of 0x1102 is PR_OUTLOOK_2003_ENTRYIDS. less sure about that one.
|
1236
|
+
when 0x101e, 0x1102
|
1237
|
+
# example data:
|
1238
|
+
# 0x802b "\003\000\000\000\020\000\000\000\030\000\000\000#\000\000\000BusinessCompetitionFavorites"
|
1239
|
+
# this 0x802b would be an extended attribute for categories / keywords.
|
1240
|
+
value = get_data_indirect_io(value).read unless String === value
|
1241
|
+
num = value.unpack('V')[0]
|
1242
|
+
offsets = value[4, 4 * num].unpack("V#{num}")
|
1243
|
+
value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] }
|
1244
|
+
value.map! { |str| StringIO.new str } if type == 0x1102
|
1245
|
+
when 0x101f
|
1246
|
+
value = get_data_indirect_io(value).read unless String === value
|
1247
|
+
num = value.unpack('V')[0]
|
1248
|
+
offsets = value[4, 4 * num].unpack("V#{num}")
|
1249
|
+
value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] }
|
1250
|
+
value.map! { |str| Ole::Types::FROM_UTF16.iconv str }
|
1251
|
+
when 0x1003 # uint32 array
|
1252
|
+
value = get_data_indirect_io(value).read unless String === value
|
1253
|
+
# there is no count field
|
1254
|
+
value = value.unpack("V#{(value.length / 4)}")
|
1255
|
+
else
|
1256
|
+
name = Mapi::Types::DATA[type].first rescue nil
|
1257
|
+
warn '0x%04x %p' % [key, get_data_indirect_io(value).read]
|
1258
|
+
raise NotImplementedError, 'unsupported mapi property type - 0x%04x (%p)' % [type, name]
|
1259
|
+
end
|
1260
|
+
[key, type, value]
|
1261
|
+
end
|
1262
|
+
end
|
1263
|
+
|
1264
|
+
=begin
|
1265
|
+
* recipients:
|
1266
|
+
|
1267
|
+
affects: ["0x200764", "0x2011c4", "0x201b24", "0x201b44", "0x201ba4", "0x201c24", "0x201cc4", "0x202504"]
|
1268
|
+
|
1269
|
+
after adding the rawpropertystoretable fix, all except the second parse properly, and satisfy:
|
1270
|
+
|
1271
|
+
item.props.display_to == item.recipients.map { |r| r.props.display_name if r.props.recipient_type == 1 }.compact * '; '
|
1272
|
+
|
1273
|
+
only the second still has a problem
|
1274
|
+
|
1275
|
+
#[#<struct Pst::Desc desc_id=0x2011c4, idx_id=0x397c, idx2_id=0x398a, parent_desc_id=0x8082>]
|
1276
|
+
|
1277
|
+
think this is related to a multi block #data3. ie, when you use @x * rec_size, and it
|
1278
|
+
goes > 8190, or there abouts, then it stuffs up. probably there is header gunk, or something,
|
1279
|
+
similar to when #data is multi block.
|
1280
|
+
|
1281
|
+
same problem affects the attachment table in test4.
|
1282
|
+
|
1283
|
+
fixed that issue. round data3 ranges to rec_size.
|
1284
|
+
|
1285
|
+
fix other issue with attached objects.
|
1286
|
+
|
1287
|
+
all recipients and attachments in test2 are fine.
|
1288
|
+
|
1289
|
+
only remaining issue is test4 recipients of 200044. strange.
|
1290
|
+
|
1291
|
+
=end
|
1292
|
+
|
1293
|
+
# RawPropertyStore is used to iterate through the properties of an item, or the auxiliary
|
1294
|
+
# data for an attachment. its just a parser for the way the properties are serialized, when the
|
1295
|
+
# properties don't have to conform to a column structure.
|
1296
|
+
#
|
1297
|
+
# structure of this chunk of data is often
|
1298
|
+
# header, property keys, data values, and then indexes.
|
1299
|
+
# the property keys has value in it. value can be the actual value if its a short type,
|
1300
|
+
# otherwise you lookup the value in the indicies, where you get the offsets to use in the
|
1301
|
+
# main data body. due to the indirect thing though, any of these parts could actually come
|
1302
|
+
# from a separate stream.
|
1303
|
+
#
|
1304
|
+
# @private
|
1305
|
+
class RawPropertyStore < BlockParser
|
1306
|
+
include Enumerable
|
1307
|
+
|
1308
|
+
# @return [Integer] number of property tuples
|
1309
|
+
attr_reader :length
|
1310
|
+
|
1311
|
+
# Will read Property Context (PC)
|
1312
|
+
#
|
1313
|
+
# @see https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-pst/294c83c6-ff92-42f5-b6b6-876c29fa9737
|
1314
|
+
# @param desc [NodePtr]
|
1315
|
+
# @param local_node_id [Integer]
|
1316
|
+
def initialize node, local_node_id = USE_MAIN_DATA
|
1317
|
+
super
|
1318
|
+
bTypePC = 0xbc
|
1319
|
+
raise FormatError, "expected type 188 - got #{@heap_type}" unless @heap_type == bTypePC
|
1320
|
+
|
1321
|
+
# the way that offset works, data1 may be a subset of buf, or something from id2. if its from buf,
|
1322
|
+
# it will be offset based on index_offset and offset. so it could be some random chunk of data anywhere
|
1323
|
+
# in the thing.
|
1324
|
+
header_data = get_data_indirect @offset1
|
1325
|
+
raise FormatError if header_data.length < 8
|
1326
|
+
signature, offset2 = header_data.unpack 'V2'
|
1327
|
+
raise FormatError, 'invalid Property Context signature 0x%08x' % @type if signature != 0x000602b5
|
1328
|
+
# this is actually a big chunk of tag tuples.
|
1329
|
+
@index_data = get_data_indirect offset2
|
1330
|
+
@length = @index_data.length / 8
|
1331
|
+
end
|
1332
|
+
|
1333
|
+
# iterate through the property tuples
|
1334
|
+
#
|
1335
|
+
# @yield [key, type, value]
|
1336
|
+
# @yieldparam key [Integer]
|
1337
|
+
# @yieldparam type [Integer]
|
1338
|
+
# @yieldparam value [Object]
|
1339
|
+
def each
|
1340
|
+
length.times do |i|
|
1341
|
+
key, type, value = handle_indirect_values(*@index_data[8 * i, 8].unpack('vvV'))
|
1342
|
+
yield key, type, value
|
1343
|
+
end
|
1344
|
+
end
|
1345
|
+
end
|
1346
|
+
|
1347
|
+
# RawPropertyStoreTable is kind of like a database table.
|
1348
|
+
# it has a fixed set of columns.
|
1349
|
+
# #[] is kind of like getting a row from the table.
|
1350
|
+
# those rows are currently encapsulated by Row, which has #each like
|
1351
|
+
# RawPropertyStore.
|
1352
|
+
# only used for the recipients array, and the attachments array. completely lazy, doesn't
|
1353
|
+
# load any of the properties upon creation.
|
1354
|
+
#
|
1355
|
+
# @private
|
1356
|
+
class RawPropertyStoreTable < BlockParser
|
1357
|
+
# TCOLDESC
|
1358
|
+
# @private
|
1359
|
+
class Column < Struct.new(:ref_type, :type, :ind2_off, :size, :slot)
|
1360
|
+
def initialize data
|
1361
|
+
super(*data.unpack('v3CC'))
|
1362
|
+
end
|
1363
|
+
|
1364
|
+
def nice_type_name
|
1365
|
+
Mapi::Types::DATA[ref_type].first[/_(.*)/, 1].downcase rescue '0x%04x' % ref_type
|
1366
|
+
end
|
1367
|
+
|
1368
|
+
def nice_prop_name
|
1369
|
+
Mapi::PropertyStore::TAGS['%04x' % type].first[/_(.*)/, 1].downcase rescue '0x%04x' % type
|
1370
|
+
end
|
1371
|
+
|
1372
|
+
def inspect
|
1373
|
+
"#<#{self.class} name=#{nice_prop_name.inspect}, type=#{nice_type_name.inspect}>"
|
1374
|
+
end
|
1375
|
+
end
|
1376
|
+
|
1377
|
+
include Enumerable
|
1378
|
+
|
1379
|
+
# @return [Integer] record count
|
1380
|
+
attr_reader :length
|
1381
|
+
# @return [String] Array of TCOLDESC
|
1382
|
+
attr_reader :index_data
|
1383
|
+
# @return [String] 2.3.2 BTree-on-Heap (BTH)
|
1384
|
+
attr_reader :data2
|
1385
|
+
# @return [Array<String>] 2.3.4.4 Row Matrix
|
1386
|
+
attr_reader :rows_pages
|
1387
|
+
# @return [Integer] TCI_bm
|
1388
|
+
attr_reader :rec_size
|
1389
|
+
# @return [Integer]
|
1390
|
+
attr_reader :rows_per_page
|
1391
|
+
|
1392
|
+
# @param node [NodePtr]
|
1393
|
+
# @param local_node_id [Integer]
|
1394
|
+
def initialize node, local_node_id
|
1395
|
+
super
|
1396
|
+
bTypeTC = 0x7c
|
1397
|
+
raise FormatError, "expected type 124 - got #{@heap_type}" unless @heap_type == bTypeTC
|
1398
|
+
|
1399
|
+
header_data = get_data_indirect @offset1
|
1400
|
+
# seven_c_blk
|
1401
|
+
# often: u1 == u2 and u3 == u2 + 2, then rec_size == u3 + 4. wtf
|
1402
|
+
# TCINFO
|
1403
|
+
seven_c, @num_list, u1, u2, u3, @rec_size, b_five_offset,
|
1404
|
+
rows_offset, u7, u8 = header_data[0, 22].unpack('CCv4V2v2')
|
1405
|
+
@index_data = header_data[22..-1]
|
1406
|
+
|
1407
|
+
raise FormatError if @num_list != schema.length or seven_c != 0x7c
|
1408
|
+
# another check
|
1409
|
+
min_size = schema.inject(0) { |total, col| total + col.size }
|
1410
|
+
# seem to have at max, 8 padding bytes on the end of the record. not sure if it means
|
1411
|
+
# anything. maybe its just space that hasn't been reclaimed due to columns being
|
1412
|
+
# removed or something. probably should just check lower bound.
|
1413
|
+
range = (min_size..min_size + 8)
|
1414
|
+
warn "rec_size seems wrong (#{range} !=== #{rec_size})" unless range === rec_size
|
1415
|
+
|
1416
|
+
header_data2 = get_data_indirect b_five_offset
|
1417
|
+
raise FormatError if header_data2.length < 8
|
1418
|
+
signature, offset2 = header_data2.unpack 'V2'
|
1419
|
+
# ??? seems a bit iffy
|
1420
|
+
# there's probably more to the differences than this, and the data2 difference below
|
1421
|
+
expect = node.pst.header.version_2003? ? 0x000404b5 : 0x000204b5
|
1422
|
+
raise FormatError, 'unhandled block signature 0x%08x' % signature if signature != expect
|
1423
|
+
|
1424
|
+
# this holds all the row data
|
1425
|
+
# handle multiple block issue.
|
1426
|
+
if rows_offset != 0
|
1427
|
+
#if RangesIOIdxChain === @rows_io
|
1428
|
+
# @data3_idxs =
|
1429
|
+
# # modify ranges
|
1430
|
+
# ranges = @rows_io.ranges.map { |offset, size| [offset, size / @rec_size * @rec_size] }
|
1431
|
+
# @rows_io.instance_variable_set :@ranges, ranges
|
1432
|
+
#end
|
1433
|
+
@rows_pages = get_data_array(rows_offset)
|
1434
|
+
else
|
1435
|
+
# table rows are empty, no data to be read
|
1436
|
+
@rows_pages = [""]
|
1437
|
+
end
|
1438
|
+
|
1439
|
+
# there must be something to the data in data2. i think data2 is the array of objects essentially.
|
1440
|
+
# currently its only used to imply a length
|
1441
|
+
# actually, at size 6, its just some auxiliary data. i'm thinking either Vv/vV, for 97, and something
|
1442
|
+
# wider for 03. the second value is just the index (0...length), and the first value is
|
1443
|
+
# some kind of offset i expect. actually, they were all id2 values, in another case.
|
1444
|
+
# so maybe they're get_data_indirect values too?
|
1445
|
+
# actually, it turned out they were identical to the PR_ATTACHMENT_ID2 values...
|
1446
|
+
# id2_values = ie, data2.unpack('v*').to_enum(:each_slice, 3).transpose[0]
|
1447
|
+
# table[i].assoc(PR_ATTACHMENT_ID2).last == id2_values[i], for all i.
|
1448
|
+
@data2 = get_data_indirect(offset2) rescue nil
|
1449
|
+
#if data2
|
1450
|
+
# @length = (data2.length / 6.0).ceil
|
1451
|
+
#else
|
1452
|
+
# the above / 6, may have been ok for 97 files, but the new 0x0004 style block must have
|
1453
|
+
# different size records... just use this instead:
|
1454
|
+
# hmmm, actually, we can still figure it out:
|
1455
|
+
@rows_per_page = @rows_pages.first.length / @rec_size
|
1456
|
+
|
1457
|
+
@length = @rows_pages.map { |data| data.length / @rec_size }.sum
|
1458
|
+
|
1459
|
+
#end
|
1460
|
+
|
1461
|
+
# lets try and at least use data2 for a warning for now
|
1462
|
+
#if data2
|
1463
|
+
# data2_rec_size = node.pst.header.version_2003? ? 8 : 6
|
1464
|
+
# warn 'somthing seems wrong with data3' unless @length == (data2.length / data2_rec_size)
|
1465
|
+
#end
|
1466
|
+
end
|
1467
|
+
|
1468
|
+
# for debug
|
1469
|
+
#
|
1470
|
+
# @return [Array<Column>]
|
1471
|
+
# @private
|
1472
|
+
def schema
|
1473
|
+
@schema ||= Pst.split_per(index_data, 8, -1).map { |data| Column.new data }
|
1474
|
+
end
|
1475
|
+
|
1476
|
+
# return grid row
|
1477
|
+
#
|
1478
|
+
# @param idx [Integer]
|
1479
|
+
# @return [Row]
|
1480
|
+
def [] idx
|
1481
|
+
# handle funky rounding
|
1482
|
+
Row.new self, idx
|
1483
|
+
end
|
1484
|
+
|
1485
|
+
# @yield [row]
|
1486
|
+
# @yieldparam row [Row]
|
1487
|
+
def each
|
1488
|
+
length.times { |i| yield self[i] }
|
1489
|
+
end
|
1490
|
+
|
1491
|
+
# get record data
|
1492
|
+
#
|
1493
|
+
# @param record_index [Integer]
|
1494
|
+
# @return [String]
|
1495
|
+
# @private
|
1496
|
+
def get_record record_index
|
1497
|
+
page_index = record_index / @rows_per_page
|
1498
|
+
heap_index = record_index % @rows_per_page
|
1499
|
+
(@rows_pages[page_index])[@rec_size * heap_index, @rec_size]
|
1500
|
+
end
|
1501
|
+
|
1502
|
+
class Row
|
1503
|
+
include Enumerable
|
1504
|
+
|
1505
|
+
# @param array_parser [RawPropertyStoreTable]
|
1506
|
+
# @param index [Integer]
|
1507
|
+
def initialize array_parser, index
|
1508
|
+
@array_parser = array_parser
|
1509
|
+
@index = index
|
1510
|
+
@data = @array_parser.get_record(index)
|
1511
|
+
end
|
1512
|
+
|
1513
|
+
# iterate through the property tuples
|
1514
|
+
#
|
1515
|
+
# @yield [key, type, value]
|
1516
|
+
# @yieldparam key [Integer]
|
1517
|
+
# @yieldparam type [Integer]
|
1518
|
+
# @yieldparam value [Object]
|
1519
|
+
def each
|
1520
|
+
(@array_parser.index_data.length / 8).times do |i|
|
1521
|
+
ref_type, type, ind2_off, size, slot = @array_parser.index_data[8 * i, 8].unpack 'v3CC'
|
1522
|
+
# check this rescue too
|
1523
|
+
value = @data[ind2_off, size]
|
1524
|
+
# if INDIRECT_TYPES.include? ref_type
|
1525
|
+
if size <= 4
|
1526
|
+
value = value.unpack('V')[0]
|
1527
|
+
end
|
1528
|
+
#p ['0x%04x' % ref_type, '0x%04x' % type, (Msg::Properties::MAPITAGS['%04x' % type].first[/^.._(.*)/, 1].downcase rescue nil),
|
1529
|
+
# value_orig, value, (get_data_indirect(value_orig.unpack('V')[0]) rescue nil), size, ind2_off, slot]
|
1530
|
+
key, type, value = @array_parser.handle_indirect_values type, ref_type, value
|
1531
|
+
yield key, type, value
|
1532
|
+
end
|
1533
|
+
end
|
1534
|
+
end
|
1535
|
+
end
|
1536
|
+
|
1537
|
+
# @private
|
1538
|
+
class AttachmentTable < BlockParser
|
1539
|
+
# a "fake" MAPI property name for this constant. if you get a mapi property with
|
1540
|
+
# this value, it is the id2 value to use to get attachment data.
|
1541
|
+
#
|
1542
|
+
# @private
|
1543
|
+
PR_ATTACHMENT_ID2 = 0x67f2
|
1544
|
+
|
1545
|
+
# @return [NodePtr]
|
1546
|
+
# @private
|
1547
|
+
attr_reader :node
|
1548
|
+
# @return [RawPropertyStoreTable]
|
1549
|
+
# @private
|
1550
|
+
attr_reader :table
|
1551
|
+
|
1552
|
+
# @param node [NodePtr]
|
1553
|
+
def initialize node
|
1554
|
+
@node = node
|
1555
|
+
# no super, we only actually want BlockParser2#idx2
|
1556
|
+
#@table = nil
|
1557
|
+
#return unless node.sub_block
|
1558
|
+
#return unless block = sub_block[ID2_ATTACHMENTS]
|
1559
|
+
## FIXME make a fake desc.
|
1560
|
+
#@fake_node = OpenStruct.new :block => block, :pst => node.pst, :sub_block => node.sub_block
|
1561
|
+
if @node.has_sub ID2_ATTACHMENTS
|
1562
|
+
@table = RawPropertyStoreTable.new @node, ID2_ATTACHMENTS
|
1563
|
+
else
|
1564
|
+
@table = []
|
1565
|
+
end
|
1566
|
+
end
|
1567
|
+
|
1568
|
+
# @return [Array<Array<Array(Integer, Integer, Object)>>]
|
1569
|
+
def to_a
|
1570
|
+
return [] if !table
|
1571
|
+
table.map do |attachment|
|
1572
|
+
attachment = attachment.to_a
|
1573
|
+
# potentially merge with yet more properties
|
1574
|
+
# this still seems pretty broken - especially the property overlap
|
1575
|
+
if attachment_id2 = attachment.assoc(PR_ATTACHMENT_ID2)
|
1576
|
+
# verify existence of this record
|
1577
|
+
if @node.has_sub attachment_id2.last
|
1578
|
+
RawPropertyStore.new(@node, attachment_id2.last).each do |a, b, c|
|
1579
|
+
record = attachment.assoc a
|
1580
|
+
attachment << record = [] unless record
|
1581
|
+
record.replace [a, b, c]
|
1582
|
+
end
|
1583
|
+
else
|
1584
|
+
warn "attachment record is missing"
|
1585
|
+
end
|
1586
|
+
end
|
1587
|
+
attachment
|
1588
|
+
end
|
1589
|
+
end
|
1590
|
+
end
|
1591
|
+
|
1592
|
+
# there is no equivalent to this in libpst. ID2_RECIPIENTS was just guessed given the above
|
1593
|
+
# AttachmentTable.
|
1594
|
+
#
|
1595
|
+
# @private
|
1596
|
+
class RecipientTable < BlockParser
|
1597
|
+
# @return [NodePtr]
|
1598
|
+
# @private
|
1599
|
+
attr_reader :node
|
1600
|
+
# @return [RawPropertyStoreTable]
|
1601
|
+
# @private
|
1602
|
+
attr_reader :table
|
1603
|
+
|
1604
|
+
# @param node [NodePtr]
|
1605
|
+
def initialize node
|
1606
|
+
@node = node
|
1607
|
+
# no super, we only actually want BlockParser2#idx2
|
1608
|
+
#@table = nil
|
1609
|
+
#return unless node.sub_block
|
1610
|
+
#return unless block = sub_block[ID2_RECIPIENTS]
|
1611
|
+
## FIXME make a fake desc.
|
1612
|
+
#fake_node = OpenStruct.new :block => block, :pst => node.pst, :sub_block => node.sub_block
|
1613
|
+
if @node.has_sub ID2_RECIPIENTS
|
1614
|
+
@table = RawPropertyStoreTable.new @node, ID2_RECIPIENTS
|
1615
|
+
else
|
1616
|
+
@table = []
|
1617
|
+
end
|
1618
|
+
|
1619
|
+
end
|
1620
|
+
|
1621
|
+
# @return [Array<Array<Array(Integer, Integer, Object)>>]
|
1622
|
+
def to_a
|
1623
|
+
return [] if !table
|
1624
|
+
table.map { |x| x.to_a }
|
1625
|
+
end
|
1626
|
+
end
|
1627
|
+
|
1628
|
+
#
|
1629
|
+
# higher level item code. wraps up the raw properties above, and gives nice
|
1630
|
+
# objects to work with. handles item relationships too.
|
1631
|
+
# ----------------------------------------------------------------------------
|
1632
|
+
#
|
1633
|
+
|
1634
|
+
# @param property_list [Array<Array(Integer, Integer, Object)>]
|
1635
|
+
# @return [PropertySet]
|
1636
|
+
# @private
|
1637
|
+
def self.make_property_set property_list
|
1638
|
+
hash = property_list.inject({}) do |hash, (key, type, value)|
|
1639
|
+
hash.update PropertySet::Key.new(key) => value
|
1640
|
+
end
|
1641
|
+
PropertySet.new hash
|
1642
|
+
end
|
1643
|
+
|
1644
|
+
class Attachment < Mapi::Attachment
|
1645
|
+
def initialize list
|
1646
|
+
super Pst.make_property_set(list)
|
1647
|
+
|
1648
|
+
@embedded_msg = props.attach_data if Item === props.attach_data
|
1649
|
+
end
|
1650
|
+
end
|
1651
|
+
|
1652
|
+
class Recipient < Mapi::Recipient
|
1653
|
+
def initialize list
|
1654
|
+
super Pst.make_property_set(list)
|
1655
|
+
end
|
1656
|
+
end
|
1657
|
+
|
1658
|
+
class Item < Mapi::Message
|
1659
|
+
# @private
|
1660
|
+
class EntryID < Struct.new(:u1, :entry_id, :id)
|
1661
|
+
UNPACK_STR = 'VA16V'
|
1662
|
+
|
1663
|
+
def initialize data
|
1664
|
+
data = data.unpack(UNPACK_STR) if String === data
|
1665
|
+
super(*data)
|
1666
|
+
end
|
1667
|
+
end
|
1668
|
+
|
1669
|
+
include RecursivelyEnumerable
|
1670
|
+
|
1671
|
+
# Obtain item type
|
1672
|
+
#
|
1673
|
+
# - `:folder`
|
1674
|
+
# - `:message`
|
1675
|
+
# - `:wastebasket`
|
1676
|
+
#
|
1677
|
+
# @return [Symbol]
|
1678
|
+
attr_accessor :type
|
1679
|
+
|
1680
|
+
# @return [Item]
|
1681
|
+
attr_accessor :parent
|
1682
|
+
|
1683
|
+
# @param node [NodePtr]
|
1684
|
+
# @param list [Array]
|
1685
|
+
# @param type [Object, nil]
|
1686
|
+
def initialize node, list, type=nil
|
1687
|
+
@node = node
|
1688
|
+
super Pst.make_property_set(list)
|
1689
|
+
|
1690
|
+
# this is kind of weird, but the ids of the special folders are stored in a hash
|
1691
|
+
# when the root item is loaded
|
1692
|
+
if ipm_wastebasket_entryid
|
1693
|
+
node.pst.special_folder_ids[ipm_wastebasket_entryid] = :wastebasket
|
1694
|
+
end
|
1695
|
+
|
1696
|
+
if finder_entryid
|
1697
|
+
node.pst.special_folder_ids[finder_entryid] = :finder
|
1698
|
+
end
|
1699
|
+
|
1700
|
+
# and then here, those are used, along with a crappy heuristic to determine if we are an
|
1701
|
+
# item
|
1702
|
+
=begin
|
1703
|
+
i think the low bits of the desc_id can give some info on the type.
|
1704
|
+
|
1705
|
+
it seems that 0x4 is for regular messages (and maybe contacts etc)
|
1706
|
+
0x2 is for folders, and 0x8 is for special things like rules etc, that aren't visible.
|
1707
|
+
=end
|
1708
|
+
unless type
|
1709
|
+
type = props.valid_folder_mask || ipm_subtree_entryid || props.content_count || props.subfolders ? :folder : :message
|
1710
|
+
if type == :folder
|
1711
|
+
type = node.pst.special_folder_ids[node.node_id] || type
|
1712
|
+
end
|
1713
|
+
end
|
1714
|
+
|
1715
|
+
@type = type
|
1716
|
+
end
|
1717
|
+
|
1718
|
+
# @yield [item]
|
1719
|
+
# @yieldparam item [Item]
|
1720
|
+
# @return [void]
|
1721
|
+
def each_child
|
1722
|
+
id = ipm_subtree_entryid
|
1723
|
+
if id
|
1724
|
+
root = @node.pst.node_from_id id
|
1725
|
+
raise "couldn't find root" unless root
|
1726
|
+
raise 'both kinds of children' unless @node.children.empty?
|
1727
|
+
children = root.children
|
1728
|
+
# lets look up the other ids we have.
|
1729
|
+
# typically the wastebasket one "deleted items" is in the children already, but
|
1730
|
+
# the search folder isn't.
|
1731
|
+
extras = [ipm_wastebasket_entryid, finder_entryid].compact.map do |id|
|
1732
|
+
root = @node.pst.node_from_id id
|
1733
|
+
warn "couldn't find root for id #{id}" unless root
|
1734
|
+
root
|
1735
|
+
end.compact
|
1736
|
+
# i do this instead of union, so as not to mess with the order of the
|
1737
|
+
# existing children.
|
1738
|
+
children += (extras - children)
|
1739
|
+
children
|
1740
|
+
else
|
1741
|
+
@node.children
|
1742
|
+
end.each do |node|
|
1743
|
+
item = @node.pst.pst_parse_item(node)
|
1744
|
+
item.parent = self
|
1745
|
+
yield item
|
1746
|
+
end
|
1747
|
+
end
|
1748
|
+
|
1749
|
+
# @return [String]
|
1750
|
+
def path
|
1751
|
+
parents, item = [], self
|
1752
|
+
parents.unshift item while item = item.parent
|
1753
|
+
# remove root
|
1754
|
+
parents.shift
|
1755
|
+
parents.map { |item| item.props.display_name or raise 'unable to construct path' } * '/'
|
1756
|
+
end
|
1757
|
+
|
1758
|
+
# Enumerate direct children
|
1759
|
+
#
|
1760
|
+
# @return [Array<Item>]
|
1761
|
+
def children
|
1762
|
+
to_enum(:each_child).to_a
|
1763
|
+
end
|
1764
|
+
|
1765
|
+
# these are still around because they do different stuff
|
1766
|
+
|
1767
|
+
# Top of Personal Folder Record
|
1768
|
+
#
|
1769
|
+
# @private
|
1770
|
+
def ipm_subtree_entryid
|
1771
|
+
@ipm_subtree_entryid ||= EntryID.new(props.ipm_subtree_entryid.read).id rescue nil
|
1772
|
+
end
|
1773
|
+
|
1774
|
+
# Deleted Items Folder Record
|
1775
|
+
#
|
1776
|
+
# @private
|
1777
|
+
def ipm_wastebasket_entryid
|
1778
|
+
@ipm_wastebasket_entryid ||= EntryID.new(props.ipm_wastebasket_entryid.read).id rescue nil
|
1779
|
+
end
|
1780
|
+
|
1781
|
+
# Search Root Record
|
1782
|
+
#
|
1783
|
+
# @private
|
1784
|
+
def finder_entryid
|
1785
|
+
@finder_entryid ||= EntryID.new(props.finder_entryid.read).id rescue nil
|
1786
|
+
end
|
1787
|
+
|
1788
|
+
# all these have been replaced with the method_missing below
|
1789
|
+
=begin
|
1790
|
+
# States which folders are valid for this message store
|
1791
|
+
#def valid_folder_mask
|
1792
|
+
# props[0x35df]
|
1793
|
+
#end
|
1794
|
+
|
1795
|
+
# Number of emails stored in a folder
|
1796
|
+
def content_count
|
1797
|
+
props[0x3602]
|
1798
|
+
end
|
1799
|
+
|
1800
|
+
# Has children
|
1801
|
+
def subfolders
|
1802
|
+
props[0x360a]
|
1803
|
+
end
|
1804
|
+
=end
|
1805
|
+
|
1806
|
+
# i think i will change these, so they can inherit the lazyness from RawPropertyStoreTable.
|
1807
|
+
# so if you want the last attachment, you can get it without creating the others perhaps.
|
1808
|
+
# it just has to handle the no table at all case a bit more gracefully.
|
1809
|
+
|
1810
|
+
# @return [Array<Attachment>]
|
1811
|
+
def attachments
|
1812
|
+
@attachments ||= AttachmentTable.new(@node).to_a.map { |list| Attachment.new list }
|
1813
|
+
end
|
1814
|
+
|
1815
|
+
# @return [Array<Recipient>]
|
1816
|
+
def recipients
|
1817
|
+
#[]
|
1818
|
+
@recipients ||= RecipientTable.new(@node).to_a.map { |list| Recipient.new list }
|
1819
|
+
end
|
1820
|
+
|
1821
|
+
# Iterate children (except on this instance) recursively stored in this MessageStore.
|
1822
|
+
#
|
1823
|
+
# @yield [item]
|
1824
|
+
# @yieldparam item [Item]
|
1825
|
+
# @return [void]
|
1826
|
+
def each_recursive(&block)
|
1827
|
+
#p :self => self
|
1828
|
+
children.each do |child|
|
1829
|
+
#p :child => child
|
1830
|
+
block[child]
|
1831
|
+
child.each_recursive(&block)
|
1832
|
+
end
|
1833
|
+
end
|
1834
|
+
|
1835
|
+
def inspect
|
1836
|
+
attrs = %w[display_name subject sender_name subfolders]
|
1837
|
+
# attrs = %w[display_name valid_folder_mask ipm_wastebasket_entryid finder_entryid content_count subfolders]
|
1838
|
+
str = attrs.map { |a| b = props.send a; " #{a}=#{b.inspect}" if b }.compact * ','
|
1839
|
+
|
1840
|
+
type_s = type == :message ? 'Message' : type == :folder ? 'Folder' : type.to_s.capitalize + 'Folder'
|
1841
|
+
str2 = 'node_id=0x%x' % @node.node_id
|
1842
|
+
|
1843
|
+
!str.empty? ? "#<Pst::#{type_s} #{str2}#{str}>" : "#<Pst::#{type_s} #{str2} props=#{props.inspect}>" #\n" + props.transport_message_headers + ">"
|
1844
|
+
end
|
1845
|
+
end
|
1846
|
+
|
1847
|
+
# corresponds to
|
1848
|
+
# * _pst_parse_item
|
1849
|
+
#
|
1850
|
+
# @param desc [NodePtr]
|
1851
|
+
# @return [Item]
|
1852
|
+
# @private
|
1853
|
+
def pst_parse_item node
|
1854
|
+
Item.new node, RawPropertyStore.new(node).to_a
|
1855
|
+
end
|
1856
|
+
|
1857
|
+
#
|
1858
|
+
# other random code
|
1859
|
+
# ----------------------------------------------------------------------------
|
1860
|
+
#
|
1861
|
+
|
1862
|
+
# @private
|
1863
|
+
def dump_debug_info
|
1864
|
+
puts "* pst header"
|
1865
|
+
p header
|
1866
|
+
|
1867
|
+
=begin
|
1868
|
+
Looking at the output of this, for blank-o1997.pst, i see this part:
|
1869
|
+
...
|
1870
|
+
- (26624,516) desc block data (overlap of 4 bytes)
|
1871
|
+
- (27136,516) desc block data (gap of 508 bytes)
|
1872
|
+
- (28160,516) desc block data (gap of 2620 bytes)
|
1873
|
+
...
|
1874
|
+
|
1875
|
+
which confirms my belief that the block size for idx and desc is more likely 512
|
1876
|
+
=end
|
1877
|
+
if 0 + 0 == 0
|
1878
|
+
puts '* file range usage'
|
1879
|
+
file_ranges =
|
1880
|
+
# these 3 things, should account for most of the data in the file.
|
1881
|
+
[[0, Header::SIZE, 'pst file header']] +
|
1882
|
+
@block_offsets.map { |offset| [offset, BlockPtr::BLOCK_SIZE, 'block data'] } +
|
1883
|
+
@node_offsets.map { |offset| [offset, NodePtr::BLOCK_SIZE, 'node data'] } +
|
1884
|
+
@blocks.map { |idx| [idx.offset, idx.size, 'idx id=0x%x (%s)' % [idx.id, idx.type]] }
|
1885
|
+
(file_ranges.sort_by { |idx| idx.first } + [nil]).to_enum(:each_cons, 2).each do |(offset, size, name), next_record|
|
1886
|
+
# i think there is a padding of the size out to 64 bytes
|
1887
|
+
# which is equivalent to padding out the final offset, because i think the offset is
|
1888
|
+
# similarly oriented
|
1889
|
+
pad_amount = 64
|
1890
|
+
warn 'i am wrong about the offset padding' if offset % pad_amount != 0
|
1891
|
+
# so, assuming i'm not wrong about that, then we can calculate how much padding is needed.
|
1892
|
+
pad = pad_amount - (size % pad_amount)
|
1893
|
+
pad = 0 if pad == pad_amount
|
1894
|
+
gap = next_record ? next_record.first - (offset + size + pad) : 0
|
1895
|
+
extra = case gap <=> 0
|
1896
|
+
when -1; ["overlap of #{gap.abs} bytes)"]
|
1897
|
+
when 0; []
|
1898
|
+
when +1; ["gap of #{gap} bytes"]
|
1899
|
+
end
|
1900
|
+
# how about we check that padding
|
1901
|
+
@io.pos = offset + size
|
1902
|
+
pad_bytes = @io.read(pad)
|
1903
|
+
extra += ["padding not all zero"] unless pad_bytes == 0.chr * pad
|
1904
|
+
puts "- #{offset}:#{size}+#{pad} #{name.inspect}" + (extra.empty? ? '' : ' [' + extra * ', ' + ']')
|
1905
|
+
end
|
1906
|
+
end
|
1907
|
+
|
1908
|
+
# i think the idea of the idx, and indeed the idx2, is just to be able to
|
1909
|
+
# refer to data indirectly, which means it can get moved around, and you just update
|
1910
|
+
# the idx table. it is simply a list of file offsets and sizes.
|
1911
|
+
# not sure i get how id2 plays into it though....
|
1912
|
+
# the sizes seem to be all even. is that a co-incidence? and the ids are all even. that
|
1913
|
+
# seems to be related to something else (see the (id & 2) == 1 stuff)
|
1914
|
+
puts '* idx entries'
|
1915
|
+
@blocks.each { |idx| puts "- #{idx.inspect}" }
|
1916
|
+
|
1917
|
+
# if you look at the desc tree, you notice a few things:
|
1918
|
+
# 1. there is a desc that seems to be the parent of all the folders, messages etc.
|
1919
|
+
# it is the one whose parent is itself.
|
1920
|
+
# one of its children is referenced as the subtree_entryid of the first desc item,
|
1921
|
+
# the root.
|
1922
|
+
# 2. typically only 2 types of desc records have idx2_id != 0. messages themselves,
|
1923
|
+
# and the desc with id = 0x61 - the xattrib container. everything else uses the
|
1924
|
+
# regular ids to find its data. i think it should be reframed as small blocks and
|
1925
|
+
# big blocks, but i'll look into it more.
|
1926
|
+
#
|
1927
|
+
# idx_id and idx2_id are for getting to the data. desc_id and parent_desc_id just define
|
1928
|
+
# the parent <-> child relationship, and the desc_ids are how the items are referred to in
|
1929
|
+
# entryids.
|
1930
|
+
# note that these aren't unique! eg for 0, 4 etc. i expect these'd never change, as the ids
|
1931
|
+
# are stored in entryids. whereas the idx and idx2 could be a bit more volatile.
|
1932
|
+
puts '* node tree'
|
1933
|
+
# make a dummy root hold everything just for convenience
|
1934
|
+
root = NodePtr.new ''
|
1935
|
+
def root.inspect; "#<Pst::Root>"; end
|
1936
|
+
root.children.replace @orphans
|
1937
|
+
# this still loads the whole thing as a string for gsub. should use directo output io
|
1938
|
+
# version.
|
1939
|
+
puts root.to_tree.gsub(/, (parent_node_id|idx2_id)=0x0(?!\d)/, '')
|
1940
|
+
|
1941
|
+
# this is fairly easy to understand, its just an attempt to display the pst items in a tree form
|
1942
|
+
# which resembles what you'd see in outlook.
|
1943
|
+
puts '* item tree'
|
1944
|
+
# now streams directly
|
1945
|
+
root_item.to_tree STDOUT
|
1946
|
+
end
|
1947
|
+
|
1948
|
+
# @return [NodePtr]
|
1949
|
+
# @private
|
1950
|
+
def root_desc
|
1951
|
+
@nodes.first
|
1952
|
+
end
|
1953
|
+
|
1954
|
+
# @return [Item]
|
1955
|
+
# @private
|
1956
|
+
def root_item
|
1957
|
+
item = pst_parse_item root_desc
|
1958
|
+
item.type = :root
|
1959
|
+
item
|
1960
|
+
end
|
1961
|
+
|
1962
|
+
# Obtain a root item
|
1963
|
+
#
|
1964
|
+
# @return [Item]
|
1965
|
+
def root
|
1966
|
+
root_item
|
1967
|
+
end
|
1968
|
+
|
1969
|
+
# depth first search of all items
|
1970
|
+
include Enumerable
|
1971
|
+
|
1972
|
+
# Iterate all kind of items recursively stored in this MessageStore.
|
1973
|
+
#
|
1974
|
+
# @yield [message]
|
1975
|
+
# @yieldparam message [Item]
|
1976
|
+
# @return [void]
|
1977
|
+
def each(&block)
|
1978
|
+
root = self.root
|
1979
|
+
block[root]
|
1980
|
+
root.each_recursive(&block)
|
1981
|
+
end
|
1982
|
+
|
1983
|
+
# Get this MessageStore's display name.
|
1984
|
+
#
|
1985
|
+
# @return [String]
|
1986
|
+
def name
|
1987
|
+
@name ||= root_item.props.display_name
|
1988
|
+
end
|
1989
|
+
|
1990
|
+
def inspect
|
1991
|
+
"#<Pst name=#{name.inspect} io=#{io.inspect}>"
|
1992
|
+
end
|
1993
|
+
end
|
1994
|
+
end
|
1995
|
+
|