ruby-msg 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,269 @@
1
+ require 'yaml'
2
+ require 'mapi/types'
3
+ require 'mapi/rtf'
4
+ require 'rtf'
5
+
6
+ module Mapi
7
+ #
8
+ # The Mapi::PropertySet class is used to wrap the lower level Msg or Pst property stores,
9
+ # and provide a consistent and more friendly interface. It allows you to just say:
10
+ #
11
+ # properties.subject
12
+ #
13
+ # instead of:
14
+ #
15
+ # properites.raw[0x0037, PS_MAPI]
16
+ #
17
+ # The underlying store can be just a hash, or lazily loading directly from the file. A good
18
+ # compromise is to cache all the available keys, and just return the values on demand, rather
19
+ # than load up many possibly unwanted values.
20
+ #
21
+ class PropertySet
22
+ # the property set guid constants
23
+ # these guids are all defined with the macro DEFINE_OLEGUID in mapiguid.h.
24
+ # see http://doc.ddart.net/msdn/header/include/mapiguid.h.html
25
+ oleguid = proc do |prefix|
26
+ Ole::Types::Clsid.parse "{#{prefix}-0000-0000-c000-000000000046}"
27
+ end
28
+
29
+ NAMES = {
30
+ oleguid['00020328'] => 'PS_MAPI',
31
+ oleguid['00020329'] => 'PS_PUBLIC_STRINGS',
32
+ oleguid['00020380'] => 'PS_ROUTING_EMAIL_ADDRESSES',
33
+ oleguid['00020381'] => 'PS_ROUTING_ADDRTYPE',
34
+ oleguid['00020382'] => 'PS_ROUTING_DISPLAY_NAME',
35
+ oleguid['00020383'] => 'PS_ROUTING_ENTRYID',
36
+ oleguid['00020384'] => 'PS_ROUTING_SEARCH_KEY',
37
+ # string properties in this namespace automatically get added to the internet headers
38
+ oleguid['00020386'] => 'PS_INTERNET_HEADERS',
39
+ # theres are bunch of outlook ones i think
40
+ # http://blogs.msdn.com/stephen_griffin/archive/2006/05/10/outlook-2007-beta-documentation-notification-based-indexing-support.aspx
41
+ # IPM.Appointment
42
+ oleguid['00062002'] => 'PSETID_Appointment',
43
+ # IPM.Task
44
+ oleguid['00062003'] => 'PSETID_Task',
45
+ # used for IPM.Contact
46
+ oleguid['00062004'] => 'PSETID_Address',
47
+ oleguid['00062008'] => 'PSETID_Common',
48
+ # didn't find a source for this name. it is for IPM.StickyNote
49
+ oleguid['0006200e'] => 'PSETID_Note',
50
+ # for IPM.Activity. also called the journal?
51
+ oleguid['0006200a'] => 'PSETID_Log',
52
+ }
53
+
54
+ module Constants
55
+ NAMES.each { |guid, name| const_set name, guid }
56
+ end
57
+
58
+ include Constants
59
+
60
+ # +Properties+ are accessed by <tt>Key</tt>s, which are coerced to this class.
61
+ # Includes a bunch of methods (hash, ==, eql?) to allow it to work as a key in
62
+ # a +Hash+.
63
+ #
64
+ # Also contains the code that maps keys to symbolic names.
65
+ class Key
66
+ include Constants
67
+
68
+ attr_reader :code, :guid
69
+ def initialize code, guid=PS_MAPI
70
+ @code, @guid = code, guid
71
+ end
72
+
73
+ def to_sym
74
+ # hmmm, for some stuff, like, eg, the message class specific range, sym-ification
75
+ # of the key depends on knowing our message class. i don't want to store anything else
76
+ # here though, so if that kind of thing is needed, it can be passed to this function.
77
+ # worry about that when some examples arise.
78
+ case code
79
+ when Integer
80
+ if guid == PS_MAPI # and < 0x8000 ?
81
+ # the hash should be updated now that i've changed the process
82
+ TAGS['%04x' % code].first[/_(.*)/, 1].downcase.to_sym rescue code
83
+ else
84
+ # handle other guids here, like mapping names to outlook properties, based on the
85
+ # outlook object model.
86
+ NAMED_MAP[self].to_sym rescue code
87
+ end
88
+ when String
89
+ # return something like
90
+ # note that named properties don't go through the map at the moment. so #categories
91
+ # doesn't work yet
92
+ code.downcase.to_sym
93
+ end
94
+ end
95
+
96
+ def to_s
97
+ to_sym.to_s
98
+ end
99
+
100
+ # FIXME implement these
101
+ def transmittable?
102
+ # etc, can go here too
103
+ end
104
+
105
+ # this stuff is to allow it to be a useful key
106
+ def hash
107
+ [code, guid].hash
108
+ end
109
+
110
+ def == other
111
+ hash == other.hash
112
+ end
113
+
114
+ alias eql? :==
115
+
116
+ def inspect
117
+ # maybe the way to do this, would be to be able to register guids
118
+ # in a global lookup, which are used by Clsid#inspect itself, to
119
+ # provide symbolic names...
120
+ guid_str = NAMES[guid] || "{#{guid.format}}"
121
+ if Integer === code
122
+ hex = '0x%04x' % code
123
+ if guid == PS_MAPI
124
+ # just display as plain hex number
125
+ hex
126
+ else
127
+ "#<Key #{guid_str}/#{hex}>"
128
+ end
129
+ else
130
+ # display full guid and code
131
+ "#<Key #{guid_str}/#{code.inspect}>"
132
+ end
133
+ end
134
+ end
135
+
136
+ # duplicated here for now
137
+ SUPPORT_DIR = File.dirname(__FILE__) + '/../..'
138
+
139
+ # data files that provide for the code to symbolic name mapping
140
+ # guids in named_map are really constant references to the above
141
+ TAGS = YAML.load_file "#{SUPPORT_DIR}/data/mapitags.yaml"
142
+ NAMED_MAP = YAML.load_file("#{SUPPORT_DIR}/data/named_map.yaml").inject({}) do |hash, (key, value)|
143
+ hash.update Key.new(key[0], const_get(key[1])) => value
144
+ end
145
+
146
+ attr_reader :raw
147
+
148
+ # +raw+ should be an hash-like object that maps <tt>Key</tt>s to values. Should respond_to?
149
+ # [], keys, values, each, and optionally []=, and delete.
150
+ def initialize raw
151
+ @raw = raw
152
+ end
153
+
154
+ # resolve +arg+ (could be key, code, string, or symbol), and possible +guid+ to a key.
155
+ # returns nil on failure
156
+ def resolve arg, guid=nil
157
+ if guid; Key.new arg, guid
158
+ else
159
+ case arg
160
+ when Key; arg
161
+ when Integer; Key.new arg
162
+ else sym_to_key[arg.to_sym]
163
+ end
164
+ end
165
+ end
166
+
167
+ # this is the function that creates a symbol to key mapping. currently this works by making a
168
+ # pass through the raw properties, but conceivably you could map symbols to keys using the
169
+ # mapitags directly. problem with that would be that named properties wouldn't map automatically,
170
+ # but maybe thats not too important.
171
+ def sym_to_key
172
+ return @sym_to_key if @sym_to_key
173
+ @sym_to_key = {}
174
+ raw.keys.each do |key|
175
+ sym = key.to_sym
176
+ unless Symbol === sym
177
+ Log.debug "couldn't find symbolic name for key #{key.inspect}"
178
+ next
179
+ end
180
+ if @sym_to_key[sym]
181
+ Log.warn "duplicate key #{key.inspect}"
182
+ # we give preference to PS_MAPI keys
183
+ @sym_to_key[sym] = key if key.guid == PS_MAPI
184
+ else
185
+ # just assign
186
+ @sym_to_key[sym] = key
187
+ end
188
+ end
189
+ @sym_to_key
190
+ end
191
+
192
+ def keys
193
+ sym_to_key.keys
194
+ end
195
+
196
+ def values
197
+ sym_to_key.values.map { |key| raw[key] }
198
+ end
199
+
200
+ def [] arg, guid=nil
201
+ raw[resolve(arg, guid)]
202
+ end
203
+
204
+ def []= arg, *args
205
+ args.unshift nil if args.length == 1
206
+ guid, value = args
207
+ # FIXME this won't really work properly. it would need to go
208
+ # to TAGS to resolve, as it often won't be there already...
209
+ raw[resolve(arg, guid)] = value
210
+ end
211
+
212
+ def method_missing name, *args
213
+ if name.to_s !~ /\=$/ and args.empty?
214
+ self[name]
215
+ elsif name.to_s =~ /(.*)\=$/ and args.length == 1
216
+ self[$1] = args[0]
217
+ else
218
+ super
219
+ end
220
+ end
221
+
222
+ def to_h
223
+ sym_to_key.inject({}) { |hash, (sym, key)| hash.update sym => raw[key] }
224
+ end
225
+
226
+ def inspect
227
+ "#<#{self.class} " + to_h.sort_by { |k, v| k.to_s }.map do |k, v|
228
+ v = v.inspect
229
+ "#{k}=#{v.length > 32 ? v[0..29] + '..."' : v}"
230
+ end.join(' ') + '>'
231
+ end
232
+
233
+ # -----
234
+
235
+ # temporary pseudo tags
236
+
237
+ # for providing rtf to plain text conversion. later, html to text too.
238
+ def body
239
+ return @body if defined?(@body)
240
+ @body = (self[:body] rescue nil)
241
+ # last resort
242
+ if !@body or @body.strip.empty?
243
+ Log.warn 'creating text body from rtf'
244
+ @body = (::RTF::Converter.rtf2text body_rtf rescue nil)
245
+ end
246
+ @body
247
+ end
248
+
249
+ # for providing rtf decompression
250
+ def body_rtf
251
+ return @body_rtf if defined?(@body_rtf)
252
+ @body_rtf = (RTF.rtfdecompr rtf_compressed.read rescue nil)
253
+ end
254
+
255
+ # for providing rtf to html conversion
256
+ def body_html
257
+ return @body_html if defined?(@body_html)
258
+ @body_html = (self[:body_html].read rescue nil)
259
+ @body_html = (RTF.rtf2html body_rtf rescue nil) if !@body_html or @body_html.strip.empty?
260
+ # last resort
261
+ if !@body_html or @body_html.strip.empty?
262
+ Log.warn 'creating html body from rtf'
263
+ @body_html = (::RTF::Converter.rtf2text body_rtf, :html rescue nil)
264
+ end
265
+ @body_html
266
+ end
267
+ end
268
+ end
269
+
@@ -0,0 +1,1806 @@
1
+ #
2
+ # = Introduction
3
+ #
4
+ # This file is mostly an attempt to port libpst to ruby, and simplify it in the process. It
5
+ # will leverage much of the existing MAPI => MIME conversion developed for Msg files, and as
6
+ # such is purely concerned with the file structure details.
7
+ #
8
+ # = TODO
9
+ #
10
+ # 1. solve recipient table problem (test4).
11
+ # this is done. turns out it was due to id2 clashes. find better solution
12
+ # 2. check parse consistency. an initial conversion of a 30M file to pst, shows
13
+ # a number of messages conveting badly. compare with libpst too.
14
+ # 3. xattribs
15
+ # 4. generalise the Mapi stuff better
16
+ # 5. refactor index load
17
+ # 6. msg serialization?
18
+ #
19
+
20
+ =begin
21
+
22
+ quick plan for cleanup.
23
+
24
+ have working tests for 97 and 03 file formats, so safe.
25
+
26
+ want to fix up:
27
+
28
+ 64 bit unpacks scattered around. its ugly. not sure how best to handle it, but am slightly tempted
29
+ to override String#unpack to support a 64 bit little endian unpack (like L vs N/V, for Q). one way or
30
+ another need to fix it. Could really slow everything else down if its parsing the unpack strings twice,
31
+ once in ruby, for every single unpack i do :/
32
+
33
+ the index loading process, and the lack of shared code between normal vs 64 bit variants, and Index vs Desc.
34
+ should be able to reduce code by factor of 4. also think I should move load code into the class too. then
35
+ maybe have something like:
36
+
37
+ class Header
38
+ def index_class
39
+ version_2003 ? Index64 : Index
40
+ end
41
+ end
42
+
43
+ def load_idx
44
+ header.index_class.load_index
45
+ end
46
+
47
+ OR
48
+
49
+ def initialize
50
+ @header = ...
51
+ extend @header.index_class::Load
52
+ load_idx
53
+ end
54
+
55
+ need to think about the role of the mapi code, and Pst::Item etc, but that layer can come later.
56
+
57
+ =end
58
+
59
+ require 'mapi'
60
+ require 'enumerator'
61
+ require 'ostruct'
62
+ require 'ole/ranges_io'
63
+
64
+ module Mapi
65
+ class Pst
66
+ class FormatError < StandardError
67
+ end
68
+
69
+ # unfortunately there is no Q analogue which is little endian only.
70
+ # this translates T as an unsigned quad word, little endian byte order, to
71
+ # not pollute the rest of the code.
72
+ #
73
+ # didn't want to override String#unpack, cause its too hacky, and incomplete.
74
+ def self.unpack str, unpack_spec
75
+ return str.unpack(unpack_spec) unless unpack_spec['T']
76
+ @unpack_cache ||= {}
77
+ t_offsets, new_spec = @unpack_cache[unpack_spec]
78
+ unless t_offsets
79
+ t_offsets = []
80
+ offset = 0
81
+ new_spec = ''
82
+ unpack_spec.scan(/([^\d])_?(\*|\d+)?/o) do
83
+ num_elems = $1.downcase == 'a' ? 1 : ($2 || 1).to_i
84
+ if $1 == 'T'
85
+ num_elems.times { |i| t_offsets << offset + i }
86
+ new_spec << "V#{num_elems * 2}"
87
+ else
88
+ new_spec << $~[0]
89
+ end
90
+ offset += num_elems
91
+ end
92
+ @unpack_cache[unpack_spec] = [t_offsets, new_spec]
93
+ end
94
+ a = str.unpack(new_spec)
95
+ t_offsets.each do |offset|
96
+ low, high = a[offset, 2]
97
+ a[offset, 2] = low && high ? low + (high << 32) : nil
98
+ end
99
+ a
100
+ end
101
+
102
+ #
103
+ # this is the header and encryption encapsulation code
104
+ # ----------------------------------------------------------------------------
105
+ #
106
+
107
+ # class which encapsulates the pst header
108
+ class Header
109
+ SIZE = 512
110
+ MAGIC = 0x2142444e
111
+
112
+ # these are the constants defined in libpst.c, that
113
+ # are referenced in pst_open()
114
+ INDEX_TYPE_OFFSET = 0x0A
115
+ FILE_SIZE_POINTER = 0xA8
116
+ FILE_SIZE_POINTER_64 = 0xB8
117
+ SECOND_POINTER = 0xBC
118
+ INDEX_POINTER = 0xC4
119
+ SECOND_POINTER_64 = 0xE0
120
+ INDEX_POINTER_64 = 0xF0
121
+ ENC_OFFSET = 0x1CD
122
+
123
+ attr_reader :magic, :index_type, :encrypt_type, :size
124
+ attr_reader :index1_count, :index1, :index2_count, :index2
125
+ attr_reader :version
126
+ def initialize data
127
+ @magic = data.unpack('N')[0]
128
+ @index_type = data[INDEX_TYPE_OFFSET]
129
+ @version = {0x0e => 1997, 0x17 => 2003}[@index_type]
130
+
131
+ if version_2003?
132
+ # don't know?
133
+ # >> data1.unpack('V*').zip(data2.unpack('V*')).enum_with_index.select { |(c, d), i| c != d and not [46, 56, 60].include?(i) }.select { |(a, b), i| b == 0 }.map { |(a, b), i| [a / 256, i] }
134
+ # [8, 76], [32768, 84], [128, 89]
135
+ # >> data1.unpack('C*').zip(data2.unpack('C*')).enum_with_index.select { |(c, d), i| c != d and not [184..187, 224..227, 240..243].any? { |r| r === i } }.select { |(a, b), i| b == 0 and ((Math.log(a) / Math.log(2)) % 1) < 0.0001 }
136
+ # [[[2, 0], 61], [[2, 0], 76], [[2, 0], 195], [[2, 0], 257], [[8, 0], 305], [[128, 0], 338], [[128, 0], 357]]
137
+ # i have only 2 psts to base this guess on, so i can't really come up with anything that looks reasonable yet. not sure what the offset is. unfortunately there is so much in the header
138
+ # that isn't understood...
139
+ @encrypt_type = 1
140
+
141
+ @index2_count, @index2 = data[SECOND_POINTER_64 - 4, 8].unpack('V2')
142
+ @index1_count, @index1 = data[INDEX_POINTER_64 - 4, 8].unpack('V2')
143
+
144
+ @size = data[FILE_SIZE_POINTER_64, 4].unpack('V')[0]
145
+ else
146
+ @encrypt_type = data[ENC_OFFSET]
147
+
148
+ @index2_count, @index2 = data[SECOND_POINTER - 4, 8].unpack('V2')
149
+ @index1_count, @index1 = data[INDEX_POINTER - 4, 8].unpack('V2')
150
+
151
+ @size = data[FILE_SIZE_POINTER, 4].unpack('V')[0]
152
+ end
153
+
154
+ validate!
155
+ end
156
+
157
+ def version_2003?
158
+ version == 2003
159
+ end
160
+
161
+ def encrypted?
162
+ encrypt_type != 0
163
+ end
164
+
165
+ def validate!
166
+ raise FormatError, "bad signature on pst file (#{'0x%x' % magic})" unless magic == MAGIC
167
+ raise FormatError, "only index types 0x0e and 0x17 are handled (#{'0x%x' % index_type})" unless [0x0e, 0x17].include?(index_type)
168
+ raise FormatError, "only encrytion types 0 and 1 are handled (#{encrypt_type.inspect})" unless [0, 1].include?(encrypt_type)
169
+ end
170
+ end
171
+
172
+ # compressible encryption! :D
173
+ #
174
+ # simple substitution. see libpst.c
175
+ # maybe test switch to using a String#tr!
176
+ class CompressibleEncryption
177
+ DECRYPT_TABLE = [
178
+ 0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48,
179
+ 0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, # 0x0f
180
+ 0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab,
181
+ 0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, # 0x1f
182
+ 0x39, 0x05, 0x96, 0x30, 0xf5, 0x37, 0x60, 0x82,
183
+ 0x8c, 0xc9, 0x13, 0x4a, 0x6b, 0x1d, 0xf3, 0xfb, # 0x2f
184
+ 0x8f, 0x26, 0x97, 0xca, 0x91, 0x17, 0x01, 0xc4,
185
+ 0x32, 0x2d, 0x6e, 0x31, 0x95, 0xff, 0xd9, 0x23, # 0x3f
186
+ 0xd1, 0x00, 0x5e, 0x79, 0xdc, 0x44, 0x3b, 0x1a,
187
+ 0x28, 0xc5, 0x61, 0x57, 0x20, 0x90, 0x3d, 0x83, # 0x4f
188
+ 0xb9, 0x43, 0xbe, 0x67, 0xd2, 0x46, 0x42, 0x76,
189
+ 0xc0, 0x6d, 0x5b, 0x7e, 0xb2, 0x0f, 0x16, 0x29, # 0x5f
190
+ 0x3c, 0xa9, 0x03, 0x54, 0x0d, 0xda, 0x5d, 0xdf,
191
+ 0xf6, 0xb7, 0xc7, 0x62, 0xcd, 0x8d, 0x06, 0xd3, # 0x6f
192
+ 0x69, 0x5c, 0x86, 0xd6, 0x14, 0xf7, 0xa5, 0x66,
193
+ 0x75, 0xac, 0xb1, 0xe9, 0x45, 0x21, 0x70, 0x0c, # 0x7f
194
+ 0x87, 0x9f, 0x74, 0xa4, 0x22, 0x4c, 0x6f, 0xbf,
195
+ 0x1f, 0x56, 0xaa, 0x2e, 0xb3, 0x78, 0x33, 0x50, # 0x8f
196
+ 0xb0, 0xa3, 0x92, 0xbc, 0xcf, 0x19, 0x1c, 0xa7,
197
+ 0x63, 0xcb, 0x1e, 0x4d, 0x3e, 0x4b, 0x1b, 0x9b, # 0x9f
198
+ 0x4f, 0xe7, 0xf0, 0xee, 0xad, 0x3a, 0xb5, 0x59,
199
+ 0x04, 0xea, 0x40, 0x55, 0x25, 0x51, 0xe5, 0x7a, # 0xaf
200
+ 0x89, 0x38, 0x68, 0x52, 0x7b, 0xfc, 0x27, 0xae,
201
+ 0xd7, 0xbd, 0xfa, 0x07, 0xf4, 0xcc, 0x8e, 0x5f, # 0xbf
202
+ 0xef, 0x35, 0x9c, 0x84, 0x2b, 0x15, 0xd5, 0x77,
203
+ 0x34, 0x49, 0xb6, 0x12, 0x0a, 0x7f, 0x71, 0x88, # 0xcf
204
+ 0xfd, 0x9d, 0x18, 0x41, 0x7d, 0x93, 0xd8, 0x58,
205
+ 0x2c, 0xce, 0xfe, 0x24, 0xaf, 0xde, 0xb8, 0x36, # 0xdf
206
+ 0xc8, 0xa1, 0x80, 0xa6, 0x99, 0x98, 0xa8, 0x2f,
207
+ 0x0e, 0x81, 0x65, 0x73, 0xe4, 0xc2, 0xa2, 0x8a, # 0xef
208
+ 0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2,
209
+ 0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec # 0xff
210
+ ]
211
+
212
+ ENCRYPT_TABLE = [nil] * 256
213
+ DECRYPT_TABLE.each_with_index { |i, j| ENCRYPT_TABLE[i] = j }
214
+
215
+ def self.decrypt_alt encrypted
216
+ decrypted = ''
217
+ encrypted.length.times { |i| decrypted << DECRYPT_TABLE[encrypted[i]] }
218
+ decrypted
219
+ end
220
+
221
+ def self.encrypt_alt decrypted
222
+ encrypted = ''
223
+ decrypted.length.times { |i| encrypted << ENCRYPT_TABLE[decrypted[i]] }
224
+ encrypted
225
+ end
226
+
227
+ # an alternate implementation that is possibly faster....
228
+ # TODO - bench
229
+ DECRYPT_STR, ENCRYPT_STR = [DECRYPT_TABLE, (0...256)].map do |values|
230
+ values.map { |i| i.chr }.join.gsub(/([\^\-\\])/, "\\\\\\1")
231
+ end
232
+
233
+ def self.decrypt encrypted
234
+ encrypted.tr ENCRYPT_STR, DECRYPT_STR
235
+ end
236
+
237
+ def self.encrypt decrypted
238
+ decrypted.tr DECRYPT_STR, ENCRYPT_STR
239
+ end
240
+ end
241
+
242
+ class RangesIOEncryptable < RangesIO
243
+ def initialize io, mode='r', params={}
244
+ mode, params = 'r', mode if Hash === mode
245
+ @decrypt = !!params[:decrypt]
246
+ super
247
+ end
248
+
249
+ def encrypted?
250
+ @decrypt
251
+ end
252
+
253
+ def read limit=nil
254
+ buf = super
255
+ buf = CompressibleEncryption.decrypt(buf) if encrypted?
256
+ buf
257
+ end
258
+ end
259
+
260
+ attr_reader :io, :header, :idx, :desc, :special_folder_ids
261
+
262
+ # corresponds to
263
+ # * pst_open
264
+ # * pst_load_index
265
+ def initialize io
266
+ @io = io
267
+ io.pos = 0
268
+ @header = Header.new io.read(Header::SIZE)
269
+
270
+ # would prefer this to be in Header#validate, but it doesn't have the io size.
271
+ # should perhaps downgrade this to just be a warning...
272
+ raise FormatError, "header size field invalid (#{header.size} != #{io.size}}" unless header.size == io.size
273
+
274
+ load_idx
275
+ load_desc
276
+ load_xattrib
277
+
278
+ @special_folder_ids = {}
279
+ end
280
+
281
+ def encrypted?
282
+ @header.encrypted?
283
+ end
284
+
285
+ # until i properly fix logging...
286
+ def warn s
287
+ Mapi::Log.warn s
288
+ end
289
+
290
+ #
291
+ # this is the index and desc record loading code
292
+ # ----------------------------------------------------------------------------
293
+ #
294
+
295
+ ToTree = Module.new
296
+
297
+ module Index2
298
+ BLOCK_SIZE = 512
299
+ module RecursiveLoad
300
+ def load_chain
301
+ #...
302
+ end
303
+ end
304
+
305
+ module Base
306
+ def read
307
+ #...
308
+ end
309
+ end
310
+
311
+ class Version1997 < Struct.new(:a)#...)
312
+ SIZE = 12
313
+
314
+ include RecursiveLoad
315
+ include Base
316
+ end
317
+
318
+ class Version2003 < Struct.new(:a)#...)
319
+ SIZE = 24
320
+
321
+ include RecursiveLoad
322
+ include Base
323
+ end
324
+ end
325
+
326
+ module Desc2
327
+ module Base
328
+ def desc
329
+ #...
330
+ end
331
+ end
332
+
333
+ class Version1997 < Struct.new(:a)#...)
334
+ #include Index::RecursiveLoad
335
+ include Base
336
+ end
337
+
338
+ class Version2003 < Struct.new(:a)#...)
339
+ #include Index::RecursiveLoad
340
+ include Base
341
+ end
342
+ end
343
+
344
+ # more constants from libpst.c
345
+ # these relate to the index block
346
+ ITEM_COUNT_OFFSET = 0x1f0 # count byte
347
+ LEVEL_INDICATOR_OFFSET = 0x1f3 # node or leaf
348
+ BACKLINK_OFFSET = 0x1f8 # backlink u1 value
349
+
350
+ # these 3 classes are used to hold various file records
351
+
352
+ # pst_index
353
+ class Index < Struct.new(:id, :offset, :size, :u1)
354
+ UNPACK_STR = 'VVvv'
355
+ SIZE = 12
356
+ BLOCK_SIZE = 512 # index blocks was 516 but bogus
357
+ COUNT_MAX = 41 # max active items (ITEM_COUNT_OFFSET / Index::SIZE = 41)
358
+
359
+ attr_accessor :pst
360
+ def initialize data
361
+ data = Pst.unpack data, UNPACK_STR if String === data
362
+ super(*data)
363
+ end
364
+
365
+ def type
366
+ @type ||= begin
367
+ if id & 0x2 == 0
368
+ :data
369
+ else
370
+ first_byte, second_byte = read.unpack('CC')
371
+ if first_byte == 1
372
+ raise second_byte unless second_byte == 1
373
+ :data_chain_header
374
+ elsif first_byte == 2
375
+ raise second_byte unless second_byte == 0
376
+ :id2_assoc
377
+ else
378
+ raise FormatError, 'unknown first byte for block - %p' % first_byte
379
+ end
380
+ end
381
+ end
382
+ end
383
+
384
+ def data?
385
+ (id & 0x2) == 0
386
+ end
387
+
388
+ def read decrypt=true
389
+ # only data blocks are every encrypted
390
+ decrypt = false unless data?
391
+ pst.pst_read_block_size offset, size, decrypt
392
+ end
393
+
394
+ # show all numbers in hex
395
+ def inspect
396
+ super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }.sub(/Index /, "Index type=#{type.inspect}, ")
397
+ end
398
+ end
399
+
400
+ # mostly guesses.
401
+ ITEM_COUNT_OFFSET_64 = 0x1e8
402
+ LEVEL_INDICATOR_OFFSET_64 = 0x1eb # diff of 3 between these 2 as above...
403
+
404
+ # will maybe inherit from Index64, in order to get the same #type function.
405
+ class Index64 < Index
406
+ UNPACK_STR = 'TTvvV'
407
+ SIZE = 24
408
+ BLOCK_SIZE = 512
409
+ COUNT_MAX = 20 # bit of a guess really. 512 / 24 = 21, but doesn't leave enough header room
410
+
411
+ # this is the extra item on the end of the UNPACK_STR above
412
+ attr_accessor :u2
413
+
414
+ def initialize data
415
+ data = Pst.unpack data, UNPACK_STR if String === data
416
+ @u2 = data.pop
417
+ super data
418
+ end
419
+
420
+ def inspect
421
+ super.sub(/>$/, ', u2=%p>' % u2)
422
+ end
423
+
424
+ def self.load_chain io, header
425
+ load_idx_rec io, header.index1, 0, 0
426
+ end
427
+
428
+ # almost identical to load code for Index, just different offsets and unpack strings.
429
+ # can probably merge them, or write a generic load_tree function or something.
430
+ def self.load_idx_rec io, offset, linku1, start_val
431
+ io.seek offset
432
+ buf = io.read BLOCK_SIZE
433
+ idxs = []
434
+
435
+ item_count = buf[ITEM_COUNT_OFFSET_64]
436
+ raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
437
+
438
+ #idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
439
+ #raise 'blah 1' unless idx.id == linku1
440
+
441
+ if buf[LEVEL_INDICATOR_OFFSET_64] == 0
442
+ # leaf pointers
443
+ # split the data into item_count index objects
444
+ buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
445
+ idx = new data
446
+ # first entry
447
+ raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
448
+ #idx.pst = self
449
+ break if idx.id == 0
450
+ idxs << idx
451
+ end
452
+ else
453
+ # node pointers
454
+ # split the data into item_count table pointers
455
+ buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
456
+ start, u1, offset = Pst.unpack data, 'T3'
457
+ # for the first value, we expect the start to be equal
458
+ raise 'blah 3' if i == 0 and start_val != 0 and start != start_val
459
+ break if start == 0
460
+ idxs += load_idx_rec io, offset, u1, start
461
+ end
462
+ end
463
+
464
+ idxs
465
+ end
466
+ end
467
+
468
+ # pst_desc
469
+ class Desc64 < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id, :u2)
470
+ UNPACK_STR = 'T3VV'
471
+ SIZE = 32
472
+ BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
473
+ COUNT_MAX = 15 # guess as per Index64
474
+
475
+ include RecursivelyEnumerable
476
+
477
+ attr_accessor :pst
478
+ attr_reader :children
479
+ def initialize data
480
+ super(*Pst.unpack(data, UNPACK_STR))
481
+ @children = []
482
+ end
483
+
484
+ def desc
485
+ pst.idx_from_id idx_id
486
+ end
487
+
488
+ def list_index
489
+ pst.idx_from_id idx2_id
490
+ end
491
+
492
+ def self.load_chain io, header
493
+ load_desc_rec io, header.index2, 0, 0x21
494
+ end
495
+
496
+ def self.load_desc_rec io, offset, linku1, start_val
497
+ io.seek offset
498
+ buf = io.read BLOCK_SIZE
499
+ descs = []
500
+ item_count = buf[ITEM_COUNT_OFFSET_64]
501
+
502
+ # not real desc
503
+ #desc = Desc.new buf[BACKLINK_OFFSET, 4]
504
+ #raise 'blah 1' unless desc.desc_id == linku1
505
+
506
+ if buf[LEVEL_INDICATOR_OFFSET_64] == 0
507
+ # leaf pointers
508
+ raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
509
+ # split the data into item_count desc objects
510
+ buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
511
+ desc = new data
512
+ # first entry
513
+ raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
514
+ break if desc.desc_id == 0
515
+ descs << desc
516
+ end
517
+ else
518
+ # node pointers
519
+ raise "have too many active items in index (#{item_count})" if item_count > Index64::COUNT_MAX
520
+ # split the data into item_count table pointers
521
+ buf[0, Index64::SIZE * item_count].scan(/.{#{Index64::SIZE}}/mo).each_with_index do |data, i|
522
+ start, u1, offset = Pst.unpack data, 'T3'
523
+ # for the first value, we expect the start to be equal note that ids -1, so even for the
524
+ # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
525
+ # that the first desc record is always 33...
526
+ # thats because 0x21 is the pst root itself...
527
+ raise 'blah 3' if i == 0 and start_val != -1 and start != start_val
528
+ # this shouldn't really happen i'd imagine
529
+ break if start == 0
530
+ descs += load_desc_rec io, offset, u1, start
531
+ end
532
+ end
533
+
534
+ descs
535
+ end
536
+
537
+ def each_child(&block)
538
+ @children.each(&block)
539
+ end
540
+ end
541
+
542
+ # _pst_table_ptr_struct
543
+ class TablePtr < Struct.new(:start, :u1, :offset)
544
+ UNPACK_STR = 'V3'
545
+ SIZE = 12
546
+
547
+ def initialize data
548
+ data = data.unpack(UNPACK_STR) if String === data
549
+ super(*data)
550
+ end
551
+ end
552
+
553
+ # pst_desc
554
+ # idx_id is a pointer to an idx record which gets the primary data stream for the Desc record.
555
+ # idx2_id gets you an idx record, that when read gives you an ID2 association list, which just maps
556
+ # another set of ids to index values
557
+ class Desc < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id)
558
+ UNPACK_STR = 'V4'
559
+ SIZE = 16
560
+ BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
561
+ COUNT_MAX = 31 # max active desc records (ITEM_COUNT_OFFSET / Desc::SIZE = 31)
562
+
563
+ include ToTree
564
+
565
+ attr_accessor :pst
566
+ attr_reader :children
567
+ def initialize data
568
+ super(*data.unpack(UNPACK_STR))
569
+ @children = []
570
+ end
571
+
572
+ def desc
573
+ pst.idx_from_id idx_id
574
+ end
575
+
576
+ def list_index
577
+ pst.idx_from_id idx2_id
578
+ end
579
+
580
+ # show all numbers in hex
581
+ def inspect
582
+ super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }
583
+ end
584
+ end
585
+
586
+ # corresponds to
587
+ # * _pst_build_id_ptr
588
+ def load_idx
589
+ @idx = []
590
+ @idx_offsets = []
591
+ if header.version_2003?
592
+ @idx = Index64.load_chain io, header
593
+ @idx.each { |idx| idx.pst = self }
594
+ else
595
+ load_idx_rec header.index1, header.index1_count, 0
596
+ end
597
+
598
+ # we'll typically be accessing by id, so create a hash as a lookup cache
599
+ @idx_from_id = {}
600
+ @idx.each do |idx|
601
+ warn "there are duplicate idx records with id #{idx.id}" if @idx_from_id[idx.id]
602
+ @idx_from_id[idx.id] = idx
603
+ end
604
+ end
605
+
606
+ # load the flat idx table, which maps ids to file ranges. this is the recursive helper
607
+ #
608
+ # corresponds to
609
+ # * _pst_build_id_ptr
610
+ def load_idx_rec offset, linku1, start_val
611
+ @idx_offsets << offset
612
+
613
+ #_pst_read_block_size(pf, offset, BLOCK_SIZE, &buf, 0, 0) < BLOCK_SIZE)
614
+ buf = pst_read_block_size offset, Index::BLOCK_SIZE, false
615
+
616
+ item_count = buf[ITEM_COUNT_OFFSET]
617
+ raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
618
+
619
+ idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
620
+ raise 'blah 1' unless idx.id == linku1
621
+
622
+ if buf[LEVEL_INDICATOR_OFFSET] == 0
623
+ # leaf pointers
624
+ # split the data into item_count index objects
625
+ buf[0, Index::SIZE * item_count].scan(/.{#{Index::SIZE}}/mo).each_with_index do |data, i|
626
+ idx = Index.new data
627
+ # first entry
628
+ raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
629
+ idx.pst = self
630
+ # this shouldn't really happen i'd imagine
631
+ break if idx.id == 0
632
+ @idx << idx
633
+ end
634
+ else
635
+ # node pointers
636
+ # split the data into item_count table pointers
637
+ buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
638
+ table = TablePtr.new data
639
+ # for the first value, we expect the start to be equal
640
+ raise 'blah 3' if i == 0 and start_val != 0 and table.start != start_val
641
+ # this shouldn't really happen i'd imagine
642
+ break if table.start == 0
643
+ load_idx_rec table.offset, table.u1, table.start
644
+ end
645
+ end
646
+ end
647
+
648
+ # most access to idx objects will use this function
649
+ #
650
+ # corresponds to
651
+ # * _pst_getID
652
+ def idx_from_id id
653
+ @idx_from_id[id]
654
+ end
655
+
656
+ # corresponds to
657
+ # * _pst_build_desc_ptr
658
+ # * record_descriptor
659
+ def load_desc
660
+ @desc = []
661
+ @desc_offsets = []
662
+ if header.version_2003?
663
+ @desc = Desc64.load_chain io, header
664
+ @desc.each { |desc| desc.pst = self }
665
+ else
666
+ load_desc_rec header.index2, header.index2_count, 0x21
667
+ end
668
+
669
+ # first create a lookup cache
670
+ @desc_from_id = {}
671
+ @desc.each do |desc|
672
+ desc.pst = self
673
+ warn "there are duplicate desc records with id #{desc.desc_id}" if @desc_from_id[desc.desc_id]
674
+ @desc_from_id[desc.desc_id] = desc
675
+ end
676
+
677
+ # now turn the flat list of loaded desc records into a tree
678
+
679
+ # well, they have no parent, so they're more like, the toplevel descs.
680
+ @orphans = []
681
+ # now assign each node to the parents child array, putting the orphans in the above
682
+ @desc.each do |desc|
683
+ parent = @desc_from_id[desc.parent_desc_id]
684
+ # note, besides this, its possible to create other circular structures.
685
+ if parent == desc
686
+ # this actually happens usually, for the root_item it appears.
687
+ #warn "desc record's parent is itself (#{desc.inspect})"
688
+ # maybe add some more checks in here for circular structures
689
+ elsif parent
690
+ parent.children << desc
691
+ next
692
+ end
693
+ @orphans << desc
694
+ end
695
+
696
+ # maybe change this to some sort of sane-ness check. orphans are expected
697
+ # warn "have #{@orphans.length} orphan desc record(s)." unless @orphans.empty?
698
+ end
699
+
700
+ # load the flat list of desc records recursively
701
+ #
702
+ # corresponds to
703
+ # * _pst_build_desc_ptr
704
+ # * record_descriptor
705
+ def load_desc_rec offset, linku1, start_val
706
+ @desc_offsets << offset
707
+
708
+ buf = pst_read_block_size offset, Desc::BLOCK_SIZE, false
709
+ item_count = buf[ITEM_COUNT_OFFSET]
710
+
711
+ # not real desc
712
+ desc = Desc.new buf[BACKLINK_OFFSET, 4]
713
+ raise 'blah 1' unless desc.desc_id == linku1
714
+
715
+ if buf[LEVEL_INDICATOR_OFFSET] == 0
716
+ # leaf pointers
717
+ raise "have too many active items in index (#{item_count})" if item_count > Desc::COUNT_MAX
718
+ # split the data into item_count desc objects
719
+ buf[0, Desc::SIZE * item_count].scan(/.{#{Desc::SIZE}}/mo).each_with_index do |data, i|
720
+ desc = Desc.new data
721
+ # first entry
722
+ raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
723
+ # this shouldn't really happen i'd imagine
724
+ break if desc.desc_id == 0
725
+ @desc << desc
726
+ end
727
+ else
728
+ # node pointers
729
+ raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
730
+ # split the data into item_count table pointers
731
+ buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
732
+ table = TablePtr.new data
733
+ # for the first value, we expect the start to be equal note that ids -1, so even for the
734
+ # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
735
+ # that the first desc record is always 33...
736
+ raise 'blah 3' if i == 0 and start_val != -1 and table.start != start_val
737
+ # this shouldn't really happen i'd imagine
738
+ break if table.start == 0
739
+ load_desc_rec table.offset, table.u1, table.start
740
+ end
741
+ end
742
+ end
743
+
744
+ # as for idx
745
+ #
746
+ # corresponds to:
747
+ # * _pst_getDptr
748
+ def desc_from_id id
749
+ @desc_from_id[id]
750
+ end
751
+
752
+ # corresponds to
753
+ # * pst_load_extended_attributes
754
+ def load_xattrib
755
+ unless desc = desc_from_id(0x61)
756
+ warn "no extended attributes desc record found"
757
+ return
758
+ end
759
+ unless desc.desc
760
+ warn "no desc idx for extended attributes"
761
+ return
762
+ end
763
+ if desc.list_index
764
+ end
765
+ #warn "skipping loading xattribs"
766
+ # FIXME implement loading xattribs
767
+ end
768
+
769
+ # corresponds to:
770
+ # * _pst_read_block_size
771
+ # * _pst_read_block ??
772
+ # * _pst_ff_getIDblock_dec ??
773
+ # * _pst_ff_getIDblock ??
774
+ def pst_read_block_size offset, size, decrypt=true
775
+ io.seek offset
776
+ buf = io.read size
777
+ warn "tried to read #{size} bytes but only got #{buf.length}" if buf.length != size
778
+ encrypted? && decrypt ? CompressibleEncryption.decrypt(buf) : buf
779
+ end
780
+
781
+ #
782
+ # id2
783
+ # ----------------------------------------------------------------------------
784
+ #
785
+
786
+ class ID2Assoc < Struct.new(:id2, :id, :table2)
787
+ UNPACK_STR = 'V3'
788
+ SIZE = 12
789
+
790
+ def initialize data
791
+ data = data.unpack(UNPACK_STR) if String === data
792
+ super(*data)
793
+ end
794
+ end
795
+
796
+ class ID2Assoc64 < Struct.new(:id2, :u1, :id, :table2)
797
+ UNPACK_STR = 'VVT2'
798
+ SIZE = 24
799
+
800
+ def initialize data
801
+ if String === data
802
+ data = Pst.unpack data, UNPACK_STR
803
+ end
804
+ super(*data)
805
+ end
806
+
807
+ def self.load_chain idx
808
+ buf = idx.read
809
+ type, count = buf.unpack 'v2'
810
+ unless type == 0x0002
811
+ raise 'unknown id2 type 0x%04x' % type
812
+ #return
813
+ end
814
+ id2 = []
815
+ count.times do |i|
816
+ assoc = new buf[8 + SIZE * i, SIZE]
817
+ id2 << assoc
818
+ if assoc.table2 != 0
819
+ id2 += load_chain idx.pst.idx_from_id(assoc.table2)
820
+ end
821
+ end
822
+ id2
823
+ end
824
+ end
825
+
826
+ class ID2Mapping
827
+ attr_reader :list
828
+ def initialize pst, list
829
+ @pst = pst
830
+ @list = list
831
+ # create a lookup.
832
+ @id_from_id2 = {}
833
+ @list.each do |id2|
834
+ # NOTE we take the last value seen value if there are duplicates. this "fixes"
835
+ # test4-o1997.pst for the time being.
836
+ warn "there are duplicate id2 records with id #{id2.id2}" if @id_from_id2[id2.id2]
837
+ next if @id_from_id2[id2.id2]
838
+ @id_from_id2[id2.id2] = id2.id
839
+ end
840
+ end
841
+
842
+ # TODO: fix logging
843
+ def warn s
844
+ Mapi::Log.warn s
845
+ end
846
+
847
+ # corresponds to:
848
+ # * _pst_getID2
849
+ def [] id
850
+ #id2 = @list.find { |x| x.id2 == id }
851
+ id = @id_from_id2[id]
852
+ id and @pst.idx_from_id(id)
853
+ end
854
+ end
855
+
856
+ def load_idx2 idx
857
+ if header.version_2003?
858
+ id2 = ID2Assoc64.load_chain idx
859
+ else
860
+ id2 = load_idx2_rec idx
861
+ end
862
+ ID2Mapping.new self, id2
863
+ end
864
+
865
+ # corresponds to
866
+ # * _pst_build_id2
867
+ def load_idx2_rec idx
868
+ # i should perhaps use a idx chain style read here?
869
+ buf = pst_read_block_size idx.offset, idx.size, false
870
+ type, count = buf.unpack 'v2'
871
+ unless type == 0x0002
872
+ raise 'unknown id2 type 0x%04x' % type
873
+ #return
874
+ end
875
+ id2 = []
876
+ count.times do |i|
877
+ assoc = ID2Assoc.new buf[4 + ID2Assoc::SIZE * i, ID2Assoc::SIZE]
878
+ id2 << assoc
879
+ if assoc.table2 != 0
880
+ id2 += load_idx2_rec idx_from_id(assoc.table2)
881
+ end
882
+ end
883
+ id2
884
+ end
885
+
886
+ class RangesIOIdxChain < RangesIOEncryptable
887
+ def initialize pst, idx_head
888
+ @idxs = pst.id2_block_idx_chain idx_head
889
+ # whether or not a given idx needs encrypting
890
+ decrypts = @idxs.map do |idx|
891
+ decrypt = (idx.id & 2) != 0 ? false : pst.encrypted?
892
+ end.uniq
893
+ raise NotImplementedError, 'partial encryption in RangesIOID2' if decrypts.length > 1
894
+ decrypt = decrypts.first
895
+ # convert idxs to ranges
896
+ ranges = @idxs.map { |idx| [idx.offset, idx.size] }
897
+ super pst.io, :ranges => ranges, :decrypt => decrypt
898
+ end
899
+ end
900
+
901
+ class RangesIOID2 < RangesIOIdxChain
902
+ def self.new pst, id2, idx2
903
+ RangesIOIdxChain.new pst, idx2[id2]
904
+ end
905
+ end
906
+
907
+ # corresponds to:
908
+ # * _pst_ff_getID2block
909
+ # * _pst_ff_getID2data
910
+ # * _pst_ff_compile_ID
911
+ def id2_block_idx_chain idx
912
+ if (idx.id & 0x2) == 0
913
+ [idx]
914
+ else
915
+ buf = idx.read
916
+ type, fdepth, count = buf[0, 4].unpack 'CCv'
917
+ unless type == 1 # libpst.c:3958
918
+ warn 'Error in idx_chain - %p, %p, %p - attempting to ignore' % [type, fdepth, count]
919
+ return [idx]
920
+ end
921
+ # there are 4 unaccounted for bytes here, 4...8
922
+ if header.version_2003?
923
+ ids = buf[8, count * 8].unpack("T#{count}")
924
+ else
925
+ ids = buf[8, count * 4].unpack('V*')
926
+ end
927
+ if fdepth == 1
928
+ ids.map { |id| idx_from_id id }
929
+ else
930
+ ids.map { |id| id2_block_idx_chain idx_from_id(id) }.flatten
931
+ end
932
+ end
933
+ end
934
+
935
+ #
936
+ # main block parsing code. gets raw properties
937
+ # ----------------------------------------------------------------------------
938
+ #
939
+
940
+ # the job of this class, is to take a desc record, and be able to enumerate through the
941
+ # mapi properties of the associated thing.
942
+ #
943
+ # corresponds to
944
+ # * _pst_parse_block
945
+ # * _pst_process (in some ways. although perhaps thats more the Item::Properties#add_property)
946
+ class BlockParser
947
+ include Mapi::Types::Constants
948
+
949
+ TYPES = {
950
+ 0xbcec => 1,
951
+ 0x7cec => 2,
952
+ # type 3 is removed. an artifact of not handling the indirect blocks properly in libpst.
953
+ }
954
+
955
+ PR_SUBJECT = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_SUBJECT' }.first.hex
956
+ PR_BODY_HTML = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_BODY_HTML' }.first.hex
957
+
958
+ # this stuff could maybe be moved to Ole::Types? or leverage it somehow?
959
+ # whether or not a type is immeidate is more a property of the pst encoding though i expect.
960
+ # what i probably can add is a generic concept of whether a type is of variadic length or not.
961
+
962
+ # these lists are very incomplete. think they are largely copied from libpst
963
+
964
+ IMMEDIATE_TYPES = [
965
+ PT_SHORT, PT_LONG, PT_BOOLEAN
966
+ ]
967
+
968
+ INDIRECT_TYPES = [
969
+ PT_DOUBLE, PT_OBJECT,
970
+ 0x0014, # whats this? probably something like PT_LONGLONG, given the correspondence with the
971
+ # ole variant types. (= VT_I8)
972
+ PT_STRING8, PT_UNICODE, # unicode isn't in libpst, but added here for outlook 2003 down the track
973
+ PT_SYSTIME,
974
+ 0x0048, # another unknown
975
+ 0x0102, # this is PT_BINARY vs PT_CLSID
976
+ #0x1003, # these are vector types, but they're commented out for now because i'd expect that
977
+ #0x1014, # there's extra decoding needed that i'm not doing. (probably just need a simple
978
+ # # PT_* => unpack string mapping for the immediate types, and just do unpack('V*') etc
979
+ #0x101e,
980
+ #0x1102
981
+ ]
982
+
983
+ # the attachment and recipient arrays appear to be always stored with these fixed
984
+ # id2 values. seems strange. are there other extra streams? can find out by making higher
985
+ # level IO wrapper, which has the id2 value, and doing the diff of available id2 values versus
986
+ # used id2 values in properties of an item.
987
+ ID2_ATTACHMENTS = 0x671
988
+ ID2_RECIPIENTS = 0x692
989
+
990
+ attr_reader :desc, :data, :data_chunks, :offset_tables
991
+ def initialize desc
992
+ raise FormatError, "unable to get associated index record for #{desc.inspect}" unless desc.desc
993
+ @desc = desc
994
+ #@data = desc.desc.read
995
+ if Pst::Index === desc.desc
996
+ #@data = RangesIOIdxChain.new(desc.pst, desc.desc).read
997
+ idxs = desc.pst.id2_block_idx_chain desc.desc
998
+ # this gets me the plain index chain.
999
+ else
1000
+ # fake desc
1001
+ #@data = desc.desc.read
1002
+ idxs = [desc.desc]
1003
+ end
1004
+
1005
+ @data_chunks = idxs.map { |idx| idx.read }
1006
+ @data = @data_chunks.first
1007
+
1008
+ load_header
1009
+
1010
+ @index_offsets = [@index_offset] + @data_chunks[1..-1].map { |chunk| chunk.unpack('v')[0] }
1011
+ @offset_tables = []
1012
+ @ignored = []
1013
+ @data_chunks.zip(@index_offsets).each do |chunk, offset|
1014
+ ignore = chunk[offset, 2].unpack('v')[0]
1015
+ @ignored << ignore
1016
+ # p ignore
1017
+ @offset_tables.push offset_table = []
1018
+ # maybe its ok if there aren't to be any values ?
1019
+ raise FormatError if offset == 0
1020
+ offsets = chunk[offset + 2..-1].unpack('v*')
1021
+ #p offsets
1022
+ offsets[0, ignore + 2].each_cons 2 do |from, to|
1023
+ #next if to == 0
1024
+ raise FormatError, [from, to].inspect if from > to
1025
+ offset_table << [from, to]
1026
+ end
1027
+ end
1028
+
1029
+ @offset_table = @offset_tables.first
1030
+ @idxs = idxs
1031
+
1032
+ # now, we may have multiple different blocks
1033
+ end
1034
+
1035
+ # a given desc record may or may not have associated idx2 data. we lazily load it here, so it will never
1036
+ # actually be requested unless get_data_indirect actually needs to use it.
1037
+ def idx2
1038
+ return @idx2 if @idx2
1039
+ raise FormatError, 'idx2 requested but no idx2 available' unless desc.list_index
1040
+ # should check this can't return nil
1041
+ @idx2 = desc.pst.load_idx2 desc.list_index
1042
+ end
1043
+
1044
+ def load_header
1045
+ @index_offset, type, @offset1 = data.unpack 'vvV'
1046
+ raise FormatError, 'unknown block type signature 0x%04x' % type unless TYPES[type]
1047
+ @type = TYPES[type]
1048
+ end
1049
+
1050
+ # based on the value of offset, return either some data from buf, or some data from the
1051
+ # id2 chain id2, where offset is some key into a lookup table that is stored as the id2
1052
+ # chain. i think i may need to create a BlockParser class that wraps up all this mess.
1053
+ #
1054
+ # corresponds to:
1055
+ # * _pst_getBlockOffsetPointer
1056
+ # * _pst_getBlockOffset
1057
+ def get_data_indirect offset
1058
+ return get_data_indirect_io(offset).read
1059
+
1060
+ if offset == 0
1061
+ nil
1062
+ elsif (offset & 0xf) == 0xf
1063
+ RangesIOID2.new(desc.pst, offset, idx2).read
1064
+ else
1065
+ low, high = offset & 0xf, offset >> 4
1066
+ raise FormatError if low != 0 or (high & 0x1) != 0 or (high / 2) > @offset_table.length
1067
+ from, to = @offset_table[high / 2]
1068
+ data[from...to]
1069
+ end
1070
+ end
1071
+
1072
+ def get_data_indirect_io offset
1073
+ if offset == 0
1074
+ nil
1075
+ elsif (offset & 0xf) == 0xf
1076
+ if idx2[offset]
1077
+ RangesIOID2.new desc.pst, offset, idx2
1078
+ else
1079
+ warn "tried to get idx2 record for #{offset} but failed"
1080
+ return StringIO.new('')
1081
+ end
1082
+ else
1083
+ low, high = offset & 0xf, offset >> 4
1084
+ if low != 0 or (high & 0x1) != 0
1085
+ # raise FormatError,
1086
+ warn "bad - #{low} #{high} (1)"
1087
+ return StringIO.new('')
1088
+ end
1089
+ # lets see which block it should come from.
1090
+ block_idx, i = high.divmod 4096
1091
+ unless block_idx < @data_chunks.length
1092
+ warn "bad - block_idx to high (not #{block_idx} < #{@data_chunks.length})"
1093
+ return StringIO.new('')
1094
+ end
1095
+ data_chunk, offset_table = @data_chunks[block_idx], @offset_tables[block_idx]
1096
+ if i / 2 >= offset_table.length
1097
+ warn "bad - #{low} #{high} - #{i / 2} >= #{offset_table.length} (2)"
1098
+ return StringIO.new('')
1099
+ end
1100
+ #warn "ok - #{low} #{high} #{offset_table.length}"
1101
+ from, to = offset_table[i / 2]
1102
+ StringIO.new data_chunk[from...to]
1103
+ end
1104
+ end
1105
+
1106
+ def handle_indirect_values key, type, value
1107
+ case type
1108
+ when PT_BOOLEAN
1109
+ value = value != 0
1110
+ when *IMMEDIATE_TYPES # not including PT_BOOLEAN which we just did above
1111
+ # no processing current applied (needed?).
1112
+ when *INDIRECT_TYPES
1113
+ # the value is a pointer
1114
+ if String === value # ie, value size > 4 above
1115
+ value = StringIO.new value
1116
+ else
1117
+ value = get_data_indirect_io(value)
1118
+ end
1119
+ # keep strings as immediate values for now, for compatability with how i set up
1120
+ # Msg::Properties::ENCODINGS
1121
+ if value
1122
+ if type == PT_STRING8
1123
+ value = value.read
1124
+ elsif type == PT_UNICODE
1125
+ value = Ole::Types::FROM_UTF16.iconv value.read
1126
+ end
1127
+ end
1128
+ # special subject handling
1129
+ if key == PR_BODY_HTML and value
1130
+ # to keep the msg code happy, which thinks body_html will be an io
1131
+ # although, in 2003 version, they are 0102 already
1132
+ value = StringIO.new value unless value.respond_to?(:read)
1133
+ end
1134
+ if key == PR_SUBJECT and value
1135
+ ignore, offset = value.unpack 'C2'
1136
+ offset = (offset == 1 ? nil : offset - 3)
1137
+ value = value[2..-1]
1138
+ =begin
1139
+ index = value =~ /^[A-Z]*:/ ? $~[0].length - 1 : nil
1140
+ unless ignore == 1 and offset == index
1141
+ warn 'something wrong with subject hack'
1142
+ $x = [ignore, offset, value]
1143
+ require 'irb'
1144
+ IRB.start
1145
+ exit
1146
+ end
1147
+ =end
1148
+ =begin
1149
+ new idea:
1150
+
1151
+ making sense of the \001\00[156] i've seen prefixing subject. i think its to do with the placement
1152
+ of the ':', or the ' '. And perhaps an optimization to do with thread topic, and ignoring the prefixes
1153
+ added by mailers. thread topic is equal to subject with all that crap removed.
1154
+
1155
+ can test by creating some mails with bizarre subjects.
1156
+
1157
+ subject="\001\005RE: blah blah"
1158
+ subject="\001\001blah blah"
1159
+ subject="\001\032Out of Office AutoReply: blah blah"
1160
+ subject="\001\020Undeliverable: blah blah"
1161
+
1162
+ looks like it
1163
+
1164
+ =end
1165
+
1166
+ # now what i think, is that perhaps, value[offset..-1] ...
1167
+ # or something like that should be stored as a special tag. ie, do a double yield
1168
+ # for this case. probably PR_CONVERSATION_TOPIC, in which case i'd write instead:
1169
+ # yield [PR_SUBJECT, ref_type, value]
1170
+ # yield [PR_CONVERSATION_TOPIC, ref_type, value[offset..-1]
1171
+ # next # to skip the yield.
1172
+ end
1173
+
1174
+ # special handling for embedded objects
1175
+ # used for attach_data for attached messages. in which case attach_method should == 5,
1176
+ # for embedded object.
1177
+ if type == PT_OBJECT and value
1178
+ value = value.read if value.respond_to?(:read)
1179
+ id2, unknown = value.unpack 'V2'
1180
+ io = RangesIOID2.new desc.pst, id2, idx2
1181
+
1182
+ # hacky
1183
+ desc2 = OpenStruct.new(:desc => io, :pst => desc.pst, :list_index => desc.list_index, :children => [])
1184
+ # put nil instead of desc.list_index, otherwise the attachment is attached to itself ad infinitum.
1185
+ # should try and fix that FIXME
1186
+ # this shouldn't be done always. for an attached message, yes, but for an attached
1187
+ # meta file, for example, it shouldn't. difference between embedded_ole vs embedded_msg
1188
+ # really.
1189
+ # note that in the case where its a embedded ole, you actually get a regular serialized ole
1190
+ # object, so i need to create an ole storage object on a rangesioidxchain!
1191
+ # eg:
1192
+ =begin
1193
+ att.props.display_name # => "Picture (Metafile)"
1194
+ io = att.props.attach_data
1195
+ io.read(32).unpack('H*') # => ["d0cf11e0a1b11ae100000.... note the docfile signature.
1196
+ # plug some missing rangesio holes:
1197
+ def io.rewind; seek 0; end
1198
+ def io.flush; raise IOError; end
1199
+ ole = Ole::Storage.open io
1200
+ puts ole.root.to_tree
1201
+
1202
+ - #<Dirent:"Root Entry">
1203
+ |- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
1204
+ |- #<Dirent:"CONTENTS" size=65696 data="\327\315\306\232\000...">
1205
+ \- #<Dirent:"\003MailStream" size=12 data="\001\000\000\000[...">
1206
+ =end
1207
+ # until properly fixed, i have disabled this code here, so this will break
1208
+ # nested messages temporarily.
1209
+ #value = Item.new desc2, RawPropertyStore.new(desc2).to_a
1210
+ #desc2.list_index = nil
1211
+ value = io
1212
+ end
1213
+ # this is PT_MV_STRING8, i guess.
1214
+ # should probably have the 0x1000 flag, and do the or-ring.
1215
+ # example of 0x1102 is PR_OUTLOOK_2003_ENTRYIDS. less sure about that one.
1216
+ when 0x101e, 0x1102
1217
+ # example data:
1218
+ # 0x802b "\003\000\000\000\020\000\000\000\030\000\000\000#\000\000\000BusinessCompetitionFavorites"
1219
+ # this 0x802b would be an extended attribute for categories / keywords.
1220
+ value = get_data_indirect_io(value).read unless String === value
1221
+ num = value.unpack('V')[0]
1222
+ offsets = value[4, 4 * num].unpack("V#{num}")
1223
+ value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] }
1224
+ value.map! { |str| StringIO.new str } if type == 0x1102
1225
+ else
1226
+ name = Mapi::Types::DATA[type].first rescue nil
1227
+ warn '0x%04x %p' % [key, get_data_indirect_io(value).read]
1228
+ raise NotImplementedError, 'unsupported mapi property type - 0x%04x (%p)' % [type, name]
1229
+ end
1230
+ [key, type, value]
1231
+ end
1232
+ end
1233
+
1234
+ =begin
1235
+ * recipients:
1236
+
1237
+ affects: ["0x200764", "0x2011c4", "0x201b24", "0x201b44", "0x201ba4", "0x201c24", "0x201cc4", "0x202504"]
1238
+
1239
+ after adding the rawpropertystoretable fix, all except the second parse properly, and satisfy:
1240
+
1241
+ item.props.display_to == item.recipients.map { |r| r.props.display_name if r.props.recipient_type == 1 }.compact * '; '
1242
+
1243
+ only the second still has a problem
1244
+
1245
+ #[#<struct Pst::Desc desc_id=0x2011c4, idx_id=0x397c, idx2_id=0x398a, parent_desc_id=0x8082>]
1246
+
1247
+ think this is related to a multi block #data3. ie, when you use @x * rec_size, and it
1248
+ goes > 8190, or there abouts, then it stuffs up. probably there is header gunk, or something,
1249
+ similar to when #data is multi block.
1250
+
1251
+ same problem affects the attachment table in test4.
1252
+
1253
+ fixed that issue. round data3 ranges to rec_size.
1254
+
1255
+ fix other issue with attached objects.
1256
+
1257
+ all recipients and attachments in test2 are fine.
1258
+
1259
+ only remaining issue is test4 recipients of 200044. strange.
1260
+
1261
+ =end
1262
+
1263
+ # RawPropertyStore is used to iterate through the properties of an item, or the auxiliary
1264
+ # data for an attachment. its just a parser for the way the properties are serialized, when the
1265
+ # properties don't have to conform to a column structure.
1266
+ #
1267
+ # structure of this chunk of data is often
1268
+ # header, property keys, data values, and then indexes.
1269
+ # the property keys has value in it. value can be the actual value if its a short type,
1270
+ # otherwise you lookup the value in the indicies, where you get the offsets to use in the
1271
+ # main data body. due to the indirect thing though, any of these parts could actually come
1272
+ # from a separate stream.
1273
+ class RawPropertyStore < BlockParser
1274
+ include Enumerable
1275
+
1276
+ attr_reader :length
1277
+ def initialize desc
1278
+ super
1279
+ raise FormatError, "expected type 1 - got #{@type}" unless @type == 1
1280
+
1281
+ # the way that offset works, data1 may be a subset of buf, or something from id2. if its from buf,
1282
+ # it will be offset based on index_offset and offset. so it could be some random chunk of data anywhere
1283
+ # in the thing.
1284
+ header_data = get_data_indirect @offset1
1285
+ raise FormatError if header_data.length < 8
1286
+ signature, offset2 = header_data.unpack 'V2'
1287
+ #p [@type, signature]
1288
+ raise FormatError, 'unhandled block signature 0x%08x' % @type if signature != 0x000602b5
1289
+ # this is actually a big chunk of tag tuples.
1290
+ @index_data = get_data_indirect offset2
1291
+ @length = @index_data.length / 8
1292
+ end
1293
+
1294
+ # iterate through the property tuples
1295
+ def each
1296
+ length.times do |i|
1297
+ key, type, value = handle_indirect_values(*@index_data[8 * i, 8].unpack('vvV'))
1298
+ yield key, type, value
1299
+ end
1300
+ end
1301
+ end
1302
+
1303
+ # RawPropertyStoreTable is kind of like a database table.
1304
+ # it has a fixed set of columns.
1305
+ # #[] is kind of like getting a row from the table.
1306
+ # those rows are currently encapsulated by Row, which has #each like
1307
+ # RawPropertyStore.
1308
+ # only used for the recipients array, and the attachments array. completely lazy, doesn't
1309
+ # load any of the properties upon creation.
1310
+ class RawPropertyStoreTable < BlockParser
1311
+ class Column < Struct.new(:ref_type, :type, :ind2_off, :size, :slot)
1312
+ def initialize data
1313
+ super(*data.unpack('v3CC'))
1314
+ end
1315
+
1316
+ def nice_type_name
1317
+ Mapi::Types::DATA[ref_type].first[/_(.*)/, 1].downcase rescue '0x%04x' % ref_type
1318
+ end
1319
+
1320
+ def nice_prop_name
1321
+ Mapi::PropertyStore::TAGS['%04x' % type].first[/_(.*)/, 1].downcase rescue '0x%04x' % type
1322
+ end
1323
+
1324
+ def inspect
1325
+ "#<#{self.class} name=#{nice_prop_name.inspect}, type=#{nice_type_name.inspect}>"
1326
+ end
1327
+ end
1328
+
1329
+ include Enumerable
1330
+
1331
+ attr_reader :length, :index_data, :data2, :data3, :rec_size
1332
+ def initialize desc
1333
+ super
1334
+ raise FormatError, "expected type 2 - got #{@type}" unless @type == 2
1335
+
1336
+ header_data = get_data_indirect @offset1
1337
+ # seven_c_blk
1338
+ # often: u1 == u2 and u3 == u2 + 2, then rec_size == u3 + 4. wtf
1339
+ seven_c, @num_list, u1, u2, u3, @rec_size, b_five_offset,
1340
+ ind2_offset, u7, u8 = header_data[0, 22].unpack('CCv4V2v2')
1341
+ @index_data = header_data[22..-1]
1342
+
1343
+ raise FormatError if @num_list != schema.length or seven_c != 0x7c
1344
+ # another check
1345
+ min_size = schema.inject(0) { |total, col| total + col.size }
1346
+ # seem to have at max, 8 padding bytes on the end of the record. not sure if it means
1347
+ # anything. maybe its just space that hasn't been reclaimed due to columns being
1348
+ # removed or something. probably should just check lower bound.
1349
+ range = (min_size..min_size + 8)
1350
+ warn "rec_size seems wrong (#{range} !=== #{rec_size})" unless range === rec_size
1351
+
1352
+ header_data2 = get_data_indirect b_five_offset
1353
+ raise FormatError if header_data2.length < 8
1354
+ signature, offset2 = header_data2.unpack 'V2'
1355
+ # ??? seems a bit iffy
1356
+ # there's probably more to the differences than this, and the data2 difference below
1357
+ expect = desc.pst.header.version_2003? ? 0x000404b5 : 0x000204b5
1358
+ raise FormatError, 'unhandled block signature 0x%08x' % signature if signature != expect
1359
+
1360
+ # this holds all the row data
1361
+ # handle multiple block issue.
1362
+ @data3_io = get_data_indirect_io ind2_offset
1363
+ if RangesIOIdxChain === @data3_io
1364
+ @data3_idxs =
1365
+ # modify ranges
1366
+ ranges = @data3_io.ranges.map { |offset, size| [offset, size / @rec_size * @rec_size] }
1367
+ @data3_io.instance_variable_set :@ranges, ranges
1368
+ end
1369
+ @data3 = @data3_io.read
1370
+
1371
+ # there must be something to the data in data2. i think data2 is the array of objects essentially.
1372
+ # currently its only used to imply a length
1373
+ # actually, at size 6, its just some auxiliary data. i'm thinking either Vv/vV, for 97, and something
1374
+ # wider for 03. the second value is just the index (0...length), and the first value is
1375
+ # some kind of offset i expect. actually, they were all id2 values, in another case.
1376
+ # so maybe they're get_data_indirect values too?
1377
+ # actually, it turned out they were identical to the PR_ATTACHMENT_ID2 values...
1378
+ # id2_values = ie, data2.unpack('v*').to_enum(:each_slice, 3).transpose[0]
1379
+ # table[i].assoc(PR_ATTACHMENT_ID2).last == id2_values[i], for all i.
1380
+ @data2 = get_data_indirect(offset2) rescue nil
1381
+ #if data2
1382
+ # @length = (data2.length / 6.0).ceil
1383
+ #else
1384
+ # the above / 6, may have been ok for 97 files, but the new 0x0004 style block must have
1385
+ # different size records... just use this instead:
1386
+ # hmmm, actually, we can still figure it out:
1387
+ @length = @data3.length / @rec_size
1388
+ #end
1389
+
1390
+ # lets try and at least use data2 for a warning for now
1391
+ if data2
1392
+ data2_rec_size = desc.pst.header.version_2003? ? 8 : 6
1393
+ warn 'somthing seems wrong with data3' unless @length == (data2.length / data2_rec_size)
1394
+ end
1395
+ end
1396
+
1397
+ def schema
1398
+ @schema ||= index_data.scan(/.{8}/m).map { |data| Column.new data }
1399
+ end
1400
+
1401
+ def [] idx
1402
+ # handle funky rounding
1403
+ Row.new self, idx * @rec_size
1404
+ end
1405
+
1406
+ def each
1407
+ length.times { |i| yield self[i] }
1408
+ end
1409
+
1410
+ class Row
1411
+ include Enumerable
1412
+
1413
+ def initialize array_parser, x
1414
+ @array_parser, @x = array_parser, x
1415
+ end
1416
+
1417
+ # iterate through the property tuples
1418
+ def each
1419
+ (@array_parser.index_data.length / 8).times do |i|
1420
+ ref_type, type, ind2_off, size, slot = @array_parser.index_data[8 * i, 8].unpack 'v3CC'
1421
+ # check this rescue too
1422
+ value = @array_parser.data3[@x + ind2_off, size]
1423
+ # if INDIRECT_TYPES.include? ref_type
1424
+ if size <= 4
1425
+ value = value.unpack('V')[0]
1426
+ end
1427
+ #p ['0x%04x' % ref_type, '0x%04x' % type, (Msg::Properties::MAPITAGS['%04x' % type].first[/^.._(.*)/, 1].downcase rescue nil),
1428
+ # value_orig, value, (get_data_indirect(value_orig.unpack('V')[0]) rescue nil), size, ind2_off, slot]
1429
+ key, type, value = @array_parser.handle_indirect_values type, ref_type, value
1430
+ yield key, type, value
1431
+ end
1432
+ end
1433
+ end
1434
+ end
1435
+
1436
+ class AttachmentTable < BlockParser
1437
+ # a "fake" MAPI property name for this constant. if you get a mapi property with
1438
+ # this value, it is the id2 value to use to get attachment data.
1439
+ PR_ATTACHMENT_ID2 = 0x67f2
1440
+
1441
+ attr_reader :desc, :table
1442
+ def initialize desc
1443
+ @desc = desc
1444
+ # no super, we only actually want BlockParser2#idx2
1445
+ @table = nil
1446
+ return unless desc.list_index
1447
+ return unless idx = idx2[ID2_ATTACHMENTS]
1448
+ # FIXME make a fake desc.
1449
+ @desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
1450
+ @table = RawPropertyStoreTable.new @desc2
1451
+ end
1452
+
1453
+ def to_a
1454
+ return [] if !table
1455
+ table.map do |attachment|
1456
+ attachment = attachment.to_a
1457
+ #p attachment
1458
+ # potentially merge with yet more properties
1459
+ # this still seems pretty broken - especially the property overlap
1460
+ if attachment_id2 = attachment.assoc(PR_ATTACHMENT_ID2)
1461
+ #p attachment_id2.last
1462
+ #p idx2[attachment_id2.last]
1463
+ @desc2.desc = idx2[attachment_id2.last]
1464
+ RawPropertyStore.new(@desc2).each do |a, b, c|
1465
+ record = attachment.assoc a
1466
+ attachment << record = [] unless record
1467
+ record.replace [a, b, c]
1468
+ end
1469
+ end
1470
+ attachment
1471
+ end
1472
+ end
1473
+ end
1474
+
1475
+ # there is no equivalent to this in libpst. ID2_RECIPIENTS was just guessed given the above
1476
+ # AttachmentTable.
1477
+ class RecipientTable < BlockParser
1478
+ attr_reader :desc, :table
1479
+ def initialize desc
1480
+ @desc = desc
1481
+ # no super, we only actually want BlockParser2#idx2
1482
+ @table = nil
1483
+ return unless desc.list_index
1484
+ return unless idx = idx2[ID2_RECIPIENTS]
1485
+ # FIXME make a fake desc.
1486
+ desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
1487
+ @table = RawPropertyStoreTable.new desc2
1488
+ end
1489
+
1490
+ def to_a
1491
+ return [] if !table
1492
+ table.map { |x| x.to_a }
1493
+ end
1494
+ end
1495
+
1496
+ #
1497
+ # higher level item code. wraps up the raw properties above, and gives nice
1498
+ # objects to work with. handles item relationships too.
1499
+ # ----------------------------------------------------------------------------
1500
+ #
1501
+
1502
+ def self.make_property_set property_list
1503
+ hash = property_list.inject({}) do |hash, (key, type, value)|
1504
+ hash.update PropertySet::Key.new(key) => value
1505
+ end
1506
+ PropertySet.new hash
1507
+ end
1508
+
1509
+ class Attachment < Mapi::Attachment
1510
+ def initialize list
1511
+ super Pst.make_property_set(list)
1512
+
1513
+ @embedded_msg = props.attach_data if Item === props.attach_data
1514
+ end
1515
+ end
1516
+
1517
+ class Recipient < Mapi::Recipient
1518
+ def initialize list
1519
+ super Pst.make_property_set(list)
1520
+ end
1521
+ end
1522
+
1523
+ class Item < Mapi::Message
1524
+ class EntryID < Struct.new(:u1, :entry_id, :id)
1525
+ UNPACK_STR = 'VA16V'
1526
+
1527
+ def initialize data
1528
+ data = data.unpack(UNPACK_STR) if String === data
1529
+ super(*data)
1530
+ end
1531
+ end
1532
+
1533
+ include RecursivelyEnumerable
1534
+
1535
+ attr_accessor :type, :parent
1536
+
1537
+ def initialize desc, list, type=nil
1538
+ @desc = desc
1539
+ super Pst.make_property_set(list)
1540
+
1541
+ # this is kind of weird, but the ids of the special folders are stored in a hash
1542
+ # when the root item is loaded
1543
+ if ipm_wastebasket_entryid
1544
+ desc.pst.special_folder_ids[ipm_wastebasket_entryid] = :wastebasket
1545
+ end
1546
+
1547
+ if finder_entryid
1548
+ desc.pst.special_folder_ids[finder_entryid] = :finder
1549
+ end
1550
+
1551
+ # and then here, those are used, along with a crappy heuristic to determine if we are an
1552
+ # item
1553
+ =begin
1554
+ i think the low bits of the desc_id can give some info on the type.
1555
+
1556
+ it seems that 0x4 is for regular messages (and maybe contacts etc)
1557
+ 0x2 is for folders, and 0x8 is for special things like rules etc, that aren't visible.
1558
+ =end
1559
+ unless type
1560
+ type = props.valid_folder_mask || ipm_subtree_entryid || props.content_count || props.subfolders ? :folder : :message
1561
+ if type == :folder
1562
+ type = desc.pst.special_folder_ids[desc.desc_id] || type
1563
+ end
1564
+ end
1565
+
1566
+ @type = type
1567
+ end
1568
+
1569
+ def each_child
1570
+ id = ipm_subtree_entryid
1571
+ if id
1572
+ root = @desc.pst.desc_from_id id
1573
+ raise "couldn't find root" unless root
1574
+ raise 'both kinds of children' unless @desc.children.empty?
1575
+ children = root.children
1576
+ # lets look up the other ids we have.
1577
+ # typically the wastebasket one "deleted items" is in the children already, but
1578
+ # the search folder isn't.
1579
+ extras = [ipm_wastebasket_entryid, finder_entryid].compact.map do |id|
1580
+ root = @desc.pst.desc_from_id id
1581
+ warn "couldn't find root for id #{id}" unless root
1582
+ root
1583
+ end.compact
1584
+ # i do this instead of union, so as not to mess with the order of the
1585
+ # existing children.
1586
+ children += (extras - children)
1587
+ children
1588
+ else
1589
+ @desc.children
1590
+ end.each do |desc|
1591
+ item = @desc.pst.pst_parse_item(desc)
1592
+ item.parent = self
1593
+ yield item
1594
+ end
1595
+ end
1596
+
1597
+ def path
1598
+ parents, item = [], self
1599
+ parents.unshift item while item = item.parent
1600
+ # remove root
1601
+ parents.shift
1602
+ parents.map { |item| item.props.display_name or raise 'unable to construct path' } * '/'
1603
+ end
1604
+
1605
+ def children
1606
+ to_enum(:each_child).to_a
1607
+ end
1608
+
1609
+ # these are still around because they do different stuff
1610
+
1611
+ # Top of Personal Folder Record
1612
+ def ipm_subtree_entryid
1613
+ @ipm_subtree_entryid ||= EntryID.new(props.ipm_subtree_entryid.read).id rescue nil
1614
+ end
1615
+
1616
+ # Deleted Items Folder Record
1617
+ def ipm_wastebasket_entryid
1618
+ @ipm_wastebasket_entryid ||= EntryID.new(props.ipm_wastebasket_entryid.read).id rescue nil
1619
+ end
1620
+
1621
+ # Search Root Record
1622
+ def finder_entryid
1623
+ @finder_entryid ||= EntryID.new(props.finder_entryid.read).id rescue nil
1624
+ end
1625
+
1626
+ # all these have been replaced with the method_missing below
1627
+ =begin
1628
+ # States which folders are valid for this message store
1629
+ #def valid_folder_mask
1630
+ # props[0x35df]
1631
+ #end
1632
+
1633
+ # Number of emails stored in a folder
1634
+ def content_count
1635
+ props[0x3602]
1636
+ end
1637
+
1638
+ # Has children
1639
+ def subfolders
1640
+ props[0x360a]
1641
+ end
1642
+ =end
1643
+
1644
+ # i think i will change these, so they can inherit the lazyness from RawPropertyStoreTable.
1645
+ # so if you want the last attachment, you can get it without creating the others perhaps.
1646
+ # it just has to handle the no table at all case a bit more gracefully.
1647
+
1648
+ def attachments
1649
+ @attachments ||= AttachmentTable.new(@desc).to_a.map { |list| Attachment.new list }
1650
+ end
1651
+
1652
+ def recipients
1653
+ #[]
1654
+ @recipients ||= RecipientTable.new(@desc).to_a.map { |list| Recipient.new list }
1655
+ end
1656
+
1657
+ def each_recursive(&block)
1658
+ #p :self => self
1659
+ children.each do |child|
1660
+ #p :child => child
1661
+ block[child]
1662
+ child.each_recursive(&block)
1663
+ end
1664
+ end
1665
+
1666
+ def inspect
1667
+ attrs = %w[display_name subject sender_name subfolders]
1668
+ # attrs = %w[display_name valid_folder_mask ipm_wastebasket_entryid finder_entryid content_count subfolders]
1669
+ str = attrs.map { |a| b = props.send a; " #{a}=#{b.inspect}" if b }.compact * ','
1670
+
1671
+ type_s = type == :message ? 'Message' : type == :folder ? 'Folder' : type.to_s.capitalize + 'Folder'
1672
+ str2 = 'desc_id=0x%x' % @desc.desc_id
1673
+
1674
+ !str.empty? ? "#<Pst::#{type_s} #{str2}#{str}>" : "#<Pst::#{type_s} #{str2} props=#{props.inspect}>" #\n" + props.transport_message_headers + ">"
1675
+ end
1676
+ end
1677
+
1678
+ # corresponds to
1679
+ # * _pst_parse_item
1680
+ def pst_parse_item desc
1681
+ Item.new desc, RawPropertyStore.new(desc).to_a
1682
+ end
1683
+
1684
+ #
1685
+ # other random code
1686
+ # ----------------------------------------------------------------------------
1687
+ #
1688
+
1689
+ def dump_debug_info
1690
+ puts "* pst header"
1691
+ p header
1692
+
1693
+ =begin
1694
+ Looking at the output of this, for blank-o1997.pst, i see this part:
1695
+ ...
1696
+ - (26624,516) desc block data (overlap of 4 bytes)
1697
+ - (27136,516) desc block data (gap of 508 bytes)
1698
+ - (28160,516) desc block data (gap of 2620 bytes)
1699
+ ...
1700
+
1701
+ which confirms my belief that the block size for idx and desc is more likely 512
1702
+ =end
1703
+ if 0 + 0 == 0
1704
+ puts '* file range usage'
1705
+ file_ranges =
1706
+ # these 3 things, should account for most of the data in the file.
1707
+ [[0, Header::SIZE, 'pst file header']] +
1708
+ @idx_offsets.map { |offset| [offset, Index::BLOCK_SIZE, 'idx block data'] } +
1709
+ @desc_offsets.map { |offset| [offset, Desc::BLOCK_SIZE, 'desc block data'] } +
1710
+ @idx.map { |idx| [idx.offset, idx.size, 'idx id=0x%x (%s)' % [idx.id, idx.type]] }
1711
+ (file_ranges.sort_by { |idx| idx.first } + [nil]).to_enum(:each_cons, 2).each do |(offset, size, name), next_record|
1712
+ # i think there is a padding of the size out to 64 bytes
1713
+ # which is equivalent to padding out the final offset, because i think the offset is
1714
+ # similarly oriented
1715
+ pad_amount = 64
1716
+ warn 'i am wrong about the offset padding' if offset % pad_amount != 0
1717
+ # so, assuming i'm not wrong about that, then we can calculate how much padding is needed.
1718
+ pad = pad_amount - (size % pad_amount)
1719
+ pad = 0 if pad == pad_amount
1720
+ gap = next_record ? next_record.first - (offset + size + pad) : 0
1721
+ extra = case gap <=> 0
1722
+ when -1; ["overlap of #{gap.abs} bytes)"]
1723
+ when 0; []
1724
+ when +1; ["gap of #{gap} bytes"]
1725
+ end
1726
+ # how about we check that padding
1727
+ @io.pos = offset + size
1728
+ pad_bytes = @io.read(pad)
1729
+ extra += ["padding not all zero"] unless pad_bytes == 0.chr * pad
1730
+ puts "- #{offset}:#{size}+#{pad} #{name.inspect}" + (extra.empty? ? '' : ' [' + extra * ', ' + ']')
1731
+ end
1732
+ end
1733
+
1734
+ # i think the idea of the idx, and indeed the idx2, is just to be able to
1735
+ # refer to data indirectly, which means it can get moved around, and you just update
1736
+ # the idx table. it is simply a list of file offsets and sizes.
1737
+ # not sure i get how id2 plays into it though....
1738
+ # the sizes seem to be all even. is that a co-incidence? and the ids are all even. that
1739
+ # seems to be related to something else (see the (id & 2) == 1 stuff)
1740
+ puts '* idx entries'
1741
+ @idx.each { |idx| puts "- #{idx.inspect}" }
1742
+
1743
+ # if you look at the desc tree, you notice a few things:
1744
+ # 1. there is a desc that seems to be the parent of all the folders, messages etc.
1745
+ # it is the one whose parent is itself.
1746
+ # one of its children is referenced as the subtree_entryid of the first desc item,
1747
+ # the root.
1748
+ # 2. typically only 2 types of desc records have idx2_id != 0. messages themselves,
1749
+ # and the desc with id = 0x61 - the xattrib container. everything else uses the
1750
+ # regular ids to find its data. i think it should be reframed as small blocks and
1751
+ # big blocks, but i'll look into it more.
1752
+ #
1753
+ # idx_id and idx2_id are for getting to the data. desc_id and parent_desc_id just define
1754
+ # the parent <-> child relationship, and the desc_ids are how the items are referred to in
1755
+ # entryids.
1756
+ # note that these aren't unique! eg for 0, 4 etc. i expect these'd never change, as the ids
1757
+ # are stored in entryids. whereas the idx and idx2 could be a bit more volatile.
1758
+ puts '* desc tree'
1759
+ # make a dummy root hold everything just for convenience
1760
+ root = Desc.new ''
1761
+ def root.inspect; "#<Pst::Root>"; end
1762
+ root.children.replace @orphans
1763
+ # this still loads the whole thing as a string for gsub. should use directo output io
1764
+ # version.
1765
+ puts root.to_tree.gsub(/, (parent_desc_id|idx2_id)=0x0(?!\d)/, '')
1766
+
1767
+ # this is fairly easy to understand, its just an attempt to display the pst items in a tree form
1768
+ # which resembles what you'd see in outlook.
1769
+ puts '* item tree'
1770
+ # now streams directly
1771
+ root_item.to_tree STDOUT
1772
+ end
1773
+
1774
+ def root_desc
1775
+ @desc.first
1776
+ end
1777
+
1778
+ def root_item
1779
+ item = pst_parse_item root_desc
1780
+ item.type = :root
1781
+ item
1782
+ end
1783
+
1784
+ def root
1785
+ root_item
1786
+ end
1787
+
1788
+ # depth first search of all items
1789
+ include Enumerable
1790
+
1791
+ def each(&block)
1792
+ root = self.root
1793
+ block[root]
1794
+ root.each_recursive(&block)
1795
+ end
1796
+
1797
+ def name
1798
+ @name ||= root_item.props.display_name
1799
+ end
1800
+
1801
+ def inspect
1802
+ "#<Pst name=#{name.inspect} io=#{io.inspect}>"
1803
+ end
1804
+ end
1805
+ end
1806
+