ruby-msg 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,269 @@
1
+ require 'yaml'
2
+ require 'mapi/types'
3
+ require 'mapi/rtf'
4
+ require 'rtf'
5
+
6
+ module Mapi
7
+ #
8
+ # The Mapi::PropertySet class is used to wrap the lower level Msg or Pst property stores,
9
+ # and provide a consistent and more friendly interface. It allows you to just say:
10
+ #
11
+ # properties.subject
12
+ #
13
+ # instead of:
14
+ #
15
+ # properites.raw[0x0037, PS_MAPI]
16
+ #
17
+ # The underlying store can be just a hash, or lazily loading directly from the file. A good
18
+ # compromise is to cache all the available keys, and just return the values on demand, rather
19
+ # than load up many possibly unwanted values.
20
+ #
21
+ class PropertySet
22
+ # the property set guid constants
23
+ # these guids are all defined with the macro DEFINE_OLEGUID in mapiguid.h.
24
+ # see http://doc.ddart.net/msdn/header/include/mapiguid.h.html
25
+ oleguid = proc do |prefix|
26
+ Ole::Types::Clsid.parse "{#{prefix}-0000-0000-c000-000000000046}"
27
+ end
28
+
29
+ NAMES = {
30
+ oleguid['00020328'] => 'PS_MAPI',
31
+ oleguid['00020329'] => 'PS_PUBLIC_STRINGS',
32
+ oleguid['00020380'] => 'PS_ROUTING_EMAIL_ADDRESSES',
33
+ oleguid['00020381'] => 'PS_ROUTING_ADDRTYPE',
34
+ oleguid['00020382'] => 'PS_ROUTING_DISPLAY_NAME',
35
+ oleguid['00020383'] => 'PS_ROUTING_ENTRYID',
36
+ oleguid['00020384'] => 'PS_ROUTING_SEARCH_KEY',
37
+ # string properties in this namespace automatically get added to the internet headers
38
+ oleguid['00020386'] => 'PS_INTERNET_HEADERS',
39
+ # theres are bunch of outlook ones i think
40
+ # http://blogs.msdn.com/stephen_griffin/archive/2006/05/10/outlook-2007-beta-documentation-notification-based-indexing-support.aspx
41
+ # IPM.Appointment
42
+ oleguid['00062002'] => 'PSETID_Appointment',
43
+ # IPM.Task
44
+ oleguid['00062003'] => 'PSETID_Task',
45
+ # used for IPM.Contact
46
+ oleguid['00062004'] => 'PSETID_Address',
47
+ oleguid['00062008'] => 'PSETID_Common',
48
+ # didn't find a source for this name. it is for IPM.StickyNote
49
+ oleguid['0006200e'] => 'PSETID_Note',
50
+ # for IPM.Activity. also called the journal?
51
+ oleguid['0006200a'] => 'PSETID_Log',
52
+ }
53
+
54
+ module Constants
55
+ NAMES.each { |guid, name| const_set name, guid }
56
+ end
57
+
58
+ include Constants
59
+
60
+ # +Properties+ are accessed by <tt>Key</tt>s, which are coerced to this class.
61
+ # Includes a bunch of methods (hash, ==, eql?) to allow it to work as a key in
62
+ # a +Hash+.
63
+ #
64
+ # Also contains the code that maps keys to symbolic names.
65
+ class Key
66
+ include Constants
67
+
68
+ attr_reader :code, :guid
69
+ def initialize code, guid=PS_MAPI
70
+ @code, @guid = code, guid
71
+ end
72
+
73
+ def to_sym
74
+ # hmmm, for some stuff, like, eg, the message class specific range, sym-ification
75
+ # of the key depends on knowing our message class. i don't want to store anything else
76
+ # here though, so if that kind of thing is needed, it can be passed to this function.
77
+ # worry about that when some examples arise.
78
+ case code
79
+ when Integer
80
+ if guid == PS_MAPI # and < 0x8000 ?
81
+ # the hash should be updated now that i've changed the process
82
+ TAGS['%04x' % code].first[/_(.*)/, 1].downcase.to_sym rescue code
83
+ else
84
+ # handle other guids here, like mapping names to outlook properties, based on the
85
+ # outlook object model.
86
+ NAMED_MAP[self].to_sym rescue code
87
+ end
88
+ when String
89
+ # return something like
90
+ # note that named properties don't go through the map at the moment. so #categories
91
+ # doesn't work yet
92
+ code.downcase.to_sym
93
+ end
94
+ end
95
+
96
+ def to_s
97
+ to_sym.to_s
98
+ end
99
+
100
+ # FIXME implement these
101
+ def transmittable?
102
+ # etc, can go here too
103
+ end
104
+
105
+ # this stuff is to allow it to be a useful key
106
+ def hash
107
+ [code, guid].hash
108
+ end
109
+
110
+ def == other
111
+ hash == other.hash
112
+ end
113
+
114
+ alias eql? :==
115
+
116
+ def inspect
117
+ # maybe the way to do this, would be to be able to register guids
118
+ # in a global lookup, which are used by Clsid#inspect itself, to
119
+ # provide symbolic names...
120
+ guid_str = NAMES[guid] || "{#{guid.format}}"
121
+ if Integer === code
122
+ hex = '0x%04x' % code
123
+ if guid == PS_MAPI
124
+ # just display as plain hex number
125
+ hex
126
+ else
127
+ "#<Key #{guid_str}/#{hex}>"
128
+ end
129
+ else
130
+ # display full guid and code
131
+ "#<Key #{guid_str}/#{code.inspect}>"
132
+ end
133
+ end
134
+ end
135
+
136
+ # duplicated here for now
137
+ SUPPORT_DIR = File.dirname(__FILE__) + '/../..'
138
+
139
+ # data files that provide for the code to symbolic name mapping
140
+ # guids in named_map are really constant references to the above
141
+ TAGS = YAML.load_file "#{SUPPORT_DIR}/data/mapitags.yaml"
142
+ NAMED_MAP = YAML.load_file("#{SUPPORT_DIR}/data/named_map.yaml").inject({}) do |hash, (key, value)|
143
+ hash.update Key.new(key[0], const_get(key[1])) => value
144
+ end
145
+
146
+ attr_reader :raw
147
+
148
+ # +raw+ should be an hash-like object that maps <tt>Key</tt>s to values. Should respond_to?
149
+ # [], keys, values, each, and optionally []=, and delete.
150
+ def initialize raw
151
+ @raw = raw
152
+ end
153
+
154
+ # resolve +arg+ (could be key, code, string, or symbol), and possible +guid+ to a key.
155
+ # returns nil on failure
156
+ def resolve arg, guid=nil
157
+ if guid; Key.new arg, guid
158
+ else
159
+ case arg
160
+ when Key; arg
161
+ when Integer; Key.new arg
162
+ else sym_to_key[arg.to_sym]
163
+ end
164
+ end
165
+ end
166
+
167
+ # this is the function that creates a symbol to key mapping. currently this works by making a
168
+ # pass through the raw properties, but conceivably you could map symbols to keys using the
169
+ # mapitags directly. problem with that would be that named properties wouldn't map automatically,
170
+ # but maybe thats not too important.
171
+ def sym_to_key
172
+ return @sym_to_key if @sym_to_key
173
+ @sym_to_key = {}
174
+ raw.keys.each do |key|
175
+ sym = key.to_sym
176
+ unless Symbol === sym
177
+ Log.debug "couldn't find symbolic name for key #{key.inspect}"
178
+ next
179
+ end
180
+ if @sym_to_key[sym]
181
+ Log.warn "duplicate key #{key.inspect}"
182
+ # we give preference to PS_MAPI keys
183
+ @sym_to_key[sym] = key if key.guid == PS_MAPI
184
+ else
185
+ # just assign
186
+ @sym_to_key[sym] = key
187
+ end
188
+ end
189
+ @sym_to_key
190
+ end
191
+
192
+ def keys
193
+ sym_to_key.keys
194
+ end
195
+
196
+ def values
197
+ sym_to_key.values.map { |key| raw[key] }
198
+ end
199
+
200
+ def [] arg, guid=nil
201
+ raw[resolve(arg, guid)]
202
+ end
203
+
204
+ def []= arg, *args
205
+ args.unshift nil if args.length == 1
206
+ guid, value = args
207
+ # FIXME this won't really work properly. it would need to go
208
+ # to TAGS to resolve, as it often won't be there already...
209
+ raw[resolve(arg, guid)] = value
210
+ end
211
+
212
+ def method_missing name, *args
213
+ if name.to_s !~ /\=$/ and args.empty?
214
+ self[name]
215
+ elsif name.to_s =~ /(.*)\=$/ and args.length == 1
216
+ self[$1] = args[0]
217
+ else
218
+ super
219
+ end
220
+ end
221
+
222
+ def to_h
223
+ sym_to_key.inject({}) { |hash, (sym, key)| hash.update sym => raw[key] }
224
+ end
225
+
226
+ def inspect
227
+ "#<#{self.class} " + to_h.sort_by { |k, v| k.to_s }.map do |k, v|
228
+ v = v.inspect
229
+ "#{k}=#{v.length > 32 ? v[0..29] + '..."' : v}"
230
+ end.join(' ') + '>'
231
+ end
232
+
233
+ # -----
234
+
235
+ # temporary pseudo tags
236
+
237
+ # for providing rtf to plain text conversion. later, html to text too.
238
+ def body
239
+ return @body if defined?(@body)
240
+ @body = (self[:body] rescue nil)
241
+ # last resort
242
+ if !@body or @body.strip.empty?
243
+ Log.warn 'creating text body from rtf'
244
+ @body = (::RTF::Converter.rtf2text body_rtf rescue nil)
245
+ end
246
+ @body
247
+ end
248
+
249
+ # for providing rtf decompression
250
+ def body_rtf
251
+ return @body_rtf if defined?(@body_rtf)
252
+ @body_rtf = (RTF.rtfdecompr rtf_compressed.read rescue nil)
253
+ end
254
+
255
+ # for providing rtf to html conversion
256
+ def body_html
257
+ return @body_html if defined?(@body_html)
258
+ @body_html = (self[:body_html].read rescue nil)
259
+ @body_html = (RTF.rtf2html body_rtf rescue nil) if !@body_html or @body_html.strip.empty?
260
+ # last resort
261
+ if !@body_html or @body_html.strip.empty?
262
+ Log.warn 'creating html body from rtf'
263
+ @body_html = (::RTF::Converter.rtf2text body_rtf, :html rescue nil)
264
+ end
265
+ @body_html
266
+ end
267
+ end
268
+ end
269
+
@@ -0,0 +1,1806 @@
1
+ #
2
+ # = Introduction
3
+ #
4
+ # This file is mostly an attempt to port libpst to ruby, and simplify it in the process. It
5
+ # will leverage much of the existing MAPI => MIME conversion developed for Msg files, and as
6
+ # such is purely concerned with the file structure details.
7
+ #
8
+ # = TODO
9
+ #
10
+ # 1. solve recipient table problem (test4).
11
+ # this is done. turns out it was due to id2 clashes. find better solution
12
+ # 2. check parse consistency. an initial conversion of a 30M file to pst, shows
13
+ # a number of messages conveting badly. compare with libpst too.
14
+ # 3. xattribs
15
+ # 4. generalise the Mapi stuff better
16
+ # 5. refactor index load
17
+ # 6. msg serialization?
18
+ #
19
+
20
+ =begin
21
+
22
+ quick plan for cleanup.
23
+
24
+ have working tests for 97 and 03 file formats, so safe.
25
+
26
+ want to fix up:
27
+
28
+ 64 bit unpacks scattered around. its ugly. not sure how best to handle it, but am slightly tempted
29
+ to override String#unpack to support a 64 bit little endian unpack (like L vs N/V, for Q). one way or
30
+ another need to fix it. Could really slow everything else down if its parsing the unpack strings twice,
31
+ once in ruby, for every single unpack i do :/
32
+
33
+ the index loading process, and the lack of shared code between normal vs 64 bit variants, and Index vs Desc.
34
+ should be able to reduce code by factor of 4. also think I should move load code into the class too. then
35
+ maybe have something like:
36
+
37
+ class Header
38
+ def index_class
39
+ version_2003 ? Index64 : Index
40
+ end
41
+ end
42
+
43
+ def load_idx
44
+ header.index_class.load_index
45
+ end
46
+
47
+ OR
48
+
49
+ def initialize
50
+ @header = ...
51
+ extend @header.index_class::Load
52
+ load_idx
53
+ end
54
+
55
+ need to think about the role of the mapi code, and Pst::Item etc, but that layer can come later.
56
+
57
+ =end
58
+
59
+ require 'mapi'
60
+ require 'enumerator'
61
+ require 'ostruct'
62
+ require 'ole/ranges_io'
63
+
64
+ module Mapi
65
+ class Pst
66
+ class FormatError < StandardError
67
+ end
68
+
69
+ # unfortunately there is no Q analogue which is little endian only.
70
+ # this translates T as an unsigned quad word, little endian byte order, to
71
+ # not pollute the rest of the code.
72
+ #
73
+ # didn't want to override String#unpack, cause its too hacky, and incomplete.
74
+ def self.unpack str, unpack_spec
75
+ return str.unpack(unpack_spec) unless unpack_spec['T']
76
+ @unpack_cache ||= {}
77
+ t_offsets, new_spec = @unpack_cache[unpack_spec]
78
+ unless t_offsets
79
+ t_offsets = []
80
+ offset = 0
81
+ new_spec = ''
82
+ unpack_spec.scan(/([^\d])_?(\*|\d+)?/o) do
83
+ num_elems = $1.downcase == 'a' ? 1 : ($2 || 1).to_i
84
+ if $1 == 'T'
85
+ num_elems.times { |i| t_offsets << offset + i }
86
+ new_spec << "V#{num_elems * 2}"
87
+ else
88
+ new_spec << $~[0]
89
+ end
90
+ offset += num_elems
91
+ end
92
+ @unpack_cache[unpack_spec] = [t_offsets, new_spec]
93
+ end
94
+ a = str.unpack(new_spec)
95
+ t_offsets.each do |offset|
96
+ low, high = a[offset, 2]
97
+ a[offset, 2] = low && high ? low + (high << 32) : nil
98
+ end
99
+ a
100
+ end
101
+
102
+ #
103
+ # this is the header and encryption encapsulation code
104
+ # ----------------------------------------------------------------------------
105
+ #
106
+
107
+ # class which encapsulates the pst header
108
+ class Header
109
+ SIZE = 512
110
+ MAGIC = 0x2142444e
111
+
112
+ # these are the constants defined in libpst.c, that
113
+ # are referenced in pst_open()
114
+ INDEX_TYPE_OFFSET = 0x0A
115
+ FILE_SIZE_POINTER = 0xA8
116
+ FILE_SIZE_POINTER_64 = 0xB8
117
+ SECOND_POINTER = 0xBC
118
+ INDEX_POINTER = 0xC4
119
+ SECOND_POINTER_64 = 0xE0
120
+ INDEX_POINTER_64 = 0xF0
121
+ ENC_OFFSET = 0x1CD
122
+
123
+ attr_reader :magic, :index_type, :encrypt_type, :size
124
+ attr_reader :index1_count, :index1, :index2_count, :index2
125
+ attr_reader :version
126
+ def initialize data
127
+ @magic = data.unpack('N')[0]
128
+ @index_type = data[INDEX_TYPE_OFFSET]
129
+ @version = {0x0e => 1997, 0x17 => 2003}[@index_type]
130
+
131
+ if version_2003?
132
+ # don't know?
133
+ # >> data1.unpack('V*').zip(data2.unpack('V*')).enum_with_index.select { |(c, d), i| c != d and not [46, 56, 60].include?(i) }.select { |(a, b), i| b == 0 }.map { |(a, b), i| [a / 256, i] }
134
+ # [8, 76], [32768, 84], [128, 89]
135
+ # >> data1.unpack('C*').zip(data2.unpack('C*')).enum_with_index.select { |(c, d), i| c != d and not [184..187, 224..227, 240..243].any? { |r| r === i } }.select { |(a, b), i| b == 0 and ((Math.log(a) / Math.log(2)) % 1) < 0.0001 }
136
+ # [[[2, 0], 61], [[2, 0], 76], [[2, 0], 195], [[2, 0], 257], [[8, 0], 305], [[128, 0], 338], [[128, 0], 357]]
137
+ # i have only 2 psts to base this guess on, so i can't really come up with anything that looks reasonable yet. not sure what the offset is. unfortunately there is so much in the header
138
+ # that isn't understood...
139
+ @encrypt_type = 1
140
+
141
+ @index2_count, @index2 = data[SECOND_POINTER_64 - 4, 8].unpack('V2')
142
+ @index1_count, @index1 = data[INDEX_POINTER_64 - 4, 8].unpack('V2')
143
+
144
+ @size = data[FILE_SIZE_POINTER_64, 4].unpack('V')[0]
145
+ else
146
+ @encrypt_type = data[ENC_OFFSET]
147
+
148
+ @index2_count, @index2 = data[SECOND_POINTER - 4, 8].unpack('V2')
149
+ @index1_count, @index1 = data[INDEX_POINTER - 4, 8].unpack('V2')
150
+
151
+ @size = data[FILE_SIZE_POINTER, 4].unpack('V')[0]
152
+ end
153
+
154
+ validate!
155
+ end
156
+
157
+ def version_2003?
158
+ version == 2003
159
+ end
160
+
161
+ def encrypted?
162
+ encrypt_type != 0
163
+ end
164
+
165
+ def validate!
166
+ raise FormatError, "bad signature on pst file (#{'0x%x' % magic})" unless magic == MAGIC
167
+ raise FormatError, "only index types 0x0e and 0x17 are handled (#{'0x%x' % index_type})" unless [0x0e, 0x17].include?(index_type)
168
+ raise FormatError, "only encrytion types 0 and 1 are handled (#{encrypt_type.inspect})" unless [0, 1].include?(encrypt_type)
169
+ end
170
+ end
171
+
172
+ # compressible encryption! :D
173
+ #
174
+ # simple substitution. see libpst.c
175
+ # maybe test switch to using a String#tr!
176
+ class CompressibleEncryption
177
+ DECRYPT_TABLE = [
178
+ 0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48,
179
+ 0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, # 0x0f
180
+ 0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab,
181
+ 0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, # 0x1f
182
+ 0x39, 0x05, 0x96, 0x30, 0xf5, 0x37, 0x60, 0x82,
183
+ 0x8c, 0xc9, 0x13, 0x4a, 0x6b, 0x1d, 0xf3, 0xfb, # 0x2f
184
+ 0x8f, 0x26, 0x97, 0xca, 0x91, 0x17, 0x01, 0xc4,
185
+ 0x32, 0x2d, 0x6e, 0x31, 0x95, 0xff, 0xd9, 0x23, # 0x3f
186
+ 0xd1, 0x00, 0x5e, 0x79, 0xdc, 0x44, 0x3b, 0x1a,
187
+ 0x28, 0xc5, 0x61, 0x57, 0x20, 0x90, 0x3d, 0x83, # 0x4f
188
+ 0xb9, 0x43, 0xbe, 0x67, 0xd2, 0x46, 0x42, 0x76,
189
+ 0xc0, 0x6d, 0x5b, 0x7e, 0xb2, 0x0f, 0x16, 0x29, # 0x5f
190
+ 0x3c, 0xa9, 0x03, 0x54, 0x0d, 0xda, 0x5d, 0xdf,
191
+ 0xf6, 0xb7, 0xc7, 0x62, 0xcd, 0x8d, 0x06, 0xd3, # 0x6f
192
+ 0x69, 0x5c, 0x86, 0xd6, 0x14, 0xf7, 0xa5, 0x66,
193
+ 0x75, 0xac, 0xb1, 0xe9, 0x45, 0x21, 0x70, 0x0c, # 0x7f
194
+ 0x87, 0x9f, 0x74, 0xa4, 0x22, 0x4c, 0x6f, 0xbf,
195
+ 0x1f, 0x56, 0xaa, 0x2e, 0xb3, 0x78, 0x33, 0x50, # 0x8f
196
+ 0xb0, 0xa3, 0x92, 0xbc, 0xcf, 0x19, 0x1c, 0xa7,
197
+ 0x63, 0xcb, 0x1e, 0x4d, 0x3e, 0x4b, 0x1b, 0x9b, # 0x9f
198
+ 0x4f, 0xe7, 0xf0, 0xee, 0xad, 0x3a, 0xb5, 0x59,
199
+ 0x04, 0xea, 0x40, 0x55, 0x25, 0x51, 0xe5, 0x7a, # 0xaf
200
+ 0x89, 0x38, 0x68, 0x52, 0x7b, 0xfc, 0x27, 0xae,
201
+ 0xd7, 0xbd, 0xfa, 0x07, 0xf4, 0xcc, 0x8e, 0x5f, # 0xbf
202
+ 0xef, 0x35, 0x9c, 0x84, 0x2b, 0x15, 0xd5, 0x77,
203
+ 0x34, 0x49, 0xb6, 0x12, 0x0a, 0x7f, 0x71, 0x88, # 0xcf
204
+ 0xfd, 0x9d, 0x18, 0x41, 0x7d, 0x93, 0xd8, 0x58,
205
+ 0x2c, 0xce, 0xfe, 0x24, 0xaf, 0xde, 0xb8, 0x36, # 0xdf
206
+ 0xc8, 0xa1, 0x80, 0xa6, 0x99, 0x98, 0xa8, 0x2f,
207
+ 0x0e, 0x81, 0x65, 0x73, 0xe4, 0xc2, 0xa2, 0x8a, # 0xef
208
+ 0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2,
209
+ 0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec # 0xff
210
+ ]
211
+
212
+ ENCRYPT_TABLE = [nil] * 256
213
+ DECRYPT_TABLE.each_with_index { |i, j| ENCRYPT_TABLE[i] = j }
214
+
215
+ def self.decrypt_alt encrypted
216
+ decrypted = ''
217
+ encrypted.length.times { |i| decrypted << DECRYPT_TABLE[encrypted[i]] }
218
+ decrypted
219
+ end
220
+
221
+ def self.encrypt_alt decrypted
222
+ encrypted = ''
223
+ decrypted.length.times { |i| encrypted << ENCRYPT_TABLE[decrypted[i]] }
224
+ encrypted
225
+ end
226
+
227
+ # an alternate implementation that is possibly faster....
228
+ # TODO - bench
229
+ DECRYPT_STR, ENCRYPT_STR = [DECRYPT_TABLE, (0...256)].map do |values|
230
+ values.map { |i| i.chr }.join.gsub(/([\^\-\\])/, "\\\\\\1")
231
+ end
232
+
233
+ def self.decrypt encrypted
234
+ encrypted.tr ENCRYPT_STR, DECRYPT_STR
235
+ end
236
+
237
+ def self.encrypt decrypted
238
+ decrypted.tr DECRYPT_STR, ENCRYPT_STR
239
+ end
240
+ end
241
+
242
+ class RangesIOEncryptable < RangesIO
243
+ def initialize io, mode='r', params={}
244
+ mode, params = 'r', mode if Hash === mode
245
+ @decrypt = !!params[:decrypt]
246
+ super
247
+ end
248
+
249
+ def encrypted?
250
+ @decrypt
251
+ end
252
+
253
+ def read limit=nil
254
+ buf = super
255
+ buf = CompressibleEncryption.decrypt(buf) if encrypted?
256
+ buf
257
+ end
258
+ end
259
+
260
+ attr_reader :io, :header, :idx, :desc, :special_folder_ids
261
+
262
+ # corresponds to
263
+ # * pst_open
264
+ # * pst_load_index
265
+ def initialize io
266
+ @io = io
267
+ io.pos = 0
268
+ @header = Header.new io.read(Header::SIZE)
269
+
270
+ # would prefer this to be in Header#validate, but it doesn't have the io size.
271
+ # should perhaps downgrade this to just be a warning...
272
+ raise FormatError, "header size field invalid (#{header.size} != #{io.size}}" unless header.size == io.size
273
+
274
+ load_idx
275
+ load_desc
276
+ load_xattrib
277
+
278
+ @special_folder_ids = {}
279
+ end
280
+
281
+ def encrypted?
282
+ @header.encrypted?
283
+ end
284
+
285
+ # until i properly fix logging...
286
+ def warn s
287
+ Mapi::Log.warn s
288
+ end
289
+
290
+ #
291
+ # this is the index and desc record loading code
292
+ # ----------------------------------------------------------------------------
293
+ #
294
+
295
+ ToTree = Module.new
296
+
297
+ module Index2
298
+ BLOCK_SIZE = 512
299
+ module RecursiveLoad
300
+ def load_chain
301
+ #...
302
+ end
303
+ end
304
+
305
+ module Base
306
+ def read
307
+ #...
308
+ end
309
+ end
310
+
311
+ class Version1997 < Struct.new(:a)#...)
312
+ SIZE = 12
313
+
314
+ include RecursiveLoad
315
+ include Base
316
+ end
317
+
318
+ class Version2003 < Struct.new(:a)#...)
319
+ SIZE = 24
320
+
321
+ include RecursiveLoad
322
+ include Base
323
+ end
324
+ end
325
+
326
+ module Desc2
327
+ module Base
328
+ def desc
329
+ #...
330
+ end
331
+ end
332
+
333
+ class Version1997 < Struct.new(:a)#...)
334
+ #include Index::RecursiveLoad
335
+ include Base
336
+ end
337
+
338
+ class Version2003 < Struct.new(:a)#...)
339
+ #include Index::RecursiveLoad
340
+ include Base
341
+ end
342
+ end
343
+
344
+ # more constants from libpst.c
345
+ # these relate to the index block
346
+ ITEM_COUNT_OFFSET = 0x1f0 # count byte
347
+ LEVEL_INDICATOR_OFFSET = 0x1f3 # node or leaf
348
+ BACKLINK_OFFSET = 0x1f8 # backlink u1 value
349
+
350
+ # these 3 classes are used to hold various file records
351
+
352
+ # pst_index
353
+ class Index < Struct.new(:id, :offset, :size, :u1)
354
+ UNPACK_STR = 'VVvv'
355
+ SIZE = 12
356
+ BLOCK_SIZE = 512 # index blocks was 516 but bogus
357
+ COUNT_MAX = 41 # max active items (ITEM_COUNT_OFFSET / Index::SIZE = 41)
358
+
359
+ attr_accessor :pst
360
+ def initialize data
361
+ data = Pst.unpack data, UNPACK_STR if String === data
362
+ super(*data)
363
+ end
364
+
365
+ def type
366
+ @type ||= begin
367
+ if id & 0x2 == 0
368
+ :data
369
+ else
370
+ first_byte, second_byte = read.unpack('CC')
371
+ if first_byte == 1
372
+ raise second_byte unless second_byte == 1
373
+ :data_chain_header
374
+ elsif first_byte == 2
375
+ raise second_byte unless second_byte == 0
376
+ :id2_assoc
377
+ else
378
+ raise FormatError, 'unknown first byte for block - %p' % first_byte
379
+ end
380
+ end
381
+ end
382
+ end
383
+
384
+ def data?
385
+ (id & 0x2) == 0
386
+ end
387
+
388
+ def read decrypt=true
389
+ # only data blocks are every encrypted
390
+ decrypt = false unless data?
391
+ pst.pst_read_block_size offset, size, decrypt
392
+ end
393
+
394
+ # show all numbers in hex
395
+ def inspect
396
+ super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }.sub(/Index /, "Index type=#{type.inspect}, ")
397
+ end
398
+ end
399
+
400
+ # mostly guesses.
401
+ ITEM_COUNT_OFFSET_64 = 0x1e8
402
+ LEVEL_INDICATOR_OFFSET_64 = 0x1eb # diff of 3 between these 2 as above...
403
+
404
+ # will maybe inherit from Index64, in order to get the same #type function.
405
+ class Index64 < Index
406
+ UNPACK_STR = 'TTvvV'
407
+ SIZE = 24
408
+ BLOCK_SIZE = 512
409
+ COUNT_MAX = 20 # bit of a guess really. 512 / 24 = 21, but doesn't leave enough header room
410
+
411
+ # this is the extra item on the end of the UNPACK_STR above
412
+ attr_accessor :u2
413
+
414
+ def initialize data
415
+ data = Pst.unpack data, UNPACK_STR if String === data
416
+ @u2 = data.pop
417
+ super data
418
+ end
419
+
420
+ def inspect
421
+ super.sub(/>$/, ', u2=%p>' % u2)
422
+ end
423
+
424
+ def self.load_chain io, header
425
+ load_idx_rec io, header.index1, 0, 0
426
+ end
427
+
428
+ # almost identical to load code for Index, just different offsets and unpack strings.
429
+ # can probably merge them, or write a generic load_tree function or something.
430
+ def self.load_idx_rec io, offset, linku1, start_val
431
+ io.seek offset
432
+ buf = io.read BLOCK_SIZE
433
+ idxs = []
434
+
435
+ item_count = buf[ITEM_COUNT_OFFSET_64]
436
+ raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
437
+
438
+ #idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
439
+ #raise 'blah 1' unless idx.id == linku1
440
+
441
+ if buf[LEVEL_INDICATOR_OFFSET_64] == 0
442
+ # leaf pointers
443
+ # split the data into item_count index objects
444
+ buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
445
+ idx = new data
446
+ # first entry
447
+ raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
448
+ #idx.pst = self
449
+ break if idx.id == 0
450
+ idxs << idx
451
+ end
452
+ else
453
+ # node pointers
454
+ # split the data into item_count table pointers
455
+ buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
456
+ start, u1, offset = Pst.unpack data, 'T3'
457
+ # for the first value, we expect the start to be equal
458
+ raise 'blah 3' if i == 0 and start_val != 0 and start != start_val
459
+ break if start == 0
460
+ idxs += load_idx_rec io, offset, u1, start
461
+ end
462
+ end
463
+
464
+ idxs
465
+ end
466
+ end
467
+
468
+ # pst_desc
469
+ class Desc64 < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id, :u2)
470
+ UNPACK_STR = 'T3VV'
471
+ SIZE = 32
472
+ BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
473
+ COUNT_MAX = 15 # guess as per Index64
474
+
475
+ include RecursivelyEnumerable
476
+
477
+ attr_accessor :pst
478
+ attr_reader :children
479
+ def initialize data
480
+ super(*Pst.unpack(data, UNPACK_STR))
481
+ @children = []
482
+ end
483
+
484
+ def desc
485
+ pst.idx_from_id idx_id
486
+ end
487
+
488
+ def list_index
489
+ pst.idx_from_id idx2_id
490
+ end
491
+
492
+ def self.load_chain io, header
493
+ load_desc_rec io, header.index2, 0, 0x21
494
+ end
495
+
496
+ def self.load_desc_rec io, offset, linku1, start_val
497
+ io.seek offset
498
+ buf = io.read BLOCK_SIZE
499
+ descs = []
500
+ item_count = buf[ITEM_COUNT_OFFSET_64]
501
+
502
+ # not real desc
503
+ #desc = Desc.new buf[BACKLINK_OFFSET, 4]
504
+ #raise 'blah 1' unless desc.desc_id == linku1
505
+
506
+ if buf[LEVEL_INDICATOR_OFFSET_64] == 0
507
+ # leaf pointers
508
+ raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
509
+ # split the data into item_count desc objects
510
+ buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
511
+ desc = new data
512
+ # first entry
513
+ raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
514
+ break if desc.desc_id == 0
515
+ descs << desc
516
+ end
517
+ else
518
+ # node pointers
519
+ raise "have too many active items in index (#{item_count})" if item_count > Index64::COUNT_MAX
520
+ # split the data into item_count table pointers
521
+ buf[0, Index64::SIZE * item_count].scan(/.{#{Index64::SIZE}}/mo).each_with_index do |data, i|
522
+ start, u1, offset = Pst.unpack data, 'T3'
523
+ # for the first value, we expect the start to be equal note that ids -1, so even for the
524
+ # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
525
+ # that the first desc record is always 33...
526
+ # thats because 0x21 is the pst root itself...
527
+ raise 'blah 3' if i == 0 and start_val != -1 and start != start_val
528
+ # this shouldn't really happen i'd imagine
529
+ break if start == 0
530
+ descs += load_desc_rec io, offset, u1, start
531
+ end
532
+ end
533
+
534
+ descs
535
+ end
536
+
537
+ def each_child(&block)
538
+ @children.each(&block)
539
+ end
540
+ end
541
+
542
+ # _pst_table_ptr_struct
543
+ class TablePtr < Struct.new(:start, :u1, :offset)
544
+ UNPACK_STR = 'V3'
545
+ SIZE = 12
546
+
547
+ def initialize data
548
+ data = data.unpack(UNPACK_STR) if String === data
549
+ super(*data)
550
+ end
551
+ end
552
+
553
+ # pst_desc
554
+ # idx_id is a pointer to an idx record which gets the primary data stream for the Desc record.
555
+ # idx2_id gets you an idx record, that when read gives you an ID2 association list, which just maps
556
+ # another set of ids to index values
557
+ class Desc < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id)
558
+ UNPACK_STR = 'V4'
559
+ SIZE = 16
560
+ BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
561
+ COUNT_MAX = 31 # max active desc records (ITEM_COUNT_OFFSET / Desc::SIZE = 31)
562
+
563
+ include ToTree
564
+
565
+ attr_accessor :pst
566
+ attr_reader :children
567
+ def initialize data
568
+ super(*data.unpack(UNPACK_STR))
569
+ @children = []
570
+ end
571
+
572
+ def desc
573
+ pst.idx_from_id idx_id
574
+ end
575
+
576
+ def list_index
577
+ pst.idx_from_id idx2_id
578
+ end
579
+
580
+ # show all numbers in hex
581
+ def inspect
582
+ super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }
583
+ end
584
+ end
585
+
586
+ # corresponds to
587
+ # * _pst_build_id_ptr
588
+ def load_idx
589
+ @idx = []
590
+ @idx_offsets = []
591
+ if header.version_2003?
592
+ @idx = Index64.load_chain io, header
593
+ @idx.each { |idx| idx.pst = self }
594
+ else
595
+ load_idx_rec header.index1, header.index1_count, 0
596
+ end
597
+
598
+ # we'll typically be accessing by id, so create a hash as a lookup cache
599
+ @idx_from_id = {}
600
+ @idx.each do |idx|
601
+ warn "there are duplicate idx records with id #{idx.id}" if @idx_from_id[idx.id]
602
+ @idx_from_id[idx.id] = idx
603
+ end
604
+ end
605
+
606
+ # load the flat idx table, which maps ids to file ranges. this is the recursive helper
607
+ #
608
+ # corresponds to
609
+ # * _pst_build_id_ptr
610
+ def load_idx_rec offset, linku1, start_val
611
+ @idx_offsets << offset
612
+
613
+ #_pst_read_block_size(pf, offset, BLOCK_SIZE, &buf, 0, 0) < BLOCK_SIZE)
614
+ buf = pst_read_block_size offset, Index::BLOCK_SIZE, false
615
+
616
+ item_count = buf[ITEM_COUNT_OFFSET]
617
+ raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
618
+
619
+ idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
620
+ raise 'blah 1' unless idx.id == linku1
621
+
622
+ if buf[LEVEL_INDICATOR_OFFSET] == 0
623
+ # leaf pointers
624
+ # split the data into item_count index objects
625
+ buf[0, Index::SIZE * item_count].scan(/.{#{Index::SIZE}}/mo).each_with_index do |data, i|
626
+ idx = Index.new data
627
+ # first entry
628
+ raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
629
+ idx.pst = self
630
+ # this shouldn't really happen i'd imagine
631
+ break if idx.id == 0
632
+ @idx << idx
633
+ end
634
+ else
635
+ # node pointers
636
+ # split the data into item_count table pointers
637
+ buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
638
+ table = TablePtr.new data
639
+ # for the first value, we expect the start to be equal
640
+ raise 'blah 3' if i == 0 and start_val != 0 and table.start != start_val
641
+ # this shouldn't really happen i'd imagine
642
+ break if table.start == 0
643
+ load_idx_rec table.offset, table.u1, table.start
644
+ end
645
+ end
646
+ end
647
+
648
+ # most access to idx objects will use this function
649
+ #
650
+ # corresponds to
651
+ # * _pst_getID
652
+ def idx_from_id id
653
+ @idx_from_id[id]
654
+ end
655
+
656
+ # corresponds to
657
+ # * _pst_build_desc_ptr
658
+ # * record_descriptor
659
+ def load_desc
660
+ @desc = []
661
+ @desc_offsets = []
662
+ if header.version_2003?
663
+ @desc = Desc64.load_chain io, header
664
+ @desc.each { |desc| desc.pst = self }
665
+ else
666
+ load_desc_rec header.index2, header.index2_count, 0x21
667
+ end
668
+
669
+ # first create a lookup cache
670
+ @desc_from_id = {}
671
+ @desc.each do |desc|
672
+ desc.pst = self
673
+ warn "there are duplicate desc records with id #{desc.desc_id}" if @desc_from_id[desc.desc_id]
674
+ @desc_from_id[desc.desc_id] = desc
675
+ end
676
+
677
+ # now turn the flat list of loaded desc records into a tree
678
+
679
+ # well, they have no parent, so they're more like, the toplevel descs.
680
+ @orphans = []
681
+ # now assign each node to the parents child array, putting the orphans in the above
682
+ @desc.each do |desc|
683
+ parent = @desc_from_id[desc.parent_desc_id]
684
+ # note, besides this, its possible to create other circular structures.
685
+ if parent == desc
686
+ # this actually happens usually, for the root_item it appears.
687
+ #warn "desc record's parent is itself (#{desc.inspect})"
688
+ # maybe add some more checks in here for circular structures
689
+ elsif parent
690
+ parent.children << desc
691
+ next
692
+ end
693
+ @orphans << desc
694
+ end
695
+
696
+ # maybe change this to some sort of sane-ness check. orphans are expected
697
+ # warn "have #{@orphans.length} orphan desc record(s)." unless @orphans.empty?
698
+ end
699
+
700
+ # load the flat list of desc records recursively
701
+ #
702
+ # corresponds to
703
+ # * _pst_build_desc_ptr
704
+ # * record_descriptor
705
+ def load_desc_rec offset, linku1, start_val
706
+ @desc_offsets << offset
707
+
708
+ buf = pst_read_block_size offset, Desc::BLOCK_SIZE, false
709
+ item_count = buf[ITEM_COUNT_OFFSET]
710
+
711
+ # not real desc
712
+ desc = Desc.new buf[BACKLINK_OFFSET, 4]
713
+ raise 'blah 1' unless desc.desc_id == linku1
714
+
715
+ if buf[LEVEL_INDICATOR_OFFSET] == 0
716
+ # leaf pointers
717
+ raise "have too many active items in index (#{item_count})" if item_count > Desc::COUNT_MAX
718
+ # split the data into item_count desc objects
719
+ buf[0, Desc::SIZE * item_count].scan(/.{#{Desc::SIZE}}/mo).each_with_index do |data, i|
720
+ desc = Desc.new data
721
+ # first entry
722
+ raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
723
+ # this shouldn't really happen i'd imagine
724
+ break if desc.desc_id == 0
725
+ @desc << desc
726
+ end
727
+ else
728
+ # node pointers
729
+ raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
730
+ # split the data into item_count table pointers
731
+ buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
732
+ table = TablePtr.new data
733
+ # for the first value, we expect the start to be equal note that ids -1, so even for the
734
+ # first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
735
+ # that the first desc record is always 33...
736
+ raise 'blah 3' if i == 0 and start_val != -1 and table.start != start_val
737
+ # this shouldn't really happen i'd imagine
738
+ break if table.start == 0
739
+ load_desc_rec table.offset, table.u1, table.start
740
+ end
741
+ end
742
+ end
743
+
744
+ # as for idx
745
+ #
746
+ # corresponds to:
747
+ # * _pst_getDptr
748
+ def desc_from_id id
749
+ @desc_from_id[id]
750
+ end
751
+
752
+ # corresponds to
753
+ # * pst_load_extended_attributes
754
+ def load_xattrib
755
+ unless desc = desc_from_id(0x61)
756
+ warn "no extended attributes desc record found"
757
+ return
758
+ end
759
+ unless desc.desc
760
+ warn "no desc idx for extended attributes"
761
+ return
762
+ end
763
+ if desc.list_index
764
+ end
765
+ #warn "skipping loading xattribs"
766
+ # FIXME implement loading xattribs
767
+ end
768
+
769
+ # corresponds to:
770
+ # * _pst_read_block_size
771
+ # * _pst_read_block ??
772
+ # * _pst_ff_getIDblock_dec ??
773
+ # * _pst_ff_getIDblock ??
774
+ def pst_read_block_size offset, size, decrypt=true
775
+ io.seek offset
776
+ buf = io.read size
777
+ warn "tried to read #{size} bytes but only got #{buf.length}" if buf.length != size
778
+ encrypted? && decrypt ? CompressibleEncryption.decrypt(buf) : buf
779
+ end
780
+
781
+ #
782
+ # id2
783
+ # ----------------------------------------------------------------------------
784
+ #
785
+
786
+ class ID2Assoc < Struct.new(:id2, :id, :table2)
787
+ UNPACK_STR = 'V3'
788
+ SIZE = 12
789
+
790
+ def initialize data
791
+ data = data.unpack(UNPACK_STR) if String === data
792
+ super(*data)
793
+ end
794
+ end
795
+
796
+ class ID2Assoc64 < Struct.new(:id2, :u1, :id, :table2)
797
+ UNPACK_STR = 'VVT2'
798
+ SIZE = 24
799
+
800
+ def initialize data
801
+ if String === data
802
+ data = Pst.unpack data, UNPACK_STR
803
+ end
804
+ super(*data)
805
+ end
806
+
807
+ def self.load_chain idx
808
+ buf = idx.read
809
+ type, count = buf.unpack 'v2'
810
+ unless type == 0x0002
811
+ raise 'unknown id2 type 0x%04x' % type
812
+ #return
813
+ end
814
+ id2 = []
815
+ count.times do |i|
816
+ assoc = new buf[8 + SIZE * i, SIZE]
817
+ id2 << assoc
818
+ if assoc.table2 != 0
819
+ id2 += load_chain idx.pst.idx_from_id(assoc.table2)
820
+ end
821
+ end
822
+ id2
823
+ end
824
+ end
825
+
826
+ class ID2Mapping
827
+ attr_reader :list
828
+ def initialize pst, list
829
+ @pst = pst
830
+ @list = list
831
+ # create a lookup.
832
+ @id_from_id2 = {}
833
+ @list.each do |id2|
834
+ # NOTE we take the last value seen value if there are duplicates. this "fixes"
835
+ # test4-o1997.pst for the time being.
836
+ warn "there are duplicate id2 records with id #{id2.id2}" if @id_from_id2[id2.id2]
837
+ next if @id_from_id2[id2.id2]
838
+ @id_from_id2[id2.id2] = id2.id
839
+ end
840
+ end
841
+
842
+ # TODO: fix logging
843
+ def warn s
844
+ Mapi::Log.warn s
845
+ end
846
+
847
+ # corresponds to:
848
+ # * _pst_getID2
849
+ def [] id
850
+ #id2 = @list.find { |x| x.id2 == id }
851
+ id = @id_from_id2[id]
852
+ id and @pst.idx_from_id(id)
853
+ end
854
+ end
855
+
856
+ def load_idx2 idx
857
+ if header.version_2003?
858
+ id2 = ID2Assoc64.load_chain idx
859
+ else
860
+ id2 = load_idx2_rec idx
861
+ end
862
+ ID2Mapping.new self, id2
863
+ end
864
+
865
+ # corresponds to
866
+ # * _pst_build_id2
867
+ def load_idx2_rec idx
868
+ # i should perhaps use a idx chain style read here?
869
+ buf = pst_read_block_size idx.offset, idx.size, false
870
+ type, count = buf.unpack 'v2'
871
+ unless type == 0x0002
872
+ raise 'unknown id2 type 0x%04x' % type
873
+ #return
874
+ end
875
+ id2 = []
876
+ count.times do |i|
877
+ assoc = ID2Assoc.new buf[4 + ID2Assoc::SIZE * i, ID2Assoc::SIZE]
878
+ id2 << assoc
879
+ if assoc.table2 != 0
880
+ id2 += load_idx2_rec idx_from_id(assoc.table2)
881
+ end
882
+ end
883
+ id2
884
+ end
885
+
886
+ class RangesIOIdxChain < RangesIOEncryptable
887
+ def initialize pst, idx_head
888
+ @idxs = pst.id2_block_idx_chain idx_head
889
+ # whether or not a given idx needs encrypting
890
+ decrypts = @idxs.map do |idx|
891
+ decrypt = (idx.id & 2) != 0 ? false : pst.encrypted?
892
+ end.uniq
893
+ raise NotImplementedError, 'partial encryption in RangesIOID2' if decrypts.length > 1
894
+ decrypt = decrypts.first
895
+ # convert idxs to ranges
896
+ ranges = @idxs.map { |idx| [idx.offset, idx.size] }
897
+ super pst.io, :ranges => ranges, :decrypt => decrypt
898
+ end
899
+ end
900
+
901
+ class RangesIOID2 < RangesIOIdxChain
902
+ def self.new pst, id2, idx2
903
+ RangesIOIdxChain.new pst, idx2[id2]
904
+ end
905
+ end
906
+
907
+ # corresponds to:
908
+ # * _pst_ff_getID2block
909
+ # * _pst_ff_getID2data
910
+ # * _pst_ff_compile_ID
911
+ def id2_block_idx_chain idx
912
+ if (idx.id & 0x2) == 0
913
+ [idx]
914
+ else
915
+ buf = idx.read
916
+ type, fdepth, count = buf[0, 4].unpack 'CCv'
917
+ unless type == 1 # libpst.c:3958
918
+ warn 'Error in idx_chain - %p, %p, %p - attempting to ignore' % [type, fdepth, count]
919
+ return [idx]
920
+ end
921
+ # there are 4 unaccounted for bytes here, 4...8
922
+ if header.version_2003?
923
+ ids = buf[8, count * 8].unpack("T#{count}")
924
+ else
925
+ ids = buf[8, count * 4].unpack('V*')
926
+ end
927
+ if fdepth == 1
928
+ ids.map { |id| idx_from_id id }
929
+ else
930
+ ids.map { |id| id2_block_idx_chain idx_from_id(id) }.flatten
931
+ end
932
+ end
933
+ end
934
+
935
+ #
936
+ # main block parsing code. gets raw properties
937
+ # ----------------------------------------------------------------------------
938
+ #
939
+
940
+ # the job of this class, is to take a desc record, and be able to enumerate through the
941
+ # mapi properties of the associated thing.
942
+ #
943
+ # corresponds to
944
+ # * _pst_parse_block
945
+ # * _pst_process (in some ways. although perhaps thats more the Item::Properties#add_property)
946
+ class BlockParser
947
+ include Mapi::Types::Constants
948
+
949
+ TYPES = {
950
+ 0xbcec => 1,
951
+ 0x7cec => 2,
952
+ # type 3 is removed. an artifact of not handling the indirect blocks properly in libpst.
953
+ }
954
+
955
+ PR_SUBJECT = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_SUBJECT' }.first.hex
956
+ PR_BODY_HTML = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_BODY_HTML' }.first.hex
957
+
958
+ # this stuff could maybe be moved to Ole::Types? or leverage it somehow?
959
+ # whether or not a type is immeidate is more a property of the pst encoding though i expect.
960
+ # what i probably can add is a generic concept of whether a type is of variadic length or not.
961
+
962
+ # these lists are very incomplete. think they are largely copied from libpst
963
+
964
+ IMMEDIATE_TYPES = [
965
+ PT_SHORT, PT_LONG, PT_BOOLEAN
966
+ ]
967
+
968
+ INDIRECT_TYPES = [
969
+ PT_DOUBLE, PT_OBJECT,
970
+ 0x0014, # whats this? probably something like PT_LONGLONG, given the correspondence with the
971
+ # ole variant types. (= VT_I8)
972
+ PT_STRING8, PT_UNICODE, # unicode isn't in libpst, but added here for outlook 2003 down the track
973
+ PT_SYSTIME,
974
+ 0x0048, # another unknown
975
+ 0x0102, # this is PT_BINARY vs PT_CLSID
976
+ #0x1003, # these are vector types, but they're commented out for now because i'd expect that
977
+ #0x1014, # there's extra decoding needed that i'm not doing. (probably just need a simple
978
+ # # PT_* => unpack string mapping for the immediate types, and just do unpack('V*') etc
979
+ #0x101e,
980
+ #0x1102
981
+ ]
982
+
983
+ # the attachment and recipient arrays appear to be always stored with these fixed
984
+ # id2 values. seems strange. are there other extra streams? can find out by making higher
985
+ # level IO wrapper, which has the id2 value, and doing the diff of available id2 values versus
986
+ # used id2 values in properties of an item.
987
+ ID2_ATTACHMENTS = 0x671
988
+ ID2_RECIPIENTS = 0x692
989
+
990
+ attr_reader :desc, :data, :data_chunks, :offset_tables
991
+ def initialize desc
992
+ raise FormatError, "unable to get associated index record for #{desc.inspect}" unless desc.desc
993
+ @desc = desc
994
+ #@data = desc.desc.read
995
+ if Pst::Index === desc.desc
996
+ #@data = RangesIOIdxChain.new(desc.pst, desc.desc).read
997
+ idxs = desc.pst.id2_block_idx_chain desc.desc
998
+ # this gets me the plain index chain.
999
+ else
1000
+ # fake desc
1001
+ #@data = desc.desc.read
1002
+ idxs = [desc.desc]
1003
+ end
1004
+
1005
+ @data_chunks = idxs.map { |idx| idx.read }
1006
+ @data = @data_chunks.first
1007
+
1008
+ load_header
1009
+
1010
+ @index_offsets = [@index_offset] + @data_chunks[1..-1].map { |chunk| chunk.unpack('v')[0] }
1011
+ @offset_tables = []
1012
+ @ignored = []
1013
+ @data_chunks.zip(@index_offsets).each do |chunk, offset|
1014
+ ignore = chunk[offset, 2].unpack('v')[0]
1015
+ @ignored << ignore
1016
+ # p ignore
1017
+ @offset_tables.push offset_table = []
1018
+ # maybe its ok if there aren't to be any values ?
1019
+ raise FormatError if offset == 0
1020
+ offsets = chunk[offset + 2..-1].unpack('v*')
1021
+ #p offsets
1022
+ offsets[0, ignore + 2].each_cons 2 do |from, to|
1023
+ #next if to == 0
1024
+ raise FormatError, [from, to].inspect if from > to
1025
+ offset_table << [from, to]
1026
+ end
1027
+ end
1028
+
1029
+ @offset_table = @offset_tables.first
1030
+ @idxs = idxs
1031
+
1032
+ # now, we may have multiple different blocks
1033
+ end
1034
+
1035
+ # a given desc record may or may not have associated idx2 data. we lazily load it here, so it will never
1036
+ # actually be requested unless get_data_indirect actually needs to use it.
1037
+ def idx2
1038
+ return @idx2 if @idx2
1039
+ raise FormatError, 'idx2 requested but no idx2 available' unless desc.list_index
1040
+ # should check this can't return nil
1041
+ @idx2 = desc.pst.load_idx2 desc.list_index
1042
+ end
1043
+
1044
+ def load_header
1045
+ @index_offset, type, @offset1 = data.unpack 'vvV'
1046
+ raise FormatError, 'unknown block type signature 0x%04x' % type unless TYPES[type]
1047
+ @type = TYPES[type]
1048
+ end
1049
+
1050
+ # based on the value of offset, return either some data from buf, or some data from the
1051
+ # id2 chain id2, where offset is some key into a lookup table that is stored as the id2
1052
+ # chain. i think i may need to create a BlockParser class that wraps up all this mess.
1053
+ #
1054
+ # corresponds to:
1055
+ # * _pst_getBlockOffsetPointer
1056
+ # * _pst_getBlockOffset
1057
+ def get_data_indirect offset
1058
+ return get_data_indirect_io(offset).read
1059
+
1060
+ if offset == 0
1061
+ nil
1062
+ elsif (offset & 0xf) == 0xf
1063
+ RangesIOID2.new(desc.pst, offset, idx2).read
1064
+ else
1065
+ low, high = offset & 0xf, offset >> 4
1066
+ raise FormatError if low != 0 or (high & 0x1) != 0 or (high / 2) > @offset_table.length
1067
+ from, to = @offset_table[high / 2]
1068
+ data[from...to]
1069
+ end
1070
+ end
1071
+
1072
+ def get_data_indirect_io offset
1073
+ if offset == 0
1074
+ nil
1075
+ elsif (offset & 0xf) == 0xf
1076
+ if idx2[offset]
1077
+ RangesIOID2.new desc.pst, offset, idx2
1078
+ else
1079
+ warn "tried to get idx2 record for #{offset} but failed"
1080
+ return StringIO.new('')
1081
+ end
1082
+ else
1083
+ low, high = offset & 0xf, offset >> 4
1084
+ if low != 0 or (high & 0x1) != 0
1085
+ # raise FormatError,
1086
+ warn "bad - #{low} #{high} (1)"
1087
+ return StringIO.new('')
1088
+ end
1089
+ # lets see which block it should come from.
1090
+ block_idx, i = high.divmod 4096
1091
+ unless block_idx < @data_chunks.length
1092
+ warn "bad - block_idx to high (not #{block_idx} < #{@data_chunks.length})"
1093
+ return StringIO.new('')
1094
+ end
1095
+ data_chunk, offset_table = @data_chunks[block_idx], @offset_tables[block_idx]
1096
+ if i / 2 >= offset_table.length
1097
+ warn "bad - #{low} #{high} - #{i / 2} >= #{offset_table.length} (2)"
1098
+ return StringIO.new('')
1099
+ end
1100
+ #warn "ok - #{low} #{high} #{offset_table.length}"
1101
+ from, to = offset_table[i / 2]
1102
+ StringIO.new data_chunk[from...to]
1103
+ end
1104
+ end
1105
+
1106
+ def handle_indirect_values key, type, value
1107
+ case type
1108
+ when PT_BOOLEAN
1109
+ value = value != 0
1110
+ when *IMMEDIATE_TYPES # not including PT_BOOLEAN which we just did above
1111
+ # no processing current applied (needed?).
1112
+ when *INDIRECT_TYPES
1113
+ # the value is a pointer
1114
+ if String === value # ie, value size > 4 above
1115
+ value = StringIO.new value
1116
+ else
1117
+ value = get_data_indirect_io(value)
1118
+ end
1119
+ # keep strings as immediate values for now, for compatability with how i set up
1120
+ # Msg::Properties::ENCODINGS
1121
+ if value
1122
+ if type == PT_STRING8
1123
+ value = value.read
1124
+ elsif type == PT_UNICODE
1125
+ value = Ole::Types::FROM_UTF16.iconv value.read
1126
+ end
1127
+ end
1128
+ # special subject handling
1129
+ if key == PR_BODY_HTML and value
1130
+ # to keep the msg code happy, which thinks body_html will be an io
1131
+ # although, in 2003 version, they are 0102 already
1132
+ value = StringIO.new value unless value.respond_to?(:read)
1133
+ end
1134
+ if key == PR_SUBJECT and value
1135
+ ignore, offset = value.unpack 'C2'
1136
+ offset = (offset == 1 ? nil : offset - 3)
1137
+ value = value[2..-1]
1138
+ =begin
1139
+ index = value =~ /^[A-Z]*:/ ? $~[0].length - 1 : nil
1140
+ unless ignore == 1 and offset == index
1141
+ warn 'something wrong with subject hack'
1142
+ $x = [ignore, offset, value]
1143
+ require 'irb'
1144
+ IRB.start
1145
+ exit
1146
+ end
1147
+ =end
1148
+ =begin
1149
+ new idea:
1150
+
1151
+ making sense of the \001\00[156] i've seen prefixing subject. i think its to do with the placement
1152
+ of the ':', or the ' '. And perhaps an optimization to do with thread topic, and ignoring the prefixes
1153
+ added by mailers. thread topic is equal to subject with all that crap removed.
1154
+
1155
+ can test by creating some mails with bizarre subjects.
1156
+
1157
+ subject="\001\005RE: blah blah"
1158
+ subject="\001\001blah blah"
1159
+ subject="\001\032Out of Office AutoReply: blah blah"
1160
+ subject="\001\020Undeliverable: blah blah"
1161
+
1162
+ looks like it
1163
+
1164
+ =end
1165
+
1166
+ # now what i think, is that perhaps, value[offset..-1] ...
1167
+ # or something like that should be stored as a special tag. ie, do a double yield
1168
+ # for this case. probably PR_CONVERSATION_TOPIC, in which case i'd write instead:
1169
+ # yield [PR_SUBJECT, ref_type, value]
1170
+ # yield [PR_CONVERSATION_TOPIC, ref_type, value[offset..-1]
1171
+ # next # to skip the yield.
1172
+ end
1173
+
1174
+ # special handling for embedded objects
1175
+ # used for attach_data for attached messages. in which case attach_method should == 5,
1176
+ # for embedded object.
1177
+ if type == PT_OBJECT and value
1178
+ value = value.read if value.respond_to?(:read)
1179
+ id2, unknown = value.unpack 'V2'
1180
+ io = RangesIOID2.new desc.pst, id2, idx2
1181
+
1182
+ # hacky
1183
+ desc2 = OpenStruct.new(:desc => io, :pst => desc.pst, :list_index => desc.list_index, :children => [])
1184
+ # put nil instead of desc.list_index, otherwise the attachment is attached to itself ad infinitum.
1185
+ # should try and fix that FIXME
1186
+ # this shouldn't be done always. for an attached message, yes, but for an attached
1187
+ # meta file, for example, it shouldn't. difference between embedded_ole vs embedded_msg
1188
+ # really.
1189
+ # note that in the case where its a embedded ole, you actually get a regular serialized ole
1190
+ # object, so i need to create an ole storage object on a rangesioidxchain!
1191
+ # eg:
1192
+ =begin
1193
+ att.props.display_name # => "Picture (Metafile)"
1194
+ io = att.props.attach_data
1195
+ io.read(32).unpack('H*') # => ["d0cf11e0a1b11ae100000.... note the docfile signature.
1196
+ # plug some missing rangesio holes:
1197
+ def io.rewind; seek 0; end
1198
+ def io.flush; raise IOError; end
1199
+ ole = Ole::Storage.open io
1200
+ puts ole.root.to_tree
1201
+
1202
+ - #<Dirent:"Root Entry">
1203
+ |- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
1204
+ |- #<Dirent:"CONTENTS" size=65696 data="\327\315\306\232\000...">
1205
+ \- #<Dirent:"\003MailStream" size=12 data="\001\000\000\000[...">
1206
+ =end
1207
+ # until properly fixed, i have disabled this code here, so this will break
1208
+ # nested messages temporarily.
1209
+ #value = Item.new desc2, RawPropertyStore.new(desc2).to_a
1210
+ #desc2.list_index = nil
1211
+ value = io
1212
+ end
1213
+ # this is PT_MV_STRING8, i guess.
1214
+ # should probably have the 0x1000 flag, and do the or-ring.
1215
+ # example of 0x1102 is PR_OUTLOOK_2003_ENTRYIDS. less sure about that one.
1216
+ when 0x101e, 0x1102
1217
+ # example data:
1218
+ # 0x802b "\003\000\000\000\020\000\000\000\030\000\000\000#\000\000\000BusinessCompetitionFavorites"
1219
+ # this 0x802b would be an extended attribute for categories / keywords.
1220
+ value = get_data_indirect_io(value).read unless String === value
1221
+ num = value.unpack('V')[0]
1222
+ offsets = value[4, 4 * num].unpack("V#{num}")
1223
+ value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] }
1224
+ value.map! { |str| StringIO.new str } if type == 0x1102
1225
+ else
1226
+ name = Mapi::Types::DATA[type].first rescue nil
1227
+ warn '0x%04x %p' % [key, get_data_indirect_io(value).read]
1228
+ raise NotImplementedError, 'unsupported mapi property type - 0x%04x (%p)' % [type, name]
1229
+ end
1230
+ [key, type, value]
1231
+ end
1232
+ end
1233
+
1234
+ =begin
1235
+ * recipients:
1236
+
1237
+ affects: ["0x200764", "0x2011c4", "0x201b24", "0x201b44", "0x201ba4", "0x201c24", "0x201cc4", "0x202504"]
1238
+
1239
+ after adding the rawpropertystoretable fix, all except the second parse properly, and satisfy:
1240
+
1241
+ item.props.display_to == item.recipients.map { |r| r.props.display_name if r.props.recipient_type == 1 }.compact * '; '
1242
+
1243
+ only the second still has a problem
1244
+
1245
+ #[#<struct Pst::Desc desc_id=0x2011c4, idx_id=0x397c, idx2_id=0x398a, parent_desc_id=0x8082>]
1246
+
1247
+ think this is related to a multi block #data3. ie, when you use @x * rec_size, and it
1248
+ goes > 8190, or there abouts, then it stuffs up. probably there is header gunk, or something,
1249
+ similar to when #data is multi block.
1250
+
1251
+ same problem affects the attachment table in test4.
1252
+
1253
+ fixed that issue. round data3 ranges to rec_size.
1254
+
1255
+ fix other issue with attached objects.
1256
+
1257
+ all recipients and attachments in test2 are fine.
1258
+
1259
+ only remaining issue is test4 recipients of 200044. strange.
1260
+
1261
+ =end
1262
+
1263
+ # RawPropertyStore is used to iterate through the properties of an item, or the auxiliary
1264
+ # data for an attachment. its just a parser for the way the properties are serialized, when the
1265
+ # properties don't have to conform to a column structure.
1266
+ #
1267
+ # structure of this chunk of data is often
1268
+ # header, property keys, data values, and then indexes.
1269
+ # the property keys has value in it. value can be the actual value if its a short type,
1270
+ # otherwise you lookup the value in the indicies, where you get the offsets to use in the
1271
+ # main data body. due to the indirect thing though, any of these parts could actually come
1272
+ # from a separate stream.
1273
+ class RawPropertyStore < BlockParser
1274
+ include Enumerable
1275
+
1276
+ attr_reader :length
1277
+ def initialize desc
1278
+ super
1279
+ raise FormatError, "expected type 1 - got #{@type}" unless @type == 1
1280
+
1281
+ # the way that offset works, data1 may be a subset of buf, or something from id2. if its from buf,
1282
+ # it will be offset based on index_offset and offset. so it could be some random chunk of data anywhere
1283
+ # in the thing.
1284
+ header_data = get_data_indirect @offset1
1285
+ raise FormatError if header_data.length < 8
1286
+ signature, offset2 = header_data.unpack 'V2'
1287
+ #p [@type, signature]
1288
+ raise FormatError, 'unhandled block signature 0x%08x' % @type if signature != 0x000602b5
1289
+ # this is actually a big chunk of tag tuples.
1290
+ @index_data = get_data_indirect offset2
1291
+ @length = @index_data.length / 8
1292
+ end
1293
+
1294
+ # iterate through the property tuples
1295
+ def each
1296
+ length.times do |i|
1297
+ key, type, value = handle_indirect_values(*@index_data[8 * i, 8].unpack('vvV'))
1298
+ yield key, type, value
1299
+ end
1300
+ end
1301
+ end
1302
+
1303
+ # RawPropertyStoreTable is kind of like a database table.
1304
+ # it has a fixed set of columns.
1305
+ # #[] is kind of like getting a row from the table.
1306
+ # those rows are currently encapsulated by Row, which has #each like
1307
+ # RawPropertyStore.
1308
+ # only used for the recipients array, and the attachments array. completely lazy, doesn't
1309
+ # load any of the properties upon creation.
1310
+ class RawPropertyStoreTable < BlockParser
1311
+ class Column < Struct.new(:ref_type, :type, :ind2_off, :size, :slot)
1312
+ def initialize data
1313
+ super(*data.unpack('v3CC'))
1314
+ end
1315
+
1316
+ def nice_type_name
1317
+ Mapi::Types::DATA[ref_type].first[/_(.*)/, 1].downcase rescue '0x%04x' % ref_type
1318
+ end
1319
+
1320
+ def nice_prop_name
1321
+ Mapi::PropertyStore::TAGS['%04x' % type].first[/_(.*)/, 1].downcase rescue '0x%04x' % type
1322
+ end
1323
+
1324
+ def inspect
1325
+ "#<#{self.class} name=#{nice_prop_name.inspect}, type=#{nice_type_name.inspect}>"
1326
+ end
1327
+ end
1328
+
1329
+ include Enumerable
1330
+
1331
+ attr_reader :length, :index_data, :data2, :data3, :rec_size
1332
+ def initialize desc
1333
+ super
1334
+ raise FormatError, "expected type 2 - got #{@type}" unless @type == 2
1335
+
1336
+ header_data = get_data_indirect @offset1
1337
+ # seven_c_blk
1338
+ # often: u1 == u2 and u3 == u2 + 2, then rec_size == u3 + 4. wtf
1339
+ seven_c, @num_list, u1, u2, u3, @rec_size, b_five_offset,
1340
+ ind2_offset, u7, u8 = header_data[0, 22].unpack('CCv4V2v2')
1341
+ @index_data = header_data[22..-1]
1342
+
1343
+ raise FormatError if @num_list != schema.length or seven_c != 0x7c
1344
+ # another check
1345
+ min_size = schema.inject(0) { |total, col| total + col.size }
1346
+ # seem to have at max, 8 padding bytes on the end of the record. not sure if it means
1347
+ # anything. maybe its just space that hasn't been reclaimed due to columns being
1348
+ # removed or something. probably should just check lower bound.
1349
+ range = (min_size..min_size + 8)
1350
+ warn "rec_size seems wrong (#{range} !=== #{rec_size})" unless range === rec_size
1351
+
1352
+ header_data2 = get_data_indirect b_five_offset
1353
+ raise FormatError if header_data2.length < 8
1354
+ signature, offset2 = header_data2.unpack 'V2'
1355
+ # ??? seems a bit iffy
1356
+ # there's probably more to the differences than this, and the data2 difference below
1357
+ expect = desc.pst.header.version_2003? ? 0x000404b5 : 0x000204b5
1358
+ raise FormatError, 'unhandled block signature 0x%08x' % signature if signature != expect
1359
+
1360
+ # this holds all the row data
1361
+ # handle multiple block issue.
1362
+ @data3_io = get_data_indirect_io ind2_offset
1363
+ if RangesIOIdxChain === @data3_io
1364
+ @data3_idxs =
1365
+ # modify ranges
1366
+ ranges = @data3_io.ranges.map { |offset, size| [offset, size / @rec_size * @rec_size] }
1367
+ @data3_io.instance_variable_set :@ranges, ranges
1368
+ end
1369
+ @data3 = @data3_io.read
1370
+
1371
+ # there must be something to the data in data2. i think data2 is the array of objects essentially.
1372
+ # currently its only used to imply a length
1373
+ # actually, at size 6, its just some auxiliary data. i'm thinking either Vv/vV, for 97, and something
1374
+ # wider for 03. the second value is just the index (0...length), and the first value is
1375
+ # some kind of offset i expect. actually, they were all id2 values, in another case.
1376
+ # so maybe they're get_data_indirect values too?
1377
+ # actually, it turned out they were identical to the PR_ATTACHMENT_ID2 values...
1378
+ # id2_values = ie, data2.unpack('v*').to_enum(:each_slice, 3).transpose[0]
1379
+ # table[i].assoc(PR_ATTACHMENT_ID2).last == id2_values[i], for all i.
1380
+ @data2 = get_data_indirect(offset2) rescue nil
1381
+ #if data2
1382
+ # @length = (data2.length / 6.0).ceil
1383
+ #else
1384
+ # the above / 6, may have been ok for 97 files, but the new 0x0004 style block must have
1385
+ # different size records... just use this instead:
1386
+ # hmmm, actually, we can still figure it out:
1387
+ @length = @data3.length / @rec_size
1388
+ #end
1389
+
1390
+ # lets try and at least use data2 for a warning for now
1391
+ if data2
1392
+ data2_rec_size = desc.pst.header.version_2003? ? 8 : 6
1393
+ warn 'somthing seems wrong with data3' unless @length == (data2.length / data2_rec_size)
1394
+ end
1395
+ end
1396
+
1397
+ def schema
1398
+ @schema ||= index_data.scan(/.{8}/m).map { |data| Column.new data }
1399
+ end
1400
+
1401
+ def [] idx
1402
+ # handle funky rounding
1403
+ Row.new self, idx * @rec_size
1404
+ end
1405
+
1406
+ def each
1407
+ length.times { |i| yield self[i] }
1408
+ end
1409
+
1410
+ class Row
1411
+ include Enumerable
1412
+
1413
+ def initialize array_parser, x
1414
+ @array_parser, @x = array_parser, x
1415
+ end
1416
+
1417
+ # iterate through the property tuples
1418
+ def each
1419
+ (@array_parser.index_data.length / 8).times do |i|
1420
+ ref_type, type, ind2_off, size, slot = @array_parser.index_data[8 * i, 8].unpack 'v3CC'
1421
+ # check this rescue too
1422
+ value = @array_parser.data3[@x + ind2_off, size]
1423
+ # if INDIRECT_TYPES.include? ref_type
1424
+ if size <= 4
1425
+ value = value.unpack('V')[0]
1426
+ end
1427
+ #p ['0x%04x' % ref_type, '0x%04x' % type, (Msg::Properties::MAPITAGS['%04x' % type].first[/^.._(.*)/, 1].downcase rescue nil),
1428
+ # value_orig, value, (get_data_indirect(value_orig.unpack('V')[0]) rescue nil), size, ind2_off, slot]
1429
+ key, type, value = @array_parser.handle_indirect_values type, ref_type, value
1430
+ yield key, type, value
1431
+ end
1432
+ end
1433
+ end
1434
+ end
1435
+
1436
+ class AttachmentTable < BlockParser
1437
+ # a "fake" MAPI property name for this constant. if you get a mapi property with
1438
+ # this value, it is the id2 value to use to get attachment data.
1439
+ PR_ATTACHMENT_ID2 = 0x67f2
1440
+
1441
+ attr_reader :desc, :table
1442
+ def initialize desc
1443
+ @desc = desc
1444
+ # no super, we only actually want BlockParser2#idx2
1445
+ @table = nil
1446
+ return unless desc.list_index
1447
+ return unless idx = idx2[ID2_ATTACHMENTS]
1448
+ # FIXME make a fake desc.
1449
+ @desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
1450
+ @table = RawPropertyStoreTable.new @desc2
1451
+ end
1452
+
1453
+ def to_a
1454
+ return [] if !table
1455
+ table.map do |attachment|
1456
+ attachment = attachment.to_a
1457
+ #p attachment
1458
+ # potentially merge with yet more properties
1459
+ # this still seems pretty broken - especially the property overlap
1460
+ if attachment_id2 = attachment.assoc(PR_ATTACHMENT_ID2)
1461
+ #p attachment_id2.last
1462
+ #p idx2[attachment_id2.last]
1463
+ @desc2.desc = idx2[attachment_id2.last]
1464
+ RawPropertyStore.new(@desc2).each do |a, b, c|
1465
+ record = attachment.assoc a
1466
+ attachment << record = [] unless record
1467
+ record.replace [a, b, c]
1468
+ end
1469
+ end
1470
+ attachment
1471
+ end
1472
+ end
1473
+ end
1474
+
1475
+ # there is no equivalent to this in libpst. ID2_RECIPIENTS was just guessed given the above
1476
+ # AttachmentTable.
1477
+ class RecipientTable < BlockParser
1478
+ attr_reader :desc, :table
1479
+ def initialize desc
1480
+ @desc = desc
1481
+ # no super, we only actually want BlockParser2#idx2
1482
+ @table = nil
1483
+ return unless desc.list_index
1484
+ return unless idx = idx2[ID2_RECIPIENTS]
1485
+ # FIXME make a fake desc.
1486
+ desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
1487
+ @table = RawPropertyStoreTable.new desc2
1488
+ end
1489
+
1490
+ def to_a
1491
+ return [] if !table
1492
+ table.map { |x| x.to_a }
1493
+ end
1494
+ end
1495
+
1496
+ #
1497
+ # higher level item code. wraps up the raw properties above, and gives nice
1498
+ # objects to work with. handles item relationships too.
1499
+ # ----------------------------------------------------------------------------
1500
+ #
1501
+
1502
+ def self.make_property_set property_list
1503
+ hash = property_list.inject({}) do |hash, (key, type, value)|
1504
+ hash.update PropertySet::Key.new(key) => value
1505
+ end
1506
+ PropertySet.new hash
1507
+ end
1508
+
1509
+ class Attachment < Mapi::Attachment
1510
+ def initialize list
1511
+ super Pst.make_property_set(list)
1512
+
1513
+ @embedded_msg = props.attach_data if Item === props.attach_data
1514
+ end
1515
+ end
1516
+
1517
+ class Recipient < Mapi::Recipient
1518
+ def initialize list
1519
+ super Pst.make_property_set(list)
1520
+ end
1521
+ end
1522
+
1523
+ class Item < Mapi::Message
1524
+ class EntryID < Struct.new(:u1, :entry_id, :id)
1525
+ UNPACK_STR = 'VA16V'
1526
+
1527
+ def initialize data
1528
+ data = data.unpack(UNPACK_STR) if String === data
1529
+ super(*data)
1530
+ end
1531
+ end
1532
+
1533
+ include RecursivelyEnumerable
1534
+
1535
+ attr_accessor :type, :parent
1536
+
1537
+ def initialize desc, list, type=nil
1538
+ @desc = desc
1539
+ super Pst.make_property_set(list)
1540
+
1541
+ # this is kind of weird, but the ids of the special folders are stored in a hash
1542
+ # when the root item is loaded
1543
+ if ipm_wastebasket_entryid
1544
+ desc.pst.special_folder_ids[ipm_wastebasket_entryid] = :wastebasket
1545
+ end
1546
+
1547
+ if finder_entryid
1548
+ desc.pst.special_folder_ids[finder_entryid] = :finder
1549
+ end
1550
+
1551
+ # and then here, those are used, along with a crappy heuristic to determine if we are an
1552
+ # item
1553
+ =begin
1554
+ i think the low bits of the desc_id can give some info on the type.
1555
+
1556
+ it seems that 0x4 is for regular messages (and maybe contacts etc)
1557
+ 0x2 is for folders, and 0x8 is for special things like rules etc, that aren't visible.
1558
+ =end
1559
+ unless type
1560
+ type = props.valid_folder_mask || ipm_subtree_entryid || props.content_count || props.subfolders ? :folder : :message
1561
+ if type == :folder
1562
+ type = desc.pst.special_folder_ids[desc.desc_id] || type
1563
+ end
1564
+ end
1565
+
1566
+ @type = type
1567
+ end
1568
+
1569
+ def each_child
1570
+ id = ipm_subtree_entryid
1571
+ if id
1572
+ root = @desc.pst.desc_from_id id
1573
+ raise "couldn't find root" unless root
1574
+ raise 'both kinds of children' unless @desc.children.empty?
1575
+ children = root.children
1576
+ # lets look up the other ids we have.
1577
+ # typically the wastebasket one "deleted items" is in the children already, but
1578
+ # the search folder isn't.
1579
+ extras = [ipm_wastebasket_entryid, finder_entryid].compact.map do |id|
1580
+ root = @desc.pst.desc_from_id id
1581
+ warn "couldn't find root for id #{id}" unless root
1582
+ root
1583
+ end.compact
1584
+ # i do this instead of union, so as not to mess with the order of the
1585
+ # existing children.
1586
+ children += (extras - children)
1587
+ children
1588
+ else
1589
+ @desc.children
1590
+ end.each do |desc|
1591
+ item = @desc.pst.pst_parse_item(desc)
1592
+ item.parent = self
1593
+ yield item
1594
+ end
1595
+ end
1596
+
1597
+ def path
1598
+ parents, item = [], self
1599
+ parents.unshift item while item = item.parent
1600
+ # remove root
1601
+ parents.shift
1602
+ parents.map { |item| item.props.display_name or raise 'unable to construct path' } * '/'
1603
+ end
1604
+
1605
+ def children
1606
+ to_enum(:each_child).to_a
1607
+ end
1608
+
1609
+ # these are still around because they do different stuff
1610
+
1611
+ # Top of Personal Folder Record
1612
+ def ipm_subtree_entryid
1613
+ @ipm_subtree_entryid ||= EntryID.new(props.ipm_subtree_entryid.read).id rescue nil
1614
+ end
1615
+
1616
+ # Deleted Items Folder Record
1617
+ def ipm_wastebasket_entryid
1618
+ @ipm_wastebasket_entryid ||= EntryID.new(props.ipm_wastebasket_entryid.read).id rescue nil
1619
+ end
1620
+
1621
+ # Search Root Record
1622
+ def finder_entryid
1623
+ @finder_entryid ||= EntryID.new(props.finder_entryid.read).id rescue nil
1624
+ end
1625
+
1626
+ # all these have been replaced with the method_missing below
1627
+ =begin
1628
+ # States which folders are valid for this message store
1629
+ #def valid_folder_mask
1630
+ # props[0x35df]
1631
+ #end
1632
+
1633
+ # Number of emails stored in a folder
1634
+ def content_count
1635
+ props[0x3602]
1636
+ end
1637
+
1638
+ # Has children
1639
+ def subfolders
1640
+ props[0x360a]
1641
+ end
1642
+ =end
1643
+
1644
+ # i think i will change these, so they can inherit the lazyness from RawPropertyStoreTable.
1645
+ # so if you want the last attachment, you can get it without creating the others perhaps.
1646
+ # it just has to handle the no table at all case a bit more gracefully.
1647
+
1648
+ def attachments
1649
+ @attachments ||= AttachmentTable.new(@desc).to_a.map { |list| Attachment.new list }
1650
+ end
1651
+
1652
+ def recipients
1653
+ #[]
1654
+ @recipients ||= RecipientTable.new(@desc).to_a.map { |list| Recipient.new list }
1655
+ end
1656
+
1657
+ def each_recursive(&block)
1658
+ #p :self => self
1659
+ children.each do |child|
1660
+ #p :child => child
1661
+ block[child]
1662
+ child.each_recursive(&block)
1663
+ end
1664
+ end
1665
+
1666
+ def inspect
1667
+ attrs = %w[display_name subject sender_name subfolders]
1668
+ # attrs = %w[display_name valid_folder_mask ipm_wastebasket_entryid finder_entryid content_count subfolders]
1669
+ str = attrs.map { |a| b = props.send a; " #{a}=#{b.inspect}" if b }.compact * ','
1670
+
1671
+ type_s = type == :message ? 'Message' : type == :folder ? 'Folder' : type.to_s.capitalize + 'Folder'
1672
+ str2 = 'desc_id=0x%x' % @desc.desc_id
1673
+
1674
+ !str.empty? ? "#<Pst::#{type_s} #{str2}#{str}>" : "#<Pst::#{type_s} #{str2} props=#{props.inspect}>" #\n" + props.transport_message_headers + ">"
1675
+ end
1676
+ end
1677
+
1678
+ # corresponds to
1679
+ # * _pst_parse_item
1680
+ def pst_parse_item desc
1681
+ Item.new desc, RawPropertyStore.new(desc).to_a
1682
+ end
1683
+
1684
+ #
1685
+ # other random code
1686
+ # ----------------------------------------------------------------------------
1687
+ #
1688
+
1689
+ def dump_debug_info
1690
+ puts "* pst header"
1691
+ p header
1692
+
1693
+ =begin
1694
+ Looking at the output of this, for blank-o1997.pst, i see this part:
1695
+ ...
1696
+ - (26624,516) desc block data (overlap of 4 bytes)
1697
+ - (27136,516) desc block data (gap of 508 bytes)
1698
+ - (28160,516) desc block data (gap of 2620 bytes)
1699
+ ...
1700
+
1701
+ which confirms my belief that the block size for idx and desc is more likely 512
1702
+ =end
1703
+ if 0 + 0 == 0
1704
+ puts '* file range usage'
1705
+ file_ranges =
1706
+ # these 3 things, should account for most of the data in the file.
1707
+ [[0, Header::SIZE, 'pst file header']] +
1708
+ @idx_offsets.map { |offset| [offset, Index::BLOCK_SIZE, 'idx block data'] } +
1709
+ @desc_offsets.map { |offset| [offset, Desc::BLOCK_SIZE, 'desc block data'] } +
1710
+ @idx.map { |idx| [idx.offset, idx.size, 'idx id=0x%x (%s)' % [idx.id, idx.type]] }
1711
+ (file_ranges.sort_by { |idx| idx.first } + [nil]).to_enum(:each_cons, 2).each do |(offset, size, name), next_record|
1712
+ # i think there is a padding of the size out to 64 bytes
1713
+ # which is equivalent to padding out the final offset, because i think the offset is
1714
+ # similarly oriented
1715
+ pad_amount = 64
1716
+ warn 'i am wrong about the offset padding' if offset % pad_amount != 0
1717
+ # so, assuming i'm not wrong about that, then we can calculate how much padding is needed.
1718
+ pad = pad_amount - (size % pad_amount)
1719
+ pad = 0 if pad == pad_amount
1720
+ gap = next_record ? next_record.first - (offset + size + pad) : 0
1721
+ extra = case gap <=> 0
1722
+ when -1; ["overlap of #{gap.abs} bytes)"]
1723
+ when 0; []
1724
+ when +1; ["gap of #{gap} bytes"]
1725
+ end
1726
+ # how about we check that padding
1727
+ @io.pos = offset + size
1728
+ pad_bytes = @io.read(pad)
1729
+ extra += ["padding not all zero"] unless pad_bytes == 0.chr * pad
1730
+ puts "- #{offset}:#{size}+#{pad} #{name.inspect}" + (extra.empty? ? '' : ' [' + extra * ', ' + ']')
1731
+ end
1732
+ end
1733
+
1734
+ # i think the idea of the idx, and indeed the idx2, is just to be able to
1735
+ # refer to data indirectly, which means it can get moved around, and you just update
1736
+ # the idx table. it is simply a list of file offsets and sizes.
1737
+ # not sure i get how id2 plays into it though....
1738
+ # the sizes seem to be all even. is that a co-incidence? and the ids are all even. that
1739
+ # seems to be related to something else (see the (id & 2) == 1 stuff)
1740
+ puts '* idx entries'
1741
+ @idx.each { |idx| puts "- #{idx.inspect}" }
1742
+
1743
+ # if you look at the desc tree, you notice a few things:
1744
+ # 1. there is a desc that seems to be the parent of all the folders, messages etc.
1745
+ # it is the one whose parent is itself.
1746
+ # one of its children is referenced as the subtree_entryid of the first desc item,
1747
+ # the root.
1748
+ # 2. typically only 2 types of desc records have idx2_id != 0. messages themselves,
1749
+ # and the desc with id = 0x61 - the xattrib container. everything else uses the
1750
+ # regular ids to find its data. i think it should be reframed as small blocks and
1751
+ # big blocks, but i'll look into it more.
1752
+ #
1753
+ # idx_id and idx2_id are for getting to the data. desc_id and parent_desc_id just define
1754
+ # the parent <-> child relationship, and the desc_ids are how the items are referred to in
1755
+ # entryids.
1756
+ # note that these aren't unique! eg for 0, 4 etc. i expect these'd never change, as the ids
1757
+ # are stored in entryids. whereas the idx and idx2 could be a bit more volatile.
1758
+ puts '* desc tree'
1759
+ # make a dummy root hold everything just for convenience
1760
+ root = Desc.new ''
1761
+ def root.inspect; "#<Pst::Root>"; end
1762
+ root.children.replace @orphans
1763
+ # this still loads the whole thing as a string for gsub. should use directo output io
1764
+ # version.
1765
+ puts root.to_tree.gsub(/, (parent_desc_id|idx2_id)=0x0(?!\d)/, '')
1766
+
1767
+ # this is fairly easy to understand, its just an attempt to display the pst items in a tree form
1768
+ # which resembles what you'd see in outlook.
1769
+ puts '* item tree'
1770
+ # now streams directly
1771
+ root_item.to_tree STDOUT
1772
+ end
1773
+
1774
+ def root_desc
1775
+ @desc.first
1776
+ end
1777
+
1778
+ def root_item
1779
+ item = pst_parse_item root_desc
1780
+ item.type = :root
1781
+ item
1782
+ end
1783
+
1784
+ def root
1785
+ root_item
1786
+ end
1787
+
1788
+ # depth first search of all items
1789
+ include Enumerable
1790
+
1791
+ def each(&block)
1792
+ root = self.root
1793
+ block[root]
1794
+ root.each_recursive(&block)
1795
+ end
1796
+
1797
+ def name
1798
+ @name ||= root_item.props.display_name
1799
+ end
1800
+
1801
+ def inspect
1802
+ "#<Pst name=#{name.inspect} io=#{io.inspect}>"
1803
+ end
1804
+ end
1805
+ end
1806
+