pyonenote-parser 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyOneNote/FileNode.py +1046 -0
- pyOneNote/Header.py +129 -0
- pyOneNote/Main.py +112 -0
- pyOneNote/OneDocument.py +93 -0
- pyOneNote/__init__.py +1 -0
- pyonenote_parser-0.0.3.dist-info/METADATA +71 -0
- pyonenote_parser-0.0.3.dist-info/RECORD +11 -0
- pyonenote_parser-0.0.3.dist-info/WHEEL +5 -0
- pyonenote_parser-0.0.3.dist-info/entry_points.txt +2 -0
- pyonenote_parser-0.0.3.dist-info/licenses/license +201 -0
- pyonenote_parser-0.0.3.dist-info/top_level.txt +1 -0
pyOneNote/FileNode.py
ADDED
|
@@ -0,0 +1,1046 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
import struct
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
import locale
|
|
5
|
+
|
|
6
|
+
DEBUG = False
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FileNodeListHeader:
|
|
10
|
+
def __init__(self, file):
|
|
11
|
+
self.uint_magic, self.file_node_list_id, self.n_fragment_sequence = struct.unpack(
|
|
12
|
+
'<8sII', file.read(16))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FileNodeList:
|
|
16
|
+
def __init__(self, file, document, file_chunk_reference):
|
|
17
|
+
file.seek(file_chunk_reference.stp)
|
|
18
|
+
self.end = file_chunk_reference.stp + file_chunk_reference.cb
|
|
19
|
+
self.fragments = []
|
|
20
|
+
|
|
21
|
+
# FileNodeList can contain one or more FileNodeListFragment
|
|
22
|
+
while True:
|
|
23
|
+
section_end = file_chunk_reference.stp + file_chunk_reference.cb
|
|
24
|
+
fragment = FileNodeListFragment(file, document, section_end)
|
|
25
|
+
self.fragments.append(fragment)
|
|
26
|
+
if fragment.next_fragment.is_fcr_nil():
|
|
27
|
+
break
|
|
28
|
+
file_chunk_reference = fragment.next_fragment
|
|
29
|
+
file.seek(fragment.next_fragment.stp)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FileNodeListFragment:
|
|
33
|
+
def __init__(self, file, document, end):
|
|
34
|
+
self.file_nodes = []
|
|
35
|
+
self.file_node_list_header = FileNodeListHeader(file)
|
|
36
|
+
|
|
37
|
+
# FileNodeListFragment can have one or more FileNode
|
|
38
|
+
while file.tell() + 24 < end:
|
|
39
|
+
node = FileNode(file, document)
|
|
40
|
+
self.file_nodes.append(node)
|
|
41
|
+
if node.file_node_header.file_node_id == 255 or node.file_node_header.file_node_id == 0:
|
|
42
|
+
break
|
|
43
|
+
|
|
44
|
+
file.seek(end - 20)
|
|
45
|
+
self.next_fragment = FileChunkReference64x32(file.read(12))
|
|
46
|
+
self.footer, = struct.unpack('<Q', file.read(8))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class FileNodeHeader:
|
|
50
|
+
_file_node_ids = {
|
|
51
|
+
0x004: (0x004, 0, "ObjectSpaceManifestRootFND"),
|
|
52
|
+
0x008: (0x008, 2, "ObjectSpaceManifestListReferenceFND"),
|
|
53
|
+
0x00C: (0x00C, 0, "ObjectSpaceManifestListStartFND"),
|
|
54
|
+
0x010: (0x010, 2, "RevisionManifestListReferenceFND"),
|
|
55
|
+
0x014: (0x014, 0, "RevisionManifestListStartFND"),
|
|
56
|
+
0x01B: (0x01B, 0, "RevisionManifestStart4FND"),
|
|
57
|
+
0x01C: (0x01C, 0, "RevisionManifestEndFND"),
|
|
58
|
+
0x01E: (0x01E, 0, "RevisionManifestStart6FND"),
|
|
59
|
+
0x01F: (0x01F, 0, "RevisionManifestStart7FND"),
|
|
60
|
+
0x021: (0x021, 0, "GlobalIdTableStartFNDX"),
|
|
61
|
+
0x022: (0x022, 0, "GlobalIdTableStart2FND"),
|
|
62
|
+
0x024: (0x024, 0, "GlobalIdTableEntryFNDX"),
|
|
63
|
+
0x025: (0x025, 0, "GlobalIdTableEntry2FNDX"),
|
|
64
|
+
0x026: (0x026, 0, "GlobalIdTableEntry3FNDX"),
|
|
65
|
+
0x028: (0x028, 0, "GlobalIdTableEndFNDX"),
|
|
66
|
+
0x02D: (0x02D, 1, "ObjectDeclarationWithRefCountFNDX"),
|
|
67
|
+
0x02E: (0x02E, 1, "ObjectDeclarationWithRefCount2FNDX"),
|
|
68
|
+
0x041: (0x041, 1, "ObjectRevisionWithRefCountFNDX"),
|
|
69
|
+
0x042: (0x042, 1, "ObjectRevisionWithRefCount2FNDX"),
|
|
70
|
+
0x059: (0x059, 0, "RootObjectReference2FNDX"),
|
|
71
|
+
0x05A: (0x05A, 0, "RootObjectReference3FND"),
|
|
72
|
+
0x05C: (0x05C, 0, "RevisionRoleDeclarationFND"),
|
|
73
|
+
0x05D: (0x05D, 0, "RevisionRoleAndContextDeclarationFND"),
|
|
74
|
+
0x072: (0x072, 0, "ObjectDeclarationFileData3RefCountFND"),
|
|
75
|
+
0x073: (0x073, 0, "ObjectDeclarationFileData3LargeRefCountFND"),
|
|
76
|
+
0x07C: (0x07C, 1, "ObjectDataEncryptionKeyV2FNDX"),
|
|
77
|
+
0x084: (0x084, 1, "ObjectInfoDependencyOverridesFND"),
|
|
78
|
+
0x08C: (0x08C, 0, "DataSignatureGroupDefinitionFND"),
|
|
79
|
+
0x090: (0x090, 2, "FileDataStoreListReferenceFND"),
|
|
80
|
+
0x094: (0x094, 1, "FileDataStoreObjectReferenceFND"),
|
|
81
|
+
0x0A4: (0x0A4, 1, "ObjectDeclaration2RefCountFND"),
|
|
82
|
+
0x0A5: (0x0A5, 1, "ObjectDeclaration2LargeRefCountFND"),
|
|
83
|
+
0x0B0: (0x0B0, 2, "ObjectGroupListReferenceFND"),
|
|
84
|
+
0x0B4: (0x0B4, 0, "ObjectGroupStartFND"),
|
|
85
|
+
0x0B8: (0x0B8, 0, "ObjectGroupEndFND"),
|
|
86
|
+
0x0C2: (0x0C2, 1, "HashedChunkDescriptor2FND"),
|
|
87
|
+
0x0C4: (0x0C4, 1, "ReadOnlyObjectDeclaration2RefCountFND"),
|
|
88
|
+
0x0C5: (0x0C5, 1, "ReadOnlyObjectDeclaration2LargeRefCountFND"),
|
|
89
|
+
0x0FF: (0x0FF, -1, "ChunkTerminatorFND")
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def __init__(self, file):
|
|
93
|
+
file_node_header_val, = struct.unpack('<I', file.read(4))
|
|
94
|
+
self.file_node_id = file_node_header_val & 0x3ff
|
|
95
|
+
self.file_node_type = "Invalid"
|
|
96
|
+
if self.file_node_id in self._file_node_ids:
|
|
97
|
+
self.file_node_type = self._file_node_ids[self.file_node_id][2]
|
|
98
|
+
self.size = (file_node_header_val >> 10) & 0x1fff
|
|
99
|
+
self.stp_format = (file_node_header_val >> 23) & 0x3
|
|
100
|
+
self.cb_format = (file_node_header_val >> 25) & 0x3
|
|
101
|
+
self.base_type = (file_node_header_val >> 27) & 0xf
|
|
102
|
+
self.reserved = (file_node_header_val >> 31)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class FileNode:
|
|
106
|
+
count = 0
|
|
107
|
+
|
|
108
|
+
def __init__(self, file, document):
|
|
109
|
+
self.document = document
|
|
110
|
+
self.file_node_header = FileNodeHeader(file)
|
|
111
|
+
if DEBUG:
|
|
112
|
+
print(f"{file.tell()} {self.file_node_header.file_node_type} {self.file_node_header.base_type}")
|
|
113
|
+
self.children = []
|
|
114
|
+
FileNode.count += 1
|
|
115
|
+
if self.file_node_header.file_node_type == "ObjectGroupStartFND":
|
|
116
|
+
self.data = ObjectGroupStartFND(file)
|
|
117
|
+
elif self.file_node_header.file_node_type == "ObjectSpaceManifestListReferenceFND":
|
|
118
|
+
self.data = ObjectSpaceManifestListReferenceFND(
|
|
119
|
+
file, self.file_node_header)
|
|
120
|
+
elif self.file_node_header.file_node_type == "ObjectSpaceManifestListStartFND":
|
|
121
|
+
self.data = ObjectSpaceManifestListStartFND(file)
|
|
122
|
+
elif self.file_node_header.file_node_type == "RevisionManifestListReferenceFND":
|
|
123
|
+
self.data = RevisionManifestListReferenceFND(
|
|
124
|
+
file, self.file_node_header)
|
|
125
|
+
elif self.file_node_header.file_node_type == "RevisionManifestListStartFND":
|
|
126
|
+
self.data = RevisionManifestListStartFND(file)
|
|
127
|
+
elif self.file_node_header.file_node_type == "RevisionManifestStart4FND":
|
|
128
|
+
self.data = RevisionManifestStart4FND(file)
|
|
129
|
+
self.document.cur_revision = self.data.rid
|
|
130
|
+
elif self.file_node_header.file_node_type == "RevisionManifestStart6FND":
|
|
131
|
+
self.data = RevisionManifestStart6FND(file)
|
|
132
|
+
self.document.cur_revision = self.data.rid
|
|
133
|
+
elif self.file_node_header.file_node_type == "ObjectGroupListReferenceFND":
|
|
134
|
+
self.data = ObjectGroupListReferenceFND(
|
|
135
|
+
file, self.file_node_header)
|
|
136
|
+
elif self.file_node_header.file_node_type == "GlobalIdTableEntryFNDX":
|
|
137
|
+
self.data = GlobalIdTableEntryFNDX(file)
|
|
138
|
+
if not self.document.cur_revision in self.document.global_identification_table:
|
|
139
|
+
self.document.global_identification_table[self.document.cur_revision] = {
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
self.document.global_identification_table[self.document.cur_revision][self.data.index] = self.data.guid
|
|
143
|
+
elif self.file_node_header.file_node_type == "DataSignatureGroupDefinitionFND":
|
|
144
|
+
self.data = DataSignatureGroupDefinitionFND(file)
|
|
145
|
+
elif self.file_node_header.file_node_type == "ObjectDeclaration2LargeRefCountFND":
|
|
146
|
+
self.data = ObjectDeclaration2LargeRefCountFND(
|
|
147
|
+
file, self.document, self.file_node_header)
|
|
148
|
+
current_offset = file.tell()
|
|
149
|
+
if self.data.body.jcid.is_property_set_jcid:
|
|
150
|
+
file.seek(self.data.ref.stp)
|
|
151
|
+
self.property_set = ObjectSpaceObjectPropSet(file, self.document)
|
|
152
|
+
file.seek(current_offset)
|
|
153
|
+
elif self.file_node_header.file_node_type == "ObjectDeclaration2RefCountFND":
|
|
154
|
+
self.data = ObjectDeclaration2RefCountFND(
|
|
155
|
+
file, self.document, self.file_node_header)
|
|
156
|
+
current_offset = file.tell()
|
|
157
|
+
if self.data.body.jcid.is_property_set_jcid:
|
|
158
|
+
file.seek(self.data.ref.stp)
|
|
159
|
+
self.property_set = ObjectSpaceObjectPropSet(file, self.document)
|
|
160
|
+
file.seek(current_offset)
|
|
161
|
+
elif self.file_node_header.file_node_type == "ReadOnlyObjectDeclaration2LargeRefCountFND":
|
|
162
|
+
self.data = ReadOnlyObjectDeclaration2LargeRefCountFND(
|
|
163
|
+
file, self.document, self.file_node_header)
|
|
164
|
+
current_offset = file.tell()
|
|
165
|
+
if self.data.body.jcid.is_property_set_jcid:
|
|
166
|
+
file.seek(self.data.ref.stp)
|
|
167
|
+
self.property_set = ObjectSpaceObjectPropSet(file, self.document)
|
|
168
|
+
file.seek(current_offset)
|
|
169
|
+
elif self.file_node_header.file_node_type == "ReadOnlyObjectDeclaration2RefCountFND":
|
|
170
|
+
self.data = ReadOnlyObjectDeclaration2RefCountFND(
|
|
171
|
+
file, self.document, self.file_node_header)
|
|
172
|
+
current_offset = file.tell()
|
|
173
|
+
if self.data.body.jcid.is_property_set_jcid:
|
|
174
|
+
file.seek(self.data.ref.stp)
|
|
175
|
+
self.property_set = ObjectSpaceObjectPropSet(file, self.document)
|
|
176
|
+
file.seek(current_offset)
|
|
177
|
+
elif self.file_node_header.file_node_type == "FileDataStoreListReferenceFND":
|
|
178
|
+
self.data = FileDataStoreListReferenceFND(
|
|
179
|
+
file, self.file_node_header)
|
|
180
|
+
elif self.file_node_header.file_node_type == "FileDataStoreObjectReferenceFND":
|
|
181
|
+
self.data = FileDataStoreObjectReferenceFND(
|
|
182
|
+
file, self.file_node_header)
|
|
183
|
+
elif self.file_node_header.file_node_type == "ObjectDeclaration2Body":
|
|
184
|
+
self.data = ObjectDeclaration2Body(file, self.document)
|
|
185
|
+
elif self.file_node_header.file_node_type == "ObjectInfoDependencyOverridesFND":
|
|
186
|
+
self.data = ObjectInfoDependencyOverridesFND(
|
|
187
|
+
file, self.file_node_header, self.document)
|
|
188
|
+
elif self.file_node_header.file_node_type == "RootObjectReference2FNDX":
|
|
189
|
+
self.data = RootObjectReference2FNDX(file, self.document)
|
|
190
|
+
elif self.file_node_header.file_node_type == "RootObjectReference3FND":
|
|
191
|
+
self.data = RootObjectReference3FND(file)
|
|
192
|
+
elif self.file_node_header.file_node_type == "ObjectSpaceManifestRootFND":
|
|
193
|
+
self.data = ObjectSpaceManifestRootFND(file)
|
|
194
|
+
elif self.file_node_header.file_node_type == "ObjectDeclarationFileData3RefCountFND":
|
|
195
|
+
self.data = ObjectDeclarationFileData3RefCountFND(
|
|
196
|
+
file, self.document)
|
|
197
|
+
elif self.file_node_header.file_node_type == "RevisionRoleDeclarationFND":
|
|
198
|
+
self.data = RevisionRoleDeclarationFND(file)
|
|
199
|
+
elif self.file_node_header.file_node_type == "RevisionRoleAndContextDeclarationFND":
|
|
200
|
+
self.data = RevisionRoleAndContextDeclarationFND(file)
|
|
201
|
+
elif self.file_node_header.file_node_type == "RevisionManifestStart7FND":
|
|
202
|
+
self.data = RevisionManifestStart7FND(file)
|
|
203
|
+
self.document.cur_revision = self.data.base.rid
|
|
204
|
+
elif self.file_node_header.file_node_id in [0x02D, 0x02E, 0x041, 0x042]:
|
|
205
|
+
# These are ObjectDeclaration FNDX types that were not specifically handled
|
|
206
|
+
# They use similar structures to ObjectDeclaration2RefCountFND
|
|
207
|
+
self.data = None # Placeholder or implement a generic FNDX parser
|
|
208
|
+
if DEBUG:
|
|
209
|
+
print(f"Warning: {self.file_node_header.file_node_type} not fully implemented")
|
|
210
|
+
elif self.file_node_header.file_node_type in ["RevisionManifestEndFND", "ObjectGroupEndFND"]:
|
|
211
|
+
# no data part
|
|
212
|
+
self.data = None
|
|
213
|
+
|
|
214
|
+
current_offset = file.tell()
|
|
215
|
+
if self.file_node_header.base_type == 2:
|
|
216
|
+
if hasattr(self.data, 'ref'):
|
|
217
|
+
ref = self.data.ref
|
|
218
|
+
if ref:
|
|
219
|
+
self.children.append(FileNodeList(
|
|
220
|
+
file, self.document, ref))
|
|
221
|
+
file.seek(current_offset)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class ExtendedGUID:
|
|
225
|
+
def __init__(self, file):
|
|
226
|
+
self.guid, self.n = struct.unpack('<16sI', file.read(20))
|
|
227
|
+
self.guid = uuid.UUID(bytes_le=self.guid)
|
|
228
|
+
|
|
229
|
+
def __repr__(self):
|
|
230
|
+
return 'ExtendedGUID:(guid:{}, n:{})'.format(self.guid, self.n)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class FileNodeChunkReference:
|
|
234
|
+
def __init__(self, file, stp_format, cb_format):
|
|
235
|
+
data_size = 0
|
|
236
|
+
stp_compressed = False
|
|
237
|
+
stp_type = ''
|
|
238
|
+
if stp_format == 0:
|
|
239
|
+
stp_type = 'Q'
|
|
240
|
+
data_size += 8
|
|
241
|
+
self.invalid = 0xffffffffffffffff
|
|
242
|
+
elif stp_format == 1:
|
|
243
|
+
stp_type = 'I'
|
|
244
|
+
data_size += 4
|
|
245
|
+
self.invalid = 0xffffffff
|
|
246
|
+
elif stp_format == 2:
|
|
247
|
+
stp_type = 'H'
|
|
248
|
+
data_size += 2
|
|
249
|
+
stp_compressed = True
|
|
250
|
+
self.invalid = 0x7fff8
|
|
251
|
+
elif stp_format == 3:
|
|
252
|
+
stp_type = 'I'
|
|
253
|
+
data_size += 4
|
|
254
|
+
stp_compressed = True
|
|
255
|
+
self.invalid = 0x7fffffff8
|
|
256
|
+
|
|
257
|
+
cb_type = ''
|
|
258
|
+
cb_compressed = False
|
|
259
|
+
if cb_format == 0:
|
|
260
|
+
cb_type = 'I'
|
|
261
|
+
data_size += 4
|
|
262
|
+
elif cb_format == 1:
|
|
263
|
+
cb_type = 'Q'
|
|
264
|
+
data_size += 8
|
|
265
|
+
elif cb_format == 2:
|
|
266
|
+
cb_type = 'B'
|
|
267
|
+
data_size += 1
|
|
268
|
+
cb_compressed = True
|
|
269
|
+
elif cb_format == 3:
|
|
270
|
+
cb_type = 'H'
|
|
271
|
+
data_size += 2
|
|
272
|
+
cb_compressed = True
|
|
273
|
+
|
|
274
|
+
fmt_map = {
|
|
275
|
+
('Q', 'I'): '<QI', ('Q', 'Q'): '<QQ', ('Q', 'B'): '<QB', ('Q', 'H'): '<QH',
|
|
276
|
+
('I', 'I'): '<II', ('I', 'Q'): '<IQ', ('I', 'B'): '<IB', ('I', 'H'): '<IH',
|
|
277
|
+
('H', 'I'): '<HI', ('H', 'Q'): '<HQ', ('H', 'B'): '<HB', ('H', 'H'): '<HH',
|
|
278
|
+
}
|
|
279
|
+
self.stp, self.cb = struct.unpack(fmt_map[(stp_type, cb_type)], file.read(data_size))
|
|
280
|
+
if stp_compressed:
|
|
281
|
+
self.stp *= 8
|
|
282
|
+
|
|
283
|
+
if cb_compressed:
|
|
284
|
+
self.cb *= 8
|
|
285
|
+
|
|
286
|
+
def is_fcr_nil(self):
|
|
287
|
+
res = (self.stp & self.invalid) == self.invalid and self.cb == 0
|
|
288
|
+
return res
|
|
289
|
+
|
|
290
|
+
def __repr__(self):
|
|
291
|
+
return 'FileChunkReference:(stp:{}, cb:{})'.format(self.stp, self.cb)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class FileChunkReference64x32:
|
|
295
|
+
def __init__(self, my_bytes):
|
|
296
|
+
self.stp, self.cb = struct.unpack('<QI', my_bytes)
|
|
297
|
+
self.invalid = 0xffffffffffffffff
|
|
298
|
+
|
|
299
|
+
def is_fcr_nil(self):
|
|
300
|
+
res = (self.stp & self.invalid) == self.invalid and self.cb == 0
|
|
301
|
+
return res
|
|
302
|
+
|
|
303
|
+
def __repr__(self):
|
|
304
|
+
return 'FileChunkReference64x32:(stp:{}, cb:{})'.format(self.stp, self.cb)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class FileChunkReference32:
|
|
308
|
+
def __init__(self, my_bytes):
|
|
309
|
+
self.stp, self.cb = struct.unpack('<II', my_bytes)
|
|
310
|
+
self.invalid = 0xffffffff
|
|
311
|
+
|
|
312
|
+
def is_fcr_nil(self):
|
|
313
|
+
res = (self.stp & self.invalid) == self.invalid and self.cb == 0
|
|
314
|
+
return res
|
|
315
|
+
|
|
316
|
+
def __repr__(self):
|
|
317
|
+
return 'FileChunkReference32:(stp:{}, cb:{})'.format(self.stp, self.cb)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class ObjectGroupStartFND:
|
|
321
|
+
def __init__(self, file):
|
|
322
|
+
self.oid = ExtendedGUID(file)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class ObjectSpaceManifestRootFND:
|
|
326
|
+
def __init__(self, file):
|
|
327
|
+
self.gosid_root = ExtendedGUID(file)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class ObjectSpaceManifestListStartFND:
|
|
331
|
+
def __init__(self, file):
|
|
332
|
+
self.gosid = ExtendedGUID(file)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
class ObjectSpaceManifestListReferenceFND:
|
|
336
|
+
def __init__(self, file, file_node_header):
|
|
337
|
+
self.ref = FileNodeChunkReference(
|
|
338
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
339
|
+
self.gosid = ExtendedGUID(file)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
class RevisionManifestListReferenceFND:
|
|
343
|
+
def __init__(self, file, file_node_header):
|
|
344
|
+
self.ref = FileNodeChunkReference(
|
|
345
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class RevisionManifestListStartFND:
|
|
349
|
+
def __init__(self, file):
|
|
350
|
+
self.gosid = ExtendedGUID(file)
|
|
351
|
+
self.n_instance = file.read(4)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class RevisionManifestStart4FND:
|
|
355
|
+
def __init__(self, file):
|
|
356
|
+
self.rid = ExtendedGUID(file)
|
|
357
|
+
self.rid_dependent = ExtendedGUID(file)
|
|
358
|
+
self.time_creation, self.revision_role, self.odcs_default = struct.unpack(
|
|
359
|
+
'<8sIH', file.read(14))
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
class RevisionManifestStart6FND:
|
|
363
|
+
def __init__(self, file):
|
|
364
|
+
self.rid = ExtendedGUID(file)
|
|
365
|
+
self.rid_dependent = ExtendedGUID(file)
|
|
366
|
+
self.revision_role, self.odcs_default = struct.unpack(
|
|
367
|
+
'<IH', file.read(6))
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
class ObjectGroupListReferenceFND:
|
|
371
|
+
def __init__(self, file, file_node_header):
|
|
372
|
+
self.ref = FileNodeChunkReference(
|
|
373
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
374
|
+
self.object_group_id = ExtendedGUID(file)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class GlobalIdTableEntryFNDX:
|
|
378
|
+
def __init__(self, file):
|
|
379
|
+
self.index, self.guid = struct.unpack('<I16s', file.read(20))
|
|
380
|
+
self.guid = uuid.UUID(bytes_le=self.guid)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
class DataSignatureGroupDefinitionFND:
|
|
384
|
+
def __init__(self, file):
|
|
385
|
+
self.data_signature_group = ExtendedGUID(file)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class ObjectDeclaration2LargeRefCountFND:
|
|
389
|
+
def __init__(self, file, document, file_node_header):
|
|
390
|
+
self.ref = FileNodeChunkReference(
|
|
391
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
392
|
+
self.body = ObjectDeclaration2Body(file, document)
|
|
393
|
+
self.c_ref, = struct.unpack('<I', file.read(4))
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
class ObjectDeclaration2RefCountFND:
|
|
397
|
+
def __init__(self, file, document, file_node_header):
|
|
398
|
+
self.ref = FileNodeChunkReference(
|
|
399
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
400
|
+
self.body = ObjectDeclaration2Body(file, document)
|
|
401
|
+
self.c_ref, = struct.unpack('<B', file.read(1))
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
class ReadOnlyObjectDeclaration2LargeRefCountFND(ObjectDeclaration2LargeRefCountFND):
|
|
405
|
+
def __init__(self, file, document, file_node_header):
|
|
406
|
+
super().__init__(file, document, file_node_header)
|
|
407
|
+
self.md5_hash, = struct.unpack('16s', file.read(16))
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
class ReadOnlyObjectDeclaration2RefCountFND(ObjectDeclaration2RefCountFND):
|
|
411
|
+
def __init__(self, file, document, file_node_header):
|
|
412
|
+
super().__init__(file, document, file_node_header)
|
|
413
|
+
self.md5_hash, = struct.unpack('16s', file.read(16))
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class ObjectDeclaration2Body:
|
|
417
|
+
def __init__(self, file, document):
|
|
418
|
+
self.oid = CompactID(file, document)
|
|
419
|
+
self.jcid = JCID(file)
|
|
420
|
+
data, = struct.unpack('B', file.read(1))
|
|
421
|
+
self.f_has_oid_references = (data & 0x1) != 0
|
|
422
|
+
self.f_has_osid_references = (data & 0x2) != 0
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
class ObjectInfoDependencyOverridesFND:
|
|
426
|
+
def __init__(self, file, file_node_header, document):
|
|
427
|
+
self.ref = FileNodeChunkReference(
|
|
428
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
429
|
+
if self.ref.is_fcr_nil():
|
|
430
|
+
self.data = ObjectInfoDependencyOverrideData(file, document)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
class FileDataStoreListReferenceFND:
|
|
434
|
+
def __init__(self, file, file_node_header):
|
|
435
|
+
self.ref = FileNodeChunkReference(
|
|
436
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
class FileDataStoreObjectReferenceFND:
|
|
440
|
+
def __init__(self, file, file_node_header):
|
|
441
|
+
self.ref = FileNodeChunkReference(
|
|
442
|
+
file, file_node_header.stp_format, file_node_header.cb_format)
|
|
443
|
+
self.guid_reference, = struct.unpack('<16s', file.read(16))
|
|
444
|
+
self.guid_reference = uuid.UUID(bytes_le=self.guid_reference)
|
|
445
|
+
current_offset = file.tell()
|
|
446
|
+
file.seek(self.ref.stp)
|
|
447
|
+
self.file_data_store_object = FileDataStoreObject(file, self.ref)
|
|
448
|
+
file.seek(current_offset)
|
|
449
|
+
|
|
450
|
+
def __str__(self):
|
|
451
|
+
return 'FileDataStoreObjectReferenceFND: (guid_reference:{},file_data_store_object:{}'.format(
|
|
452
|
+
self.guid_reference,
|
|
453
|
+
str(self.file_data_store_object)
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
class ObjectInfoDependencyOverrideData:
|
|
458
|
+
def __init__(self, file, document):
|
|
459
|
+
self.c_8bit_overrides, self.c_32bit_overrides, self.crc = struct.unpack(
|
|
460
|
+
'<III', file.read(12))
|
|
461
|
+
self.overrides_1 = []
|
|
462
|
+
for i in range(self.c_8bit_overrides):
|
|
463
|
+
self.overrides_1.append(
|
|
464
|
+
ObjectInfoDependencyOverride8(file, document))
|
|
465
|
+
for i in range(self.c_32bit_overrides):
|
|
466
|
+
self.overrides_1.append(
|
|
467
|
+
ObjectInfoDependencyOverride32(file, document))
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
class ObjectInfoDependencyOverride8:
|
|
471
|
+
def __init__(self, file, document):
|
|
472
|
+
self.oid = CompactID(file, document)
|
|
473
|
+
self.c_ref, = struct.unpack('B', file.read(1))
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
class ObjectInfoDependencyOverride32:
|
|
477
|
+
def __init__(self, file, document):
|
|
478
|
+
self.oid = CompactID(file, document)
|
|
479
|
+
self.c_ref, = struct.unpack('<I', file.read(4))
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class RootObjectReference2FNDX:
|
|
483
|
+
def __init__(self, file, document):
|
|
484
|
+
self.oid_root = CompactID(file, document)
|
|
485
|
+
self.root_role, = struct.unpack('<I', file.read(4))
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
class RootObjectReference3FND:
|
|
489
|
+
def __init__(self, file):
|
|
490
|
+
self.oid_root_guid = ExtendedGUID(file)
|
|
491
|
+
self.root_role_guid_ver, = struct.unpack('<I', file.read(4))
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
class ObjectDeclarationFileData3RefCountFND:
|
|
495
|
+
def __init__(self, file, document):
|
|
496
|
+
self.oid = CompactID(file, document)
|
|
497
|
+
self.jcid = JCID(file)
|
|
498
|
+
self.c_ref, = struct.unpack('<B', file.read(1))
|
|
499
|
+
self.file_data_reference = StringInStorageBuffer(file)
|
|
500
|
+
self.extension_storage_buffer = StringInStorageBuffer(file)
|
|
501
|
+
|
|
502
|
+
def __str__(self):
|
|
503
|
+
return 'ObjectDeclarationFileData3RefCountFND: (jcid:{}, extension_storage_buffer:{}, file_data_reference:{}'.format(
|
|
504
|
+
self.jcid,
|
|
505
|
+
self.extension_storage_buffer,
|
|
506
|
+
self.file_data_reference
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
class RevisionRoleDeclarationFND:
|
|
511
|
+
def __init__(self, file):
|
|
512
|
+
self.rid = ExtendedGUID(file)
|
|
513
|
+
self.revision_role_decl, = struct.unpack('<I', file.read(4))
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
class RevisionRoleAndContextDeclarationFND:
|
|
517
|
+
def __init__(self, file):
|
|
518
|
+
self.base = RevisionRoleDeclarationFND(file)
|
|
519
|
+
self.gctxid = ExtendedGUID(file)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
class RevisionManifestStart7FND:
|
|
523
|
+
def __init__(self, file):
|
|
524
|
+
self.base = RevisionManifestStart6FND(file)
|
|
525
|
+
self.gctxid = ExtendedGUID(file)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
class CompactID:
|
|
529
|
+
def __init__(self, file, document):
|
|
530
|
+
data, = struct.unpack('<I', file.read(4))
|
|
531
|
+
self.n = data & 0xff
|
|
532
|
+
self.guidIndex = data >> 8
|
|
533
|
+
self.document = document
|
|
534
|
+
self.current_revision = self.document.cur_revision
|
|
535
|
+
|
|
536
|
+
def __str__(self):
|
|
537
|
+
return '<ExtendedGUID> ({}, {})'.format(
|
|
538
|
+
self.document.global_identification_table[self.current_revision][self.guidIndex],
|
|
539
|
+
self.n)
|
|
540
|
+
|
|
541
|
+
def __repr__(self):
|
|
542
|
+
return '<ExtendedGUID> ({}, {})'.format(
|
|
543
|
+
self.document.global_identification_table[self.current_revision][self.guidIndex],
|
|
544
|
+
self.n)
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
class JCID:
|
|
548
|
+
_jcid_name_mapping = {
|
|
549
|
+
0x00120001: "jcidReadOnlyPersistablePropertyContainerForAuthor",
|
|
550
|
+
0x00020001: "jcidPersistablePropertyContainer",
|
|
551
|
+
0x00060007: "jcidSectionNode",
|
|
552
|
+
0x00060008: "jcidPageSeriesNode",
|
|
553
|
+
0x0006000B: "jcidPageNode",
|
|
554
|
+
0x0006000C: "jcidOutlineNode",
|
|
555
|
+
0x0006000D: "jcidOutlineElementNode",
|
|
556
|
+
0x0006000E: "jcidRichTextOENode",
|
|
557
|
+
0x00060011: "jcidImageNode",
|
|
558
|
+
0x00060012: "jcidNumberListNode",
|
|
559
|
+
0x00060019: "jcidOutlineGroup",
|
|
560
|
+
0x00060022: "jcidTableNode",
|
|
561
|
+
0x00060023: "jcidTableRowNode",
|
|
562
|
+
0x00060024: "jcidTableCellNode",
|
|
563
|
+
0x0006002C: "jcidTitleNode",
|
|
564
|
+
0x00020030: "jcidPageMetaData",
|
|
565
|
+
0x00020031: "jcidSectionMetaData",
|
|
566
|
+
0x00060035: "jcidEmbeddedFileNode",
|
|
567
|
+
0x00060037: "jcidPageManifestNode",
|
|
568
|
+
0x00020038: "jcidConflictPageMetaData",
|
|
569
|
+
0x0006003C: "jcidVersionHistoryContent",
|
|
570
|
+
0x0006003D: "jcidVersionProxy",
|
|
571
|
+
0x00120043: "jcidNoteTagSharedDefinitionContainer",
|
|
572
|
+
0x00020044: "jcidRevisionMetaData",
|
|
573
|
+
0x00020046: "jcidVersionHistoryMetaData",
|
|
574
|
+
0x0012004D: "jcidParagraphStyleObject",
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
def __init__(self, file):
|
|
578
|
+
self.jcid, = struct.unpack('<I', file.read(4))
|
|
579
|
+
self.index = self.jcid & 0xffff
|
|
580
|
+
self.is_binary = ((self.jcid >> 16) & 0x1) == 1
|
|
581
|
+
self.is_property_set_jcid = ((self.jcid >> 17) & 0x1) == 1
|
|
582
|
+
self.is_graph_node_jcid = ((self.jcid >> 18) & 0x1) == 1
|
|
583
|
+
self.is_file_data_jcid = ((self.jcid >> 19) & 0x1) == 1
|
|
584
|
+
self.is_read_only_jcid_flag = ((self.jcid >> 20) & 0x1) == 1
|
|
585
|
+
|
|
586
|
+
def get_jcid_name(self):
|
|
587
|
+
return self._jcid_name_mapping[self.jcid] if self.jcid in self._jcid_name_mapping else 'Unknown'
|
|
588
|
+
|
|
589
|
+
def __str__(self):
|
|
590
|
+
return self.get_jcid_name()
|
|
591
|
+
|
|
592
|
+
def __repr__(self):
|
|
593
|
+
return self.get_jcid_name()
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
class StringInStorageBuffer:
|
|
597
|
+
def __init__(self, file):
|
|
598
|
+
self.cch, = struct.unpack('<I', file.read(4))
|
|
599
|
+
self.length_in_bytes = self.cch * 2
|
|
600
|
+
self.string_data_buffer = file.read(self.length_in_bytes)
|
|
601
|
+
self.string_data_buffer = self.string_data_buffer.decode('utf-16')
|
|
602
|
+
|
|
603
|
+
def __str__(self):
|
|
604
|
+
return self.string_data_buffer
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
class FileDataStoreObject:
|
|
608
|
+
def __init__(self, file, file_node_chunk_reference):
|
|
609
|
+
self.guid_header_file_data, self.cb_length_file_data, self.unused, self.reserved = struct.unpack(
|
|
610
|
+
'<16sQ4s8s', file.read(36))
|
|
611
|
+
self.file_data_bytes = file.read(self.cb_length_file_data)
|
|
612
|
+
file.seek(file_node_chunk_reference.stp + file_node_chunk_reference.cb - 16)
|
|
613
|
+
self.guid_footer_file_data, = struct.unpack('16s', file.read(16))
|
|
614
|
+
self.guid_header_file_data = uuid.UUID(bytes_le=self.guid_header_file_data)
|
|
615
|
+
self.guid_footer_file_data = uuid.UUID(bytes_le=self.guid_footer_file_data)
|
|
616
|
+
|
|
617
|
+
def __str__(self):
|
|
618
|
+
return self.file_data_bytes[:128].hex()
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
class ObjectSpaceObjectPropSet:
|
|
622
|
+
def __init__(self, file, document):
|
|
623
|
+
self.oids_stream_set = ObjectSpaceObjectStreamOfIDs(file, document)
|
|
624
|
+
self.osids_stream_set = None
|
|
625
|
+
if not self.oids_stream_set.header.osid_stream_not_present:
|
|
626
|
+
self.osids_stream_set = ObjectSpaceObjectStreamOfIDs(file, document)
|
|
627
|
+
self.context_ids_stream_set_obj = None
|
|
628
|
+
if self.oids_stream_set.header.extended_streams_present:
|
|
629
|
+
self.context_ids_stream_set_obj = ObjectSpaceObjectStreamOfIDs(file, document)
|
|
630
|
+
self.body = PropertySet(
|
|
631
|
+
file, self.oids_stream_set, self.osids_stream_set, self.context_ids_stream_set_obj, document)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
class ObjectSpaceObjectStreamOfIDs:
|
|
635
|
+
def __init__(self, file, document):
|
|
636
|
+
self.header = ObjectSpaceObjectStreamHeader(file)
|
|
637
|
+
self.body = []
|
|
638
|
+
self.head = 0
|
|
639
|
+
for i in range(self.header.count):
|
|
640
|
+
self.body.append(CompactID(file, document))
|
|
641
|
+
|
|
642
|
+
def read(self):
|
|
643
|
+
res = None
|
|
644
|
+
if self.head < len(self.body):
|
|
645
|
+
res = self.body[self.head]
|
|
646
|
+
self.head += 1
|
|
647
|
+
return res
|
|
648
|
+
|
|
649
|
+
def reset(self):
|
|
650
|
+
self.head = 0
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
class ObjectSpaceObjectStreamHeader:
|
|
654
|
+
def __init__(self, file):
|
|
655
|
+
data, = struct.unpack('<I', file.read(4))
|
|
656
|
+
self.count = data & 0xffffff
|
|
657
|
+
self.extended_streams_present = (data >> 30) & 1 == 1
|
|
658
|
+
self.osid_stream_not_present = (data >> 31) & 1 == 1
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
class PropertySet:
|
|
662
|
+
def __init__(self, file, oids, osids, context_ids, document):
|
|
663
|
+
self.current = file.tell()
|
|
664
|
+
self.c_properties_count_set, = struct.unpack('<H', file.read(2))
|
|
665
|
+
self.rg_prids_list = []
|
|
666
|
+
self.indent = ''
|
|
667
|
+
self.document = document
|
|
668
|
+
self.current_revision = document.cur_revision
|
|
669
|
+
self._formated_properties = None
|
|
670
|
+
for i in range(self.c_properties_count_set):
|
|
671
|
+
self.rg_prids_list.append(PropertyID(file))
|
|
672
|
+
|
|
673
|
+
self.rg_data_list = []
|
|
674
|
+
for i in range(self.c_properties_count_set):
|
|
675
|
+
property_type = self.rg_prids_list[i].type
|
|
676
|
+
if property_type == 0x1:
|
|
677
|
+
self.rg_data_list.append(None)
|
|
678
|
+
elif property_type == 0x2:
|
|
679
|
+
self.rg_data_list.append(self.rg_prids_list[i].bool_value_prop_id)
|
|
680
|
+
elif property_type == 0x3:
|
|
681
|
+
self.rg_data_list.append(struct.unpack('c', file.read(1))[0])
|
|
682
|
+
elif property_type == 0x4:
|
|
683
|
+
self.rg_data_list.append(struct.unpack('2s', file.read(2))[0])
|
|
684
|
+
elif property_type == 0x5:
|
|
685
|
+
self.rg_data_list.append(struct.unpack('4s', file.read(4))[0])
|
|
686
|
+
elif property_type == 0x6:
|
|
687
|
+
self.rg_data_list.append(struct.unpack('8s', file.read(8))[0])
|
|
688
|
+
elif property_type == 0x7:
|
|
689
|
+
self.rg_data_list.append(
|
|
690
|
+
PrtFourBytesOfLengthFollowedByData(file))
|
|
691
|
+
elif property_type == 0x8 or property_type == 0x09:
|
|
692
|
+
count = 1
|
|
693
|
+
if property_type == 0x09:
|
|
694
|
+
count, = struct.unpack('<I', file.read(4))
|
|
695
|
+
self.rg_data_list.append(self.get_compact_ids(oids, count))
|
|
696
|
+
elif property_type == 0xA or property_type == 0x0B:
|
|
697
|
+
count = 1
|
|
698
|
+
if property_type == 0x0B:
|
|
699
|
+
count, = struct.unpack('<I', file.read(4))
|
|
700
|
+
self.rg_data_list.append(self.get_compact_ids(osids, count))
|
|
701
|
+
elif property_type == 0xC or property_type == 0x0D:
|
|
702
|
+
count = 1
|
|
703
|
+
if property_type == 0x0D:
|
|
704
|
+
count, = struct.unpack('<I', file.read(4))
|
|
705
|
+
self.rg_data_list.append(self.get_compact_ids(context_ids, count))
|
|
706
|
+
elif property_type == 0x10:
|
|
707
|
+
self.rg_data_list.append(
|
|
708
|
+
ArrayOfPropertyValues(file, oids, osids, context_ids, document))
|
|
709
|
+
elif property_type == 0x11:
|
|
710
|
+
self.rg_data_list.append(
|
|
711
|
+
PropertySet(file, oids, osids, context_ids, document))
|
|
712
|
+
else:
|
|
713
|
+
raise ValueError('rg_prids_list[i].type is not valid')
|
|
714
|
+
|
|
715
|
+
@staticmethod
|
|
716
|
+
def get_compact_ids(stream_of_context_ids, count):
|
|
717
|
+
data = []
|
|
718
|
+
for i in range(count):
|
|
719
|
+
data.append(stream_of_context_ids.read())
|
|
720
|
+
return data
|
|
721
|
+
|
|
722
|
+
def get_properties(self):
|
|
723
|
+
if self._formated_properties is not None:
|
|
724
|
+
return self._formated_properties
|
|
725
|
+
|
|
726
|
+
self._formated_properties = {}
|
|
727
|
+
for i in range(self.c_properties_count_set):
|
|
728
|
+
property_name = str(self.rg_prids_list[i])
|
|
729
|
+
if property_name != 'Unknown':
|
|
730
|
+
if isinstance(self.rg_data_list[i], PrtFourBytesOfLengthFollowedByData):
|
|
731
|
+
if 'guid' in property_name.lower():
|
|
732
|
+
property_val = uuid.UUID(
|
|
733
|
+
bytes_le=self.rg_data_list[i].data).hex
|
|
734
|
+
else:
|
|
735
|
+
try:
|
|
736
|
+
property_val = self.rg_data_list[i].data.decode('utf-16')
|
|
737
|
+
except (UnicodeDecodeError, AttributeError):
|
|
738
|
+
property_val = self.rg_data_list[i].data.hex()
|
|
739
|
+
else:
|
|
740
|
+
property_name_lower = property_name.lower()
|
|
741
|
+
if isinstance(self.rg_data_list[i], ArrayOfPropertyValues):
|
|
742
|
+
property_val = self.rg_data_list[i].get_properties()
|
|
743
|
+
elif 'time' in property_name_lower:
|
|
744
|
+
if isinstance(self.rg_data_list[i], bytes):
|
|
745
|
+
if len(self.rg_data_list[i]) == 8:
|
|
746
|
+
timestamp_in_nano, = struct.unpack(
|
|
747
|
+
'<Q', self.rg_data_list[i])
|
|
748
|
+
property_val = str(
|
|
749
|
+
PropertySet.parse_filetime(timestamp_in_nano))
|
|
750
|
+
elif len(self.rg_data_list[i]) == 4:
|
|
751
|
+
timestamp_in_sec, = struct.unpack(
|
|
752
|
+
'<I', self.rg_data_list[i])
|
|
753
|
+
property_val = str(
|
|
754
|
+
PropertySet.time32_to_datetime(timestamp_in_sec))
|
|
755
|
+
else:
|
|
756
|
+
property_val = self.rg_data_list[i].hex()
|
|
757
|
+
else:
|
|
758
|
+
property_val = str(self.rg_data_list[i])
|
|
759
|
+
elif 'height' in property_name_lower or \
|
|
760
|
+
'width' in property_name_lower or \
|
|
761
|
+
'offset' in property_name_lower or \
|
|
762
|
+
'margin' in property_name_lower:
|
|
763
|
+
if isinstance(self.rg_data_list[i], bytes):
|
|
764
|
+
size, = struct.unpack('<f', self.rg_data_list[i])
|
|
765
|
+
property_val = PropertySet.half_inch_size_to_pixels(
|
|
766
|
+
size)
|
|
767
|
+
else:
|
|
768
|
+
property_val = str(self.rg_data_list[i])
|
|
769
|
+
elif 'langid' in property_name_lower:
|
|
770
|
+
if isinstance(self.rg_data_list[i], bytes):
|
|
771
|
+
lcid, = struct.unpack('<H', self.rg_data_list[i])
|
|
772
|
+
property_val = '{}({})'.format(
|
|
773
|
+
PropertySet.lcid_to_string(lcid), lcid)
|
|
774
|
+
else:
|
|
775
|
+
property_val = str(self.rg_data_list[i])
|
|
776
|
+
elif 'languageid' in property_name_lower:
|
|
777
|
+
if isinstance(self.rg_data_list[i], bytes):
|
|
778
|
+
lcid, = struct.unpack('<I', self.rg_data_list[i])
|
|
779
|
+
property_val = '{}({})'.format(
|
|
780
|
+
PropertySet.lcid_to_string(lcid), lcid)
|
|
781
|
+
else:
|
|
782
|
+
property_val = str(self.rg_data_list[i])
|
|
783
|
+
else:
|
|
784
|
+
if isinstance(self.rg_data_list[i], list):
|
|
785
|
+
property_val = [str(i) for i in self.rg_data_list[i]]
|
|
786
|
+
else:
|
|
787
|
+
property_val = str(self.rg_data_list[i])
|
|
788
|
+
|
|
789
|
+
self._formated_properties[property_name] = property_val
|
|
790
|
+
return self._formated_properties
|
|
791
|
+
|
|
792
|
+
def __str__(self):
|
|
793
|
+
result = ''
|
|
794
|
+
for propertyName, propertyVal in self.get_properties().items():
|
|
795
|
+
result += '{}{}: {}\n'.format(self.indent,
|
|
796
|
+
propertyName, propertyVal)
|
|
797
|
+
return result
|
|
798
|
+
|
|
799
|
+
@staticmethod
|
|
800
|
+
def half_inch_size_to_pixels(picture_width, dpi=96):
|
|
801
|
+
# Number of pixels per half-inch
|
|
802
|
+
pixels_per_half_inch = dpi / 2
|
|
803
|
+
|
|
804
|
+
# Calculate the number of pixels
|
|
805
|
+
pixels = picture_width * pixels_per_half_inch
|
|
806
|
+
|
|
807
|
+
return int(pixels)
|
|
808
|
+
|
|
809
|
+
@staticmethod
|
|
810
|
+
def time32_to_datetime(time32):
|
|
811
|
+
# Define the starting time (12:00 A.M., January 1, 1980, UTC)
|
|
812
|
+
start = datetime(1980, 1, 1, 0, 0, 0)
|
|
813
|
+
|
|
814
|
+
# Calculate the number of seconds represented by the Time32 value
|
|
815
|
+
seconds = time32
|
|
816
|
+
|
|
817
|
+
# Calculate the final datetime by adding the number of seconds to the starting time
|
|
818
|
+
dt = start + timedelta(seconds=seconds)
|
|
819
|
+
|
|
820
|
+
return dt
|
|
821
|
+
|
|
822
|
+
@staticmethod
|
|
823
|
+
def parse_filetime(filetime):
|
|
824
|
+
# Define the number of 100-nanosecond intervals in 1 second
|
|
825
|
+
intervals_per_second = 10 ** 7
|
|
826
|
+
|
|
827
|
+
# Define the number of seconds between January 1, 1601 and January 1, 1970
|
|
828
|
+
seconds_between_epochs = 11644473600
|
|
829
|
+
|
|
830
|
+
# Calculate the number of seconds represented by the FILETIME value
|
|
831
|
+
seconds = filetime / intervals_per_second
|
|
832
|
+
|
|
833
|
+
# Calculate the number of seconds that have elapsed since January 1, 1970
|
|
834
|
+
seconds_since_epoch = seconds - seconds_between_epochs
|
|
835
|
+
|
|
836
|
+
# Convert the number of seconds to a datetime object
|
|
837
|
+
dt = datetime(1970, 1, 1) + timedelta(seconds=seconds_since_epoch)
|
|
838
|
+
|
|
839
|
+
return dt
|
|
840
|
+
|
|
841
|
+
@staticmethod
|
|
842
|
+
def lcid_to_string(lcid):
|
|
843
|
+
return locale.windows_locale.get(lcid, 'Unknown LCID')
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
class ArrayOfPropertyValues:
|
|
847
|
+
def __init__(self, file, oids, osids, context_ids, document):
|
|
848
|
+
self.c_properties, = struct.unpack('<I', file.read(4))
|
|
849
|
+
self.rg_data = []
|
|
850
|
+
# MS-ONESTORE 2.2.48:
|
|
851
|
+
# ArrayOfPropertyValues starts with cProperties (4 bytes).
|
|
852
|
+
# Followed by rgData which is an array of elements.
|
|
853
|
+
# The element type is derived from the PropertyID's ID.
|
|
854
|
+
# However, for 0x10, the element type isn't directly specified in the type field.
|
|
855
|
+
# But commonly, it's either an array of PropertySets (0x11 style) or simple types.
|
|
856
|
+
# In many implementations, it's treated as a list of PropertySets if it's an object array.
|
|
857
|
+
# Let's check common property IDs with type 0x10.
|
|
858
|
+
# 0x40003499 is TextRunData.
|
|
859
|
+
|
|
860
|
+
# If the property ID is TextRunData, it's usually followed by PropertySets.
|
|
861
|
+
# Let's try to parse as PropertySets if possible, or fallback to raw if we knew the size.
|
|
862
|
+
# But we don't know the size of each element if we don't know the type.
|
|
863
|
+
|
|
864
|
+
for i in range(self.c_properties):
|
|
865
|
+
# For now, let's assume it's a PropertySet (0x11)
|
|
866
|
+
# This is a common pattern in OneNote files for type 0x10 properties.
|
|
867
|
+
# If this fails, we might need a more complex mapping.
|
|
868
|
+
try:
|
|
869
|
+
self.rg_data.append(
|
|
870
|
+
PropertySet(
|
|
871
|
+
file,
|
|
872
|
+
oids,
|
|
873
|
+
osids,
|
|
874
|
+
context_ids,
|
|
875
|
+
document))
|
|
876
|
+
except (struct.error, ValueError):
|
|
877
|
+
# If it fails, we are in trouble because we don't know how many bytes to skip.
|
|
878
|
+
# But let's hope it's PropertySet.
|
|
879
|
+
break
|
|
880
|
+
|
|
881
|
+
def get_properties(self):
|
|
882
|
+
return [prop.get_properties() for prop in self.rg_data if hasattr(prop, 'get_properties')]
|
|
883
|
+
|
|
884
|
+
def __str__(self):
|
|
885
|
+
return str(self.get_properties())
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
class PrtFourBytesOfLengthFollowedByData:
|
|
889
|
+
def __init__(self, file):
|
|
890
|
+
self.cb, = struct.unpack('<I', file.read(4))
|
|
891
|
+
self.data = file.read(self.cb)
|
|
892
|
+
|
|
893
|
+
def __str__(self):
|
|
894
|
+
return self.data.hex()
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
class PropertyID:
|
|
898
|
+
_property_id_name_mapping = {
|
|
899
|
+
0x08001C00: "LayoutTightLayout",
|
|
900
|
+
0x14001C01: "PageWidth",
|
|
901
|
+
0x14001C02: "PageHeight",
|
|
902
|
+
0x0C001C03: "OutlineElementChildLevel",
|
|
903
|
+
0x08001C04: "Bold",
|
|
904
|
+
0x08001C05: "Italic",
|
|
905
|
+
0x08001C06: "Underline",
|
|
906
|
+
0x08001C07: "Strikethrough",
|
|
907
|
+
0x08001C08: "Superscript",
|
|
908
|
+
0x08001C09: "Subscript",
|
|
909
|
+
0x1C001C0A: "Font",
|
|
910
|
+
0x10001C0B: "FontSize",
|
|
911
|
+
0x14001C0C: "FontColor",
|
|
912
|
+
0x14001C0D: "Highlight",
|
|
913
|
+
0x1C001C12: "RgOutlineIndentDistance",
|
|
914
|
+
0x0C001C13: "BodyTextAlignment",
|
|
915
|
+
0x14001C14: "OffsetFromParentHoriz",
|
|
916
|
+
0x14001C15: "OffsetFromParentVert",
|
|
917
|
+
0x1C001C1A: "NumberListFormat",
|
|
918
|
+
0x14001C1B: "LayoutMaxWidth",
|
|
919
|
+
0x14001C1C: "LayoutMaxHeight",
|
|
920
|
+
0x24001C1F: "ContentChildNodes",
|
|
921
|
+
0x24001C20: "ElementChildNodes",
|
|
922
|
+
0x08001E1E: "EnableHistory",
|
|
923
|
+
0x1C001C22: "RichEditTextUnicode",
|
|
924
|
+
0x24001C26: "ListNodes",
|
|
925
|
+
0x1C001C30: "NotebookManagementEntityGuid",
|
|
926
|
+
0x08001C34: "OutlineElementRTL",
|
|
927
|
+
0x14001C3B: "LanguageID",
|
|
928
|
+
0x14001C3E: "LayoutAlignmentInParent",
|
|
929
|
+
0x20001C3F: "PictureContainer",
|
|
930
|
+
0x14001C4C: "PageMarginTop",
|
|
931
|
+
0x14001C4D: "PageMarginBottom",
|
|
932
|
+
0x14001C4E: "PageMarginLeft",
|
|
933
|
+
0x14001C4F: "PageMarginRight",
|
|
934
|
+
0x1C001C52: "ListFont",
|
|
935
|
+
0x18001C65: "TopologyCreationTimeStamp",
|
|
936
|
+
0x14001C84: "LayoutAlignmentSelf",
|
|
937
|
+
0x08001C87: "IsTitleTime",
|
|
938
|
+
0x08001C88: "IsBoilerText",
|
|
939
|
+
0x14001C8B: "PageSize",
|
|
940
|
+
0x08001C8E: "PortraitPage",
|
|
941
|
+
0x08001C91: "EnforceOutlineStructure",
|
|
942
|
+
0x08001C92: "EditRootRTL",
|
|
943
|
+
0x08001CB2: "CannotBeSelected",
|
|
944
|
+
0x08001CB4: "IsTitleText",
|
|
945
|
+
0x08001CB5: "IsTitleDate",
|
|
946
|
+
0x14001CB7: "ListRestart",
|
|
947
|
+
0x08001CBD: "IsLayoutSizeSetByUser",
|
|
948
|
+
0x14001CCB: "ListSpacingMu",
|
|
949
|
+
0x14001CDB: "LayoutOutlineReservedWidth",
|
|
950
|
+
0x08001CDC: "LayoutResolveChildCollisions",
|
|
951
|
+
0x08001CDE: "is_read_only_jcid_flag",
|
|
952
|
+
0x14001CEC: "LayoutMinimumOutlineWidth",
|
|
953
|
+
0x14001CF1: "LayoutCollisionPriority",
|
|
954
|
+
0x1C001CF3: "CachedTitleString",
|
|
955
|
+
0x08001CF9: "DescendantsCannotBeMoved",
|
|
956
|
+
0x10001CFE: "RichEditTextLangID",
|
|
957
|
+
0x08001CFF: "LayoutTightAlignment",
|
|
958
|
+
0x0C001D01: "Charset",
|
|
959
|
+
0x14001D09: "CreationTimeStamp",
|
|
960
|
+
0x08001D0C: "Deletable",
|
|
961
|
+
0x10001D0E: "ListMSAAIndex",
|
|
962
|
+
0x08001D13: "IsBackground",
|
|
963
|
+
0x14001D24: "IRecordMedia",
|
|
964
|
+
0x1C001D3C: "CachedTitleStringFromPage",
|
|
965
|
+
0x14001D57: "RowCount",
|
|
966
|
+
0x14001D58: "ColumnCount",
|
|
967
|
+
0x08001D5E: "TableBordersVisible",
|
|
968
|
+
0x24001D5F: "StructureElementChildNodes",
|
|
969
|
+
0x2C001D63: "ChildGraphSpaceElementNodes",
|
|
970
|
+
0x1C001D66: "TableColumnWidths",
|
|
971
|
+
0x1C001D75: "Author",
|
|
972
|
+
0x18001D77: "LastModifiedTimeStamp",
|
|
973
|
+
0x20001D78: "AuthorOriginal",
|
|
974
|
+
0x20001D79: "AuthorMostRecent",
|
|
975
|
+
0x14001D7A: "LastModifiedTime",
|
|
976
|
+
0x08001D7C: "IsConflictPage",
|
|
977
|
+
0x1C001D7D: "TableColumnsLocked",
|
|
978
|
+
0x14001D82: "SchemaRevisionInOrderToRead",
|
|
979
|
+
0x08001D96: "IsConflictObjectForRender",
|
|
980
|
+
0x20001D9B: "EmbeddedFileContainer",
|
|
981
|
+
0x1C001D9C: "EmbeddedFileName",
|
|
982
|
+
0x1C001D9D: "SourceFilepath",
|
|
983
|
+
0x1C001D9E: "ConflictingUserName",
|
|
984
|
+
0x1C001DD7: "ImageFilename",
|
|
985
|
+
0x08001DDB: "IsConflictObjectForSelection",
|
|
986
|
+
0x14001DFF: "PageLevel",
|
|
987
|
+
0x1C001E12: "TextRunIndex",
|
|
988
|
+
0x24001E13: "TextRunFormatting",
|
|
989
|
+
0x08001E14: "Hyperlink",
|
|
990
|
+
0x0C001E15: "UnderlineType",
|
|
991
|
+
0x08001E16: "Hidden",
|
|
992
|
+
0x08001E19: "HyperlinkProtected",
|
|
993
|
+
0x08001E22: "TextRunIsEmbeddedObject",
|
|
994
|
+
0x14001e26: "CellShadingColor",
|
|
995
|
+
0x1C001E58: "ImageAltText",
|
|
996
|
+
0x08003401: "MathFormatting",
|
|
997
|
+
0x2000342C: "ParagraphStyle",
|
|
998
|
+
0x1400342E: "ParagraphSpaceBefore",
|
|
999
|
+
0x1400342F: "ParagraphSpaceAfter",
|
|
1000
|
+
0x14003430: "ParagraphLineSpacingExact",
|
|
1001
|
+
0x24003442: "MetaDataObjectsAboveGraphSpace",
|
|
1002
|
+
0x24003458: "TextRunDataObject",
|
|
1003
|
+
0x40003499: "TextRunData",
|
|
1004
|
+
0x1C00345A: "ParagraphStyleId",
|
|
1005
|
+
0x08003462: "HasVersionPages",
|
|
1006
|
+
0x10003463: "ActionItemType",
|
|
1007
|
+
0x10003464: "NoteTagShape",
|
|
1008
|
+
0x14003465: "NoteTagHighlightColor",
|
|
1009
|
+
0x14003466: "NoteTagTextColor",
|
|
1010
|
+
0x14003467: "NoteTagPropertyStatus",
|
|
1011
|
+
0x1C003468: "NoteTagLabel",
|
|
1012
|
+
0x1400346E: "NoteTagCreated",
|
|
1013
|
+
0x1400346F: "NoteTagCompleted",
|
|
1014
|
+
0x20003488: "NoteTagDefinitionOid",
|
|
1015
|
+
0x04003489: "NoteTagStates",
|
|
1016
|
+
0x10003470: "ActionItemStatus",
|
|
1017
|
+
0x0C003473: "ActionItemSchemaVersion",
|
|
1018
|
+
0x08003476: "ReadingOrderRTL",
|
|
1019
|
+
0x0C003477: "ParagraphAlignment",
|
|
1020
|
+
0x3400347B: "VersionHistoryGraphSpaceContextNodes",
|
|
1021
|
+
0x14003480: "DisplayedPageNumber",
|
|
1022
|
+
0x1C00349B: "SectionDisplayName",
|
|
1023
|
+
0x1C00348A: "NextStyle",
|
|
1024
|
+
0x200034C8: "WebPictureContainer14",
|
|
1025
|
+
0x140034CB: "ImageUploadState",
|
|
1026
|
+
0x1C003498: "TextExtendedAscii",
|
|
1027
|
+
0x140034CD: "PictureWidth",
|
|
1028
|
+
0x140034CE: "PictureHeight",
|
|
1029
|
+
0x14001D0F: "PageMarginOriginX",
|
|
1030
|
+
0x14001D10: "PageMarginOriginY",
|
|
1031
|
+
0x1C001E20: "WzHyperlinkUrl",
|
|
1032
|
+
0x1400346B: "TaskTagDueDate",
|
|
1033
|
+
0x1C001DE9: "IsDeletedGraphSpaceContent",
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
def __init__(self, file):
|
|
1037
|
+
self.value, = struct.unpack('<I', file.read(4))
|
|
1038
|
+
self.id = self.value & 0x3ffffff
|
|
1039
|
+
self.type = (self.value >> 26) & 0x1f
|
|
1040
|
+
self.bool_value_prop_id = (self.value >> 31) & 1 == 1
|
|
1041
|
+
|
|
1042
|
+
def get_property_name(self):
|
|
1043
|
+
return self._property_id_name_mapping[self.value] if self.value in self._property_id_name_mapping else 'Unknown'
|
|
1044
|
+
|
|
1045
|
+
def __str__(self):
|
|
1046
|
+
return self.get_property_name()
|