contextbase-plugin-microsoft-mail 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_plugin_microsoft_mail-0.2.6.dist-info/METADATA +14 -0
- contextbase_plugin_microsoft_mail-0.2.6.dist-info/RECORD +18 -0
- contextbase_plugin_microsoft_mail-0.2.6.dist-info/WHEEL +4 -0
- plugin_microsoft_mail/__init__.py +1 -0
- plugin_microsoft_mail/binding_config.py +14 -0
- plugin_microsoft_mail/component.py +189 -0
- plugin_microsoft_mail/defs/__init__.py +0 -0
- plugin_microsoft_mail/defs/defs.yaml +1 -0
- plugin_microsoft_mail/models/__init__.py +1 -0
- plugin_microsoft_mail/models/ctx.py +378 -0
- plugin_microsoft_mail/models/translators.py +193 -0
- plugin_microsoft_mail/plugin.json +7 -0
- plugin_microsoft_mail/sources/__init__.py +1 -0
- plugin_microsoft_mail/sources/attachments.py +407 -0
- plugin_microsoft_mail/sources/sync.py +375 -0
- plugin_microsoft_mail/utils/__init__.py +1 -0
- plugin_microsoft_mail/utils/attachments.py +107 -0
- plugin_microsoft_mail/utils/client.py +245 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterator
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import parse_qs, urlparse
|
|
7
|
+
|
|
8
|
+
import dlt
|
|
9
|
+
from shared_plugins.naming import (
|
|
10
|
+
dlt_resource_name,
|
|
11
|
+
dlt_source_name,
|
|
12
|
+
plugin_id_from_module,
|
|
13
|
+
)
|
|
14
|
+
from shared_plugins.resources import ctx_dlt_resource
|
|
15
|
+
|
|
16
|
+
from ..models.ctx import (
|
|
17
|
+
MAIL_FOLDER_COLUMN_DESCRIPTIONS,
|
|
18
|
+
MESSAGE_COLUMN_DESCRIPTIONS,
|
|
19
|
+
MailFolderRow,
|
|
20
|
+
MessageRow,
|
|
21
|
+
)
|
|
22
|
+
from ..models.translators import (
|
|
23
|
+
mail_folder_rows_to_ctx_models,
|
|
24
|
+
message_rows_to_ctx_models,
|
|
25
|
+
)
|
|
26
|
+
from ..utils.client import (
|
|
27
|
+
SyncGraphMailClient,
|
|
28
|
+
graph_object_to_payload,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Known unhandled correctness gaps:
|
|
32
|
+
#
|
|
33
|
+
# 1. Stale messages from removed/hidden folders. `apply_mail_folder_delta_rows`
|
|
34
|
+
# drops folder IDs from the active set when a folder is @removed or
|
|
35
|
+
# transitions to isHidden, and `messages` only iterates the active set.
|
|
36
|
+
# Existing message rows (and their attachment_content rows) under the dropped
|
|
37
|
+
# folder remain in the destination forever — no tombstone path covers this
|
|
38
|
+
# transition.
|
|
39
|
+
#
|
|
40
|
+
# 2. Attachment same-count replacements (lives in sources/attachments.py). The
|
|
41
|
+
# candidate query compares counts of materializable attachments vs. existing
|
|
42
|
+
# attachment_content rows; a message changing [att-1] -> [att-2] looks like
|
|
43
|
+
# 1 == 1 and never re-materializes. The stale file row persists, the new
|
|
44
|
+
# attachment is missing.
|
|
45
|
+
|
|
46
|
+
MAIL_FOLDER_DELTA_CURSOR_URL_KEY = "cursor_url"
|
|
47
|
+
ACTIVE_MAIL_FOLDERS_KEY = "active_folders_by_id"
|
|
48
|
+
MESSAGE_DELTA_CURSOR_URLS_BY_FOLDER_ID_KEY = "cursor_urls_by_folder_id"
|
|
49
|
+
|
|
50
|
+
PLUGIN_ID = plugin_id_from_module(__file__)
|
|
51
|
+
JOB = "sync"
|
|
52
|
+
DELTA_PAGE_SIZE = 100
|
|
53
|
+
DELTA_PREFER_HEADER = f'IdType="ImmutableId", odata.maxpagesize={DELTA_PAGE_SIZE}'
|
|
54
|
+
MAIL_FOLDER_DELTA_QUERY_PARAMS = {
|
|
55
|
+
"$select": [
|
|
56
|
+
"id",
|
|
57
|
+
"displayName",
|
|
58
|
+
"parentFolderId",
|
|
59
|
+
"childFolderCount",
|
|
60
|
+
"totalItemCount",
|
|
61
|
+
"unreadItemCount",
|
|
62
|
+
"isHidden",
|
|
63
|
+
],
|
|
64
|
+
}
|
|
65
|
+
MESSAGE_DELTA_ORDERBY = ["receivedDateTime desc"]
|
|
66
|
+
INCLUDE_HIDDEN_MAIL_FOLDERS = False
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass(frozen=True)
|
|
70
|
+
class MailFolderDeltaDrainResult:
|
|
71
|
+
mail_folder_rows: list[dict[str, Any]]
|
|
72
|
+
active_mail_folders_by_id: dict[str, dict[str, Any]]
|
|
73
|
+
cursor_url: str
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass(frozen=True)
|
|
77
|
+
class DeltaPage:
|
|
78
|
+
rows: list[Any]
|
|
79
|
+
cursor_url: str
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_delta_cursor_url(cursor_url: str) -> bool:
|
|
83
|
+
query = parse_qs(urlparse(cursor_url).query)
|
|
84
|
+
return "$deltatoken" in query or "deltatoken" in query
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def drain_delta_pages(
|
|
88
|
+
*,
|
|
89
|
+
initial_cursor_url: str | None,
|
|
90
|
+
fetch_page: Callable[[str | None], Any],
|
|
91
|
+
rows_from_page: Callable[[Any], list[Any]],
|
|
92
|
+
) -> Iterator[DeltaPage]:
|
|
93
|
+
cursor_url = initial_cursor_url
|
|
94
|
+
|
|
95
|
+
while True:
|
|
96
|
+
response = fetch_page(cursor_url)
|
|
97
|
+
rows = rows_from_page(response)
|
|
98
|
+
|
|
99
|
+
delta_link = getattr(response, "odata_delta_link", None)
|
|
100
|
+
if delta_link:
|
|
101
|
+
yield DeltaPage(rows=rows, cursor_url=delta_link)
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
next_link = getattr(response, "odata_next_link", None)
|
|
105
|
+
if not next_link:
|
|
106
|
+
raise RuntimeError(
|
|
107
|
+
"Graph delta response did not include @odata.nextLink or "
|
|
108
|
+
"@odata.deltaLink"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
yield DeltaPage(rows=rows, cursor_url=next_link)
|
|
112
|
+
cursor_url = next_link
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def message_delta_cursor_urls_for_active_mail_folders(
|
|
116
|
+
previous_message_cursor_urls_by_folder_id: dict[str, str],
|
|
117
|
+
active_mail_folder_ids: list[str],
|
|
118
|
+
) -> dict[str, str]:
|
|
119
|
+
active_mail_folder_id_set = set(active_mail_folder_ids)
|
|
120
|
+
return {
|
|
121
|
+
folder_id: cursor_url
|
|
122
|
+
for folder_id, cursor_url in previous_message_cursor_urls_by_folder_id.items()
|
|
123
|
+
if folder_id in active_mail_folder_id_set
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def message_delta_cursor_urls_with_cursor_for_folder(
|
|
128
|
+
message_cursor_urls_by_folder_id: dict[str, str],
|
|
129
|
+
*,
|
|
130
|
+
folder_id: str,
|
|
131
|
+
cursor_url: str,
|
|
132
|
+
) -> dict[str, str]:
|
|
133
|
+
return {
|
|
134
|
+
**message_cursor_urls_by_folder_id,
|
|
135
|
+
folder_id: cursor_url,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def should_include_mail_folder_payload(payload: dict[str, Any]) -> bool:
|
|
140
|
+
if payload.get("@removed") is not None:
|
|
141
|
+
return False
|
|
142
|
+
if not INCLUDE_HIDDEN_MAIL_FOLDERS and payload.get("isHidden"):
|
|
143
|
+
return False
|
|
144
|
+
return True
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def mail_folder_delta_page_rows(response: Any) -> list[dict[str, Any]]:
|
|
148
|
+
rows: list[dict[str, Any]] = []
|
|
149
|
+
for folder in getattr(response, "value", None) or []:
|
|
150
|
+
row = graph_object_to_payload(folder)
|
|
151
|
+
if not row.get("id"):
|
|
152
|
+
raise RuntimeError(
|
|
153
|
+
f"Graph mail-folder delta row missing id: keys={sorted(row)}"
|
|
154
|
+
)
|
|
155
|
+
rows.append(row)
|
|
156
|
+
return rows
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def apply_mail_folder_delta_rows(
|
|
160
|
+
*,
|
|
161
|
+
previous_active_mail_folders_by_id: dict[str, dict[str, Any]],
|
|
162
|
+
mail_folder_rows: list[dict[str, Any]],
|
|
163
|
+
) -> dict[str, dict[str, Any]]:
|
|
164
|
+
active_mail_folders_by_id = dict(previous_active_mail_folders_by_id)
|
|
165
|
+
|
|
166
|
+
for row in mail_folder_rows:
|
|
167
|
+
folder_id = str(row["id"])
|
|
168
|
+
if should_include_mail_folder_payload(row):
|
|
169
|
+
active_mail_folders_by_id[folder_id] = {
|
|
170
|
+
"id": folder_id,
|
|
171
|
+
"display_name": row.get("displayName"),
|
|
172
|
+
"parent_folder_id": row.get("parentFolderId"),
|
|
173
|
+
"is_hidden": row.get("isHidden"),
|
|
174
|
+
}
|
|
175
|
+
else:
|
|
176
|
+
active_mail_folders_by_id.pop(folder_id, None)
|
|
177
|
+
|
|
178
|
+
return active_mail_folders_by_id
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def drain_mail_folder_delta(
|
|
182
|
+
*,
|
|
183
|
+
client: SyncGraphMailClient,
|
|
184
|
+
previous_cursor_url: str | None,
|
|
185
|
+
previous_active_mail_folders_by_id: dict[str, dict[str, Any]],
|
|
186
|
+
) -> MailFolderDeltaDrainResult:
|
|
187
|
+
mail_folder_rows: list[dict[str, Any]] = []
|
|
188
|
+
drained_cursor_url: str | None = None
|
|
189
|
+
|
|
190
|
+
for page in drain_delta_pages(
|
|
191
|
+
initial_cursor_url=previous_cursor_url,
|
|
192
|
+
fetch_page=lambda cursor_url: client.get_folder_delta_page(
|
|
193
|
+
delta_url=cursor_url,
|
|
194
|
+
query_params=MAIL_FOLDER_DELTA_QUERY_PARAMS,
|
|
195
|
+
prefer_header=DELTA_PREFER_HEADER,
|
|
196
|
+
),
|
|
197
|
+
rows_from_page=mail_folder_delta_page_rows,
|
|
198
|
+
):
|
|
199
|
+
mail_folder_rows.extend(page.rows)
|
|
200
|
+
drained_cursor_url = page.cursor_url
|
|
201
|
+
|
|
202
|
+
if drained_cursor_url is None or not is_delta_cursor_url(drained_cursor_url):
|
|
203
|
+
raise RuntimeError("mail folder delta drain did not finish with a delta cursor")
|
|
204
|
+
|
|
205
|
+
return MailFolderDeltaDrainResult(
|
|
206
|
+
mail_folder_rows=mail_folder_rows,
|
|
207
|
+
active_mail_folders_by_id=apply_mail_folder_delta_rows(
|
|
208
|
+
previous_active_mail_folders_by_id=previous_active_mail_folders_by_id,
|
|
209
|
+
mail_folder_rows=mail_folder_rows,
|
|
210
|
+
),
|
|
211
|
+
cursor_url=drained_cursor_url,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def message_delta_page_rows(
|
|
216
|
+
*,
|
|
217
|
+
response: Any,
|
|
218
|
+
folder_id: str,
|
|
219
|
+
) -> list[Any]:
|
|
220
|
+
# Don't pre-validate parent_folder_id — @removed rows carry only id +
|
|
221
|
+
# @removed; the translator detects them via additional_data and emits a
|
|
222
|
+
# tombstone using folder_id from context. Live rows with no
|
|
223
|
+
# parent_folder_id fail pydantic validation at MessageRow.parent_folder_id.
|
|
224
|
+
rows: list[Any] = []
|
|
225
|
+
for message in getattr(response, "value", None) or []:
|
|
226
|
+
message_id = getattr(message, "id", None)
|
|
227
|
+
if not message_id:
|
|
228
|
+
raise RuntimeError(
|
|
229
|
+
f"Graph message delta row missing id (folder_id={folder_id!r})"
|
|
230
|
+
)
|
|
231
|
+
rows.append(message)
|
|
232
|
+
return rows
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def drain_message_delta_pages_for_folder(
|
|
236
|
+
*,
|
|
237
|
+
client: SyncGraphMailClient,
|
|
238
|
+
folder_id: str,
|
|
239
|
+
previous_cursor_url: str | None,
|
|
240
|
+
initial_message_delta_top: int | None,
|
|
241
|
+
) -> Iterator[DeltaPage]:
|
|
242
|
+
yield from drain_delta_pages(
|
|
243
|
+
initial_cursor_url=previous_cursor_url,
|
|
244
|
+
fetch_page=lambda cursor_url: client.get_message_delta_page(
|
|
245
|
+
folder_id=folder_id,
|
|
246
|
+
delta_url=cursor_url,
|
|
247
|
+
query_params={
|
|
248
|
+
"$orderby": MESSAGE_DELTA_ORDERBY,
|
|
249
|
+
"$top": initial_message_delta_top,
|
|
250
|
+
"$expand": [
|
|
251
|
+
"attachments($select=id,name,contentType,size,isInline,lastModifiedDateTime)"
|
|
252
|
+
],
|
|
253
|
+
},
|
|
254
|
+
prefer_header=DELTA_PREFER_HEADER,
|
|
255
|
+
),
|
|
256
|
+
rows_from_page=lambda response: message_delta_page_rows(
|
|
257
|
+
response=response,
|
|
258
|
+
folder_id=folder_id,
|
|
259
|
+
),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@dlt.source(name=dlt_source_name(PLUGIN_ID, JOB))
|
|
264
|
+
def microsoft_mail_source(
|
|
265
|
+
binding_id: str,
|
|
266
|
+
*,
|
|
267
|
+
client: SyncGraphMailClient,
|
|
268
|
+
initial_message_delta_top: int | None = None,
|
|
269
|
+
) -> tuple[Any, ...]:
|
|
270
|
+
folder_drain_cache: MailFolderDeltaDrainResult | None = None
|
|
271
|
+
|
|
272
|
+
def get_mail_folder_delta_snapshot() -> MailFolderDeltaDrainResult:
|
|
273
|
+
nonlocal folder_drain_cache
|
|
274
|
+
if folder_drain_cache is not None:
|
|
275
|
+
return folder_drain_cache
|
|
276
|
+
|
|
277
|
+
source_state = dlt.current.source_state()
|
|
278
|
+
previous_cursor_url = source_state.get(MAIL_FOLDER_DELTA_CURSOR_URL_KEY)
|
|
279
|
+
previous_active_mail_folders_by_id = dict(
|
|
280
|
+
source_state.get(ACTIVE_MAIL_FOLDERS_KEY) or {}
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
result = drain_mail_folder_delta(
|
|
284
|
+
client=client,
|
|
285
|
+
previous_cursor_url=previous_cursor_url,
|
|
286
|
+
previous_active_mail_folders_by_id=previous_active_mail_folders_by_id,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
source_state[MAIL_FOLDER_DELTA_CURSOR_URL_KEY] = result.cursor_url
|
|
290
|
+
source_state[ACTIVE_MAIL_FOLDERS_KEY] = result.active_mail_folders_by_id
|
|
291
|
+
|
|
292
|
+
folder_drain_cache = result
|
|
293
|
+
return result
|
|
294
|
+
|
|
295
|
+
@ctx_dlt_resource(
|
|
296
|
+
name=dlt_resource_name("mail_folders"),
|
|
297
|
+
write_disposition="merge",
|
|
298
|
+
primary_key=("_ctx_binding_id", "id"),
|
|
299
|
+
columns={
|
|
300
|
+
**MAIL_FOLDER_COLUMN_DESCRIPTIONS,
|
|
301
|
+
"_ctx_deleted": {"hard_delete": True},
|
|
302
|
+
},
|
|
303
|
+
)
|
|
304
|
+
def mail_folders() -> Iterator[MailFolderRow]:
|
|
305
|
+
snapshot = get_mail_folder_delta_snapshot()
|
|
306
|
+
yield from mail_folder_rows_to_ctx_models(
|
|
307
|
+
binding_id,
|
|
308
|
+
snapshot.mail_folder_rows,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
@ctx_dlt_resource(
|
|
312
|
+
name=dlt_resource_name("messages"),
|
|
313
|
+
write_disposition="merge",
|
|
314
|
+
primary_key=("_ctx_binding_id", "id", "parent_folder_id"),
|
|
315
|
+
columns={
|
|
316
|
+
**MESSAGE_COLUMN_DESCRIPTIONS,
|
|
317
|
+
"_ctx_deleted": {"hard_delete": True},
|
|
318
|
+
},
|
|
319
|
+
)
|
|
320
|
+
def messages() -> Iterator[MessageRow]:
|
|
321
|
+
snapshot = get_mail_folder_delta_snapshot()
|
|
322
|
+
active_mail_folder_ids = sorted(snapshot.active_mail_folders_by_id)
|
|
323
|
+
|
|
324
|
+
source_state = dlt.current.source_state()
|
|
325
|
+
previous_message_cursor_urls_by_folder_id = (
|
|
326
|
+
message_delta_cursor_urls_for_active_mail_folders(
|
|
327
|
+
dict(
|
|
328
|
+
source_state.get(MESSAGE_DELTA_CURSOR_URLS_BY_FOLDER_ID_KEY) or {}
|
|
329
|
+
),
|
|
330
|
+
active_mail_folder_ids,
|
|
331
|
+
)
|
|
332
|
+
)
|
|
333
|
+
updated_message_cursor_urls_by_folder_id = dict(
|
|
334
|
+
previous_message_cursor_urls_by_folder_id,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
for folder_id in active_mail_folder_ids:
|
|
338
|
+
previous_message_cursor_url = previous_message_cursor_urls_by_folder_id.get(
|
|
339
|
+
folder_id
|
|
340
|
+
)
|
|
341
|
+
message_cursor_url: str | None = previous_message_cursor_url
|
|
342
|
+
|
|
343
|
+
for page in drain_message_delta_pages_for_folder(
|
|
344
|
+
client=client,
|
|
345
|
+
folder_id=folder_id,
|
|
346
|
+
previous_cursor_url=previous_message_cursor_url,
|
|
347
|
+
initial_message_delta_top=initial_message_delta_top,
|
|
348
|
+
):
|
|
349
|
+
message_cursor_url = page.cursor_url
|
|
350
|
+
yield from message_rows_to_ctx_models(
|
|
351
|
+
binding_id,
|
|
352
|
+
page.rows,
|
|
353
|
+
folder_id=folder_id,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
if message_cursor_url is None or not is_delta_cursor_url(
|
|
357
|
+
message_cursor_url
|
|
358
|
+
):
|
|
359
|
+
raise RuntimeError(
|
|
360
|
+
"message delta drain did not finish with a delta cursor"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
updated_message_cursor_urls_by_folder_id = (
|
|
364
|
+
message_delta_cursor_urls_with_cursor_for_folder(
|
|
365
|
+
updated_message_cursor_urls_by_folder_id,
|
|
366
|
+
folder_id=folder_id,
|
|
367
|
+
cursor_url=message_cursor_url,
|
|
368
|
+
)
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
source_state[MESSAGE_DELTA_CURSOR_URLS_BY_FOLDER_ID_KEY] = (
|
|
372
|
+
updated_message_cursor_urls_by_folder_id
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
return (mail_folders, messages)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utilities for Microsoft Mail Graph and DLT spikes."""
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from base64 import b64decode
|
|
5
|
+
from collections.abc import Mapping, Sequence
|
|
6
|
+
from pathlib import PurePosixPath
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from shared_plugins.scratch import replace_scratch_dir_files
|
|
10
|
+
|
|
11
|
+
from ..models.ctx import AttachmentContentRow
|
|
12
|
+
from ..models.translators import attachment_content_row_from_graph_payload
|
|
13
|
+
|
|
14
|
+
REFERENCE_ATTACHMENT_ODATA_TYPE = "#microsoft.graph.referenceAttachment"
|
|
15
|
+
ITEM_ATTACHMENT_ODATA_TYPE = "#microsoft.graph.itemAttachment"
|
|
16
|
+
FILE_ATTACHMENT_ODATA_TYPE = "#microsoft.graph.fileAttachment"
|
|
17
|
+
KNOWN_ATTACHMENT_ODATA_TYPES = frozenset(
|
|
18
|
+
{
|
|
19
|
+
FILE_ATTACHMENT_ODATA_TYPE,
|
|
20
|
+
REFERENCE_ATTACHMENT_ODATA_TYPE,
|
|
21
|
+
ITEM_ATTACHMENT_ODATA_TYPE,
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _hash_path_segment(*parts: str) -> str:
|
|
27
|
+
hash_input = "\n".join(parts)
|
|
28
|
+
return hashlib.sha256(hash_input.encode("utf-8")).hexdigest()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _build_deterministic_file_name(
|
|
32
|
+
*,
|
|
33
|
+
attachment_id: str,
|
|
34
|
+
name: str | None,
|
|
35
|
+
) -> str:
|
|
36
|
+
digest = _hash_path_segment(attachment_id, name or "")
|
|
37
|
+
suffix = ""
|
|
38
|
+
if name:
|
|
39
|
+
suffix = "".join(PurePosixPath(name.strip()).suffixes)
|
|
40
|
+
return f"{digest}{suffix}"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def materialize_attachment_payloads(
|
|
44
|
+
*,
|
|
45
|
+
binding_id: str,
|
|
46
|
+
message_id: str,
|
|
47
|
+
attachment_payloads: Sequence[Mapping[str, Any]],
|
|
48
|
+
) -> list[AttachmentContentRow]:
|
|
49
|
+
"""Decode each payload's `contentBytes`, write all of a message's
|
|
50
|
+
attachments into the per-message scratch directory atomically, and return
|
|
51
|
+
one AttachmentContentRow per payload in input order.
|
|
52
|
+
|
|
53
|
+
All payloads must include `id` and `contentBytes` (base64). Reference and
|
|
54
|
+
item attachments are excluded upstream — this helper is for materializable
|
|
55
|
+
file attachments only.
|
|
56
|
+
|
|
57
|
+
Writes are batched into a single `replace_scratch_dir_files` call because
|
|
58
|
+
that helper atomically REPLACES the entire `relative_dir`; per-attachment
|
|
59
|
+
calls would clobber each other within the same message.
|
|
60
|
+
"""
|
|
61
|
+
if len(attachment_payloads) == 0:
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
file_name_by_attachment_id: dict[str, str] = {}
|
|
65
|
+
files: dict[str, bytes] = {}
|
|
66
|
+
for payload in attachment_payloads:
|
|
67
|
+
attachment_id = payload.get("id")
|
|
68
|
+
if not isinstance(attachment_id, str):
|
|
69
|
+
raise RuntimeError(
|
|
70
|
+
"attachment payload missing id "
|
|
71
|
+
f"message_id={message_id} keys={sorted(payload)}"
|
|
72
|
+
)
|
|
73
|
+
content_bytes_b64 = payload.get("contentBytes")
|
|
74
|
+
if not isinstance(content_bytes_b64, str):
|
|
75
|
+
raise RuntimeError(
|
|
76
|
+
"attachment payload missing contentBytes "
|
|
77
|
+
f"message_id={message_id} attachment_id={attachment_id}"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
file_name = _build_deterministic_file_name(
|
|
81
|
+
attachment_id=attachment_id,
|
|
82
|
+
name=(
|
|
83
|
+
payload.get("name") if isinstance(payload.get("name"), str) else None
|
|
84
|
+
),
|
|
85
|
+
)
|
|
86
|
+
file_name_by_attachment_id[attachment_id] = file_name
|
|
87
|
+
files[file_name] = b64decode(content_bytes_b64)
|
|
88
|
+
|
|
89
|
+
path_by_file_name = replace_scratch_dir_files(
|
|
90
|
+
binding_id=binding_id,
|
|
91
|
+
relative_dir=f"attachments/{_hash_path_segment(message_id)}",
|
|
92
|
+
files=files,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
rows: list[AttachmentContentRow] = []
|
|
96
|
+
for payload in attachment_payloads:
|
|
97
|
+
attachment_id = payload["id"]
|
|
98
|
+
file_name = file_name_by_attachment_id[attachment_id]
|
|
99
|
+
rows.append(
|
|
100
|
+
attachment_content_row_from_graph_payload(
|
|
101
|
+
binding_id=binding_id,
|
|
102
|
+
message_id=message_id,
|
|
103
|
+
attachment_payload=payload,
|
|
104
|
+
file_path=path_by_file_name[file_name],
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
return rows
|