contextbase-plugin-microsoft-mail 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Iterator, Mapping
4
+ from typing import Any
5
+
6
+ from ..utils.client import graph_object_to_payload
7
+ from .ctx import (
8
+ AttachmentContentRow,
9
+ MailFolderRow,
10
+ MessageRow,
11
+ )
12
+
13
+
14
+ def _payload(item: object) -> dict[str, Any]:
15
+ return graph_object_to_payload(item)
16
+
17
+
18
+ def _list_value(payload: Mapping[str, Any], key: str) -> list[Any]:
19
+ value = payload.get(key)
20
+ if value is None:
21
+ return []
22
+ if not isinstance(value, list):
23
+ raise TypeError(
24
+ f"translator: field {key!r} must be a list, got {type(value).__name__}"
25
+ )
26
+ return list(value)
27
+
28
+
29
+ def _dict_value(payload: Mapping[str, Any], key: str) -> dict[str, Any] | None:
30
+ value = payload.get(key)
31
+ if value is None:
32
+ return None
33
+ if not isinstance(value, dict):
34
+ raise TypeError(
35
+ f"translator: field {key!r} must be a dict, got {type(value).__name__}"
36
+ )
37
+ return dict(value)
38
+
39
+
40
+ def mail_folder_rows_to_ctx_models(
41
+ binding_id: str,
42
+ rows: Iterable[object],
43
+ ) -> Iterator[MailFolderRow]:
44
+ """Translate Graph delta mail-folder rows to MailFolderRow instances.
45
+
46
+ `@removed` rows from Graph carry only `id` + the `@removed` marker; they
47
+ become tombstones (`ctx_deleted=True`) so dlt's `hard_delete` deletes the
48
+ matching row at merge time. Live rows pass through with full field set.
49
+ """
50
+ for row in rows:
51
+ payload = _payload(row)
52
+ additional_data = _dict_value(payload, "additional_data") or {}
53
+ if "@removed" in additional_data or "@removed" in payload:
54
+ yield MailFolderRow(
55
+ ctx_binding_id=binding_id,
56
+ id=payload.get("id"),
57
+ ctx_deleted=True,
58
+ )
59
+ continue
60
+
61
+ yield MailFolderRow(
62
+ ctx_binding_id=binding_id,
63
+ id=payload.get("id"),
64
+ odata_type=payload.get("@odata.type"),
65
+ additional_data=additional_data,
66
+ child_folder_count=payload.get("childFolderCount"),
67
+ child_folders=_list_value(payload, "childFolders"),
68
+ display_name=payload.get("displayName"),
69
+ is_hidden=payload.get("isHidden"),
70
+ message_rules=_list_value(payload, "messageRules"),
71
+ messages=_list_value(payload, "messages"),
72
+ multi_value_extended_properties=_list_value(
73
+ payload,
74
+ "multiValueExtendedProperties",
75
+ ),
76
+ parent_folder_id=payload.get("parentFolderId"),
77
+ single_value_extended_properties=_list_value(
78
+ payload,
79
+ "singleValueExtendedProperties",
80
+ ),
81
+ total_item_count=payload.get("totalItemCount"),
82
+ unread_item_count=payload.get("unreadItemCount"),
83
+ )
84
+
85
+
86
+ def message_rows_to_ctx_models(
87
+ binding_id: str,
88
+ rows: Iterable[object],
89
+ *,
90
+ folder_id: str | None = None,
91
+ ) -> Iterator[MessageRow]:
92
+ """Translate Graph delta message rows to MessageRow instances.
93
+
94
+ `folder_id` is the folder whose delta produced these rows. It's used to
95
+ populate `parent_folder_id` on tombstone rows (`@removed` entries from Graph
96
+ contain only `id` + `@removed` and don't carry the folder).
97
+ """
98
+ for row in rows:
99
+ payload = _payload(row)
100
+ additional_data = _dict_value(payload, "additional_data") or {}
101
+ if "@removed" in additional_data or "@removed" in payload:
102
+ if folder_id is None:
103
+ raise RuntimeError(
104
+ "@removed message row received without folder_id context "
105
+ f"message_id={payload.get('id')!r}"
106
+ )
107
+ yield MessageRow(
108
+ ctx_binding_id=binding_id,
109
+ id=payload.get("id"),
110
+ parent_folder_id=folder_id,
111
+ ctx_deleted=True,
112
+ )
113
+ continue
114
+
115
+ yield MessageRow(
116
+ ctx_binding_id=binding_id,
117
+ ctx_source_updated_at=payload.get("lastModifiedDateTime"),
118
+ id=payload.get("id"),
119
+ odata_type=payload.get("@odata.type"),
120
+ etag=payload.get("@odata.etag"),
121
+ additional_data=additional_data,
122
+ attachments=_list_value(payload, "attachments"),
123
+ bcc_recipients=_list_value(payload, "bccRecipients"),
124
+ body=_dict_value(payload, "body"),
125
+ body_preview=payload.get("bodyPreview"),
126
+ categories=_list_value(payload, "categories"),
127
+ cc_recipients=_list_value(payload, "ccRecipients"),
128
+ change_key=payload.get("changeKey"),
129
+ conversation_id=payload.get("conversationId"),
130
+ conversation_index=payload.get("conversationIndex"),
131
+ created_date_time=payload.get("createdDateTime"),
132
+ extensions=_list_value(payload, "extensions"),
133
+ flag=_dict_value(payload, "flag"),
134
+ from_=_dict_value(payload, "from"),
135
+ has_attachments=payload.get("hasAttachments"),
136
+ importance=payload.get("importance"),
137
+ inference_classification=payload.get("inferenceClassification"),
138
+ internet_message_headers=_list_value(
139
+ payload,
140
+ "internetMessageHeaders",
141
+ ),
142
+ internet_message_id=payload.get("internetMessageId"),
143
+ is_delivery_receipt_requested=payload.get("isDeliveryReceiptRequested"),
144
+ is_draft=payload.get("isDraft"),
145
+ is_read=payload.get("isRead"),
146
+ is_read_receipt_requested=payload.get("isReadReceiptRequested"),
147
+ last_modified_date_time=payload.get("lastModifiedDateTime"),
148
+ multi_value_extended_properties=_list_value(
149
+ payload,
150
+ "multiValueExtendedProperties",
151
+ ),
152
+ parent_folder_id=payload.get("parentFolderId"),
153
+ received_date_time=payload.get("receivedDateTime"),
154
+ reply_to=_list_value(payload, "replyTo"),
155
+ sender=_dict_value(payload, "sender"),
156
+ sent_date_time=payload.get("sentDateTime"),
157
+ single_value_extended_properties=_list_value(
158
+ payload,
159
+ "singleValueExtendedProperties",
160
+ ),
161
+ subject=payload.get("subject"),
162
+ to_recipients=_list_value(payload, "toRecipients"),
163
+ unique_body=_dict_value(payload, "uniqueBody"),
164
+ web_link=payload.get("webLink"),
165
+ )
166
+
167
+
168
+ def attachment_content_row_from_graph_payload(
169
+ *,
170
+ binding_id: str,
171
+ message_id: str,
172
+ attachment_payload: Mapping[str, Any],
173
+ file_path: str,
174
+ ) -> AttachmentContentRow:
175
+ """Build an AttachmentContentRow from a Graph attachment object and a
176
+ locally-materialized file path."""
177
+ last_modified = attachment_payload.get("lastModifiedDateTime")
178
+ return AttachmentContentRow(
179
+ ctx_binding_id=binding_id,
180
+ ctx_source_updated_at=last_modified,
181
+ message_id=message_id,
182
+ attachment_id=attachment_payload.get("id"),
183
+ odata_type=attachment_payload.get("@odata.type"),
184
+ media_content_type=attachment_payload.get("@odata.mediaContentType"),
185
+ name=attachment_payload.get("name"),
186
+ content_type=attachment_payload.get("contentType"),
187
+ size=attachment_payload.get("size"),
188
+ is_inline=attachment_payload.get("isInline"),
189
+ content_id=attachment_payload.get("contentId"),
190
+ content_location=attachment_payload.get("contentLocation"),
191
+ last_modified_date_time=last_modified,
192
+ file_path=file_path,
193
+ )
@@ -0,0 +1,7 @@
1
+ {
2
+ "auth": {
3
+ "type": "client_credentials"
4
+ },
5
+ "mode": "dagster",
6
+ "plugin_id": "microsoft_mail"
7
+ }
@@ -0,0 +1 @@
1
+ """DLT sources for Microsoft Mail."""
@@ -0,0 +1,407 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import time
5
+ import uuid
6
+ from collections.abc import Iterator, Mapping
7
+ from dataclasses import dataclass
8
+ from typing import Any, Literal, Self
9
+
10
+ import dlt
11
+ from dlt.destinations.sql_client import SqlClientBase
12
+ from pydantic import model_validator
13
+ from shared_plugins.dlt import destination_has_table
14
+ from shared_plugins.models import IdStr, StrictModel
15
+ from shared_plugins.naming import (
16
+ dlt_resource_name,
17
+ dlt_source_name,
18
+ plugin_id_from_module,
19
+ )
20
+ from shared_plugins.resources import ctx_dlt_resource
21
+
22
+ from ..models.ctx import (
23
+ ATTACHMENT_CONTENT_COLUMN_DESCRIPTIONS,
24
+ AttachmentContentRow,
25
+ )
26
+ from ..utils.attachments import (
27
+ FILE_ATTACHMENT_ODATA_TYPE,
28
+ KNOWN_ATTACHMENT_ODATA_TYPES,
29
+ materialize_attachment_payloads,
30
+ )
31
+ from ..utils.client import (
32
+ SyncGraphMailClient,
33
+ graph_object_to_payload,
34
+ )
35
+
36
+ PLUGIN_ID = plugin_id_from_module(__file__)
37
+ JOB = "attachment_content"
38
+ SOURCE_NAME = dlt_source_name(PLUGIN_ID, JOB)
39
+ LOGGER = logging.getLogger(__name__)
40
+ DEFAULT_CANDIDATE_LIMIT = 200
41
+ ATTACHMENT_PREFER_HEADER = 'IdType="ImmutableId"'
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class Candidate:
46
+ action: str # 'materialize' or 'orphan'
47
+ message_id: str
48
+ attachments: list[dict[str, Any]] | None = None # only for 'materialize'
49
+
50
+
51
+ class AttachmentCandidateProjection(StrictModel):
52
+ action: Literal["materialize", "orphan"]
53
+ message_id: IdStr
54
+ attachments: list[dict[str, Any]] | None = None
55
+
56
+ @model_validator(mode="after")
57
+ def _validate_payload_for_action(self) -> Self:
58
+ if self.action == "materialize" and self.attachments is None:
59
+ raise ValueError(
60
+ "materialize attachment candidate requires attachments payload"
61
+ )
62
+ if self.action == "orphan" and self.attachments is not None:
63
+ raise ValueError(
64
+ "orphan attachment candidate must not include attachments payload"
65
+ )
66
+ return self
67
+
68
+ def to_candidate(self) -> Candidate:
69
+ return Candidate(
70
+ action=self.action,
71
+ message_id=self.message_id,
72
+ attachments=(
73
+ [dict(attachment) for attachment in self.attachments]
74
+ if self.attachments is not None
75
+ else None
76
+ ),
77
+ )
78
+
79
+
80
+ def parse_attachment_candidate(row: Mapping[str, Any]) -> Candidate:
81
+ return AttachmentCandidateProjection.model_validate(dict(row)).to_candidate()
82
+
83
+
84
+ def build_materialize_arm_query() -> str:
85
+ return """
86
+ WITH latest_message AS (
87
+ SELECT DISTINCT ON (_ctx_binding_id, id)
88
+ _ctx_binding_id,
89
+ id,
90
+ attachments
91
+ FROM messages
92
+ WHERE _ctx_binding_id = %s
93
+ ORDER BY _ctx_binding_id, id, last_modified_date_time DESC NULLS LAST
94
+ )
95
+ SELECT
96
+ 'materialize'::text AS action,
97
+ m.id AS message_id,
98
+ m.attachments AS attachments
99
+ FROM latest_message AS m
100
+ WHERE (
101
+ SELECT count(*)
102
+ FROM jsonb_array_elements(COALESCE(m.attachments, '[]'::jsonb)) AS a
103
+ WHERE COALESCE(a->>'@odata.type', '') NOT IN (
104
+ '#microsoft.graph.referenceAttachment',
105
+ '#microsoft.graph.itemAttachment'
106
+ )
107
+ ) <> (
108
+ SELECT count(*)
109
+ FROM attachment_content AS c
110
+ WHERE c._ctx_binding_id = m._ctx_binding_id
111
+ AND c.message_id = m.id
112
+ )
113
+ """.strip()
114
+
115
+
116
+ def build_orphan_arm_query() -> str:
117
+ return """
118
+ SELECT
119
+ 'orphan'::text AS action,
120
+ c.message_id,
121
+ NULL::jsonb AS attachments
122
+ FROM attachment_content AS c
123
+ LEFT JOIN messages AS m
124
+ ON m._ctx_binding_id = c._ctx_binding_id
125
+ AND m.id = c.message_id
126
+ WHERE c._ctx_binding_id = %s
127
+ AND m.id IS NULL
128
+ GROUP BY c.message_id
129
+ """.strip()
130
+
131
+
132
+ def build_bootstrap_materialize_arm_query() -> str:
133
+ """Materialize-arm variant for the case where `attachment_content` table
134
+ doesn't exist yet (first run after a clean reseed). Hardcodes the
135
+ existing-count subquery to 0 instead of selecting from `attachment_content`,
136
+ so the candidate query doesn't fail on a missing relation.
137
+ """
138
+ return """
139
+ WITH latest_message AS (
140
+ SELECT DISTINCT ON (_ctx_binding_id, id)
141
+ _ctx_binding_id,
142
+ id,
143
+ attachments
144
+ FROM messages
145
+ WHERE _ctx_binding_id = %s
146
+ ORDER BY _ctx_binding_id, id, last_modified_date_time DESC NULLS LAST
147
+ )
148
+ SELECT
149
+ 'materialize'::text AS action,
150
+ m.id AS message_id,
151
+ m.attachments AS attachments
152
+ FROM latest_message AS m
153
+ WHERE (
154
+ SELECT count(*)
155
+ FROM jsonb_array_elements(m.attachments) AS a
156
+ WHERE COALESCE(a->>'@odata.type', '') NOT IN (
157
+ '#microsoft.graph.referenceAttachment',
158
+ '#microsoft.graph.itemAttachment'
159
+ )
160
+ ) > 0
161
+ """.strip()
162
+
163
+
164
+ def build_combined_candidate_query(*, limit: int) -> str:
165
+ materialize = build_materialize_arm_query()
166
+ orphan = build_orphan_arm_query()
167
+ return f"""
168
+ WITH candidates AS (
169
+ {materialize}
170
+ UNION ALL
171
+ {orphan}
172
+ )
173
+ SELECT action, message_id, attachments
174
+ FROM candidates
175
+ ORDER BY message_id ASC
176
+ LIMIT %s
177
+ """.strip()
178
+
179
+
180
+ def iter_candidates(
181
+ sql_client: SqlClientBase[Any],
182
+ *,
183
+ binding_id: str,
184
+ limit: int,
185
+ ) -> list[Candidate]:
186
+ if not destination_has_table(sql_client, "messages"):
187
+ raise RuntimeError(
188
+ "messages table does not exist yet; run microsoft-mail-dlt-sync first"
189
+ )
190
+
191
+ if not destination_has_table(sql_client, "attachment_content"):
192
+ # Bootstrap: orphan arm has no rows AND we can't reference
193
+ # attachment_content yet. Use the bootstrap materialize-arm SQL.
194
+ query = (
195
+ build_bootstrap_materialize_arm_query()
196
+ + "\nORDER BY message_id ASC\nLIMIT %s"
197
+ )
198
+ params = (binding_id, limit)
199
+ else:
200
+ query = build_combined_candidate_query(limit=limit)
201
+ params = (binding_id, binding_id, limit)
202
+
203
+ candidates: list[Candidate] = []
204
+ with sql_client.execute_query(query, *params) as cursor:
205
+ if cursor.description is None:
206
+ return candidates
207
+ columns = [c[0] for c in cursor.description]
208
+ for row in cursor.fetchall():
209
+ raw = dict(zip(columns, row))
210
+ candidates.append(parse_attachment_candidate(raw))
211
+ return candidates
212
+
213
+
214
+ def _materializable_attachments(
215
+ attachments: list[dict[str, Any]],
216
+ ) -> list[dict[str, Any]]:
217
+ """Filter to file attachments. Reference and item attachments produce no
218
+ row in `attachment_content`. An unknown `@odata.type` is treated as a loud
219
+ error per spec §8 — Graph's attachment subtype set is closed."""
220
+ materializable: list[dict[str, Any]] = []
221
+ for a in attachments:
222
+ odata_type = a.get("@odata.type")
223
+ if odata_type not in KNOWN_ATTACHMENT_ODATA_TYPES:
224
+ raise RuntimeError(
225
+ f"unknown attachment @odata.type {odata_type!r}; "
226
+ f"expected one of {sorted(KNOWN_ATTACHMENT_ODATA_TYPES)}"
227
+ )
228
+ if odata_type == FILE_ATTACHMENT_ODATA_TYPE:
229
+ materializable.append(a)
230
+ return materializable
231
+
232
+
233
+ def _full_attachment_payload(obj: Any) -> dict[str, Any]:
234
+ """Build the materialization payload for a Graph FileAttachment object.
235
+
236
+ Kiota's Python deserializer stores `FileAttachment.content_bytes` as the
237
+ ASCII bytes of the base64 string from Graph's wire JSON — not as the
238
+ decoded file bytes. Routing it through `graph_object_to_payload` (which
239
+ uses Kiota's JSON writer) base64-encodes those ASCII bytes a second time,
240
+ so a single `b64decode` downstream would produce the base64 string back
241
+ instead of the real file bytes. Override `contentBytes` with the original
242
+ base64 string so the single `b64decode` in `materialize_attachment_payloads`
243
+ yields real bytes. (Verified empirically — see scratch/smoke_real_graph.py.)
244
+ """
245
+ payload = graph_object_to_payload(obj)
246
+ kiota_content_bytes = getattr(obj, "content_bytes", None)
247
+ if isinstance(kiota_content_bytes, (bytes, bytearray)):
248
+ payload["contentBytes"] = bytes(kiota_content_bytes).decode("ascii")
249
+ return payload
250
+
251
+
252
+ def fetch_and_emit_for_message(
253
+ *,
254
+ binding_id: str,
255
+ client: SyncGraphMailClient,
256
+ candidate: Candidate,
257
+ ) -> Iterator[AttachmentContentRow]:
258
+ if candidate.attachments is None:
259
+ raise RuntimeError(
260
+ f"materialize candidate without attachments payload message_id={candidate.message_id}"
261
+ )
262
+
263
+ full_payloads: list[dict[str, Any]] = []
264
+ for raw_attachment in _materializable_attachments(candidate.attachments):
265
+ attachment_id = raw_attachment.get("id")
266
+ if not isinstance(attachment_id, str):
267
+ raise RuntimeError(
268
+ "attachment in messages.attachments has no id "
269
+ f"message_id={candidate.message_id} payload_keys={sorted(raw_attachment)}"
270
+ )
271
+ full = client.get_attachment_full(
272
+ message_id=candidate.message_id,
273
+ attachment_id=attachment_id,
274
+ prefer_header=ATTACHMENT_PREFER_HEADER,
275
+ )
276
+ full_payloads.append(_full_attachment_payload(full))
277
+
278
+ rows = materialize_attachment_payloads(
279
+ binding_id=binding_id,
280
+ message_id=candidate.message_id,
281
+ attachment_payloads=full_payloads,
282
+ )
283
+ yield from rows
284
+
285
+
286
+ TOMBSTONE_ATTACHMENT_ID_PREFIX = "_ctx_tombstone:"
287
+
288
+
289
+ def _generate_tombstone_attachment_id() -> str:
290
+ """Build a unique sentinel `attachment_id` for a tombstone row.
291
+
292
+ The prefix marks intent; the random uuid4 suffix prevents any conceivable
293
+ collision with a real Graph attachment id and guarantees uniqueness
294
+ across multiple tombstones in the same merge.
295
+ """
296
+ return f"{TOMBSTONE_ATTACHMENT_ID_PREFIX}{uuid.uuid4()}"
297
+
298
+
299
+ def emit_orphan_tombstone(
300
+ *,
301
+ binding_id: str,
302
+ candidate: Candidate,
303
+ ) -> AttachmentContentRow:
304
+ """Build a tombstone row that dlt will use to delete all attachment_content
305
+ rows matching merge_key=(_ctx_binding_id, message_id).
306
+
307
+ The PK includes `attachment_id` (Postgres requires PK columns NOT NULL),
308
+ so the tombstone carries a sentinel id with the form
309
+ `_ctx_tombstone:<uuid4>`. dlt's `hard_delete` on `_ctx_deleted` deletes
310
+ the sentinel-bearing row at the end of the merge, so the sentinel never
311
+ persists in the destination."""
312
+ return AttachmentContentRow(
313
+ ctx_binding_id=binding_id,
314
+ message_id=candidate.message_id,
315
+ attachment_id=_generate_tombstone_attachment_id(),
316
+ ctx_deleted=True,
317
+ )
318
+
319
+
320
+ @dlt.source(name=SOURCE_NAME)
321
+ def microsoft_mail_attachment_source(
322
+ binding_id: str,
323
+ *,
324
+ client: SyncGraphMailClient,
325
+ ) -> tuple[Any, ...]:
326
+ @ctx_dlt_resource(
327
+ name=dlt_resource_name("attachment_content"),
328
+ write_disposition={"disposition": "merge", "strategy": "delete-insert"},
329
+ primary_key=("_ctx_binding_id", "message_id", "attachment_id"),
330
+ merge_key=("_ctx_binding_id", "message_id"),
331
+ columns={
332
+ **ATTACHMENT_CONTENT_COLUMN_DESCRIPTIONS,
333
+ # file_path is required on live rows but NULL on tombstones
334
+ # (ctx_deleted=True). dlt infers schema from first-observed data,
335
+ # so we declare it nullable upfront to match the pydantic Optional
336
+ # type. attachment_id stays NOT NULL because it's part of the PK
337
+ # — tombstones carry a sentinel value (TOMBSTONE_ATTACHMENT_ID).
338
+ "file_path": {"nullable": True},
339
+ "_ctx_deleted": {"data_type": "bool", "hard_delete": True},
340
+ },
341
+ )
342
+ def attachment_content() -> Iterator[AttachmentContentRow]:
343
+ # `schema_name=SOURCE_NAME` avoids dlt's `Schema(pipeline_name)`
344
+ # fallback inside `_get_schema_or_create`. The pipeline_name for a
345
+ # per-binding attachment_content pipeline routinely exceeds dlt's
346
+ # 64-char Schema-name limit, and the source schema is what we
347
+ # actually want here. `run_dlt_pipeline` pre-registers an empty
348
+ # schema with this name on cold start so the lookup succeeds.
349
+ with dlt.current.pipeline().sql_client(
350
+ schema_name=SOURCE_NAME,
351
+ ) as sql_client:
352
+ candidates = iter_candidates(
353
+ sql_client,
354
+ binding_id=binding_id,
355
+ limit=DEFAULT_CANDIDATE_LIMIT,
356
+ )
357
+
358
+ run_started = time.monotonic()
359
+ materialized_messages = 0
360
+ materialized_rows = 0
361
+ orphaned_messages = 0
362
+
363
+ for candidate in candidates:
364
+ if candidate.action == "orphan":
365
+ orphaned_messages += 1
366
+ yield emit_orphan_tombstone(binding_id=binding_id, candidate=candidate)
367
+ continue
368
+
369
+ if candidate.action != "materialize":
370
+ raise RuntimeError(f"unknown candidate action {candidate.action!r}")
371
+
372
+ # Pre-filter: when the materialize arm fires for a message
373
+ # whose materializable count is zero (e.g. all file attachments
374
+ # were removed but reference attachments remain, or the
375
+ # attachments array is empty entirely), there's nothing to
376
+ # fetch. Emit a tombstone instead — dlt's delete-insert merge
377
+ # is a no-op when zero rows are emitted for a merge_key, so
378
+ # leftover rows would persist without the tombstone.
379
+ materializable = _materializable_attachments(candidate.attachments or [])
380
+ if not materializable:
381
+ orphaned_messages += 1
382
+ yield emit_orphan_tombstone(binding_id=binding_id, candidate=candidate)
383
+ continue
384
+
385
+ rows = list(
386
+ fetch_and_emit_for_message(
387
+ binding_id=binding_id,
388
+ client=client,
389
+ candidate=candidate,
390
+ )
391
+ )
392
+ materialized_messages += 1
393
+ materialized_rows += len(rows)
394
+ yield from rows
395
+
396
+ elapsed = time.monotonic() - run_started
397
+ LOGGER.info(
398
+ "microsoft_mail.attachment_content.run_complete "
399
+ "candidates=%d materialized_messages=%d rows=%d orphaned_messages=%d elapsed=%.3fs",
400
+ len(candidates),
401
+ materialized_messages,
402
+ materialized_rows,
403
+ orphaned_messages,
404
+ elapsed,
405
+ )
406
+
407
+ return (attachment_content,)