contextbase-plugin-gmail 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,251 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from collections.abc import Mapping, Sequence
5
+ from dataclasses import dataclass
6
+ from pathlib import PurePosixPath
7
+
8
+ from pydantic import ValidationError
9
+ from shared_plugins.scratch import replace_scratch_dir_files
10
+
11
+ from ..models.ctx import AttachmentRow
12
+ from ..models.ingress import GmailAttachmentIngress, GmailMessageAttachmentIngress
13
+ from ..models.translators import Base64UrlDecodeError, decode_base64url_bytes
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class AttachmentMaterializationError(RuntimeError):
18
+ message_id: str
19
+ reason: str
20
+ part_id: str | None = None
21
+ attachment_id: str | None = None
22
+
23
+ def __str__(self) -> str:
24
+ return self.reason
25
+
26
+
27
+ FetchedAttachmentMap = Mapping[tuple[str, str], GmailMessageAttachmentIngress]
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class _PreparedAttachment:
32
+ part_id: str
33
+ attachment_id: str | None
34
+ filename: str | None
35
+ mime_type: str | None
36
+ size: int | None
37
+ content_disposition: str | None
38
+ content_id: str | None
39
+ materialized_file_name: str
40
+ content: bytes
41
+
42
+
43
+ def _hash_path_segment(*parts: str) -> str:
44
+ hash_input = "\n".join(parts)
45
+ return hashlib.sha256(hash_input.encode("utf-8")).hexdigest()
46
+
47
+
48
+ def _build_deterministic_file_name(*, part_id: str, filename: str | None) -> str:
49
+ digest = _hash_path_segment(part_id, filename or "")
50
+ ext = ""
51
+ if filename:
52
+ suffixes = PurePosixPath(filename.strip()).suffixes
53
+ ext = "".join(suffixes)
54
+ return f"{digest}{ext}"
55
+
56
+
57
+ def _append_stable_suffix(file_name: str, occurrence: int) -> str:
58
+ if occurrence <= 1:
59
+ return file_name
60
+
61
+ suffixes = PurePosixPath(file_name).suffixes
62
+ suffix = "".join(suffixes)
63
+ stem = file_name[: -len(suffix)] if suffix else file_name
64
+ return f"{stem}-{occurrence}{suffix}"
65
+
66
+
67
+ def _decode_attachment_content(
68
+ *,
69
+ message_id: str,
70
+ attachment_metadata: GmailAttachmentIngress,
71
+ fetched_map: FetchedAttachmentMap,
72
+ ) -> tuple[bytes, GmailMessageAttachmentIngress | None]:
73
+ part_id = attachment_metadata.part_id
74
+ attachment_id = attachment_metadata.attachment_id
75
+
76
+ if attachment_id is not None:
77
+ fetched_attachment = fetched_map.get((message_id, attachment_id))
78
+ if fetched_attachment is None:
79
+ raise AttachmentMaterializationError(
80
+ message_id=message_id,
81
+ part_id=part_id,
82
+ attachment_id=attachment_id,
83
+ reason="Fetched attachment missing from batch result map.",
84
+ )
85
+
86
+ try:
87
+ content = decode_base64url_bytes(fetched_attachment.data)
88
+ except Base64UrlDecodeError as exc:
89
+ raise AttachmentMaterializationError(
90
+ message_id=message_id,
91
+ part_id=part_id,
92
+ attachment_id=attachment_id,
93
+ reason=f"Fetched attachment base64url decode failed: {exc}",
94
+ ) from exc
95
+
96
+ if content is None:
97
+ raise AttachmentMaterializationError(
98
+ message_id=message_id,
99
+ part_id=part_id,
100
+ attachment_id=attachment_id,
101
+ reason="Fetched attachment is missing decodable base64url content.",
102
+ )
103
+ return content, fetched_attachment
104
+
105
+ try:
106
+ content = decode_base64url_bytes(attachment_metadata.inline_data_b64url)
107
+ except Base64UrlDecodeError as exc:
108
+ raise AttachmentMaterializationError(
109
+ message_id=message_id,
110
+ part_id=part_id,
111
+ attachment_id=None,
112
+ reason=f"Inline attachment base64url decode failed: {exc}",
113
+ ) from exc
114
+
115
+ if content is None:
116
+ raise AttachmentMaterializationError(
117
+ message_id=message_id,
118
+ part_id=part_id,
119
+ attachment_id=None,
120
+ reason="Inline attachment metadata is missing decodable base64url content.",
121
+ )
122
+ return content, None
123
+
124
+
125
+ def _prepare_attachment_for_materialization(
126
+ *,
127
+ message_id: str,
128
+ attachment_metadata: GmailAttachmentIngress,
129
+ fetched_map: FetchedAttachmentMap,
130
+ file_name_counts: dict[str, int],
131
+ ) -> _PreparedAttachment:
132
+ part_id = attachment_metadata.part_id
133
+ attachment_id = attachment_metadata.attachment_id
134
+ content, fetched_attachment = _decode_attachment_content(
135
+ message_id=message_id,
136
+ attachment_metadata=attachment_metadata,
137
+ fetched_map=fetched_map,
138
+ )
139
+
140
+ size = attachment_metadata.size
141
+ if size is None and fetched_attachment is not None:
142
+ size = fetched_attachment.size
143
+
144
+ deterministic_file_name = _build_deterministic_file_name(
145
+ part_id=part_id,
146
+ filename=attachment_metadata.filename,
147
+ )
148
+ next_occurrence = file_name_counts.get(deterministic_file_name, 0) + 1
149
+ file_name_counts[deterministic_file_name] = next_occurrence
150
+ materialized_file_name = _append_stable_suffix(
151
+ deterministic_file_name,
152
+ next_occurrence,
153
+ )
154
+
155
+ return _PreparedAttachment(
156
+ part_id=part_id,
157
+ attachment_id=attachment_id,
158
+ filename=attachment_metadata.filename,
159
+ mime_type=attachment_metadata.mime_type,
160
+ size=size,
161
+ content_disposition=attachment_metadata.content_disposition,
162
+ content_id=attachment_metadata.content_id,
163
+ materialized_file_name=materialized_file_name,
164
+ content=content,
165
+ )
166
+
167
+
168
+ def _build_attachment_row(
169
+ *,
170
+ binding_id: str,
171
+ message_id: str,
172
+ prepared_attachment: _PreparedAttachment,
173
+ file_path: str,
174
+ ) -> AttachmentRow:
175
+ candidate_row = {
176
+ "ctx_binding_id": binding_id,
177
+ "ctx_source_updated_at": None,
178
+ "message_id": message_id,
179
+ "part_id": prepared_attachment.part_id,
180
+ "attachment_id": prepared_attachment.attachment_id,
181
+ "filename": prepared_attachment.filename,
182
+ "mime_type": prepared_attachment.mime_type,
183
+ "size": prepared_attachment.size,
184
+ "content_disposition": prepared_attachment.content_disposition,
185
+ "content_id": prepared_attachment.content_id,
186
+ "file_path": file_path,
187
+ }
188
+
189
+ try:
190
+ return AttachmentRow.model_validate(candidate_row)
191
+ except ValidationError as exc:
192
+ raise AttachmentMaterializationError(
193
+ message_id=message_id,
194
+ part_id=prepared_attachment.part_id,
195
+ attachment_id=prepared_attachment.attachment_id,
196
+ reason=f"Attachment row validation failed: {exc}",
197
+ ) from exc
198
+
199
+
200
+ def build_attachment_rows_for_message(
201
+ *,
202
+ binding_id: str,
203
+ message_id: str,
204
+ attachments: Sequence[GmailAttachmentIngress],
205
+ fetched_map: FetchedAttachmentMap,
206
+ ) -> list[AttachmentRow]:
207
+ if len(attachments) == 0:
208
+ return []
209
+
210
+ safe_message_id = _hash_path_segment(message_id)
211
+ file_name_counts: dict[str, int] = {}
212
+ prepared_attachments = [
213
+ _prepare_attachment_for_materialization(
214
+ message_id=message_id,
215
+ attachment_metadata=attachment_metadata,
216
+ fetched_map=fetched_map,
217
+ file_name_counts=file_name_counts,
218
+ )
219
+ for attachment_metadata in attachments
220
+ ]
221
+ file_payloads = {
222
+ prepared_attachment.materialized_file_name: prepared_attachment.content
223
+ for prepared_attachment in prepared_attachments
224
+ }
225
+
226
+ try:
227
+ path_map = replace_scratch_dir_files(
228
+ binding_id=binding_id,
229
+ relative_dir=f"attachments/{safe_message_id}",
230
+ files=file_payloads,
231
+ )
232
+ except Exception as exc:
233
+ raise AttachmentMaterializationError(
234
+ message_id=message_id,
235
+ reason=f"Attachment file write failed: {exc}",
236
+ ) from exc
237
+
238
+ rebuilt_rows: list[AttachmentRow] = []
239
+ for prepared_attachment in prepared_attachments:
240
+ file_path = path_map[prepared_attachment.materialized_file_name]
241
+
242
+ rebuilt_rows.append(
243
+ _build_attachment_row(
244
+ binding_id=binding_id,
245
+ message_id=message_id,
246
+ prepared_attachment=prepared_attachment,
247
+ file_path=file_path,
248
+ )
249
+ )
250
+
251
+ return rebuilt_rows