contextbase-plugin-gmail 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_plugin_gmail-0.2.6.dist-info/METADATA +13 -0
- contextbase_plugin_gmail-0.2.6.dist-info/RECORD +21 -0
- contextbase_plugin_gmail-0.2.6.dist-info/WHEEL +4 -0
- plugin_gmail/__init__.py +0 -0
- plugin_gmail/binding_config.py +13 -0
- plugin_gmail/component.py +269 -0
- plugin_gmail/defs/__init__.py +0 -0
- plugin_gmail/defs/defs.yaml +1 -0
- plugin_gmail/models/__init__.py +0 -0
- plugin_gmail/models/ctx.py +132 -0
- plugin_gmail/models/ingress.py +185 -0
- plugin_gmail/models/translators.py +470 -0
- plugin_gmail/models/types.py +12 -0
- plugin_gmail/plugin.json +9 -0
- plugin_gmail/sources/__init__.py +0 -0
- plugin_gmail/sources/attachments.py +307 -0
- plugin_gmail/sources/backfill.py +129 -0
- plugin_gmail/sources/history.py +160 -0
- plugin_gmail/utils/__init__.py +0 -0
- plugin_gmail/utils/attachments.py +251 -0
- plugin_gmail/utils/client.py +494 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from collections.abc import Mapping, Sequence
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import PurePosixPath
|
|
7
|
+
|
|
8
|
+
from pydantic import ValidationError
|
|
9
|
+
from shared_plugins.scratch import replace_scratch_dir_files
|
|
10
|
+
|
|
11
|
+
from ..models.ctx import AttachmentRow
|
|
12
|
+
from ..models.ingress import GmailAttachmentIngress, GmailMessageAttachmentIngress
|
|
13
|
+
from ..models.translators import Base64UrlDecodeError, decode_base64url_bytes
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class AttachmentMaterializationError(RuntimeError):
|
|
18
|
+
message_id: str
|
|
19
|
+
reason: str
|
|
20
|
+
part_id: str | None = None
|
|
21
|
+
attachment_id: str | None = None
|
|
22
|
+
|
|
23
|
+
def __str__(self) -> str:
|
|
24
|
+
return self.reason
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
FetchedAttachmentMap = Mapping[tuple[str, str], GmailMessageAttachmentIngress]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class _PreparedAttachment:
|
|
32
|
+
part_id: str
|
|
33
|
+
attachment_id: str | None
|
|
34
|
+
filename: str | None
|
|
35
|
+
mime_type: str | None
|
|
36
|
+
size: int | None
|
|
37
|
+
content_disposition: str | None
|
|
38
|
+
content_id: str | None
|
|
39
|
+
materialized_file_name: str
|
|
40
|
+
content: bytes
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _hash_path_segment(*parts: str) -> str:
|
|
44
|
+
hash_input = "\n".join(parts)
|
|
45
|
+
return hashlib.sha256(hash_input.encode("utf-8")).hexdigest()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _build_deterministic_file_name(*, part_id: str, filename: str | None) -> str:
|
|
49
|
+
digest = _hash_path_segment(part_id, filename or "")
|
|
50
|
+
ext = ""
|
|
51
|
+
if filename:
|
|
52
|
+
suffixes = PurePosixPath(filename.strip()).suffixes
|
|
53
|
+
ext = "".join(suffixes)
|
|
54
|
+
return f"{digest}{ext}"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _append_stable_suffix(file_name: str, occurrence: int) -> str:
|
|
58
|
+
if occurrence <= 1:
|
|
59
|
+
return file_name
|
|
60
|
+
|
|
61
|
+
suffixes = PurePosixPath(file_name).suffixes
|
|
62
|
+
suffix = "".join(suffixes)
|
|
63
|
+
stem = file_name[: -len(suffix)] if suffix else file_name
|
|
64
|
+
return f"{stem}-{occurrence}{suffix}"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _decode_attachment_content(
|
|
68
|
+
*,
|
|
69
|
+
message_id: str,
|
|
70
|
+
attachment_metadata: GmailAttachmentIngress,
|
|
71
|
+
fetched_map: FetchedAttachmentMap,
|
|
72
|
+
) -> tuple[bytes, GmailMessageAttachmentIngress | None]:
|
|
73
|
+
part_id = attachment_metadata.part_id
|
|
74
|
+
attachment_id = attachment_metadata.attachment_id
|
|
75
|
+
|
|
76
|
+
if attachment_id is not None:
|
|
77
|
+
fetched_attachment = fetched_map.get((message_id, attachment_id))
|
|
78
|
+
if fetched_attachment is None:
|
|
79
|
+
raise AttachmentMaterializationError(
|
|
80
|
+
message_id=message_id,
|
|
81
|
+
part_id=part_id,
|
|
82
|
+
attachment_id=attachment_id,
|
|
83
|
+
reason="Fetched attachment missing from batch result map.",
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
content = decode_base64url_bytes(fetched_attachment.data)
|
|
88
|
+
except Base64UrlDecodeError as exc:
|
|
89
|
+
raise AttachmentMaterializationError(
|
|
90
|
+
message_id=message_id,
|
|
91
|
+
part_id=part_id,
|
|
92
|
+
attachment_id=attachment_id,
|
|
93
|
+
reason=f"Fetched attachment base64url decode failed: {exc}",
|
|
94
|
+
) from exc
|
|
95
|
+
|
|
96
|
+
if content is None:
|
|
97
|
+
raise AttachmentMaterializationError(
|
|
98
|
+
message_id=message_id,
|
|
99
|
+
part_id=part_id,
|
|
100
|
+
attachment_id=attachment_id,
|
|
101
|
+
reason="Fetched attachment is missing decodable base64url content.",
|
|
102
|
+
)
|
|
103
|
+
return content, fetched_attachment
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
content = decode_base64url_bytes(attachment_metadata.inline_data_b64url)
|
|
107
|
+
except Base64UrlDecodeError as exc:
|
|
108
|
+
raise AttachmentMaterializationError(
|
|
109
|
+
message_id=message_id,
|
|
110
|
+
part_id=part_id,
|
|
111
|
+
attachment_id=None,
|
|
112
|
+
reason=f"Inline attachment base64url decode failed: {exc}",
|
|
113
|
+
) from exc
|
|
114
|
+
|
|
115
|
+
if content is None:
|
|
116
|
+
raise AttachmentMaterializationError(
|
|
117
|
+
message_id=message_id,
|
|
118
|
+
part_id=part_id,
|
|
119
|
+
attachment_id=None,
|
|
120
|
+
reason="Inline attachment metadata is missing decodable base64url content.",
|
|
121
|
+
)
|
|
122
|
+
return content, None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _prepare_attachment_for_materialization(
|
|
126
|
+
*,
|
|
127
|
+
message_id: str,
|
|
128
|
+
attachment_metadata: GmailAttachmentIngress,
|
|
129
|
+
fetched_map: FetchedAttachmentMap,
|
|
130
|
+
file_name_counts: dict[str, int],
|
|
131
|
+
) -> _PreparedAttachment:
|
|
132
|
+
part_id = attachment_metadata.part_id
|
|
133
|
+
attachment_id = attachment_metadata.attachment_id
|
|
134
|
+
content, fetched_attachment = _decode_attachment_content(
|
|
135
|
+
message_id=message_id,
|
|
136
|
+
attachment_metadata=attachment_metadata,
|
|
137
|
+
fetched_map=fetched_map,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
size = attachment_metadata.size
|
|
141
|
+
if size is None and fetched_attachment is not None:
|
|
142
|
+
size = fetched_attachment.size
|
|
143
|
+
|
|
144
|
+
deterministic_file_name = _build_deterministic_file_name(
|
|
145
|
+
part_id=part_id,
|
|
146
|
+
filename=attachment_metadata.filename,
|
|
147
|
+
)
|
|
148
|
+
next_occurrence = file_name_counts.get(deterministic_file_name, 0) + 1
|
|
149
|
+
file_name_counts[deterministic_file_name] = next_occurrence
|
|
150
|
+
materialized_file_name = _append_stable_suffix(
|
|
151
|
+
deterministic_file_name,
|
|
152
|
+
next_occurrence,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return _PreparedAttachment(
|
|
156
|
+
part_id=part_id,
|
|
157
|
+
attachment_id=attachment_id,
|
|
158
|
+
filename=attachment_metadata.filename,
|
|
159
|
+
mime_type=attachment_metadata.mime_type,
|
|
160
|
+
size=size,
|
|
161
|
+
content_disposition=attachment_metadata.content_disposition,
|
|
162
|
+
content_id=attachment_metadata.content_id,
|
|
163
|
+
materialized_file_name=materialized_file_name,
|
|
164
|
+
content=content,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _build_attachment_row(
|
|
169
|
+
*,
|
|
170
|
+
binding_id: str,
|
|
171
|
+
message_id: str,
|
|
172
|
+
prepared_attachment: _PreparedAttachment,
|
|
173
|
+
file_path: str,
|
|
174
|
+
) -> AttachmentRow:
|
|
175
|
+
candidate_row = {
|
|
176
|
+
"ctx_binding_id": binding_id,
|
|
177
|
+
"ctx_source_updated_at": None,
|
|
178
|
+
"message_id": message_id,
|
|
179
|
+
"part_id": prepared_attachment.part_id,
|
|
180
|
+
"attachment_id": prepared_attachment.attachment_id,
|
|
181
|
+
"filename": prepared_attachment.filename,
|
|
182
|
+
"mime_type": prepared_attachment.mime_type,
|
|
183
|
+
"size": prepared_attachment.size,
|
|
184
|
+
"content_disposition": prepared_attachment.content_disposition,
|
|
185
|
+
"content_id": prepared_attachment.content_id,
|
|
186
|
+
"file_path": file_path,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
return AttachmentRow.model_validate(candidate_row)
|
|
191
|
+
except ValidationError as exc:
|
|
192
|
+
raise AttachmentMaterializationError(
|
|
193
|
+
message_id=message_id,
|
|
194
|
+
part_id=prepared_attachment.part_id,
|
|
195
|
+
attachment_id=prepared_attachment.attachment_id,
|
|
196
|
+
reason=f"Attachment row validation failed: {exc}",
|
|
197
|
+
) from exc
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def build_attachment_rows_for_message(
|
|
201
|
+
*,
|
|
202
|
+
binding_id: str,
|
|
203
|
+
message_id: str,
|
|
204
|
+
attachments: Sequence[GmailAttachmentIngress],
|
|
205
|
+
fetched_map: FetchedAttachmentMap,
|
|
206
|
+
) -> list[AttachmentRow]:
|
|
207
|
+
if len(attachments) == 0:
|
|
208
|
+
return []
|
|
209
|
+
|
|
210
|
+
safe_message_id = _hash_path_segment(message_id)
|
|
211
|
+
file_name_counts: dict[str, int] = {}
|
|
212
|
+
prepared_attachments = [
|
|
213
|
+
_prepare_attachment_for_materialization(
|
|
214
|
+
message_id=message_id,
|
|
215
|
+
attachment_metadata=attachment_metadata,
|
|
216
|
+
fetched_map=fetched_map,
|
|
217
|
+
file_name_counts=file_name_counts,
|
|
218
|
+
)
|
|
219
|
+
for attachment_metadata in attachments
|
|
220
|
+
]
|
|
221
|
+
file_payloads = {
|
|
222
|
+
prepared_attachment.materialized_file_name: prepared_attachment.content
|
|
223
|
+
for prepared_attachment in prepared_attachments
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
path_map = replace_scratch_dir_files(
|
|
228
|
+
binding_id=binding_id,
|
|
229
|
+
relative_dir=f"attachments/{safe_message_id}",
|
|
230
|
+
files=file_payloads,
|
|
231
|
+
)
|
|
232
|
+
except Exception as exc:
|
|
233
|
+
raise AttachmentMaterializationError(
|
|
234
|
+
message_id=message_id,
|
|
235
|
+
reason=f"Attachment file write failed: {exc}",
|
|
236
|
+
) from exc
|
|
237
|
+
|
|
238
|
+
rebuilt_rows: list[AttachmentRow] = []
|
|
239
|
+
for prepared_attachment in prepared_attachments:
|
|
240
|
+
file_path = path_map[prepared_attachment.materialized_file_name]
|
|
241
|
+
|
|
242
|
+
rebuilt_rows.append(
|
|
243
|
+
_build_attachment_row(
|
|
244
|
+
binding_id=binding_id,
|
|
245
|
+
message_id=message_id,
|
|
246
|
+
prepared_attachment=prepared_attachment,
|
|
247
|
+
file_path=file_path,
|
|
248
|
+
)
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return rebuilt_rows
|