nornweave 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. nornweave/__init__.py +3 -0
  2. nornweave/adapters/__init__.py +1 -0
  3. nornweave/adapters/base.py +5 -0
  4. nornweave/adapters/mailgun.py +196 -0
  5. nornweave/adapters/resend.py +510 -0
  6. nornweave/adapters/sendgrid.py +492 -0
  7. nornweave/adapters/ses.py +824 -0
  8. nornweave/cli.py +186 -0
  9. nornweave/core/__init__.py +26 -0
  10. nornweave/core/config.py +172 -0
  11. nornweave/core/exceptions.py +25 -0
  12. nornweave/core/interfaces.py +390 -0
  13. nornweave/core/storage.py +192 -0
  14. nornweave/core/utils.py +23 -0
  15. nornweave/huginn/__init__.py +10 -0
  16. nornweave/huginn/client.py +296 -0
  17. nornweave/huginn/config.py +52 -0
  18. nornweave/huginn/resources.py +165 -0
  19. nornweave/huginn/server.py +202 -0
  20. nornweave/models/__init__.py +113 -0
  21. nornweave/models/attachment.py +136 -0
  22. nornweave/models/event.py +275 -0
  23. nornweave/models/inbox.py +33 -0
  24. nornweave/models/message.py +284 -0
  25. nornweave/models/thread.py +172 -0
  26. nornweave/muninn/__init__.py +14 -0
  27. nornweave/muninn/tools.py +207 -0
  28. nornweave/search/__init__.py +1 -0
  29. nornweave/search/embeddings.py +1 -0
  30. nornweave/search/vector_store.py +1 -0
  31. nornweave/skuld/__init__.py +1 -0
  32. nornweave/skuld/rate_limiter.py +1 -0
  33. nornweave/skuld/scheduler.py +1 -0
  34. nornweave/skuld/sender.py +25 -0
  35. nornweave/skuld/webhooks.py +1 -0
  36. nornweave/storage/__init__.py +20 -0
  37. nornweave/storage/database.py +165 -0
  38. nornweave/storage/gcs.py +144 -0
  39. nornweave/storage/local.py +152 -0
  40. nornweave/storage/s3.py +164 -0
  41. nornweave/urdr/__init__.py +14 -0
  42. nornweave/urdr/adapters/__init__.py +16 -0
  43. nornweave/urdr/adapters/base.py +385 -0
  44. nornweave/urdr/adapters/postgres.py +50 -0
  45. nornweave/urdr/adapters/sqlite.py +51 -0
  46. nornweave/urdr/migrations/env.py +94 -0
  47. nornweave/urdr/migrations/script.py.mako +26 -0
  48. nornweave/urdr/migrations/versions/.gitkeep +0 -0
  49. nornweave/urdr/migrations/versions/20260131_0001_initial_schema.py +182 -0
  50. nornweave/urdr/migrations/versions/20260131_0002_extended_schema.py +241 -0
  51. nornweave/urdr/orm.py +641 -0
  52. nornweave/verdandi/__init__.py +45 -0
  53. nornweave/verdandi/attachments.py +471 -0
  54. nornweave/verdandi/content.py +420 -0
  55. nornweave/verdandi/headers.py +404 -0
  56. nornweave/verdandi/parser.py +25 -0
  57. nornweave/verdandi/sanitizer.py +9 -0
  58. nornweave/verdandi/threading.py +359 -0
  59. nornweave/yggdrasil/__init__.py +1 -0
  60. nornweave/yggdrasil/app.py +86 -0
  61. nornweave/yggdrasil/dependencies.py +190 -0
  62. nornweave/yggdrasil/middleware/__init__.py +1 -0
  63. nornweave/yggdrasil/middleware/auth.py +1 -0
  64. nornweave/yggdrasil/middleware/logging.py +1 -0
  65. nornweave/yggdrasil/routes/__init__.py +1 -0
  66. nornweave/yggdrasil/routes/v1/__init__.py +1 -0
  67. nornweave/yggdrasil/routes/v1/inboxes.py +124 -0
  68. nornweave/yggdrasil/routes/v1/messages.py +200 -0
  69. nornweave/yggdrasil/routes/v1/search.py +84 -0
  70. nornweave/yggdrasil/routes/v1/threads.py +142 -0
  71. nornweave/yggdrasil/routes/webhooks/__init__.py +1 -0
  72. nornweave/yggdrasil/routes/webhooks/mailgun.py +136 -0
  73. nornweave/yggdrasil/routes/webhooks/resend.py +344 -0
  74. nornweave/yggdrasil/routes/webhooks/sendgrid.py +15 -0
  75. nornweave/yggdrasil/routes/webhooks/ses.py +15 -0
  76. nornweave-0.1.2.dist-info/METADATA +324 -0
  77. nornweave-0.1.2.dist-info/RECORD +80 -0
  78. nornweave-0.1.2.dist-info/WHEEL +4 -0
  79. nornweave-0.1.2.dist-info/entry_points.txt +5 -0
  80. nornweave-0.1.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,471 @@
1
+ """Attachment handling: MIME parsing, Content-ID mapping, validation.
2
+
3
+ Provides utilities for:
4
+ - Parsing MIME attachments from raw email content
5
+ - Extracting attachments from multipart form data
6
+ - Content-ID handling for inline images
7
+ - File type validation and security checks
8
+ - Text extraction from attachments (PDF, CSV, etc.)
9
+ """
10
+
11
+ import email
12
+ import mimetypes
13
+ import re
14
+ from dataclasses import dataclass, field
15
+ from email import policy
16
+ from typing import TYPE_CHECKING, Any, cast
17
+
18
+ from nornweave.core.interfaces import InboundAttachment
19
+ from nornweave.models.attachment import AttachmentDisposition
20
+
21
+ if TYPE_CHECKING:
22
+ from email.message import EmailMessage
23
+
24
+ # Blocked file extensions for security
25
+ BLOCKED_EXTENSIONS = {
26
+ ".exe",
27
+ ".bat",
28
+ ".cmd",
29
+ ".scr",
30
+ ".com",
31
+ ".pif",
32
+ ".vbs",
33
+ ".vbe",
34
+ ".js",
35
+ ".jse",
36
+ ".ws",
37
+ ".wsf",
38
+ ".wsc",
39
+ ".wsh",
40
+ ".ps1",
41
+ ".ps1xml",
42
+ ".ps2",
43
+ ".ps2xml",
44
+ ".psc1",
45
+ ".psc2",
46
+ ".msh",
47
+ ".msh1",
48
+ ".msh2",
49
+ ".mshxml",
50
+ ".msh1xml",
51
+ ".msh2xml",
52
+ ".scf",
53
+ ".lnk",
54
+ ".inf",
55
+ ".reg",
56
+ }
57
+
58
+ # Maximum sizes
59
+ MAX_SINGLE_ATTACHMENT_SIZE = 25 * 1024 * 1024 # 25 MB
60
+ MAX_TOTAL_ATTACHMENT_SIZE = 35 * 1024 * 1024 # 35 MB
61
+ MAX_ATTACHMENT_COUNT = 20
62
+
63
+
64
+ @dataclass
65
+ class AttachmentValidationResult:
66
+ """Result of attachment validation."""
67
+
68
+ valid: bool
69
+ errors: list[str] = field(default_factory=list)
70
+ warnings: list[str] = field(default_factory=list)
71
+
72
+
73
+ @dataclass
74
+ class ContentIdMapping:
75
+ """Mapping from Content-ID to attachment info."""
76
+
77
+ content_id: str
78
+ filename: str
79
+ attachment_index: int
80
+
81
+
82
+ def parse_mime_attachments(raw_mime: str | bytes) -> list[InboundAttachment]:
83
+ """
84
+ Parse attachments from raw MIME email content.
85
+
86
+ Used for AWS SES which provides the full raw email.
87
+
88
+ Args:
89
+ raw_mime: Raw MIME email content
90
+
91
+ Returns:
92
+ List of InboundAttachment objects
93
+ """
94
+ if isinstance(raw_mime, bytes):
95
+ raw_mime = raw_mime.decode("utf-8", errors="replace")
96
+
97
+ msg = email.message_from_string(raw_mime, policy=policy.default)
98
+ return _extract_attachments_from_message(msg)
99
+
100
+
101
+ def _extract_attachments_from_message(msg: EmailMessage) -> list[InboundAttachment]:
102
+ """Extract attachments from an EmailMessage object."""
103
+ attachments: list[InboundAttachment] = []
104
+
105
+ for part in msg.walk():
106
+ # Skip multipart containers
107
+ if part.is_multipart():
108
+ continue
109
+
110
+ content_disposition = part.get_content_disposition()
111
+
112
+ # Process both attachment and inline parts
113
+ if content_disposition in ("attachment", "inline"):
114
+ filename = part.get_filename() or "unnamed"
115
+ content_type = part.get_content_type()
116
+ payload = part.get_payload(decode=True)
117
+
118
+ # get_payload returns bytes when decode=True for non-multipart
119
+ if payload is None or not isinstance(payload, bytes):
120
+ continue
121
+ content: bytes = payload
122
+
123
+ # Get Content-ID for inline attachments
124
+ content_id = part.get("Content-ID")
125
+ if content_id:
126
+ # Strip angle brackets
127
+ content_id = content_id.strip("<>")
128
+
129
+ disposition = (
130
+ AttachmentDisposition.INLINE
131
+ if content_disposition == "inline"
132
+ else AttachmentDisposition.ATTACHMENT
133
+ )
134
+
135
+ attachments.append(
136
+ InboundAttachment(
137
+ filename=filename,
138
+ content_type=content_type,
139
+ content=content,
140
+ size_bytes=len(content),
141
+ disposition=disposition,
142
+ content_id=content_id,
143
+ )
144
+ )
145
+
146
+ return attachments
147
+
148
+
149
+ def parse_content_id_map(content_id_map_json: str | dict[str, str] | None) -> dict[str, str]:
150
+ """
151
+ Parse content-id-map from Mailgun/SendGrid format.
152
+
153
+ The content-id-map maps Content-IDs to attachment field names:
154
+ {"ii_abc123": "attachment1", "ii_def456": "attachment2"}
155
+
156
+ Args:
157
+ content_id_map_json: JSON string or dict of content ID mappings
158
+
159
+ Returns:
160
+ Dictionary mapping Content-ID to attachment field name
161
+ """
162
+ if not content_id_map_json:
163
+ return {}
164
+
165
+ if isinstance(content_id_map_json, str):
166
+ import json
167
+
168
+ try:
169
+ return cast("dict[str, str]", json.loads(content_id_map_json))
170
+ except (json.JSONDecodeError, ValueError):
171
+ return {}
172
+
173
+ return cast("dict[str, str]", dict(content_id_map_json))
174
+
175
+
176
+ def normalize_content_id(content_id: str | None) -> str | None:
177
+ """
178
+ Normalize a Content-ID by stripping angle brackets.
179
+
180
+ Args:
181
+ content_id: Raw Content-ID value
182
+
183
+ Returns:
184
+ Normalized Content-ID without angle brackets
185
+ """
186
+ if not content_id:
187
+ return None
188
+
189
+ cid = content_id.strip()
190
+ if cid.startswith("<"):
191
+ cid = cid[1:]
192
+ if cid.endswith(">"):
193
+ cid = cid[:-1]
194
+
195
+ return cid if cid else None
196
+
197
+
198
+ def build_content_id_to_filename_map(
199
+ attachments: list[InboundAttachment],
200
+ ) -> dict[str, str]:
201
+ """
202
+ Build a mapping from Content-ID to filename for inline attachments.
203
+
204
+ Args:
205
+ attachments: List of attachments
206
+
207
+ Returns:
208
+ Dictionary mapping Content-ID to filename
209
+ """
210
+ mapping: dict[str, str] = {}
211
+
212
+ for att in attachments:
213
+ if att.content_id:
214
+ cid = normalize_content_id(att.content_id)
215
+ if cid:
216
+ mapping[cid] = att.filename
217
+
218
+ return mapping
219
+
220
+
221
+ def validate_attachments(
222
+ attachments: list[InboundAttachment],
223
+ *,
224
+ max_single_size: int = MAX_SINGLE_ATTACHMENT_SIZE,
225
+ max_total_size: int = MAX_TOTAL_ATTACHMENT_SIZE,
226
+ max_count: int = MAX_ATTACHMENT_COUNT,
227
+ check_extensions: bool = True,
228
+ ) -> AttachmentValidationResult:
229
+ """
230
+ Validate a list of attachments against size and security constraints.
231
+
232
+ Args:
233
+ attachments: List of attachments to validate
234
+ max_single_size: Maximum size for single attachment
235
+ max_total_size: Maximum total size for all attachments
236
+ max_count: Maximum number of attachments
237
+ check_extensions: Whether to check for blocked extensions
238
+
239
+ Returns:
240
+ AttachmentValidationResult with validation status
241
+ """
242
+ errors: list[str] = []
243
+ warnings: list[str] = []
244
+
245
+ # Check count
246
+ if len(attachments) > max_count:
247
+ errors.append(f"Too many attachments: {len(attachments)} > {max_count}")
248
+
249
+ total_size = 0
250
+
251
+ for i, att in enumerate(attachments):
252
+ # Check single size
253
+ if att.size_bytes > max_single_size:
254
+ errors.append(
255
+ f"Attachment {i + 1} ({att.filename}) too large: "
256
+ f"{att.size_bytes / 1024 / 1024:.1f}MB > {max_single_size / 1024 / 1024:.1f}MB"
257
+ )
258
+
259
+ total_size += att.size_bytes
260
+
261
+ # Check extension
262
+ if check_extensions:
263
+ ext = _get_extension(att.filename).lower()
264
+ if ext in BLOCKED_EXTENSIONS:
265
+ errors.append(f"Blocked file type: {att.filename}")
266
+
267
+ # Validate content-type matches filename
268
+ guessed_type, _ = mimetypes.guess_type(att.filename)
269
+ if guessed_type and guessed_type != att.content_type:
270
+ warnings.append(
271
+ f"Content-type mismatch for {att.filename}: "
272
+ f"claimed {att.content_type}, expected {guessed_type}"
273
+ )
274
+
275
+ # Check total size
276
+ if total_size > max_total_size:
277
+ errors.append(
278
+ f"Total attachment size too large: "
279
+ f"{total_size / 1024 / 1024:.1f}MB > {max_total_size / 1024 / 1024:.1f}MB"
280
+ )
281
+
282
+ return AttachmentValidationResult(
283
+ valid=len(errors) == 0,
284
+ errors=errors,
285
+ warnings=warnings,
286
+ )
287
+
288
+
289
+ def _get_extension(filename: str) -> str:
290
+ """Get the file extension from a filename."""
291
+ if not filename:
292
+ return ""
293
+ parts = filename.rsplit(".", 1)
294
+ if len(parts) > 1:
295
+ return "." + parts[1]
296
+ return ""
297
+
298
+
299
+ def resolve_cid_urls_in_html(
300
+ html: str,
301
+ attachments: list[InboundAttachment],
302
+ *,
303
+ base_url: str = "/v1/attachments",
304
+ ) -> str:
305
+ """
306
+ Replace cid: URLs in HTML with actual attachment URLs.
307
+
308
+ This is useful for rendering HTML emails with inline images.
309
+
310
+ Args:
311
+ html: HTML content with cid: URLs
312
+ attachments: List of attachments
313
+ base_url: Base URL for attachment downloads
314
+
315
+ Returns:
316
+ HTML with cid: URLs replaced with actual URLs
317
+ """
318
+ if not html:
319
+ return html
320
+
321
+ # Build cid to attachment_id mapping
322
+ cid_to_id: dict[str, str] = {}
323
+ for i, att in enumerate(attachments):
324
+ if att.content_id:
325
+ cid = normalize_content_id(att.content_id)
326
+ if cid:
327
+ # Use index as ID placeholder (real ID would come from storage)
328
+ cid_to_id[cid] = f"att_{i}"
329
+
330
+ # Replace cid: URLs
331
+ def replace_cid(match: re.Match[str]) -> str:
332
+ cid = match.group(1)
333
+ att_id = cid_to_id.get(cid)
334
+ if att_id:
335
+ return f"{base_url}/{att_id}/download"
336
+ return match.group(0) # Keep original if not found
337
+
338
+ # Pattern matches cid:xxx in src attributes
339
+ pattern = r'cid:([^"\'\s>]+)'
340
+ return re.sub(pattern, replace_cid, html)
341
+
342
+
343
+ def extract_text_from_attachment(
344
+ content_bytes: bytes,
345
+ content_type: str,
346
+ filename: str | None = None,
347
+ ) -> str:
348
+ """
349
+ Extract plain text from attachment content.
350
+
351
+ Supports:
352
+ - Plain text files
353
+ - PDF files (requires pypdf or pdfplumber)
354
+ - CSV files
355
+ - More formats in Phase 2
356
+
357
+ Args:
358
+ content_bytes: Raw attachment content
359
+ content_type: MIME content type
360
+ filename: Optional filename for type detection
361
+
362
+ Returns:
363
+ Extracted plain text or empty string
364
+ """
365
+ # Plain text
366
+ if content_type.startswith("text/"):
367
+ try:
368
+ return content_bytes.decode("utf-8", errors="replace")
369
+ except Exception:
370
+ return ""
371
+
372
+ # CSV (treat as text)
373
+ if content_type == "text/csv" or (filename and filename.endswith(".csv")):
374
+ try:
375
+ return content_bytes.decode("utf-8", errors="replace")
376
+ except Exception:
377
+ return ""
378
+
379
+ # PDF extraction (requires optional dependency)
380
+ if content_type == "application/pdf" or (filename and filename.endswith(".pdf")):
381
+ try:
382
+ return _extract_text_from_pdf(content_bytes)
383
+ except Exception:
384
+ return ""
385
+
386
+ # Unknown type
387
+ return ""
388
+
389
+
390
+ def _extract_text_from_pdf(content_bytes: bytes) -> str:
391
+ """Extract text from PDF using available library."""
392
+ # Try pypdf first
393
+ try:
394
+ from io import BytesIO
395
+
396
+ from pypdf import PdfReader
397
+
398
+ reader = PdfReader(BytesIO(content_bytes))
399
+ text_parts = []
400
+ for page in reader.pages:
401
+ text_parts.append(page.extract_text() or "")
402
+ return "\n\n".join(text_parts)
403
+ except ImportError:
404
+ pass
405
+
406
+ # Try pdfplumber
407
+ try:
408
+ from io import BytesIO
409
+
410
+ import pdfplumber
411
+
412
+ with pdfplumber.open(BytesIO(content_bytes)) as pdf:
413
+ text_parts = []
414
+ for page in pdf.pages:
415
+ text_parts.append(page.extract_text() or "")
416
+ return "\n\n".join(text_parts)
417
+ except ImportError:
418
+ pass
419
+
420
+ return ""
421
+
422
+
423
+ def parse_attachment_info_json(
424
+ attachment_info: str | dict[str, Any] | None,
425
+ ) -> dict[str, dict[str, Any]]:
426
+ """
427
+ Parse SendGrid attachment-info JSON format.
428
+
429
+ Format:
430
+ {
431
+ "attachment1": {
432
+ "filename": "image.jpg",
433
+ "name": "image.jpg",
434
+ "type": "image/jpeg",
435
+ "content-id": "ii_abc123"
436
+ }
437
+ }
438
+
439
+ Args:
440
+ attachment_info: JSON string or dict
441
+
442
+ Returns:
443
+ Parsed attachment info dictionary
444
+ """
445
+ if not attachment_info:
446
+ return {}
447
+
448
+ if isinstance(attachment_info, str):
449
+ import json
450
+
451
+ try:
452
+ return cast("dict[str, dict[str, Any]]", json.loads(attachment_info))
453
+ except (json.JSONDecodeError, ValueError):
454
+ return {}
455
+
456
+ return cast("dict[str, dict[str, Any]]", dict(attachment_info))
457
+
458
+
459
+ def guess_content_type(filename: str, default: str = "application/octet-stream") -> str:
460
+ """
461
+ Guess the content type from a filename.
462
+
463
+ Args:
464
+ filename: The filename to check
465
+ default: Default type if guessing fails
466
+
467
+ Returns:
468
+ MIME content type
469
+ """
470
+ content_type, _ = mimetypes.guess_type(filename)
471
+ return content_type or default