PyPI - cellarbrain - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

cellarbrain 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (133) hide show

{cellarbrain-0.2.2/src/cellarbrain.egg-info → cellarbrain-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cellarbrain
-Version: 0.2.2
+Version: 0.2.4
 Summary: AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports
 Author-email: Urban Busslinger <urbanb@me.com>
 License-Expression: MIT

{cellarbrain-0.2.2 → cellarbrain-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "cellarbrain"
-version = "0.2.2"
+version = "0.2.4"
 description = "AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports"
 requires-python = ">=3.11"
 license = "MIT"

{cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/cli.py RENAMED Viewed

@@ -1349,6 +1349,9 @@ def _cmd_ingest(args: argparse.Namespace, settings: Settings) -> None:
     if args.once:
         count = poll_once(config, settings, dry_run=args.dry_run)
+        if count < 0:
+            print(f"Failed {-count} batch(es) (ETL error — messages left unprocessed).")
+            sys.exit(1)
         print(f"Processed {count} batch(es).")
         sys.exit(0)

{cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/__init__.py RENAMED Viewed

@@ -58,7 +58,8 @@ def poll_once(
 ) -> int:
     """Execute a single poll cycle.
-    Returns the number of batches successfully processed (0 or more).
+    Returns the number of batches successfully processed (0 or more),
+    or a negative number indicating how many batches failed ETL.
     """
     from .credentials import resolve_credentials
     from .etl_runner import run_etl
@@ -95,11 +96,26 @@ def poll_once(
         logger.info("Found %d new messages", len(uids))
         # Fetch and parse
-        fetched = client.fetch_messages(uids, config.expected_files)
+        fetched = client.fetch_messages(uids, config.expected_files, max_attachment_bytes=config.max_attachment_bytes)
         if not fetched:
             logger.info("No messages with valid attachments")
             return 0
+        # Application-level sender whitelist (defence-in-depth)
+        if config.sender_whitelist:
+            whitelist = {s.lower() for s in config.sender_whitelist}
+            original_count = len(fetched)
+            fetched = [(em, data) for em, data in fetched if em.sender in whitelist]
+            rejected = original_count - len(fetched)
+            if rejected:
+                logger.warning(
+                    "Rejected %d message(s) from non-whitelisted senders",
+                    rejected,
+                )
+            if not fetched:
+                logger.info("No messages from whitelisted senders")
+                return 0
         # Build EmailMessage list and attachment map
         messages = [em for em, _ in fetched]
         attachment_map: dict[int, tuple[str, bytes]] = {em.uid: (em.filename, data) for em, data in fetched}
@@ -111,6 +127,7 @@ def poll_once(
             return 0
         processed = 0
+        failed = 0
         for batch in batches:
             logger.info(
                 "Batch detected — %s",
@@ -141,11 +158,18 @@ def poll_once(
                 output_dir,
                 config_path,
                 expected_files=config.expected_files,
+                timeout=config.etl_timeout,
             )
             if exit_code != 0:
-                logger.error("ETL failed (exit %d)", exit_code)
+                logger.error(
+                    "ETL failed (exit %d) — leaving messages unprocessed (UIDs: %s)",
+                    exit_code,
+                    list(batch.uids),
+                )
+                failed += 1
+                continue
-            # Mark as processed (regardless of ETL outcome)
+            # Mark as processed only on successful ETL
             batch_uids = list(batch.uids)
             if config.processed_action == "move":
                 client.move_messages(batch_uids, config.processed_folder)
@@ -161,6 +185,8 @@ def poll_once(
             processed += 1
+    if failed:
+        return -failed
     return processed
@@ -176,7 +202,7 @@ class IngestDaemon:
         self.config = config
         self.settings = settings
         self._base_interval = config.poll_interval
-        self._max_interval = 600  # 10 minutes
+        self._max_interval = config.max_backoff_interval
         self._current_interval = config.poll_interval
     def run(self, *, dry_run: bool = False) -> None:
@@ -195,9 +221,17 @@ class IngestDaemon:
         while True:
             try:
                 count = poll_once(self.config, self.settings, dry_run=dry_run)
-                if count > 0:
+                if count < 0:
+                    logger.error("ETL failed for %d batch(es) — will retry next cycle", -count)
+                    self._current_interval = min(
+                        self._current_interval * 2,
+                        self._max_interval,
+                    )
+                elif count > 0:
                     logger.info("Processed %d batch(es)", count)
-                self._current_interval = self._base_interval
+                    self._current_interval = self._base_interval
+                else:
+                    self._current_interval = self._base_interval
             except ValueError:
                 # Credential / config errors — fatal, stop daemon
                 raise

{cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/etl_runner.py RENAMED Viewed

@@ -3,14 +3,13 @@
 from __future__ import annotations
 import logging
+import os
 import subprocess
 import sys
 from pathlib import Path
 logger = logging.getLogger(__name__)
-_ETL_TIMEOUT = 300  # seconds
 def run_etl(
     raw_dir: Path,
@@ -22,6 +21,7 @@ def run_etl(
         "export-bottles-stored.csv",
         "export-bottles-gone.csv",
     ),
+    timeout: int = 300,
 ) -> tuple[int, str]:
     """Run ``cellarbrain etl`` as a subprocess.
@@ -36,6 +36,8 @@ def run_etl(
     expected_files:
         Filenames to pass to the ETL command (in positional order:
         wines, bottles-stored, bottles-gone).
+    timeout:
+        Seconds before the ETL subprocess is killed.
     Returns
     -------
@@ -58,11 +60,13 @@ def run_etl(
     logger.info("Running ETL: %s", " ".join(cmd))
     try:
+        env = {**os.environ, "PYTHONUTF8": "1", "PYTHONIOENCODING": "utf-8"}
         result = subprocess.run(
             cmd,
             capture_output=True,
             text=True,
-            timeout=_ETL_TIMEOUT,
+            timeout=timeout,
+            env=env,
         )
         output = result.stdout + result.stderr
         if result.returncode == 0:
@@ -71,5 +75,5 @@ def run_etl(
             logger.error("ETL failed (exit %d): %s", result.returncode, output)
         return result.returncode, output
     except subprocess.TimeoutExpired:
-        logger.error("ETL timed out after %d seconds", _ETL_TIMEOUT)
-        return -1, f"ETL timed out after {_ETL_TIMEOUT} seconds"
+        logger.error("ETL timed out after %d seconds", timeout)
+        return -1, f"ETL timed out after {timeout} seconds"

{cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/grouping.py RENAMED Viewed

@@ -21,6 +21,7 @@ class EmailMessage:
     date: datetime
     filename: str
     size: int
+    sender: str = ""
 @dataclass(frozen=True)

{cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/imap.py RENAMED Viewed

@@ -9,6 +9,7 @@ from __future__ import annotations
 import email
 import email.policy
+import email.utils
 import logging
 from datetime import UTC, datetime
 from types import TracebackType
@@ -91,26 +92,35 @@ class ImapClient:
         self,
         uids: list[int],
         expected_files: tuple[str, ...] | list[str],
+        *,
+        max_attachment_bytes: int = 0,
     ) -> list[tuple[EmailMessage, bytes]]:
         """Fetch messages and extract single-attachment metadata + data.
         Only messages with exactly one attachment whose filename is in
         *expected_files* are returned.  Others are silently skipped.
+        Parameters
+        ----------
+        max_attachment_bytes:
+            If > 0, skip attachments exceeding this size (bytes).
         Returns list of ``(EmailMessage, attachment_bytes)`` tuples.
         """
         if not uids:
             return []
         results: list[tuple[EmailMessage, bytes]] = []
-        raw_responses = self._client.fetch(uids, ["RFC822", "INTERNALDATE"])
+        raw_responses = self._client.fetch(uids, ["BODY.PEEK[]", "INTERNALDATE"])
         for uid, data in raw_responses.items():
             internal_date = data.get(b"INTERNALDATE")
             if internal_date is None:
                 internal_date = datetime.now(UTC)
-            rfc822 = data.get(b"RFC822", b"")
+            rfc822 = data.get(b"BODY[]") or data.get(b"RFC822", b"")
+            if not rfc822:
+                continue
             msg = email.message_from_bytes(rfc822, policy=email.policy.default)
             attachments = _extract_attachments(msg)
@@ -121,11 +131,25 @@ class ImapClient:
             if filename not in expected_files:
                 continue
+            if max_attachment_bytes and len(payload) > max_attachment_bytes:
+                logger.warning(
+                    "Attachment %s (%d bytes) exceeds limit — skipping UID %d",
+                    filename,
+                    len(payload),
+                    uid,
+                )
+                continue
+            # Extract sender from From: header
+            from_header = msg.get("From", "")
+            _, sender_addr = email.utils.parseaddr(from_header)
             em = EmailMessage(
                 uid=int(uid),
                 date=internal_date,
                 filename=filename,
                 size=len(payload),
+                sender=sender_addr.lower(),
             )
             results.append((em, payload))

{cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/settings.py RENAMED Viewed

@@ -242,6 +242,7 @@ class IngestConfig:
     mailbox: str = "INBOX"
     subject_filter: str = "[VinoCell] CSV file"
     sender_filter: str = ""
+    sender_whitelist: tuple[str, ...] = ()
     poll_interval: int = 60
     batch_window: int = 300
     expected_files: tuple[str, ...] = (
@@ -251,6 +252,9 @@ class IngestConfig:
     )
     processed_action: str = "flag"
     processed_folder: str = "VinoCell/Processed"
+    etl_timeout: int = 300
+    max_backoff_interval: int = 600
+    max_attachment_bytes: int = 10_485_760
 # ---------------------------------------------------------------------------
@@ -919,12 +923,14 @@ def load_settings(
         _validate_keys("dashboard", dashboard_raw, DashboardConfig)
     dashboard = DashboardConfig(**dashboard_raw) if dashboard_raw else DashboardConfig()
-    # Ingest — scalar config with tuple conversion for expected_files
+    # Ingest — scalar config with tuple conversion for expected_files/sender_whitelist
     ingest_raw = raw.get("ingest", {})
     if ingest_raw:
         ingest_kw: dict = dict(ingest_raw)
         if "expected_files" in ingest_kw:
             ingest_kw["expected_files"] = tuple(ingest_kw["expected_files"])
+        if "sender_whitelist" in ingest_kw:
+            ingest_kw["sender_whitelist"] = tuple(ingest_kw["sender_whitelist"])
         _validate_keys("ingest", ingest_kw, IngestConfig)
         ingest = IngestConfig(**ingest_kw)
     else:

{cellarbrain-0.2.2 → cellarbrain-0.2.4/src/cellarbrain.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cellarbrain
-Version: 0.2.2
+Version: 0.2.4
 Summary: AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports
 Author-email: Urban Busslinger <urbanb@me.com>
 License-Expression: MIT

cellarbrain 0.2.2__tar.gz → 0.2.4__tar.gz

cellarbrain 0.2.2tar.gz → 0.2.4tar.gz