cellarbrain 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cellarbrain-0.2.2/src/cellarbrain.egg-info → cellarbrain-0.2.4}/PKG-INFO +1 -1
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/pyproject.toml +1 -1
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/cli.py +3 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/__init__.py +41 -7
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/etl_runner.py +9 -5
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/grouping.py +1 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/imap.py +26 -2
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/settings.py +7 -1
- {cellarbrain-0.2.2 → cellarbrain-0.2.4/src/cellarbrain.egg-info}/PKG-INFO +1 -1
- cellarbrain-0.2.4/tests/test_email_poll.py +922 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_price.py +9 -9
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_settings.py +35 -0
- cellarbrain-0.2.2/tests/test_email_poll.py +0 -343
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/LICENSE +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/README.md +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/setup.cfg +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/__init__.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/__main__.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/_query_base.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/backup.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/companion_markdown.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/computed.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/__init__.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/app.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/cellar_queries.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/dossier_render.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/queries.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/static/dashboard.js +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/static/workbench.js +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/base.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/bottles.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/cellar.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/drinking.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/error.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/errors.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/index.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/latency.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/live.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/pairing.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/bottle_rows.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/dossier_section.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/error_detail.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/error_rows.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/event_stream.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/pairing_results.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/session_detail.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/sql_results.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/stats_content.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/tool_rows.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/turn_events.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/wine_rows.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/workbench_response.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/sessions.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/sql.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/stats.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/tools.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/tracked.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/tracked_detail.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/wine_detail.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/workbench_batch.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/workbench_list.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/workbench_tool.html +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/workbench.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/doctor.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dossier_ops.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/credentials.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/placement.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/flat.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/incremental.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/log.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/markdown.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/mcp_server.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/observability.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/pairing.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/parsers.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/price.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/query.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/search.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/slugify.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/__init__.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/catalogue.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/engine.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/index.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/model.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/schemas.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/text_builder.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/training.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/transform.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/validate.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/vinocell_parsers.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/vinocell_reader.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/writer.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/SOURCES.txt +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/dependency_links.txt +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/entry_points.txt +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/requires.txt +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/top_level.txt +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_backup.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_catalogue.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_cli.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_companion_markdown.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_computed.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_app.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_cellar.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_dossier.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_pairing.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_queries.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_workbench.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dataset_factory.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_doctor.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dossier_ops.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_flat.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_incremental.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_integration.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_log.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_markdown.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_mcp_server.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_observability.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_pairing.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_parsers.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_query.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_reader.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_search.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_data.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_mcp.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_quality.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_training.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_transform.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_validate.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_vinocell_parsers.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_vinocell_reader.py +0 -0
- {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_writer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarbrain
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports
|
|
5
5
|
Author-email: Urban Busslinger <urbanb@me.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "cellarbrain"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.4"
|
|
8
8
|
description = "AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports"
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
license = "MIT"
|
|
@@ -1349,6 +1349,9 @@ def _cmd_ingest(args: argparse.Namespace, settings: Settings) -> None:
|
|
|
1349
1349
|
|
|
1350
1350
|
if args.once:
|
|
1351
1351
|
count = poll_once(config, settings, dry_run=args.dry_run)
|
|
1352
|
+
if count < 0:
|
|
1353
|
+
print(f"Failed {-count} batch(es) (ETL error — messages left unprocessed).")
|
|
1354
|
+
sys.exit(1)
|
|
1352
1355
|
print(f"Processed {count} batch(es).")
|
|
1353
1356
|
sys.exit(0)
|
|
1354
1357
|
|
|
@@ -58,7 +58,8 @@ def poll_once(
|
|
|
58
58
|
) -> int:
|
|
59
59
|
"""Execute a single poll cycle.
|
|
60
60
|
|
|
61
|
-
Returns the number of batches successfully processed (0 or more)
|
|
61
|
+
Returns the number of batches successfully processed (0 or more),
|
|
62
|
+
or a negative number indicating how many batches failed ETL.
|
|
62
63
|
"""
|
|
63
64
|
from .credentials import resolve_credentials
|
|
64
65
|
from .etl_runner import run_etl
|
|
@@ -95,11 +96,26 @@ def poll_once(
|
|
|
95
96
|
logger.info("Found %d new messages", len(uids))
|
|
96
97
|
|
|
97
98
|
# Fetch and parse
|
|
98
|
-
fetched = client.fetch_messages(uids, config.expected_files)
|
|
99
|
+
fetched = client.fetch_messages(uids, config.expected_files, max_attachment_bytes=config.max_attachment_bytes)
|
|
99
100
|
if not fetched:
|
|
100
101
|
logger.info("No messages with valid attachments")
|
|
101
102
|
return 0
|
|
102
103
|
|
|
104
|
+
# Application-level sender whitelist (defence-in-depth)
|
|
105
|
+
if config.sender_whitelist:
|
|
106
|
+
whitelist = {s.lower() for s in config.sender_whitelist}
|
|
107
|
+
original_count = len(fetched)
|
|
108
|
+
fetched = [(em, data) for em, data in fetched if em.sender in whitelist]
|
|
109
|
+
rejected = original_count - len(fetched)
|
|
110
|
+
if rejected:
|
|
111
|
+
logger.warning(
|
|
112
|
+
"Rejected %d message(s) from non-whitelisted senders",
|
|
113
|
+
rejected,
|
|
114
|
+
)
|
|
115
|
+
if not fetched:
|
|
116
|
+
logger.info("No messages from whitelisted senders")
|
|
117
|
+
return 0
|
|
118
|
+
|
|
103
119
|
# Build EmailMessage list and attachment map
|
|
104
120
|
messages = [em for em, _ in fetched]
|
|
105
121
|
attachment_map: dict[int, tuple[str, bytes]] = {em.uid: (em.filename, data) for em, data in fetched}
|
|
@@ -111,6 +127,7 @@ def poll_once(
|
|
|
111
127
|
return 0
|
|
112
128
|
|
|
113
129
|
processed = 0
|
|
130
|
+
failed = 0
|
|
114
131
|
for batch in batches:
|
|
115
132
|
logger.info(
|
|
116
133
|
"Batch detected — %s",
|
|
@@ -141,11 +158,18 @@ def poll_once(
|
|
|
141
158
|
output_dir,
|
|
142
159
|
config_path,
|
|
143
160
|
expected_files=config.expected_files,
|
|
161
|
+
timeout=config.etl_timeout,
|
|
144
162
|
)
|
|
145
163
|
if exit_code != 0:
|
|
146
|
-
logger.error(
|
|
164
|
+
logger.error(
|
|
165
|
+
"ETL failed (exit %d) — leaving messages unprocessed (UIDs: %s)",
|
|
166
|
+
exit_code,
|
|
167
|
+
list(batch.uids),
|
|
168
|
+
)
|
|
169
|
+
failed += 1
|
|
170
|
+
continue
|
|
147
171
|
|
|
148
|
-
# Mark as processed
|
|
172
|
+
# Mark as processed only on successful ETL
|
|
149
173
|
batch_uids = list(batch.uids)
|
|
150
174
|
if config.processed_action == "move":
|
|
151
175
|
client.move_messages(batch_uids, config.processed_folder)
|
|
@@ -161,6 +185,8 @@ def poll_once(
|
|
|
161
185
|
|
|
162
186
|
processed += 1
|
|
163
187
|
|
|
188
|
+
if failed:
|
|
189
|
+
return -failed
|
|
164
190
|
return processed
|
|
165
191
|
|
|
166
192
|
|
|
@@ -176,7 +202,7 @@ class IngestDaemon:
|
|
|
176
202
|
self.config = config
|
|
177
203
|
self.settings = settings
|
|
178
204
|
self._base_interval = config.poll_interval
|
|
179
|
-
self._max_interval =
|
|
205
|
+
self._max_interval = config.max_backoff_interval
|
|
180
206
|
self._current_interval = config.poll_interval
|
|
181
207
|
|
|
182
208
|
def run(self, *, dry_run: bool = False) -> None:
|
|
@@ -195,9 +221,17 @@ class IngestDaemon:
|
|
|
195
221
|
while True:
|
|
196
222
|
try:
|
|
197
223
|
count = poll_once(self.config, self.settings, dry_run=dry_run)
|
|
198
|
-
if count
|
|
224
|
+
if count < 0:
|
|
225
|
+
logger.error("ETL failed for %d batch(es) — will retry next cycle", -count)
|
|
226
|
+
self._current_interval = min(
|
|
227
|
+
self._current_interval * 2,
|
|
228
|
+
self._max_interval,
|
|
229
|
+
)
|
|
230
|
+
elif count > 0:
|
|
199
231
|
logger.info("Processed %d batch(es)", count)
|
|
200
|
-
|
|
232
|
+
self._current_interval = self._base_interval
|
|
233
|
+
else:
|
|
234
|
+
self._current_interval = self._base_interval
|
|
201
235
|
except ValueError:
|
|
202
236
|
# Credential / config errors — fatal, stop daemon
|
|
203
237
|
raise
|
|
@@ -3,14 +3,13 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
+
import os
|
|
6
7
|
import subprocess
|
|
7
8
|
import sys
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
12
|
-
_ETL_TIMEOUT = 300 # seconds
|
|
13
|
-
|
|
14
13
|
|
|
15
14
|
def run_etl(
|
|
16
15
|
raw_dir: Path,
|
|
@@ -22,6 +21,7 @@ def run_etl(
|
|
|
22
21
|
"export-bottles-stored.csv",
|
|
23
22
|
"export-bottles-gone.csv",
|
|
24
23
|
),
|
|
24
|
+
timeout: int = 300,
|
|
25
25
|
) -> tuple[int, str]:
|
|
26
26
|
"""Run ``cellarbrain etl`` as a subprocess.
|
|
27
27
|
|
|
@@ -36,6 +36,8 @@ def run_etl(
|
|
|
36
36
|
expected_files:
|
|
37
37
|
Filenames to pass to the ETL command (in positional order:
|
|
38
38
|
wines, bottles-stored, bottles-gone).
|
|
39
|
+
timeout:
|
|
40
|
+
Seconds before the ETL subprocess is killed.
|
|
39
41
|
|
|
40
42
|
Returns
|
|
41
43
|
-------
|
|
@@ -58,11 +60,13 @@ def run_etl(
|
|
|
58
60
|
|
|
59
61
|
logger.info("Running ETL: %s", " ".join(cmd))
|
|
60
62
|
try:
|
|
63
|
+
env = {**os.environ, "PYTHONUTF8": "1", "PYTHONIOENCODING": "utf-8"}
|
|
61
64
|
result = subprocess.run(
|
|
62
65
|
cmd,
|
|
63
66
|
capture_output=True,
|
|
64
67
|
text=True,
|
|
65
|
-
timeout=
|
|
68
|
+
timeout=timeout,
|
|
69
|
+
env=env,
|
|
66
70
|
)
|
|
67
71
|
output = result.stdout + result.stderr
|
|
68
72
|
if result.returncode == 0:
|
|
@@ -71,5 +75,5 @@ def run_etl(
|
|
|
71
75
|
logger.error("ETL failed (exit %d): %s", result.returncode, output)
|
|
72
76
|
return result.returncode, output
|
|
73
77
|
except subprocess.TimeoutExpired:
|
|
74
|
-
logger.error("ETL timed out after %d seconds",
|
|
75
|
-
return -1, f"ETL timed out after {
|
|
78
|
+
logger.error("ETL timed out after %d seconds", timeout)
|
|
79
|
+
return -1, f"ETL timed out after {timeout} seconds"
|
|
@@ -9,6 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import email
|
|
11
11
|
import email.policy
|
|
12
|
+
import email.utils
|
|
12
13
|
import logging
|
|
13
14
|
from datetime import UTC, datetime
|
|
14
15
|
from types import TracebackType
|
|
@@ -91,26 +92,35 @@ class ImapClient:
|
|
|
91
92
|
self,
|
|
92
93
|
uids: list[int],
|
|
93
94
|
expected_files: tuple[str, ...] | list[str],
|
|
95
|
+
*,
|
|
96
|
+
max_attachment_bytes: int = 0,
|
|
94
97
|
) -> list[tuple[EmailMessage, bytes]]:
|
|
95
98
|
"""Fetch messages and extract single-attachment metadata + data.
|
|
96
99
|
|
|
97
100
|
Only messages with exactly one attachment whose filename is in
|
|
98
101
|
*expected_files* are returned. Others are silently skipped.
|
|
99
102
|
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
max_attachment_bytes:
|
|
106
|
+
If > 0, skip attachments exceeding this size (bytes).
|
|
107
|
+
|
|
100
108
|
Returns list of ``(EmailMessage, attachment_bytes)`` tuples.
|
|
101
109
|
"""
|
|
102
110
|
if not uids:
|
|
103
111
|
return []
|
|
104
112
|
|
|
105
113
|
results: list[tuple[EmailMessage, bytes]] = []
|
|
106
|
-
raw_responses = self._client.fetch(uids, ["
|
|
114
|
+
raw_responses = self._client.fetch(uids, ["BODY.PEEK[]", "INTERNALDATE"])
|
|
107
115
|
|
|
108
116
|
for uid, data in raw_responses.items():
|
|
109
117
|
internal_date = data.get(b"INTERNALDATE")
|
|
110
118
|
if internal_date is None:
|
|
111
119
|
internal_date = datetime.now(UTC)
|
|
112
120
|
|
|
113
|
-
rfc822 = data.get(b"RFC822", b"")
|
|
121
|
+
rfc822 = data.get(b"BODY[]") or data.get(b"RFC822", b"")
|
|
122
|
+
if not rfc822:
|
|
123
|
+
continue
|
|
114
124
|
msg = email.message_from_bytes(rfc822, policy=email.policy.default)
|
|
115
125
|
|
|
116
126
|
attachments = _extract_attachments(msg)
|
|
@@ -121,11 +131,25 @@ class ImapClient:
|
|
|
121
131
|
if filename not in expected_files:
|
|
122
132
|
continue
|
|
123
133
|
|
|
134
|
+
if max_attachment_bytes and len(payload) > max_attachment_bytes:
|
|
135
|
+
logger.warning(
|
|
136
|
+
"Attachment %s (%d bytes) exceeds limit — skipping UID %d",
|
|
137
|
+
filename,
|
|
138
|
+
len(payload),
|
|
139
|
+
uid,
|
|
140
|
+
)
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Extract sender from From: header
|
|
144
|
+
from_header = msg.get("From", "")
|
|
145
|
+
_, sender_addr = email.utils.parseaddr(from_header)
|
|
146
|
+
|
|
124
147
|
em = EmailMessage(
|
|
125
148
|
uid=int(uid),
|
|
126
149
|
date=internal_date,
|
|
127
150
|
filename=filename,
|
|
128
151
|
size=len(payload),
|
|
152
|
+
sender=sender_addr.lower(),
|
|
129
153
|
)
|
|
130
154
|
results.append((em, payload))
|
|
131
155
|
|
|
@@ -242,6 +242,7 @@ class IngestConfig:
|
|
|
242
242
|
mailbox: str = "INBOX"
|
|
243
243
|
subject_filter: str = "[VinoCell] CSV file"
|
|
244
244
|
sender_filter: str = ""
|
|
245
|
+
sender_whitelist: tuple[str, ...] = ()
|
|
245
246
|
poll_interval: int = 60
|
|
246
247
|
batch_window: int = 300
|
|
247
248
|
expected_files: tuple[str, ...] = (
|
|
@@ -251,6 +252,9 @@ class IngestConfig:
|
|
|
251
252
|
)
|
|
252
253
|
processed_action: str = "flag"
|
|
253
254
|
processed_folder: str = "VinoCell/Processed"
|
|
255
|
+
etl_timeout: int = 300
|
|
256
|
+
max_backoff_interval: int = 600
|
|
257
|
+
max_attachment_bytes: int = 10_485_760
|
|
254
258
|
|
|
255
259
|
|
|
256
260
|
# ---------------------------------------------------------------------------
|
|
@@ -919,12 +923,14 @@ def load_settings(
|
|
|
919
923
|
_validate_keys("dashboard", dashboard_raw, DashboardConfig)
|
|
920
924
|
dashboard = DashboardConfig(**dashboard_raw) if dashboard_raw else DashboardConfig()
|
|
921
925
|
|
|
922
|
-
# Ingest — scalar config with tuple conversion for expected_files
|
|
926
|
+
# Ingest — scalar config with tuple conversion for expected_files/sender_whitelist
|
|
923
927
|
ingest_raw = raw.get("ingest", {})
|
|
924
928
|
if ingest_raw:
|
|
925
929
|
ingest_kw: dict = dict(ingest_raw)
|
|
926
930
|
if "expected_files" in ingest_kw:
|
|
927
931
|
ingest_kw["expected_files"] = tuple(ingest_kw["expected_files"])
|
|
932
|
+
if "sender_whitelist" in ingest_kw:
|
|
933
|
+
ingest_kw["sender_whitelist"] = tuple(ingest_kw["sender_whitelist"])
|
|
928
934
|
_validate_keys("ingest", ingest_kw, IngestConfig)
|
|
929
935
|
ingest = IngestConfig(**ingest_kw)
|
|
930
936
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarbrain
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports
|
|
5
5
|
Author-email: Urban Busslinger <urbanb@me.com>
|
|
6
6
|
License-Expression: MIT
|