cellarbrain 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {cellarbrain-0.2.2/src/cellarbrain.egg-info → cellarbrain-0.2.4}/PKG-INFO +1 -1
  2. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/pyproject.toml +1 -1
  3. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/cli.py +3 -0
  4. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/__init__.py +41 -7
  5. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/etl_runner.py +9 -5
  6. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/grouping.py +1 -0
  7. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/imap.py +26 -2
  8. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/settings.py +7 -1
  9. {cellarbrain-0.2.2 → cellarbrain-0.2.4/src/cellarbrain.egg-info}/PKG-INFO +1 -1
  10. cellarbrain-0.2.4/tests/test_email_poll.py +922 -0
  11. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_price.py +9 -9
  12. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_settings.py +35 -0
  13. cellarbrain-0.2.2/tests/test_email_poll.py +0 -343
  14. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/LICENSE +0 -0
  15. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/README.md +0 -0
  16. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/setup.cfg +0 -0
  17. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/__init__.py +0 -0
  18. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/__main__.py +0 -0
  19. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/_query_base.py +0 -0
  20. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/backup.py +0 -0
  21. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/companion_markdown.py +0 -0
  22. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/computed.py +0 -0
  23. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/__init__.py +0 -0
  24. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/app.py +0 -0
  25. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/cellar_queries.py +0 -0
  26. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/dossier_render.py +0 -0
  27. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/queries.py +0 -0
  28. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/static/dashboard.js +0 -0
  29. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/static/workbench.js +0 -0
  30. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/base.html +0 -0
  31. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/bottles.html +0 -0
  32. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/cellar.html +0 -0
  33. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/drinking.html +0 -0
  34. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/error.html +0 -0
  35. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/errors.html +0 -0
  36. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/index.html +0 -0
  37. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/latency.html +0 -0
  38. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/live.html +0 -0
  39. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/pairing.html +0 -0
  40. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/bottle_rows.html +0 -0
  41. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/dossier_section.html +0 -0
  42. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/error_detail.html +0 -0
  43. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/error_rows.html +0 -0
  44. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/event_stream.html +0 -0
  45. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/pairing_results.html +0 -0
  46. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/session_detail.html +0 -0
  47. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/sql_results.html +0 -0
  48. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/stats_content.html +0 -0
  49. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/tool_rows.html +0 -0
  50. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/turn_events.html +0 -0
  51. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/wine_rows.html +0 -0
  52. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/partials/workbench_response.html +0 -0
  53. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/sessions.html +0 -0
  54. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/sql.html +0 -0
  55. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/stats.html +0 -0
  56. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/tools.html +0 -0
  57. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/tracked.html +0 -0
  58. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/tracked_detail.html +0 -0
  59. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/wine_detail.html +0 -0
  60. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/workbench_batch.html +0 -0
  61. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/workbench_list.html +0 -0
  62. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/templates/workbench_tool.html +0 -0
  63. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dashboard/workbench.py +0 -0
  64. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/doctor.py +0 -0
  65. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/dossier_ops.py +0 -0
  66. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/credentials.py +0 -0
  67. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/email_poll/placement.py +0 -0
  68. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/flat.py +0 -0
  69. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/incremental.py +0 -0
  70. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/log.py +0 -0
  71. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/markdown.py +0 -0
  72. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/mcp_server.py +0 -0
  73. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/observability.py +0 -0
  74. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/pairing.py +0 -0
  75. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/parsers.py +0 -0
  76. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/price.py +0 -0
  77. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/query.py +0 -0
  78. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/search.py +0 -0
  79. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/slugify.py +0 -0
  80. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/__init__.py +0 -0
  81. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/catalogue.py +0 -0
  82. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/engine.py +0 -0
  83. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/index.py +0 -0
  84. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/model.py +0 -0
  85. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/schemas.py +0 -0
  86. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/text_builder.py +0 -0
  87. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/sommelier/training.py +0 -0
  88. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/transform.py +0 -0
  89. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/validate.py +0 -0
  90. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/vinocell_parsers.py +0 -0
  91. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/vinocell_reader.py +0 -0
  92. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain/writer.py +0 -0
  93. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/SOURCES.txt +0 -0
  94. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/dependency_links.txt +0 -0
  95. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/entry_points.txt +0 -0
  96. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/requires.txt +0 -0
  97. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/src/cellarbrain.egg-info/top_level.txt +0 -0
  98. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_backup.py +0 -0
  99. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_catalogue.py +0 -0
  100. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_cli.py +0 -0
  101. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_companion_markdown.py +0 -0
  102. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_computed.py +0 -0
  103. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_app.py +0 -0
  104. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_cellar.py +0 -0
  105. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_dossier.py +0 -0
  106. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_pairing.py +0 -0
  107. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_queries.py +0 -0
  108. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dashboard_workbench.py +0 -0
  109. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dataset_factory.py +0 -0
  110. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_doctor.py +0 -0
  111. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_dossier_ops.py +0 -0
  112. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_flat.py +0 -0
  113. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_incremental.py +0 -0
  114. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_integration.py +0 -0
  115. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_log.py +0 -0
  116. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_markdown.py +0 -0
  117. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_mcp_server.py +0 -0
  118. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_observability.py +0 -0
  119. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_pairing.py +0 -0
  120. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_parsers.py +0 -0
  121. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_query.py +0 -0
  122. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_reader.py +0 -0
  123. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_search.py +0 -0
  124. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier.py +0 -0
  125. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_data.py +0 -0
  126. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_mcp.py +0 -0
  127. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_quality.py +0 -0
  128. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_sommelier_training.py +0 -0
  129. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_transform.py +0 -0
  130. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_validate.py +0 -0
  131. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_vinocell_parsers.py +0 -0
  132. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_vinocell_reader.py +0 -0
  133. {cellarbrain-0.2.2 → cellarbrain-0.2.4}/tests/test_writer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarbrain
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports
5
5
  Author-email: Urban Busslinger <urbanb@me.com>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cellarbrain"
7
- version = "0.2.2"
7
+ version = "0.2.4"
8
8
  description = "AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports"
9
9
  requires-python = ">=3.11"
10
10
  license = "MIT"
@@ -1349,6 +1349,9 @@ def _cmd_ingest(args: argparse.Namespace, settings: Settings) -> None:
1349
1349
 
1350
1350
  if args.once:
1351
1351
  count = poll_once(config, settings, dry_run=args.dry_run)
1352
+ if count < 0:
1353
+ print(f"Failed {-count} batch(es) (ETL error — messages left unprocessed).")
1354
+ sys.exit(1)
1352
1355
  print(f"Processed {count} batch(es).")
1353
1356
  sys.exit(0)
1354
1357
 
@@ -58,7 +58,8 @@ def poll_once(
58
58
  ) -> int:
59
59
  """Execute a single poll cycle.
60
60
 
61
- Returns the number of batches successfully processed (0 or more).
61
+ Returns the number of batches successfully processed (0 or more),
62
+ or a negative number indicating how many batches failed ETL.
62
63
  """
63
64
  from .credentials import resolve_credentials
64
65
  from .etl_runner import run_etl
@@ -95,11 +96,26 @@ def poll_once(
95
96
  logger.info("Found %d new messages", len(uids))
96
97
 
97
98
  # Fetch and parse
98
- fetched = client.fetch_messages(uids, config.expected_files)
99
+ fetched = client.fetch_messages(uids, config.expected_files, max_attachment_bytes=config.max_attachment_bytes)
99
100
  if not fetched:
100
101
  logger.info("No messages with valid attachments")
101
102
  return 0
102
103
 
104
+ # Application-level sender whitelist (defence-in-depth)
105
+ if config.sender_whitelist:
106
+ whitelist = {s.lower() for s in config.sender_whitelist}
107
+ original_count = len(fetched)
108
+ fetched = [(em, data) for em, data in fetched if em.sender in whitelist]
109
+ rejected = original_count - len(fetched)
110
+ if rejected:
111
+ logger.warning(
112
+ "Rejected %d message(s) from non-whitelisted senders",
113
+ rejected,
114
+ )
115
+ if not fetched:
116
+ logger.info("No messages from whitelisted senders")
117
+ return 0
118
+
103
119
  # Build EmailMessage list and attachment map
104
120
  messages = [em for em, _ in fetched]
105
121
  attachment_map: dict[int, tuple[str, bytes]] = {em.uid: (em.filename, data) for em, data in fetched}
@@ -111,6 +127,7 @@ def poll_once(
111
127
  return 0
112
128
 
113
129
  processed = 0
130
+ failed = 0
114
131
  for batch in batches:
115
132
  logger.info(
116
133
  "Batch detected — %s",
@@ -141,11 +158,18 @@ def poll_once(
141
158
  output_dir,
142
159
  config_path,
143
160
  expected_files=config.expected_files,
161
+ timeout=config.etl_timeout,
144
162
  )
145
163
  if exit_code != 0:
146
- logger.error("ETL failed (exit %d)", exit_code)
164
+ logger.error(
165
+ "ETL failed (exit %d) — leaving messages unprocessed (UIDs: %s)",
166
+ exit_code,
167
+ list(batch.uids),
168
+ )
169
+ failed += 1
170
+ continue
147
171
 
148
- # Mark as processed (regardless of ETL outcome)
172
+ # Mark as processed only on successful ETL
149
173
  batch_uids = list(batch.uids)
150
174
  if config.processed_action == "move":
151
175
  client.move_messages(batch_uids, config.processed_folder)
@@ -161,6 +185,8 @@ def poll_once(
161
185
 
162
186
  processed += 1
163
187
 
188
+ if failed:
189
+ return -failed
164
190
  return processed
165
191
 
166
192
 
@@ -176,7 +202,7 @@ class IngestDaemon:
176
202
  self.config = config
177
203
  self.settings = settings
178
204
  self._base_interval = config.poll_interval
179
- self._max_interval = 600 # 10 minutes
205
+ self._max_interval = config.max_backoff_interval
180
206
  self._current_interval = config.poll_interval
181
207
 
182
208
  def run(self, *, dry_run: bool = False) -> None:
@@ -195,9 +221,17 @@ class IngestDaemon:
195
221
  while True:
196
222
  try:
197
223
  count = poll_once(self.config, self.settings, dry_run=dry_run)
198
- if count > 0:
224
+ if count < 0:
225
+ logger.error("ETL failed for %d batch(es) — will retry next cycle", -count)
226
+ self._current_interval = min(
227
+ self._current_interval * 2,
228
+ self._max_interval,
229
+ )
230
+ elif count > 0:
199
231
  logger.info("Processed %d batch(es)", count)
200
- self._current_interval = self._base_interval
232
+ self._current_interval = self._base_interval
233
+ else:
234
+ self._current_interval = self._base_interval
201
235
  except ValueError:
202
236
  # Credential / config errors — fatal, stop daemon
203
237
  raise
@@ -3,14 +3,13 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
+ import os
6
7
  import subprocess
7
8
  import sys
8
9
  from pathlib import Path
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
12
- _ETL_TIMEOUT = 300 # seconds
13
-
14
13
 
15
14
  def run_etl(
16
15
  raw_dir: Path,
@@ -22,6 +21,7 @@ def run_etl(
22
21
  "export-bottles-stored.csv",
23
22
  "export-bottles-gone.csv",
24
23
  ),
24
+ timeout: int = 300,
25
25
  ) -> tuple[int, str]:
26
26
  """Run ``cellarbrain etl`` as a subprocess.
27
27
 
@@ -36,6 +36,8 @@ def run_etl(
36
36
  expected_files:
37
37
  Filenames to pass to the ETL command (in positional order:
38
38
  wines, bottles-stored, bottles-gone).
39
+ timeout:
40
+ Seconds before the ETL subprocess is killed.
39
41
 
40
42
  Returns
41
43
  -------
@@ -58,11 +60,13 @@ def run_etl(
58
60
 
59
61
  logger.info("Running ETL: %s", " ".join(cmd))
60
62
  try:
63
+ env = {**os.environ, "PYTHONUTF8": "1", "PYTHONIOENCODING": "utf-8"}
61
64
  result = subprocess.run(
62
65
  cmd,
63
66
  capture_output=True,
64
67
  text=True,
65
- timeout=_ETL_TIMEOUT,
68
+ timeout=timeout,
69
+ env=env,
66
70
  )
67
71
  output = result.stdout + result.stderr
68
72
  if result.returncode == 0:
@@ -71,5 +75,5 @@ def run_etl(
71
75
  logger.error("ETL failed (exit %d): %s", result.returncode, output)
72
76
  return result.returncode, output
73
77
  except subprocess.TimeoutExpired:
74
- logger.error("ETL timed out after %d seconds", _ETL_TIMEOUT)
75
- return -1, f"ETL timed out after {_ETL_TIMEOUT} seconds"
78
+ logger.error("ETL timed out after %d seconds", timeout)
79
+ return -1, f"ETL timed out after {timeout} seconds"
@@ -21,6 +21,7 @@ class EmailMessage:
21
21
  date: datetime
22
22
  filename: str
23
23
  size: int
24
+ sender: str = ""
24
25
 
25
26
 
26
27
  @dataclass(frozen=True)
@@ -9,6 +9,7 @@ from __future__ import annotations
9
9
 
10
10
  import email
11
11
  import email.policy
12
+ import email.utils
12
13
  import logging
13
14
  from datetime import UTC, datetime
14
15
  from types import TracebackType
@@ -91,26 +92,35 @@ class ImapClient:
91
92
  self,
92
93
  uids: list[int],
93
94
  expected_files: tuple[str, ...] | list[str],
95
+ *,
96
+ max_attachment_bytes: int = 0,
94
97
  ) -> list[tuple[EmailMessage, bytes]]:
95
98
  """Fetch messages and extract single-attachment metadata + data.
96
99
 
97
100
  Only messages with exactly one attachment whose filename is in
98
101
  *expected_files* are returned. Others are silently skipped.
99
102
 
103
+ Parameters
104
+ ----------
105
+ max_attachment_bytes:
106
+ If > 0, skip attachments exceeding this size (bytes).
107
+
100
108
  Returns list of ``(EmailMessage, attachment_bytes)`` tuples.
101
109
  """
102
110
  if not uids:
103
111
  return []
104
112
 
105
113
  results: list[tuple[EmailMessage, bytes]] = []
106
- raw_responses = self._client.fetch(uids, ["RFC822", "INTERNALDATE"])
114
+ raw_responses = self._client.fetch(uids, ["BODY.PEEK[]", "INTERNALDATE"])
107
115
 
108
116
  for uid, data in raw_responses.items():
109
117
  internal_date = data.get(b"INTERNALDATE")
110
118
  if internal_date is None:
111
119
  internal_date = datetime.now(UTC)
112
120
 
113
- rfc822 = data.get(b"RFC822", b"")
121
+ rfc822 = data.get(b"BODY[]") or data.get(b"RFC822", b"")
122
+ if not rfc822:
123
+ continue
114
124
  msg = email.message_from_bytes(rfc822, policy=email.policy.default)
115
125
 
116
126
  attachments = _extract_attachments(msg)
@@ -121,11 +131,25 @@ class ImapClient:
121
131
  if filename not in expected_files:
122
132
  continue
123
133
 
134
+ if max_attachment_bytes and len(payload) > max_attachment_bytes:
135
+ logger.warning(
136
+ "Attachment %s (%d bytes) exceeds limit — skipping UID %d",
137
+ filename,
138
+ len(payload),
139
+ uid,
140
+ )
141
+ continue
142
+
143
+ # Extract sender from From: header
144
+ from_header = msg.get("From", "")
145
+ _, sender_addr = email.utils.parseaddr(from_header)
146
+
124
147
  em = EmailMessage(
125
148
  uid=int(uid),
126
149
  date=internal_date,
127
150
  filename=filename,
128
151
  size=len(payload),
152
+ sender=sender_addr.lower(),
129
153
  )
130
154
  results.append((em, payload))
131
155
 
@@ -242,6 +242,7 @@ class IngestConfig:
242
242
  mailbox: str = "INBOX"
243
243
  subject_filter: str = "[VinoCell] CSV file"
244
244
  sender_filter: str = ""
245
+ sender_whitelist: tuple[str, ...] = ()
245
246
  poll_interval: int = 60
246
247
  batch_window: int = 300
247
248
  expected_files: tuple[str, ...] = (
@@ -251,6 +252,9 @@ class IngestConfig:
251
252
  )
252
253
  processed_action: str = "flag"
253
254
  processed_folder: str = "VinoCell/Processed"
255
+ etl_timeout: int = 300
256
+ max_backoff_interval: int = 600
257
+ max_attachment_bytes: int = 10_485_760
254
258
 
255
259
 
256
260
  # ---------------------------------------------------------------------------
@@ -919,12 +923,14 @@ def load_settings(
919
923
  _validate_keys("dashboard", dashboard_raw, DashboardConfig)
920
924
  dashboard = DashboardConfig(**dashboard_raw) if dashboard_raw else DashboardConfig()
921
925
 
922
- # Ingest — scalar config with tuple conversion for expected_files
926
+ # Ingest — scalar config with tuple conversion for expected_files/sender_whitelist
923
927
  ingest_raw = raw.get("ingest", {})
924
928
  if ingest_raw:
925
929
  ingest_kw: dict = dict(ingest_raw)
926
930
  if "expected_files" in ingest_kw:
927
931
  ingest_kw["expected_files"] = tuple(ingest_kw["expected_files"])
932
+ if "sender_whitelist" in ingest_kw:
933
+ ingest_kw["sender_whitelist"] = tuple(ingest_kw["sender_whitelist"])
928
934
  _validate_keys("ingest", ingest_kw, IngestConfig)
929
935
  ingest = IngestConfig(**ingest_kw)
930
936
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarbrain
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports
5
5
  Author-email: Urban Busslinger <urbanb@me.com>
6
6
  License-Expression: MIT