cellarbrain 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. cellarbrain-0.2.0/LICENSE +21 -0
  2. cellarbrain-0.2.0/PKG-INFO +343 -0
  3. cellarbrain-0.2.0/README.md +300 -0
  4. cellarbrain-0.2.0/pyproject.toml +115 -0
  5. cellarbrain-0.2.0/setup.cfg +4 -0
  6. cellarbrain-0.2.0/src/cellarbrain/__init__.py +1 -0
  7. cellarbrain-0.2.0/src/cellarbrain/__main__.py +5 -0
  8. cellarbrain-0.2.0/src/cellarbrain/_query_base.py +16 -0
  9. cellarbrain-0.2.0/src/cellarbrain/backup.py +143 -0
  10. cellarbrain-0.2.0/src/cellarbrain/cli.py +1559 -0
  11. cellarbrain-0.2.0/src/cellarbrain/companion_markdown.py +295 -0
  12. cellarbrain-0.2.0/src/cellarbrain/computed.py +582 -0
  13. cellarbrain-0.2.0/src/cellarbrain/dashboard/__init__.py +38 -0
  14. cellarbrain-0.2.0/src/cellarbrain/dashboard/app.py +1004 -0
  15. cellarbrain-0.2.0/src/cellarbrain/dashboard/cellar_queries.py +509 -0
  16. cellarbrain-0.2.0/src/cellarbrain/dashboard/dossier_render.py +64 -0
  17. cellarbrain-0.2.0/src/cellarbrain/dashboard/queries.py +443 -0
  18. cellarbrain-0.2.0/src/cellarbrain/dashboard/workbench.py +337 -0
  19. cellarbrain-0.2.0/src/cellarbrain/doctor.py +449 -0
  20. cellarbrain-0.2.0/src/cellarbrain/dossier_ops.py +996 -0
  21. cellarbrain-0.2.0/src/cellarbrain/email_poll/__init__.py +215 -0
  22. cellarbrain-0.2.0/src/cellarbrain/email_poll/credentials.py +86 -0
  23. cellarbrain-0.2.0/src/cellarbrain/email_poll/etl_runner.py +75 -0
  24. cellarbrain-0.2.0/src/cellarbrain/email_poll/grouping.py +109 -0
  25. cellarbrain-0.2.0/src/cellarbrain/email_poll/imap.py +165 -0
  26. cellarbrain-0.2.0/src/cellarbrain/email_poll/placement.py +74 -0
  27. cellarbrain-0.2.0/src/cellarbrain/flat.py +316 -0
  28. cellarbrain-0.2.0/src/cellarbrain/incremental.py +1076 -0
  29. cellarbrain-0.2.0/src/cellarbrain/log.py +94 -0
  30. cellarbrain-0.2.0/src/cellarbrain/markdown.py +1048 -0
  31. cellarbrain-0.2.0/src/cellarbrain/mcp_server.py +2097 -0
  32. cellarbrain-0.2.0/src/cellarbrain/observability.py +242 -0
  33. cellarbrain-0.2.0/src/cellarbrain/parsers.py +211 -0
  34. cellarbrain-0.2.0/src/cellarbrain/price.py +460 -0
  35. cellarbrain-0.2.0/src/cellarbrain/query.py +1089 -0
  36. cellarbrain-0.2.0/src/cellarbrain/search.py +577 -0
  37. cellarbrain-0.2.0/src/cellarbrain/settings.py +989 -0
  38. cellarbrain-0.2.0/src/cellarbrain/slugify.py +66 -0
  39. cellarbrain-0.2.0/src/cellarbrain/sommelier/__init__.py +14 -0
  40. cellarbrain-0.2.0/src/cellarbrain/sommelier/catalogue.py +774 -0
  41. cellarbrain-0.2.0/src/cellarbrain/sommelier/engine.py +168 -0
  42. cellarbrain-0.2.0/src/cellarbrain/sommelier/index.py +111 -0
  43. cellarbrain-0.2.0/src/cellarbrain/sommelier/model.py +28 -0
  44. cellarbrain-0.2.0/src/cellarbrain/sommelier/schemas.py +72 -0
  45. cellarbrain-0.2.0/src/cellarbrain/sommelier/text_builder.py +154 -0
  46. cellarbrain-0.2.0/src/cellarbrain/sommelier/training.py +141 -0
  47. cellarbrain-0.2.0/src/cellarbrain/transform.py +846 -0
  48. cellarbrain-0.2.0/src/cellarbrain/validate.py +372 -0
  49. cellarbrain-0.2.0/src/cellarbrain/vinocell_parsers.py +212 -0
  50. cellarbrain-0.2.0/src/cellarbrain/vinocell_reader.py +249 -0
  51. cellarbrain-0.2.0/src/cellarbrain/writer.py +429 -0
  52. cellarbrain-0.2.0/src/cellarbrain.egg-info/PKG-INFO +343 -0
  53. cellarbrain-0.2.0/src/cellarbrain.egg-info/SOURCES.txt +92 -0
  54. cellarbrain-0.2.0/src/cellarbrain.egg-info/dependency_links.txt +1 -0
  55. cellarbrain-0.2.0/src/cellarbrain.egg-info/entry_points.txt +2 -0
  56. cellarbrain-0.2.0/src/cellarbrain.egg-info/requires.txt +30 -0
  57. cellarbrain-0.2.0/src/cellarbrain.egg-info/top_level.txt +1 -0
  58. cellarbrain-0.2.0/tests/test_backup.py +383 -0
  59. cellarbrain-0.2.0/tests/test_catalogue.py +762 -0
  60. cellarbrain-0.2.0/tests/test_cli.py +691 -0
  61. cellarbrain-0.2.0/tests/test_companion_markdown.py +598 -0
  62. cellarbrain-0.2.0/tests/test_computed.py +1229 -0
  63. cellarbrain-0.2.0/tests/test_dashboard_app.py +773 -0
  64. cellarbrain-0.2.0/tests/test_dashboard_cellar.py +467 -0
  65. cellarbrain-0.2.0/tests/test_dashboard_dossier.py +95 -0
  66. cellarbrain-0.2.0/tests/test_dashboard_queries.py +421 -0
  67. cellarbrain-0.2.0/tests/test_dashboard_workbench.py +177 -0
  68. cellarbrain-0.2.0/tests/test_dataset_factory.py +127 -0
  69. cellarbrain-0.2.0/tests/test_doctor.py +721 -0
  70. cellarbrain-0.2.0/tests/test_dossier_ops.py +1110 -0
  71. cellarbrain-0.2.0/tests/test_email_poll.py +343 -0
  72. cellarbrain-0.2.0/tests/test_flat.py +372 -0
  73. cellarbrain-0.2.0/tests/test_incremental.py +2350 -0
  74. cellarbrain-0.2.0/tests/test_integration.py +331 -0
  75. cellarbrain-0.2.0/tests/test_log.py +168 -0
  76. cellarbrain-0.2.0/tests/test_markdown.py +1182 -0
  77. cellarbrain-0.2.0/tests/test_mcp_server.py +1210 -0
  78. cellarbrain-0.2.0/tests/test_observability.py +213 -0
  79. cellarbrain-0.2.0/tests/test_parsers.py +189 -0
  80. cellarbrain-0.2.0/tests/test_price.py +548 -0
  81. cellarbrain-0.2.0/tests/test_query.py +1427 -0
  82. cellarbrain-0.2.0/tests/test_reader.py +217 -0
  83. cellarbrain-0.2.0/tests/test_search.py +2800 -0
  84. cellarbrain-0.2.0/tests/test_settings.py +1152 -0
  85. cellarbrain-0.2.0/tests/test_sommelier.py +333 -0
  86. cellarbrain-0.2.0/tests/test_sommelier_data.py +307 -0
  87. cellarbrain-0.2.0/tests/test_sommelier_mcp.py +516 -0
  88. cellarbrain-0.2.0/tests/test_sommelier_quality.py +121 -0
  89. cellarbrain-0.2.0/tests/test_sommelier_training.py +862 -0
  90. cellarbrain-0.2.0/tests/test_transform.py +1017 -0
  91. cellarbrain-0.2.0/tests/test_validate.py +357 -0
  92. cellarbrain-0.2.0/tests/test_vinocell_parsers.py +199 -0
  93. cellarbrain-0.2.0/tests/test_vinocell_reader.py +217 -0
  94. cellarbrain-0.2.0/tests/test_writer.py +334 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Urban Busslinger
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,343 @@
1
+ Metadata-Version: 2.4
2
+ Name: cellarbrain
3
+ Version: 0.2.0
4
+ Summary: AI sommelier for your wine cellar — ETL pipeline, DuckDB query layer, Markdown dossiers, and MCP server for wine cellar CSV exports
5
+ Author-email: Urban Busslinger <urbanb@me.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/urban-buss/cellarbrain
8
+ Project-URL: Issues, https://github.com/urban-buss/cellarbrain/issues
9
+ Project-URL: Changelog, https://github.com/urban-buss/cellarbrain/blob/main/CHANGELOG.md
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Topic :: Database
13
+ Classifier: Environment :: Console
14
+ Requires-Python: >=3.11
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: pyarrow>=15.0
18
+ Requires-Dist: duckdb>=0.10
19
+ Requires-Dist: pandas>=2.0
20
+ Requires-Dist: tabulate>=0.9
21
+ Requires-Dist: mcp[cli]>=1.2.0
22
+ Provides-Extra: research
23
+ Requires-Dist: httpx>=0.27; extra == "research"
24
+ Provides-Extra: sommelier
25
+ Requires-Dist: sentence-transformers>=3.0; extra == "sommelier"
26
+ Requires-Dist: faiss-cpu>=1.7; extra == "sommelier"
27
+ Requires-Dist: datasets>=2.0; extra == "sommelier"
28
+ Requires-Dist: accelerate>=1.1.0; extra == "sommelier"
29
+ Provides-Extra: dashboard
30
+ Requires-Dist: starlette>=0.36; extra == "dashboard"
31
+ Requires-Dist: uvicorn[standard]>=0.27; extra == "dashboard"
32
+ Requires-Dist: jinja2>=3.1; extra == "dashboard"
33
+ Requires-Dist: markdown>=3.5; extra == "dashboard"
34
+ Requires-Dist: pyyaml>=6.0; extra == "dashboard"
35
+ Requires-Dist: pytz>=2024.1; extra == "dashboard"
36
+ Provides-Extra: ingest
37
+ Requires-Dist: imapclient>=3.0; extra == "ingest"
38
+ Requires-Dist: keyring>=25.0; extra == "ingest"
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest; extra == "dev"
41
+ Requires-Dist: ruff>=0.8; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # Cellarbrain
45
+
46
+ AI sommelier for your wine cellar. Transforms
47
+ [Vinocell](https://www.vinocell.com/) CSV exports into normalised Parquet
48
+ tables, per-wine Markdown dossiers, and an in-process DuckDB query layer
49
+ that AI agents can use via the [Model Context Protocol](https://modelcontextprotocol.io/).
50
+
51
+ ## Quick start
52
+
53
+ ```bash
54
+ # Clone and install (Python 3.11+)
55
+ git clone https://github.com/urban-buss/cellarbrain.git
56
+ cd cellarbrain
57
+ python -m venv .venv
58
+
59
+ # Windows
60
+ .venv\Scripts\activate
61
+ # macOS / Linux
62
+ source .venv/bin/activate
63
+
64
+ pip install -e .
65
+ ```
66
+
67
+ ## Usage
68
+
69
+ ### 1. Run the ETL pipeline
70
+
71
+ Export your cellar from cellarbrain (File → Export → CSV) and place the files
72
+ in `raw/`:
73
+
74
+ ```
75
+ raw/
76
+ ├── export-wines.csv
77
+ ├── export-bottles-stored.csv
78
+ └── export-bottles-gone.csv # optional
79
+ ```
80
+
81
+ Then run:
82
+
83
+ ```bash
84
+ # Full load (first time)
85
+ cellarbrain etl raw/export-wines.csv raw/export-bottles-stored.csv raw/export-bottles-gone.csv -o output
86
+
87
+ # Incremental sync (subsequent runs — detects changes, preserves IDs)
88
+ cellarbrain etl raw/export-wines.csv raw/export-bottles-stored.csv raw/export-bottles-gone.csv -o output --sync
89
+ ```
90
+
91
+ Output goes to `output/` — 12 Parquet entity files and per-wine Markdown
92
+ dossiers under `output/wines/`.
93
+
94
+ ### 2. Query your cellar
95
+
96
+ ```bash
97
+ # SQL query (DuckDB syntax)
98
+ cellarbrain query "SELECT w.name, wy.name AS winery, w.vintage FROM wine w JOIN winery wy ON w.winery_id = wy.winery_id LIMIT 10"
99
+
100
+ # Output as CSV or JSON
101
+ cellarbrain query "SELECT * FROM wine LIMIT 5" --format csv
102
+ cellarbrain query "SELECT * FROM bottle WHERE status = 'stored'" --format json
103
+
104
+ # SQL from a file
105
+ cellarbrain query -f my_query.sql
106
+ ```
107
+
108
+ ### 3. Cellar statistics
109
+
110
+ ```bash
111
+ cellarbrain stats # Overall summary
112
+ cellarbrain stats --by country # Grouped by country
113
+ cellarbrain stats --by grape # Grouped by grape variety
114
+ # Also: region, category, vintage, winery, cellar, provider, status
115
+ ```
116
+
117
+ ### 4. Wine dossiers
118
+
119
+ ```bash
120
+ cellarbrain dossier 42 # Read dossier for wine #42
121
+ cellarbrain dossier --search Barolo # Search wines by name, grape, region…
122
+ cellarbrain dossier --pending # Wines with pending agent research
123
+ ```
124
+
125
+ ### 5. Validate output
126
+
127
+ ```bash
128
+ cellarbrain validate # Check Parquet integrity
129
+ ```
130
+
131
+ ### 6. Start the MCP server
132
+
133
+ ```bash
134
+ cellarbrain mcp # stdio transport (default)
135
+ cellarbrain mcp --transport sse # SSE transport for HTTP clients
136
+ ```
137
+
138
+ All subcommands accept `-d <path>` to point at a different data directory
139
+ (default: `output`).
140
+
141
+ > **Legacy mode:** The old `cellarbrain <wines.csv> <bottles.csv>` syntax still
142
+ > works but emits a deprecation warning.
143
+
144
+ ---
145
+
146
+ ## Web Explorer
147
+
148
+ A local web dashboard for browsing your cellar, observability data, and running queries interactively.
149
+
150
+ ```bash
151
+ cellarbrain dashboard # opens at http://localhost:8017
152
+ ```
153
+
154
+ Pages: overview, tool usage, errors, sessions, latency charts, live tail (SSE), cellar browser, bottles, drinking window, tracked wines, SQL playground, statistics, and workbench. Requires a prior ETL run and MCP log store.
155
+
156
+ ---
157
+
158
+ ## MCP server
159
+
160
+ The MCP server exposes 7 read/write tools for AI agents (Claude, OpenClaw,
161
+ Copilot, etc.). Tools are **thin data primitives** — all reasoning stays in
162
+ the agent.
163
+
164
+ ### Tools
165
+
166
+ | Tool | Description |
167
+ |---|---|
168
+ | `query_cellar` | Run read-only SQL against the cellar (DuckDB over Parquet) |
169
+ | `cellar_stats` | Summary statistics, optionally grouped by 9 dimensions |
170
+ | `find_wine` | Text search across name, winery, region, grape, vintage |
171
+ | `read_dossier` | Read a wine's full Markdown dossier |
172
+ | `update_dossier` | Write to agent-owned dossier sections (ETL sections protected) |
173
+ | `reload_data` | Re-run the ETL pipeline in-process |
174
+ | `pending_research` | List wines with empty agent sections, sorted by priority |
175
+
176
+ ### Resources
177
+
178
+ | URI | Description |
179
+ |---|---|
180
+ | `wine://list` | All wines with basic metadata |
181
+ | `wine://cellar` | Wines currently in the cellar |
182
+ | `wine://favorites` | Favorite wines |
183
+ | `wine://{wine_id}` | Full dossier for a specific wine |
184
+ | `cellar://stats` | Current cellar statistics |
185
+ | `cellar://drinking-now` | Wines in their optimal drinking window |
186
+ | `etl://last-run` | Last ETL run metadata |
187
+ | `etl://changes` | Change log from the last ETL run |
188
+
189
+ ### Prompts
190
+
191
+ | Prompt | Description |
192
+ |---|---|
193
+ | `cellar_qa` | System prompt for cellar Q&A (embeds live stats) |
194
+ | `food_pairing` | Food pairing workflow for a given dish |
195
+ | `wine_research` | Deep research workflow for a single wine |
196
+ | `batch_research` | Batch research across pending wines |
197
+
198
+ ### Configure with Claude Desktop
199
+
200
+ Add to `claude_desktop_config.json`:
201
+
202
+ ```json
203
+ {
204
+ "mcpServers": {
205
+ "cellarbrain": {
206
+ "command": "cellarbrain",
207
+ "args": ["mcp"],
208
+ "env": {}
209
+ }
210
+ }
211
+ }
212
+ ```
213
+
214
+ If cellarbrain is installed in a virtualenv, use the full path:
215
+
216
+ ```json
217
+ {
218
+ "mcpServers": {
219
+ "cellarbrain": {
220
+ "command": "/path/to/cellarbrain/.venv/bin/cellarbrain",
221
+ "args": ["mcp"],
222
+ "env": {}
223
+ }
224
+ }
225
+ }
226
+ ```
227
+
228
+ To point at a different data directory:
229
+
230
+ ```json
231
+ {
232
+ "mcpServers": {
233
+ "cellarbrain": {
234
+ "command": "cellarbrain",
235
+ "args": ["-d", "/path/to/output", "mcp"],
236
+ "env": {}
237
+ }
238
+ }
239
+ }
240
+ ```
241
+
242
+ ### Configure with VS Code (Copilot)
243
+
244
+ Add to `.vscode/mcp.json` in your workspace:
245
+
246
+ ```json
247
+ {
248
+ "servers": {
249
+ "cellarbrain": {
250
+ "command": "cellarbrain",
251
+ "args": ["mcp"],
252
+ "env": {}
253
+ }
254
+ }
255
+ }
256
+ ```
257
+
258
+ Or with a virtualenv on Windows:
259
+
260
+ ```json
261
+ {
262
+ "servers": {
263
+ "cellarbrain": {
264
+ "command": ".venv\\Scripts\\cellarbrain.exe",
265
+ "args": ["-d", "output", "mcp"],
266
+ "env": {}
267
+ }
268
+ }
269
+ }
270
+ ```
271
+
272
+ ### Configure with OpenClaw
273
+
274
+ See [`.docs/design/openclaw-skill.md`](.docs/design/openclaw-skill.md) for
275
+ the full skill integration design.
276
+
277
+ ---
278
+
279
+ ## Data model
280
+
281
+ The ETL produces 12 normalised Parquet tables:
282
+
283
+ | Table | Description |
284
+ |---|---|
285
+ | `wine` | Central wine catalog with all attributes |
286
+ | `bottle` | Individual bottles (stored + consumed) |
287
+ | `winery` | Producer lookup |
288
+ | `appellation` | Country / region / subregion / classification |
289
+ | `grape` | Grape variety lookup |
290
+ | `wine_grape` | Wine–grape junction with blend percentages |
291
+ | `tasting` | Personal tasting notes and scores |
292
+ | `pro_rating` | Professional critic scores |
293
+ | `cellar` | Physical storage locations |
294
+ | `provider` | Retailers / sources |
295
+ | `etl_run` | Pipeline run history |
296
+ | `change_log` | Row-level insert / update / delete audit trail |
297
+
298
+ Plus per-wine Markdown dossiers in `output/wines/` with:
299
+ - ETL-owned sections (identity, origin, inventory, tastings, etc.)
300
+ - Agent-owned sections (producer profile, vintage report, food pairings, etc.)
301
+ - YAML frontmatter tracking which agent sections are populated vs pending
302
+
303
+ See [`.docs/data-model/source/`](.docs/data-model/source/) and
304
+ [`.docs/data-model/target/`](.docs/data-model/target/) for detailed field documentation.
305
+
306
+ ---
307
+
308
+ ## Project structure
309
+
310
+ ```
311
+ src/cellarbrain/
312
+ ├── cli.py # CLI entry point with subcommands
313
+ ├── reader.py # CSV readers
314
+ ├── parsers.py # Field-level parsers
315
+ ├── transform.py # Normalisation and entity building
316
+ ├── writer.py # Parquet writer with Arrow schemas
317
+ ├── validate.py # Post-ETL validation
318
+ ├── incremental.py # Change detection and sync
319
+ ├── markdown.py # Dossier generation with agent section preservation
320
+ ├── query.py # DuckDB query layer (stats, search, SQL)
321
+ ├── dossier_ops.py # Dossier read/write/pending operations
322
+ └── mcp_server.py # FastMCP server (7 tools, 8 resources, 4 prompts)
323
+ ```
324
+
325
+ ## Development
326
+
327
+ ```bash
328
+ # Install with test + research dependencies
329
+ pip install -e ".[research]"
330
+
331
+ # Run tests
332
+ pytest tests/ -v
333
+
334
+ # Run only unit tests (fast, no CSV files needed)
335
+ pytest tests/ -v --ignore=tests/test_integration.py
336
+
337
+ # Run integration tests (requires raw/*.csv files)
338
+ pytest tests/test_integration.py -v
339
+ ```
340
+
341
+ ## License
342
+
343
+ Private — not for redistribution.
@@ -0,0 +1,300 @@
1
+ # Cellarbrain
2
+
3
+ AI sommelier for your wine cellar. Transforms
4
+ [Vinocell](https://www.vinocell.com/) CSV exports into normalised Parquet
5
+ tables, per-wine Markdown dossiers, and an in-process DuckDB query layer
6
+ that AI agents can use via the [Model Context Protocol](https://modelcontextprotocol.io/).
7
+
8
+ ## Quick start
9
+
10
+ ```bash
11
+ # Clone and install (Python 3.11+)
12
+ git clone https://github.com/urban-buss/cellarbrain.git
13
+ cd cellarbrain
14
+ python -m venv .venv
15
+
16
+ # Windows
17
+ .venv\Scripts\activate
18
+ # macOS / Linux
19
+ source .venv/bin/activate
20
+
21
+ pip install -e .
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ### 1. Run the ETL pipeline
27
+
28
+ Export your cellar from cellarbrain (File → Export → CSV) and place the files
29
+ in `raw/`:
30
+
31
+ ```
32
+ raw/
33
+ ├── export-wines.csv
34
+ ├── export-bottles-stored.csv
35
+ └── export-bottles-gone.csv # optional
36
+ ```
37
+
38
+ Then run:
39
+
40
+ ```bash
41
+ # Full load (first time)
42
+ cellarbrain etl raw/export-wines.csv raw/export-bottles-stored.csv raw/export-bottles-gone.csv -o output
43
+
44
+ # Incremental sync (subsequent runs — detects changes, preserves IDs)
45
+ cellarbrain etl raw/export-wines.csv raw/export-bottles-stored.csv raw/export-bottles-gone.csv -o output --sync
46
+ ```
47
+
48
+ Output goes to `output/` — 12 Parquet entity files and per-wine Markdown
49
+ dossiers under `output/wines/`.
50
+
51
+ ### 2. Query your cellar
52
+
53
+ ```bash
54
+ # SQL query (DuckDB syntax)
55
+ cellarbrain query "SELECT w.name, wy.name AS winery, w.vintage FROM wine w JOIN winery wy ON w.winery_id = wy.winery_id LIMIT 10"
56
+
57
+ # Output as CSV or JSON
58
+ cellarbrain query "SELECT * FROM wine LIMIT 5" --format csv
59
+ cellarbrain query "SELECT * FROM bottle WHERE status = 'stored'" --format json
60
+
61
+ # SQL from a file
62
+ cellarbrain query -f my_query.sql
63
+ ```
64
+
65
+ ### 3. Cellar statistics
66
+
67
+ ```bash
68
+ cellarbrain stats # Overall summary
69
+ cellarbrain stats --by country # Grouped by country
70
+ cellarbrain stats --by grape # Grouped by grape variety
71
+ # Also: region, category, vintage, winery, cellar, provider, status
72
+ ```
73
+
74
+ ### 4. Wine dossiers
75
+
76
+ ```bash
77
+ cellarbrain dossier 42 # Read dossier for wine #42
78
+ cellarbrain dossier --search Barolo # Search wines by name, grape, region…
79
+ cellarbrain dossier --pending # Wines with pending agent research
80
+ ```
81
+
82
+ ### 5. Validate output
83
+
84
+ ```bash
85
+ cellarbrain validate # Check Parquet integrity
86
+ ```
87
+
88
+ ### 6. Start the MCP server
89
+
90
+ ```bash
91
+ cellarbrain mcp # stdio transport (default)
92
+ cellarbrain mcp --transport sse # SSE transport for HTTP clients
93
+ ```
94
+
95
+ All subcommands accept `-d <path>` to point at a different data directory
96
+ (default: `output`).
97
+
98
+ > **Legacy mode:** The old `cellarbrain <wines.csv> <bottles.csv>` syntax still
99
+ > works but emits a deprecation warning.
100
+
101
+ ---
102
+
103
+ ## Web Explorer
104
+
105
+ A local web dashboard for browsing your cellar, observability data, and running queries interactively.
106
+
107
+ ```bash
108
+ cellarbrain dashboard # opens at http://localhost:8017
109
+ ```
110
+
111
+ Pages: overview, tool usage, errors, sessions, latency charts, live tail (SSE), cellar browser, bottles, drinking window, tracked wines, SQL playground, statistics, and workbench. Requires a prior ETL run and MCP log store.
112
+
113
+ ---
114
+
115
+ ## MCP server
116
+
117
+ The MCP server exposes 7 read/write tools for AI agents (Claude, OpenClaw,
118
+ Copilot, etc.). Tools are **thin data primitives** — all reasoning stays in
119
+ the agent.
120
+
121
+ ### Tools
122
+
123
+ | Tool | Description |
124
+ |---|---|
125
+ | `query_cellar` | Run read-only SQL against the cellar (DuckDB over Parquet) |
126
+ | `cellar_stats` | Summary statistics, optionally grouped by 9 dimensions |
127
+ | `find_wine` | Text search across name, winery, region, grape, vintage |
128
+ | `read_dossier` | Read a wine's full Markdown dossier |
129
+ | `update_dossier` | Write to agent-owned dossier sections (ETL sections protected) |
130
+ | `reload_data` | Re-run the ETL pipeline in-process |
131
+ | `pending_research` | List wines with empty agent sections, sorted by priority |
132
+
133
+ ### Resources
134
+
135
+ | URI | Description |
136
+ |---|---|
137
+ | `wine://list` | All wines with basic metadata |
138
+ | `wine://cellar` | Wines currently in the cellar |
139
+ | `wine://favorites` | Favorite wines |
140
+ | `wine://{wine_id}` | Full dossier for a specific wine |
141
+ | `cellar://stats` | Current cellar statistics |
142
+ | `cellar://drinking-now` | Wines in their optimal drinking window |
143
+ | `etl://last-run` | Last ETL run metadata |
144
+ | `etl://changes` | Change log from the last ETL run |
145
+
146
+ ### Prompts
147
+
148
+ | Prompt | Description |
149
+ |---|---|
150
+ | `cellar_qa` | System prompt for cellar Q&A (embeds live stats) |
151
+ | `food_pairing` | Food pairing workflow for a given dish |
152
+ | `wine_research` | Deep research workflow for a single wine |
153
+ | `batch_research` | Batch research across pending wines |
154
+
155
+ ### Configure with Claude Desktop
156
+
157
+ Add to `claude_desktop_config.json`:
158
+
159
+ ```json
160
+ {
161
+ "mcpServers": {
162
+ "cellarbrain": {
163
+ "command": "cellarbrain",
164
+ "args": ["mcp"],
165
+ "env": {}
166
+ }
167
+ }
168
+ }
169
+ ```
170
+
171
+ If cellarbrain is installed in a virtualenv, use the full path:
172
+
173
+ ```json
174
+ {
175
+ "mcpServers": {
176
+ "cellarbrain": {
177
+ "command": "/path/to/cellarbrain/.venv/bin/cellarbrain",
178
+ "args": ["mcp"],
179
+ "env": {}
180
+ }
181
+ }
182
+ }
183
+ ```
184
+
185
+ To point at a different data directory:
186
+
187
+ ```json
188
+ {
189
+ "mcpServers": {
190
+ "cellarbrain": {
191
+ "command": "cellarbrain",
192
+ "args": ["-d", "/path/to/output", "mcp"],
193
+ "env": {}
194
+ }
195
+ }
196
+ }
197
+ ```
198
+
199
+ ### Configure with VS Code (Copilot)
200
+
201
+ Add to `.vscode/mcp.json` in your workspace:
202
+
203
+ ```json
204
+ {
205
+ "servers": {
206
+ "cellarbrain": {
207
+ "command": "cellarbrain",
208
+ "args": ["mcp"],
209
+ "env": {}
210
+ }
211
+ }
212
+ }
213
+ ```
214
+
215
+ Or with a virtualenv on Windows:
216
+
217
+ ```json
218
+ {
219
+ "servers": {
220
+ "cellarbrain": {
221
+ "command": ".venv\\Scripts\\cellarbrain.exe",
222
+ "args": ["-d", "output", "mcp"],
223
+ "env": {}
224
+ }
225
+ }
226
+ }
227
+ ```
228
+
229
+ ### Configure with OpenClaw
230
+
231
+ See [`.docs/design/openclaw-skill.md`](.docs/design/openclaw-skill.md) for
232
+ the full skill integration design.
233
+
234
+ ---
235
+
236
+ ## Data model
237
+
238
+ The ETL produces 12 normalised Parquet tables:
239
+
240
+ | Table | Description |
241
+ |---|---|
242
+ | `wine` | Central wine catalog with all attributes |
243
+ | `bottle` | Individual bottles (stored + consumed) |
244
+ | `winery` | Producer lookup |
245
+ | `appellation` | Country / region / subregion / classification |
246
+ | `grape` | Grape variety lookup |
247
+ | `wine_grape` | Wine–grape junction with blend percentages |
248
+ | `tasting` | Personal tasting notes and scores |
249
+ | `pro_rating` | Professional critic scores |
250
+ | `cellar` | Physical storage locations |
251
+ | `provider` | Retailers / sources |
252
+ | `etl_run` | Pipeline run history |
253
+ | `change_log` | Row-level insert / update / delete audit trail |
254
+
255
+ Plus per-wine Markdown dossiers in `output/wines/` with:
256
+ - ETL-owned sections (identity, origin, inventory, tastings, etc.)
257
+ - Agent-owned sections (producer profile, vintage report, food pairings, etc.)
258
+ - YAML frontmatter tracking which agent sections are populated vs pending
259
+
260
+ See [`.docs/data-model/source/`](.docs/data-model/source/) and
261
+ [`.docs/data-model/target/`](.docs/data-model/target/) for detailed field documentation.
262
+
263
+ ---
264
+
265
+ ## Project structure
266
+
267
+ ```
268
+ src/cellarbrain/
269
+ ├── cli.py # CLI entry point with subcommands
270
+ ├── reader.py # CSV readers
271
+ ├── parsers.py # Field-level parsers
272
+ ├── transform.py # Normalisation and entity building
273
+ ├── writer.py # Parquet writer with Arrow schemas
274
+ ├── validate.py # Post-ETL validation
275
+ ├── incremental.py # Change detection and sync
276
+ ├── markdown.py # Dossier generation with agent section preservation
277
+ ├── query.py # DuckDB query layer (stats, search, SQL)
278
+ ├── dossier_ops.py # Dossier read/write/pending operations
279
+ └── mcp_server.py # FastMCP server (7 tools, 8 resources, 4 prompts)
280
+ ```
281
+
282
+ ## Development
283
+
284
+ ```bash
285
+ # Install with test + research dependencies
286
+ pip install -e ".[research]"
287
+
288
+ # Run tests
289
+ pytest tests/ -v
290
+
291
+ # Run only unit tests (fast, no CSV files needed)
292
+ pytest tests/ -v --ignore=tests/test_integration.py
293
+
294
+ # Run integration tests (requires raw/*.csv files)
295
+ pytest tests/test_integration.py -v
296
+ ```
297
+
298
+ ## License
299
+
300
+ Private — not for redistribution.