abbacus-cortex 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. abbacus_cortex-0.2.1.dist-info/METADATA +215 -0
  2. abbacus_cortex-0.2.1.dist-info/RECORD +65 -0
  3. abbacus_cortex-0.2.1.dist-info/WHEEL +4 -0
  4. abbacus_cortex-0.2.1.dist-info/entry_points.txt +3 -0
  5. cortex/__init__.py +3 -0
  6. cortex/__main__.py +5 -0
  7. cortex/cli/__init__.py +0 -0
  8. cortex/cli/backup.py +317 -0
  9. cortex/cli/install.py +392 -0
  10. cortex/cli/main.py +2150 -0
  11. cortex/core/__init__.py +13 -0
  12. cortex/core/config.py +182 -0
  13. cortex/core/constants.py +63 -0
  14. cortex/core/errors.py +237 -0
  15. cortex/core/logging.py +184 -0
  16. cortex/dashboard/__init__.py +0 -0
  17. cortex/dashboard/server.py +344 -0
  18. cortex/dashboard/static/css/style.css +293 -0
  19. cortex/dashboard/static/js/graph.js +155 -0
  20. cortex/dashboard/templates/base.html +34 -0
  21. cortex/dashboard/templates/create.html +39 -0
  22. cortex/dashboard/templates/detail.html +63 -0
  23. cortex/dashboard/templates/documents.html +49 -0
  24. cortex/dashboard/templates/entities.html +40 -0
  25. cortex/dashboard/templates/error.html +14 -0
  26. cortex/dashboard/templates/graph.html +46 -0
  27. cortex/dashboard/templates/home.html +54 -0
  28. cortex/dashboard/templates/login.html +26 -0
  29. cortex/dashboard/templates/settings.html +35 -0
  30. cortex/dashboard/templates/trail.html +27 -0
  31. cortex/db/__init__.py +5 -0
  32. cortex/db/content_store.py +452 -0
  33. cortex/db/graph_store.py +1005 -0
  34. cortex/db/store.py +242 -0
  35. cortex/ontology/__init__.py +5 -0
  36. cortex/ontology/cortex.ttl +428 -0
  37. cortex/ontology/namespaces.py +84 -0
  38. cortex/ontology/resolver.py +35 -0
  39. cortex/pipeline/__init__.py +0 -0
  40. cortex/pipeline/advanced_reason.py +315 -0
  41. cortex/pipeline/enrich.py +118 -0
  42. cortex/pipeline/importer.py +552 -0
  43. cortex/pipeline/link.py +149 -0
  44. cortex/pipeline/normalize.py +134 -0
  45. cortex/pipeline/orchestrator.py +184 -0
  46. cortex/pipeline/reason.py +197 -0
  47. cortex/pipeline/templates.py +104 -0
  48. cortex/pipeline/temporal.py +145 -0
  49. cortex/py.typed +0 -0
  50. cortex/retrieval/__init__.py +0 -0
  51. cortex/retrieval/engine.py +248 -0
  52. cortex/retrieval/graph.py +269 -0
  53. cortex/retrieval/learner.py +142 -0
  54. cortex/retrieval/presenters.py +403 -0
  55. cortex/services/__init__.py +0 -0
  56. cortex/services/embeddings.py +206 -0
  57. cortex/services/llm.py +267 -0
  58. cortex/tools/__init__.py +0 -0
  59. cortex/transport/__init__.py +0 -0
  60. cortex/transport/api/__init__.py +0 -0
  61. cortex/transport/api/server.py +429 -0
  62. cortex/transport/mcp/__init__.py +0 -0
  63. cortex/transport/mcp/__main__.py +6 -0
  64. cortex/transport/mcp/client.py +484 -0
  65. cortex/transport/mcp/server.py +776 -0
@@ -0,0 +1,215 @@
1
+ Metadata-Version: 2.3
2
+ Name: abbacus-cortex
3
+ Version: 0.2.1
4
+ Summary: Cognitive knowledge system with formal ontology, reasoning, and intelligence serving
5
+ Author: Fabrizzio Silveira
6
+ Author-email: Fabrizzio Silveira <74255714+grayisnotacolor@users.noreply.github.com>
7
+ Requires-Dist: pyoxigraph>=0.4
8
+ Requires-Dist: aiosqlite>=0.20
9
+ Requires-Dist: fastapi>=0.115
10
+ Requires-Dist: uvicorn[standard]>=0.34
11
+ Requires-Dist: typer>=0.15
12
+ Requires-Dist: jinja2>=3.1
13
+ Requires-Dist: python-dotenv>=1.1
14
+ Requires-Dist: bcrypt>=4.2
15
+ Requires-Dist: mcp>=1.6
16
+ Requires-Dist: httpx>=0.28
17
+ Requires-Dist: sentence-transformers>=3.4 ; extra == 'embeddings'
18
+ Requires-Dist: litellm>=1.60 ; extra == 'llm'
19
+ Requires-Python: >=3.12
20
+ Provides-Extra: embeddings
21
+ Provides-Extra: llm
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Cortex
25
+
26
+ Cognitive knowledge system with formal ontology, reasoning, and intelligence serving.
27
+
28
+ Cortex captures knowledge objects (decisions, lessons, fixes, sessions, research, ideas), classifies them with an OWL-RL ontology, discovers relationships, reasons over the graph, and serves intelligence through hybrid retrieval.
29
+
30
+ ## Install
31
+
32
+ ```bash
33
+ # Full install (semantic + keyword search)
34
+ pip install abbacus-cortex[embeddings]
35
+
36
+ # Lightweight (keyword search only, no PyTorch)
37
+ pip install abbacus-cortex
38
+
39
+ # From source
40
+ git clone https://github.com/abbacusgroup/Cortex.git
41
+ cd Cortex
42
+ uv sync --extra embeddings
43
+ ```
44
+
45
+ ## Quick Start
46
+
47
+ ```bash
48
+ # 1. Initialize — creates ~/.cortex/, loads ontology, warms up embedding model
49
+ cortex init
50
+
51
+ # 2. Install background services (auto-start on login)
52
+ cortex install
53
+
54
+ # 3. Register with Claude Code
55
+ cortex register
56
+
57
+ # 4. Use
58
+ cortex capture "Fix: Neo4j pool exhaustion" --type fix --content "Root cause was..."
59
+ cortex search "Neo4j"
60
+ cortex list
61
+ cortex context "Neo4j"
62
+ cortex dashboard # web UI at http://localhost:1315
63
+ ```
64
+
65
+ ## Configuration
66
+
67
+ Set via environment variables (prefix `CORTEX_`) or `.env` file:
68
+
69
+ ```env
70
+ CORTEX_DATA_DIR=~/.cortex
71
+ CORTEX_LLM_MODEL=claude-sonnet-4-20250514
72
+ CORTEX_LLM_API_KEY=sk-...
73
+ CORTEX_DASHBOARD_PASSWORD= # set via `cortex setup`
74
+ CORTEX_EMBEDDING_MODEL=all-mpnet-base-v2
75
+ ```
76
+
77
+ See `.env.example` for all options.
78
+
79
+ ## CLI Commands
80
+
81
+ | Command | Description |
82
+ |---------|-------------|
83
+ | `cortex init` | Initialize data directory and stores |
84
+ | `cortex setup` | Interactive setup wizard |
85
+ | `cortex install` | Install background services (macOS/Linux) |
86
+ | `cortex uninstall` | Remove background services |
87
+ | `cortex register` | Register MCP server with Claude Code |
88
+ | `cortex capture` | Capture a knowledge object |
89
+ | `cortex search` | Hybrid keyword + semantic search |
90
+ | `cortex read` | Read object in full |
91
+ | `cortex list` | List objects with filters |
92
+ | `cortex status` | Health and counts |
93
+ | `cortex context` | Briefing mode (summaries) |
94
+ | `cortex dossier` | Entity-centric intelligence brief |
95
+ | `cortex graph` | Show object relationships |
96
+ | `cortex synthesize` | Cross-document synthesis |
97
+ | `cortex entities` | List resolved entities |
98
+ | `cortex serve` | Start MCP or HTTP server |
99
+ | `cortex dashboard` | Start web dashboard |
100
+ | `cortex import-v1` | Import from Cortex v1 database |
101
+ | `cortex import-vault` | Import from Obsidian vault |
102
+
103
+ ## MCP Tools
104
+
105
+ 22 tools for AI agent integration. Localhost-bound HTTP exposes all; non-localhost binds expose only the public set.
106
+
107
+ **Public**: `cortex_search`, `cortex_context`, `cortex_dossier`, `cortex_read`, `cortex_capture`, `cortex_link`, `cortex_feedback`, `cortex_graph`, `cortex_list`, `cortex_classify`, `cortex_pipeline`
108
+
109
+ **Admin** (localhost only): `cortex_status`, `cortex_synthesize`, `cortex_delete`, `cortex_reason`, `cortex_query_trail`, `cortex_graph_data`, `cortex_list_entities`, `cortex_export`, `cortex_safety_check`, `cortex_debug_sessions`, `cortex_debug_memory`
110
+
111
+ ## Architecture
112
+
113
+ Cortex runs as a single **MCP HTTP server** that owns the graph store. Claude Code, the dashboard, the CLI, and the REST API are all HTTP clients of that one server.
114
+
115
+ ```
116
+ ┌───────────────┐ ┌────────────┐ ┌─────────────┐
117
+ │ Claude Code │ │ Dashboard │ │ CLI │
118
+ │ (MCP client) │ │ (browser) │ │ (terminal) │
119
+ └───────┬───────┘ └─────┬──────┘ └──────┬──────┘
120
+ │ │ │
121
+ │ HTTP JSON-RPC │ HTTP MCP │ HTTP MCP (default)
122
+ │ │ │ direct (--direct)
123
+ ▼ ▼ ▼
124
+ ┌──────────────────────────────────────┐
125
+ │ cortex serve --transport mcp-http │
126
+ │ (canonical MCP HTTP server) │
127
+ │ PID-locked owner of graph.db │
128
+ └──────────────────────────────────────┘
129
+
130
+
131
+ ┌─────────────────────────────┐
132
+ │ ~/.cortex/ │
133
+ │ graph.db (Oxigraph) │
134
+ │ cortex.db (SQLite WAL) │
135
+ └─────────────────────────────┘
136
+ ```
137
+
138
+ - **Ontology**: OWL-RL formal ontology with 8 knowledge types and 8 relationship types
139
+ - **Storage**: Oxigraph (RDF/SPARQL) + SQLite (FTS5/BM25) dual-write
140
+ - **Pipeline**: Classify → Extract entities → Link → Enrich → Reason
141
+ - **Retrieval**: Hybrid keyword + semantic + graph-boosted ranking
142
+ - **Serving**: 5 presentation modes (briefing, dossier, document, synthesis, alert)
143
+ - **Transports**: MCP (stdio + HTTP), REST API, Web Dashboard
144
+
145
+ ## Service Management
146
+
147
+ ```bash
148
+ # Install both MCP server and dashboard as background services
149
+ cortex install
150
+
151
+ # Install only the MCP server
152
+ cortex install --service mcp
153
+
154
+ # Remove all services
155
+ cortex uninstall
156
+ ```
157
+
158
+ On macOS, this creates LaunchAgent plists (auto-start on login, auto-restart on crash).
159
+ On Linux, this creates systemd user units.
160
+
161
+ Raw templates are available in `deploy/` for manual setup.
162
+
163
+ ### `--direct` escape hatch
164
+
165
+ By default, CLI commands route through the running MCP server. If the server is down:
166
+
167
+ ```bash
168
+ cortex --direct list # bypass MCP, open store directly
169
+ cortex --direct pipeline --batch # required for bulk SQL operations
170
+ ```
171
+
172
+ Bootstrap commands (`init`, `setup`, `import-v1`, `import-vault`) always run directly.
173
+
174
+ ## Docker
175
+
176
+ ```bash
177
+ docker compose up -d
178
+ # Server at http://localhost:1314
179
+ ```
180
+
181
+ ## Troubleshooting
182
+
183
+ ### Crashed MCP server
184
+
185
+ If the MCP server is killed hard, stale lock files auto-recover on next start. For manual cleanup:
186
+
187
+ ```bash
188
+ cortex doctor unlock # normal cleanup
189
+ cortex doctor unlock --dry-run # report only
190
+ cortex doctor unlock --force # bypass live-holder check
191
+ ```
192
+
193
+ ### Log management
194
+
195
+ ```bash
196
+ cortex doctor logs # show log file sizes and status
197
+ cortex doctor logs --tail 20 # last 20 lines
198
+ cortex doctor logs --rotate # rotate log files (safe while running)
199
+ ```
200
+
201
+ ### Claude Code session staleness
202
+
203
+ After restarting the MCP server, restart Claude Code to clear its stale session ID. `claude --resume` restores your conversation. The dashboard and CLI do not have this issue.
204
+
205
+ ## Knowledge Types
206
+
207
+ decision, lesson, fix, session, research, source, synthesis, idea
208
+
209
+ ## Relationship Types
210
+
211
+ causedBy, contradicts (symmetric), supports, supersedes (transitive), dependsOn, ledTo (inverse of causedBy), implements, mentions
212
+
213
+ ## License
214
+
215
+ Copyright Abbacus Group.
@@ -0,0 +1,65 @@
1
+ cortex/__init__.py,sha256=fdX8yf2PkG6RQGMsP2Gwx05VVMKtujgAlPLwH03rNAw,68
2
+ cortex/__main__.py,sha256=omWWYPYr4nDk8T7ZVLk6rXJ0TzRtZtwPIAEpRKd-Ey4,92
3
+ cortex/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ cortex/cli/backup.py,sha256=xMTs-gi1r1p2pl4QYaIfvZxHqWmyNRFBMMSxTfHKfAM,10551
5
+ cortex/cli/install.py,sha256=jBSyBrJ1CmokynUstyJo2cxaOMcgjFvWeJAEq3YLjBs,11570
6
+ cortex/cli/main.py,sha256=b4alF6o6kwW5gI-OINOdh6Nmq55GekgWTZmdfMSljik,73473
7
+ cortex/core/__init__.py,sha256=VZjPX_XAfgfdxKUuWVCLjOSP679uDExusMe9eHjjwzA,334
8
+ cortex/core/config.py,sha256=7M1dpI_RHb673G6jTaH_3xYONt7o2Lx5dpKDxA3bThI,5591
9
+ cortex/core/constants.py,sha256=YsVUmFYqmnrCMxLjuzX0JS6kB6gDmF7OEMKG3xrCtsM,1198
10
+ cortex/core/errors.py,sha256=nXvHAlzf-hTZS2Ywi2ICO8ehSHQxnlJPhmwrPMlqqlc,8188
11
+ cortex/core/logging.py,sha256=Em3CKkeCWxw4wtLoJytRv1kV0bdTi5PhArfJUO4KA08,7153
12
+ cortex/dashboard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ cortex/dashboard/server.py,sha256=uyaWZDhS7XopvPzKBgoojPjnlR29aEtxBQyaI2uBzZc,12255
14
+ cortex/dashboard/static/css/style.css,sha256=qTqAhgOlaNLWGRccmDjb6u8nwiF6Fi-gnfMv3oB-AUQ,6578
15
+ cortex/dashboard/static/js/graph.js,sha256=JmVZSy6WaicYWslFXMqoc5ywxrex8wCWETOtTibvWs0,5677
16
+ cortex/dashboard/templates/base.html,sha256=bcI3NVyyTw4hsNq-M-Q1L4OY1KW2B_UpVi10XqZmp4c,1385
17
+ cortex/dashboard/templates/create.html,sha256=OQ4ZMi_erDLac0_IqVsT24r3_jy3TpUOmxxFqH9ZCWQ,1600
18
+ cortex/dashboard/templates/detail.html,sha256=AalREYMa8XRQGp-3Y-88_Ok47aedaPTbX6JeuLGmnQo,1845
19
+ cortex/dashboard/templates/documents.html,sha256=KK6RgmchRgT-qKNSVhbR3mgOBRiUbaxgj3Vl2MNtSfM,1929
20
+ cortex/dashboard/templates/entities.html,sha256=9nadwZYdP9vCk5kYpmPiBN75Lat-VmqbTEVPLdsogp8,1482
21
+ cortex/dashboard/templates/error.html,sha256=Wj3OBN9o0LJmbIcP1obiOrCS96Q4T_Dangxqcub0KYc,665
22
+ cortex/dashboard/templates/graph.html,sha256=OQad4DAuVhDnPz1jb6TzfxAcDtiw-1RtkC95Bo-FrT0,1608
23
+ cortex/dashboard/templates/home.html,sha256=K1ewFV5ke0_k9cceVD4cOdBv-nPEBiGkPO1fgFxde9U,1749
24
+ cortex/dashboard/templates/login.html,sha256=2rJGZVXpKhlBtA0YJBYTErouCQFUEEkilvW7Le1PrEY,846
25
+ cortex/dashboard/templates/settings.html,sha256=sROZ3UJWcZgth5zb0axCZHt_8GiMfyBQAulkOZ59IsQ,1465
26
+ cortex/dashboard/templates/trail.html,sha256=QyNsMA4x6pQ21gIqY9l6_OKRfcBd2OIrovftCZVwFrQ,1004
27
+ cortex/db/__init__.py,sha256=CPsig_A8yUNJnilkptnT5H_KiQ5R1aDm83VB6q7499k,136
28
+ cortex/db/content_store.py,sha256=3QA4fJ73DiWW6N3eRF1TWa1gmnpIIF5CnJukJYAZItg,15102
29
+ cortex/db/graph_store.py,sha256=UrYHBdsfqt64Nnc7e9PJ6Exut4Wj3og05oPBFizRbMQ,36340
30
+ cortex/db/store.py,sha256=xLJCKPdwbLJFpz6aqlB1vm1G9z05pBNADUQvRD1g5Rg,8531
31
+ cortex/ontology/__init__.py,sha256=iQyQB_GTmQR_uICI7PAB2Qn5tHhTCqBe6yNfy8KlwSQ,158
32
+ cortex/ontology/cortex.ttl,sha256=xP2-m9vQUvz7j8jntVuERlAn80vTeDD7dFpxA53WqvE,13662
33
+ cortex/ontology/namespaces.py,sha256=A7bHHMvJ69nIJQhd7078kWvmHSjoUmaeBW_ylzuIC8Y,2205
34
+ cortex/ontology/resolver.py,sha256=ujYOGhTO-nwjpKjdQ_i8ysE9IXdYteW9G5otVbyDDUg,989
35
+ cortex/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
+ cortex/pipeline/advanced_reason.py,sha256=b2_tvn5HoxZtoGbFOQKv5ZlBHwAxpkuKq6yXwTMQM-s,12059
37
+ cortex/pipeline/enrich.py,sha256=OTKUxubU7QZLNzQeO5fsKvm3IJ5yu2kChCeGALx7vag,3790
38
+ cortex/pipeline/importer.py,sha256=yCl3SP1R1efMyMO2gIo4bCr2gWXnB1mhsgkhZqZ-Es8,20983
39
+ cortex/pipeline/link.py,sha256=JT5upsowG3jCo-qfjroI6K2UzksSZm5A8Dfg_q7EbEw,4764
40
+ cortex/pipeline/normalize.py,sha256=CqDjj1kHAD-U-nQi7gNfCIswxVNViW_pDY8-3HXD3j0,4958
41
+ cortex/pipeline/orchestrator.py,sha256=VGTn3hnptR7FCeIEffrp5LDevcgN_oMrNbWMijlTfgs,6779
42
+ cortex/pipeline/reason.py,sha256=dIw3B_5YnBX9Ic0UN-v2sCfZp-nVVv_c7FT74kwv47I,6462
43
+ cortex/pipeline/templates.py,sha256=_nMChnXMuARX-uyfSZrPlaJeyLlRv0AO9_eaUf7u3HA,2884
44
+ cortex/pipeline/temporal.py,sha256=zJiY3GW2joyJI_rsVmdHkIhvsBGeUwqLMAU9M_35K50,4642
45
+ cortex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ cortex/retrieval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ cortex/retrieval/engine.py,sha256=TZbrHR3p1gpMVT-7RCoFr655QjocPAOqeDVaJ_dZMjQ,8464
48
+ cortex/retrieval/graph.py,sha256=7NIDpWnFH5ocsKG75RLmLH5N1yl1j9dmuhmggjPl_Fg,9442
49
+ cortex/retrieval/learner.py,sha256=l-B00mZppIxVcM_mZQxwNk8WMqw4oQ8e2_-4E2wlUFk,4857
50
+ cortex/retrieval/presenters.py,sha256=izqsXxd3KlBKtMfMhm-ZDn75H6U7eACfhnQlhrEss3Q,13903
51
+ cortex/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
+ cortex/services/embeddings.py,sha256=e4s6Fc1XGzU85fuOdLUuLBILON6hS539SkskMEyA5IE,6388
53
+ cortex/services/llm.py,sha256=5ituqPM7eKRMwWnuqkfjTW-yoSR_AW8w8p0phqw7Zpk,8738
54
+ cortex/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
+ cortex/transport/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
+ cortex/transport/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
+ cortex/transport/api/server.py,sha256=fteAYnF9lHYB6lsKa3l96O_hUqCRDQM8pkRSnbZMcAs,15647
58
+ cortex/transport/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ cortex/transport/mcp/__main__.py,sha256=SDv0uFPqsHIntBumxIZYkXOLOtCV-xP1YClO6EBHVMw,151
60
+ cortex/transport/mcp/client.py,sha256=bav28U68umFBOANh878WsgPxH0DgsA9YhRuxwPFqB9E,17381
61
+ cortex/transport/mcp/server.py,sha256=tInX88m0Q8Kd_2R0tflcNCU87vMJqlUYQ4aKczGhpE8,26972
62
+ abbacus_cortex-0.2.1.dist-info/WHEEL,sha256=s_zqWxHFEH8b58BCtf46hFCqPaISurdB9R1XJ8za6XI,80
63
+ abbacus_cortex-0.2.1.dist-info/entry_points.txt,sha256=Cf_p26EHtXKEoKpjBCHUfjXku9cC2-S5Qpb_issRlGM,48
64
+ abbacus_cortex-0.2.1.dist-info/METADATA,sha256=Yg5PdYHNSb9IKj0ICyFw4EqN-R-K7-sy1EGrDYdAaVI,7844
65
+ abbacus_cortex-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.11.6
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ cortex = cortex.cli.main:app
3
+
cortex/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Cortex — Cognitive knowledge system."""
2
+
3
+ __version__ = "0.2.1"
cortex/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running Cortex as ``python -m cortex``."""
2
+
3
+ from cortex.cli.main import app
4
+
5
+ app()
cortex/cli/__init__.py ADDED
File without changes
cortex/cli/backup.py ADDED
@@ -0,0 +1,317 @@
1
+ """Backup and restore for Cortex data stores.
2
+
3
+ Creates timestamped tar.gz archives of cortex.db and graph.db/, and
4
+ restores them with safety rollback via a .pre-restore/ staging area.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import shutil
10
+ import sqlite3
11
+ import tarfile
12
+ from datetime import UTC, datetime
13
+ from pathlib import Path
14
+
15
+ import typer
16
+
17
+ from cortex.core.config import CortexConfig
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Exclusion rules
21
+ # ---------------------------------------------------------------------------
22
+
23
+ # Relative paths (from data_dir) to exclude from backup archives.
24
+ _EXCLUDE_EXACT = frozenset({
25
+ "graph.db.lock",
26
+ "graph.db/LOCK",
27
+ ".env",
28
+ "cortex.db-wal",
29
+ "cortex.db-shm",
30
+ })
31
+
32
+ _EXCLUDE_SUFFIXES = (".log", ".err", ".log.old", ".err.old")
33
+
34
+
35
+ def _should_exclude(rel: str) -> bool:
36
+ """Return True if *rel* (relative to data_dir) should be skipped."""
37
+ if rel in _EXCLUDE_EXACT:
38
+ return True
39
+ # Top-level server logs
40
+ parts = rel.split("/")
41
+ if len(parts) == 1 and rel.endswith(_EXCLUDE_SUFFIXES):
42
+ return True
43
+ # RocksDB diagnostic log archives inside graph.db/
44
+ if parts[0] == "graph.db" and len(parts) == 2:
45
+ name = parts[1]
46
+ if name.startswith("LOG.old"):
47
+ return True
48
+ return False
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Server-running check
53
+ # ---------------------------------------------------------------------------
54
+
55
+
56
+ def _check_server_running(config: CortexConfig) -> tuple[bool, int | None, str | None]:
57
+ """Check if a Cortex server is holding the graph.db lock.
58
+
59
+ Returns (is_running, pid_or_none, cmdline_or_none).
60
+ """
61
+ from cortex.db.graph_store import _marker_path_for, _pid_alive, _read_marker
62
+
63
+ marker_path = _marker_path_for(config.graph_db_path)
64
+ if not marker_path.exists():
65
+ return (False, None, None)
66
+
67
+ marker = _read_marker(marker_path)
68
+ if marker is None or marker.get("_unreadable"):
69
+ return (False, None, None)
70
+
71
+ raw_pid = marker.get("pid")
72
+ if not isinstance(raw_pid, int):
73
+ return (False, None, None)
74
+
75
+ cmdline = marker.get("cmdline")
76
+ if _pid_alive(raw_pid):
77
+ return (True, raw_pid, cmdline)
78
+ return (False, raw_pid, cmdline)
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # SQLite WAL checkpoint
83
+ # ---------------------------------------------------------------------------
84
+
85
+
86
+ def _checkpoint_sqlite(db_path: Path) -> None:
87
+ """Flush the SQLite WAL into the main database file.
88
+
89
+ Uses a short-lived direct connection (not ContentStore) to avoid
90
+ acquiring schema locks or interfering with a running server.
91
+ """
92
+ conn = sqlite3.connect(str(db_path))
93
+ try:
94
+ conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
95
+ finally:
96
+ conn.close()
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Human-readable file size
101
+ # ---------------------------------------------------------------------------
102
+
103
+
104
+ def _human_size(nbytes: int) -> str:
105
+ for unit in ("B", "KB", "MB", "GB"):
106
+ if nbytes < 1024:
107
+ return f"{nbytes:.1f} {unit}" if unit != "B" else f"{nbytes} {unit}"
108
+ nbytes /= 1024
109
+ return f"{nbytes:.1f} TB"
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Public API
114
+ # ---------------------------------------------------------------------------
115
+
116
+
117
+ def do_backup(config: CortexConfig, output: Path | None = None) -> Path:
118
+ """Create a tar.gz backup of cortex.db and graph.db/.
119
+
120
+ Returns the path to the created archive.
121
+ """
122
+ # 1. Verify stores exist
123
+ if not config.sqlite_db_path.exists():
124
+ typer.secho(
125
+ f"Cortex not initialized — {config.sqlite_db_path} not found.\n"
126
+ "Run `cortex init` first.",
127
+ fg=typer.colors.RED,
128
+ err=True,
129
+ )
130
+ raise typer.Exit(1)
131
+
132
+ if not config.graph_db_path.exists() or not config.graph_db_path.is_dir():
133
+ typer.secho(
134
+ f"Graph store not found at {config.graph_db_path}.",
135
+ fg=typer.colors.RED,
136
+ err=True,
137
+ )
138
+ raise typer.Exit(1)
139
+
140
+ # 2. Warn if server is running (safe to proceed)
141
+ running, pid, _cmdline = _check_server_running(config)
142
+ if running:
143
+ typer.secho(
144
+ f" Cortex server is running (PID {pid}). "
145
+ "Backup will proceed — stores support concurrent reads.",
146
+ fg=typer.colors.YELLOW,
147
+ )
148
+
149
+ # 3. Checkpoint SQLite WAL
150
+ typer.echo(" Checkpointing SQLite WAL...")
151
+ _checkpoint_sqlite(config.sqlite_db_path)
152
+
153
+ # 4. Build archive
154
+ stamp = datetime.now(UTC).strftime("%Y-%m-%dT%H%M%S")
155
+ filename = f"cortex-backup-{stamp}.tar.gz"
156
+ out_dir = output if output else Path.cwd()
157
+ out_dir.mkdir(parents=True, exist_ok=True)
158
+ archive_path = out_dir / filename
159
+
160
+ typer.echo(f" Archiving to {archive_path}...")
161
+ data_dir = config.data_dir
162
+ with tarfile.open(archive_path, "w:gz") as tar:
163
+ for path in sorted(data_dir.rglob("*")):
164
+ if not path.is_file():
165
+ continue
166
+ rel = str(path.relative_to(data_dir))
167
+ if _should_exclude(rel):
168
+ continue
169
+ tar.add(str(path), arcname=rel)
170
+
171
+ # 5. Report
172
+ size = archive_path.stat().st_size
173
+ doc_count = _quick_doc_count(config.sqlite_db_path)
174
+ typer.secho(f"\nBackup complete: {archive_path}", fg=typer.colors.GREEN)
175
+ typer.echo(f" Size: {_human_size(size)}")
176
+ if doc_count is not None:
177
+ typer.echo(f" Documents: {doc_count}")
178
+
179
+ return archive_path
180
+
181
+
182
+ def do_restore(config: CortexConfig, archive: Path) -> None:
183
+ """Restore Cortex data from a tar.gz backup archive."""
184
+ # 1. Verify archive exists
185
+ if not archive.exists():
186
+ typer.secho(f"Archive not found: {archive}", fg=typer.colors.RED, err=True)
187
+ raise typer.Exit(1)
188
+
189
+ # 2. Validate archive
190
+ try:
191
+ with tarfile.open(archive, "r:gz") as tar:
192
+ names = tar.getnames()
193
+ except tarfile.TarError as e:
194
+ typer.secho(
195
+ f"Invalid archive: {e}",
196
+ fg=typer.colors.RED,
197
+ err=True,
198
+ )
199
+ raise typer.Exit(1) from None
200
+
201
+ # 3. Validate contents
202
+ has_db = "cortex.db" in names
203
+ has_graph = any(n.startswith("graph.db/") for n in names)
204
+ if not has_db or not has_graph:
205
+ missing = []
206
+ if not has_db:
207
+ missing.append("cortex.db")
208
+ if not has_graph:
209
+ missing.append("graph.db/")
210
+ typer.secho(
211
+ f"Archive is missing required files: {', '.join(missing)}",
212
+ fg=typer.colors.RED,
213
+ err=True,
214
+ )
215
+ raise typer.Exit(1)
216
+
217
+ # 4. Path traversal check
218
+ for name in names:
219
+ if name.startswith("/") or ".." in name.split("/"):
220
+ typer.secho(
221
+ f"Archive contains unsafe path: {name!r}. Refusing to extract.",
222
+ fg=typer.colors.RED,
223
+ err=True,
224
+ )
225
+ raise typer.Exit(1)
226
+
227
+ # 5. Refuse if server is running
228
+ running, pid, _cmdline = _check_server_running(config)
229
+ if running:
230
+ typer.secho(
231
+ f"Cortex server is running (PID {pid}). "
232
+ f"Stop it first:\n cortex uninstall\n # or: kill {pid}",
233
+ fg=typer.colors.RED,
234
+ err=True,
235
+ )
236
+ raise typer.Exit(1)
237
+
238
+ data_dir = config.data_dir
239
+ data_dir.mkdir(parents=True, exist_ok=True)
240
+
241
+ # 6. Safety: move current stores to .pre-restore/
242
+ pre_restore = data_dir / ".pre-restore"
243
+ if pre_restore.exists():
244
+ shutil.rmtree(pre_restore)
245
+ pre_restore.mkdir()
246
+
247
+ moved_db = False
248
+ moved_graph = False
249
+ try:
250
+ if config.sqlite_db_path.exists():
251
+ shutil.move(str(config.sqlite_db_path), str(pre_restore / "cortex.db"))
252
+ moved_db = True
253
+ # Also move WAL/SHM if present
254
+ for suffix in ("-wal", "-shm"):
255
+ wal = config.data_dir / f"cortex.db{suffix}"
256
+ if wal.exists():
257
+ shutil.move(str(wal), str(pre_restore / f"cortex.db{suffix}"))
258
+
259
+ if config.graph_db_path.exists():
260
+ shutil.move(str(config.graph_db_path), str(pre_restore / "graph.db"))
261
+ moved_graph = True
262
+
263
+ # 7. Extract
264
+ typer.echo(f" Extracting {archive.name} to {data_dir}...")
265
+ with tarfile.open(archive, "r:gz") as tar:
266
+ tar.extractall(path=str(data_dir), filter="data")
267
+
268
+ # 8. Clean lock files from extraction
269
+ lock_marker = data_dir / "graph.db.lock"
270
+ if lock_marker.exists():
271
+ lock_marker.unlink()
272
+ rocksdb_lock = config.graph_db_path / "LOCK"
273
+ if rocksdb_lock.exists():
274
+ rocksdb_lock.unlink()
275
+
276
+ except Exception:
277
+ # Attempt rollback
278
+ typer.secho(
279
+ "\nExtraction failed — rolling back from .pre-restore/",
280
+ fg=typer.colors.YELLOW,
281
+ err=True,
282
+ )
283
+ if moved_db and (pre_restore / "cortex.db").exists():
284
+ shutil.move(str(pre_restore / "cortex.db"), str(config.sqlite_db_path))
285
+ for suffix in ("-wal", "-shm"):
286
+ backed = pre_restore / f"cortex.db{suffix}"
287
+ if backed.exists():
288
+ shutil.move(str(backed), str(config.data_dir / f"cortex.db{suffix}"))
289
+ if moved_graph and (pre_restore / "graph.db").exists():
290
+ shutil.move(str(pre_restore / "graph.db"), str(config.graph_db_path))
291
+ raise
292
+
293
+ # 9. Report
294
+ doc_count = _quick_doc_count(config.sqlite_db_path)
295
+ typer.secho(f"\nRestore complete: {data_dir}", fg=typer.colors.GREEN)
296
+ if doc_count is not None:
297
+ typer.echo(f" Documents: {doc_count}")
298
+ typer.echo(" Old data saved to .pre-restore/ (safe to delete after verifying).")
299
+ typer.echo(" Run `cortex status` to verify.")
300
+
301
+
302
+ # ---------------------------------------------------------------------------
303
+ # Helpers
304
+ # ---------------------------------------------------------------------------
305
+
306
+
307
+ def _quick_doc_count(db_path: Path) -> int | None:
308
+ """Quick readonly document count from SQLite. Returns None on any error."""
309
+ try:
310
+ conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
311
+ try:
312
+ row = conn.execute("SELECT COUNT(*) FROM documents").fetchone()
313
+ return row[0] if row else None
314
+ finally:
315
+ conn.close()
316
+ except Exception:
317
+ return None