cartograph-v1 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartograph_v1-0.2.0/LICENSE +21 -0
- cartograph_v1-0.2.0/MANIFEST.in +8 -0
- cartograph_v1-0.2.0/PKG-INFO +263 -0
- cartograph_v1-0.2.0/README.md +226 -0
- cartograph_v1-0.2.0/cartograph/__init__.py +4 -0
- cartograph_v1-0.2.0/cartograph/cli.py +340 -0
- cartograph_v1-0.2.0/cartograph/config.py +62 -0
- cartograph_v1-0.2.0/cartograph/demo.py +114 -0
- cartograph_v1-0.2.0/cartograph/elite/__init__.py +14 -0
- cartograph_v1-0.2.0/cartograph/elite/catalog.py +117 -0
- cartograph_v1-0.2.0/cartograph/elite/dod.py +130 -0
- cartograph_v1-0.2.0/cartograph/elite/elevate.py +36 -0
- cartograph_v1-0.2.0/cartograph/elite/frontier.py +43 -0
- cartograph_v1-0.2.0/cartograph/elite/playbooks.py +83 -0
- cartograph_v1-0.2.0/cartograph/embed.py +101 -0
- cartograph_v1-0.2.0/cartograph/ingest.py +213 -0
- cartograph_v1-0.2.0/cartograph/mcp_server.py +140 -0
- cartograph_v1-0.2.0/cartograph/persona/__init__.py +17 -0
- cartograph_v1-0.2.0/cartograph/persona/profile.py +163 -0
- cartograph_v1-0.2.0/cartograph/persona/signals.py +83 -0
- cartograph_v1-0.2.0/cartograph/persona/steer.py +115 -0
- cartograph_v1-0.2.0/cartograph/retrieve.py +93 -0
- cartograph_v1-0.2.0/cartograph/storage.py +148 -0
- cartograph_v1-0.2.0/cartograph/viz/__init__.py +1 -0
- cartograph_v1-0.2.0/cartograph/viz/app.py +85 -0
- cartograph_v1-0.2.0/cartograph/viz/index.html +95 -0
- cartograph_v1-0.2.0/cartograph__v1.egg-info/PKG-INFO +263 -0
- cartograph_v1-0.2.0/cartograph__v1.egg-info/SOURCES.txt +46 -0
- cartograph_v1-0.2.0/cartograph__v1.egg-info/dependency_links.txt +1 -0
- cartograph_v1-0.2.0/cartograph__v1.egg-info/entry_points.txt +2 -0
- cartograph_v1-0.2.0/cartograph__v1.egg-info/requires.txt +23 -0
- cartograph_v1-0.2.0/cartograph__v1.egg-info/top_level.txt +1 -0
- cartograph_v1-0.2.0/docs/ARCHITECTURE.md +42 -0
- cartograph_v1-0.2.0/docs/BROWSER.md +50 -0
- cartograph_v1-0.2.0/docs/PERSONA.md +97 -0
- cartograph_v1-0.2.0/docs/PUBLISHING.md +65 -0
- cartograph_v1-0.2.0/docs/REFERENCE_PACKS.md +37 -0
- cartograph_v1-0.2.0/docs/SCALING.md +33 -0
- cartograph_v1-0.2.0/examples/sample_data/ml_project/train.py +17 -0
- cartograph_v1-0.2.0/examples/sample_data/web_app/README.md +7 -0
- cartograph_v1-0.2.0/pyproject.toml +51 -0
- cartograph_v1-0.2.0/scripts/build_reference_pack.py +62 -0
- cartograph_v1-0.2.0/scripts/launch_viz.bat +4 -0
- cartograph_v1-0.2.0/scripts/launch_viz.sh +3 -0
- cartograph_v1-0.2.0/setup.cfg +4 -0
- cartograph_v1-0.2.0/tests/test_demo_and_loop.py +35 -0
- cartograph_v1-0.2.0/tests/test_persona.py +117 -0
- cartograph_v1-0.2.0/tests/test_smoke.py +56 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Cartograph contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cartograph__v1
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Build a personal cognitive graph of everything you work with, and plug it into your AI agents.
|
|
5
|
+
Author: Cartograph contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/pbathuri/cartograph
|
|
8
|
+
Project-URL: Documentation, https://github.com/pbathuri/cartograph#readme
|
|
9
|
+
Keywords: rag,knowledge-graph,retrieval,mcp,ai-agents,embeddings,local-first
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: typer>=0.9
|
|
18
|
+
Requires-Dist: rich>=13
|
|
19
|
+
Requires-Dist: pyyaml>=6
|
|
20
|
+
Provides-Extra: semantic
|
|
21
|
+
Requires-Dist: sentence-transformers>=2.6; extra == "semantic"
|
|
22
|
+
Requires-Dist: numpy>=1.24; extra == "semantic"
|
|
23
|
+
Requires-Dist: einops>=0.7; extra == "semantic"
|
|
24
|
+
Provides-Extra: ml
|
|
25
|
+
Requires-Dist: torch>=2.0; extra == "ml"
|
|
26
|
+
Requires-Dist: scikit-learn>=1.3; extra == "ml"
|
|
27
|
+
Requires-Dist: numpy>=1.24; extra == "ml"
|
|
28
|
+
Provides-Extra: full
|
|
29
|
+
Requires-Dist: sentence-transformers>=2.6; extra == "full"
|
|
30
|
+
Requires-Dist: torch>=2.0; extra == "full"
|
|
31
|
+
Requires-Dist: scikit-learn>=1.3; extra == "full"
|
|
32
|
+
Requires-Dist: numpy>=1.24; extra == "full"
|
|
33
|
+
Requires-Dist: einops>=0.7; extra == "full"
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
<div align="center">
|
|
39
|
+
|
|
40
|
+
# 🗺️ Cartograph
|
|
41
|
+
|
|
42
|
+
**Build a personal cognitive graph of everything you work with — and plug it into your AI agents.**
|
|
43
|
+
|
|
44
|
+
*Point it at your folders. It maps your repos, notes, and docs into one searchable graph, learns your
|
|
45
|
+
field, and serves the right context to Claude Code, Cursor, or any MCP agent — so every future
|
|
46
|
+
task, from coding to daily generative-AI use, is grounded in **your** knowledge.*
|
|
47
|
+
|
|
48
|
+
Local-first · your data never leaves your machine · works on any field · no ML expertise required
|
|
49
|
+
|
|
50
|
+
[Quickstart](#-quickstart) · [How it works](#-how-it-works) · [Connect your agents](#-connect-your-agents)
|
|
51
|
+
· [Requirements](#-requirements) · [Install tiers](#-install-tiers--graph-sizes) · [Visual app](#-the-visual-app)
|
|
52
|
+
|
|
53
|
+
</div>
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## ✨ Why
|
|
58
|
+
|
|
59
|
+
LLM agents are brilliant but amnesiac — they don't know your repos, your conventions, your past
|
|
60
|
+
decisions, or what "good" looks like in your field. Cartograph fixes that locally:
|
|
61
|
+
|
|
62
|
+
1. **Plug** — `pip install cartograph__v1`
|
|
63
|
+
2. **Ingest** — `carto ingest ~/code` builds a graph of your work (incrementally, any folder)
|
|
64
|
+
3. **(Optional) Train** — add semantic search / your own models on *your* graph
|
|
65
|
+
4. **Auto-applied** — your agents query the graph over MCP for grounded context, forever
|
|
66
|
+
|
|
67
|
+
No cloud. No account. Your graph is a single SQLite file you own.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## 🚀 Quickstart
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install cartograph__v1 # PyPI package name; the command is `carto`, the import is `cartograph`
|
|
75
|
+
carto demo # ⚡ see it ALL work in ~10s on a synthetic corpus (zero setup)
|
|
76
|
+
carto init # pick folder(s) + your field(s) — declaring your field makes labels accurate
|
|
77
|
+
carto ingest # build your graph (re-run anytime; only changed files reprocess)
|
|
78
|
+
carto viz # 👀 see your graph in the browser
|
|
79
|
+
carto retrieve "how did I handle auth" --chunks # hybrid search over everything you've done
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Want semantic (meaning-based) search too?
|
|
83
|
+
```bash
|
|
84
|
+
pip install "cartograph__v1[semantic]" # ~2GB; uses your GPU if present, else CPU
|
|
85
|
+
carto index # embed your chunks once
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## 🧠 How it works
|
|
91
|
+
|
|
92
|
+
```mermaid
|
|
93
|
+
flowchart TB
|
|
94
|
+
subgraph YOU["Your stuff (stays on your machine)"]
|
|
95
|
+
R["repos · notes · docs · books"]
|
|
96
|
+
end
|
|
97
|
+
subgraph CARTO["Cartograph"]
|
|
98
|
+
ING["Ingest: walk → chunk → infer field (incremental)"]
|
|
99
|
+
GRAPH["The graph — SQLite + FTS5\nprojects · files · chunks · skills · edges"]
|
|
100
|
+
SEM["Semantic index (optional)\nlocal embeddings, GPU-aware"]
|
|
101
|
+
HY["Hybrid retrieval (RRF of semantic + keyword)"]
|
|
102
|
+
EL["Elite layer\nfrontier catalog · playbooks · Definition-of-Done · coverage"]
|
|
103
|
+
end
|
|
104
|
+
subgraph OUT["Where it shows up"]
|
|
105
|
+
MCP["MCP server → Claude Code / Cursor / any agent"]
|
|
106
|
+
VIZ["Desktop visual graph"]
|
|
107
|
+
CLI["carto CLI"]
|
|
108
|
+
end
|
|
109
|
+
R --> ING --> GRAPH
|
|
110
|
+
GRAPH --> SEM --> HY
|
|
111
|
+
GRAPH --> HY --> EL
|
|
112
|
+
HY --> MCP & VIZ & CLI
|
|
113
|
+
EL --> MCP & CLI
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**The graph schema** — everything you work with becomes connected nodes:
|
|
117
|
+
|
|
118
|
+
```mermaid
|
|
119
|
+
flowchart LR
|
|
120
|
+
Project -->|CONTAINS| File -->|CHUNKED_INTO| Chunk
|
|
121
|
+
Project -->|IN_FIELD| Field[("field: ml / quant / web / …")]
|
|
122
|
+
Project -->|RELATED_TO| Project
|
|
123
|
+
Project -.->|REPO_TEACHES| Skill
|
|
124
|
+
Chunk -->|FTS5 + embeddings| Search[("hybrid retrieval")]
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
**Hybrid retrieval is the headline.** It fuses meaning-based (semantic) and exact (keyword) search via
|
|
128
|
+
Reciprocal Rank Fusion. In the engine Cartograph distills from, hybrid scored **success@10 0.986 vs
|
|
129
|
+
0.958** for either method alone — it's *never worse*, because it catches both paraphrases (semantic)
|
|
130
|
+
and exact tokens like a function name (keyword). It works in pure-keyword mode with zero ML installed,
|
|
131
|
+
and turns on semantic automatically once you run `carto index`.
|
|
132
|
+
|
|
133
|
+
**The elite layer** pulls any build toward the top of its field (works for ML, quant, web, HPC, data,
|
|
134
|
+
devops, mobile, game-dev, research, libraries — and is one file to extend):
|
|
135
|
+
- `carto elevate "<task>"` — the elite bar, reference repos, the frontier *playbook* (process), and the
|
|
136
|
+
repos you already have to build on
|
|
137
|
+
- `carto frontier` — how much of your field's top-tier reference set you've ingested + what to add
|
|
138
|
+
- `carto review <project> --field <f>` — grade a build against the field's Definition-of-Done
|
|
139
|
+
|
|
140
|
+
### 🧭 The persona layer — steer your agents *to you*
|
|
141
|
+
|
|
142
|
+
The graph knows your work; the **persona layer** learns *what you respond to* and shapes every answer.
|
|
143
|
+
It models you as field weights + an optional preference vector in embedding space, re-ranks retrieval by
|
|
144
|
+
alignment to you, and emits a model-agnostic **steering brief** any agent prepends — so Claude / Cursor /
|
|
145
|
+
ChatGPT / Gemini outputs adapt to your field, conventions, and preferences, and **keep adapting** as you
|
|
146
|
+
give feedback. Confidence-scaled: well-supported preferences steer hard, sparse ones barely nudge.
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
carto persona # your learned focus + confidence
|
|
150
|
+
carto personalize "how do I cache this?" # the steering brief an agent prepends
|
|
151
|
+
carto feedback --liked my-repo # teach it what was useful (adapts over time)
|
|
152
|
+
```
|
|
153
|
+
The same brief is available to agents via the MCP `personalize` tool and to web GenAI via
|
|
154
|
+
`carto serve` + a tiny userscript ([docs/BROWSER.md](docs/BROWSER.md)). Foundations, the
|
|
155
|
+
Hilbert-space mapping, and honest limits: **[docs/PERSONA.md](docs/PERSONA.md)**.
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## 🔌 Connect your agents
|
|
160
|
+
|
|
161
|
+
Cartograph speaks **MCP** (Model Context Protocol). Add it once and your agent can query your graph.
|
|
162
|
+
|
|
163
|
+
**Claude Code / Cursor** — add to your MCP config (`~/.cursor/mcp.json` or Claude Code's MCP settings):
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"mcpServers": {
|
|
167
|
+
"cartograph": { "command": "carto", "args": ["mcp-server"], "type": "stdio" }
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Your agent now has these tools:
|
|
173
|
+
| Tool | What it gives the agent |
|
|
174
|
+
|---|---|
|
|
175
|
+
| **`personalize`** | **call first** — a steering brief (your persona, field, output guidance + your relevant snippets) so the answer fits *you* and adapts over time |
|
|
176
|
+
| `retrieve_context` | the relevant code/doc **snippets** to inject (hybrid) |
|
|
177
|
+
| `relevant_projects` | which of your repos relate to the task |
|
|
178
|
+
| `elevate_task` | top-of-field briefing: bar + reference repos + playbook |
|
|
179
|
+
| `frontier_status` | your coverage of each field's best references |
|
|
180
|
+
| `record_use` | after answering, report what helped → the persona adapts automatically |
|
|
181
|
+
| `graph_stats` | size of your graph |
|
|
182
|
+
|
|
183
|
+
> Tip: tell your agent in its system prompt *"At the start of a task, call `elevate_task` and
|
|
184
|
+
> `retrieve_context` against Cartograph."* — then every future task is grounded in your knowledge.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## 💻 Requirements
|
|
189
|
+
|
|
190
|
+
| | Minimum | Recommended |
|
|
191
|
+
|---|---|---|
|
|
192
|
+
| Python | 3.10+ | 3.12 |
|
|
193
|
+
| OS | Windows / macOS / Linux | any |
|
|
194
|
+
| RAM | 4 GB | 16 GB+ |
|
|
195
|
+
| Disk | ~50 MB + your data | put the graph on a fast/large drive (`CARTOGRAPH_HOME`) |
|
|
196
|
+
| GPU | none (CPU works) | any CUDA GPU → ~10× faster embedding |
|
|
197
|
+
| Heavy ML | **not required** | `cartograph__v1[semantic]` for meaning-based search |
|
|
198
|
+
|
|
199
|
+
**Efficiency tips**
|
|
200
|
+
- Put your workspace on a fast, roomy drive: `export CARTOGRAPH_HOME=/mnt/fast/cartograph`.
|
|
201
|
+
- A CUDA GPU makes `carto index` dramatically faster; CPU still works (just slower).
|
|
202
|
+
- `carto ingest` is incremental — re-run it anytime; only changed files reprocess.
|
|
203
|
+
- Brute-force semantic search is fine to a few million chunks (~150 ms/query). Past that, see
|
|
204
|
+
[docs/SCALING.md](docs/SCALING.md) to swap in FAISS.
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## 📦 Install tiers & graph sizes
|
|
209
|
+
|
|
210
|
+
Cartograph is **modular** — install only what you need, and grow the graph to any size:
|
|
211
|
+
|
|
212
|
+
| Install | Command | Adds |
|
|
213
|
+
|---|---|---|
|
|
214
|
+
| **Core** | `pip install cartograph__v1` | full graph + keyword search + viz + MCP. Tiny, instant. |
|
|
215
|
+
| **Semantic** | `pip install "cartograph__v1[semantic]"` | meaning-based + hybrid search (~2 GB model) |
|
|
216
|
+
| **ML** | `pip install "cartograph__v1[ml]"` | train your own graph models on your data |
|
|
217
|
+
| **Everything** | `pip install "cartograph__v1[full]"` | all of the above |
|
|
218
|
+
|
|
219
|
+
**Graph-size tiers** — *don't* download one giant graph. Choose what fits:
|
|
220
|
+
- **Your own** (recommended): `carto ingest` your folders — the graph is exactly your scale.
|
|
221
|
+
- **Starter reference packs** (optional, public OSS only — never anyone's personal data): pre-built
|
|
222
|
+
graphs of curated top-tier repos per field, offered as quantized download tiers (S / M / L) via
|
|
223
|
+
GitHub Releases. Pick a small pack to seed a new field, or build your own with
|
|
224
|
+
`scripts/build_reference_pack.py`. See [docs/REFERENCE_PACKS.md](docs/REFERENCE_PACKS.md).
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## 🖥️ The visual app
|
|
229
|
+
|
|
230
|
+
For non-technical users, one command opens an interactive map of your knowledge:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
carto viz # or double-click scripts/launch_viz.bat (Windows) / launch_viz.sh (mac/linux)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
- Pan/zoom a force-directed graph of your projects, colored by field.
|
|
237
|
+
- Type a query → relevant projects **light up** and matching snippets appear.
|
|
238
|
+
- Zero setup, runs locally in your browser, no data leaves your machine.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## 🔒 Privacy
|
|
243
|
+
|
|
244
|
+
- **Local-first.** Everything lives in `~/.cartograph` (or `CARTOGRAPH_HOME`). Nothing is uploaded.
|
|
245
|
+
- **Your data is git-ignored** by default; the graph, index, and config never get committed.
|
|
246
|
+
- Reference packs contain **only public OSS** — never personal data.
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## 🧩 Extending
|
|
251
|
+
|
|
252
|
+
Add your field in three small files and Cartograph elevates it like any other:
|
|
253
|
+
`cartograph/elite/catalog.py` (reference repos) · `playbooks.py` (the process) · `dod.py` (the bar).
|
|
254
|
+
Field inference lives in `cartograph/ingest.py`.
|
|
255
|
+
|
|
256
|
+
## 🗂️ Commands
|
|
257
|
+
|
|
258
|
+
`carto demo · init · ingest · index · retrieve · elevate · frontier · review · persona · personalize ·
|
|
259
|
+
feedback · prefs · serve · stats · viz · mcp-server · doctor` (run `carto --help` or `carto <cmd> --help`).
|
|
260
|
+
|
|
261
|
+
## License
|
|
262
|
+
|
|
263
|
+
MIT — see [LICENSE](LICENSE). Use it, fork it, build your own.
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# 🗺️ Cartograph
|
|
4
|
+
|
|
5
|
+
**Build a personal cognitive graph of everything you work with — and plug it into your AI agents.**
|
|
6
|
+
|
|
7
|
+
*Point it at your folders. It maps your repos, notes, and docs into one searchable graph, learns your
|
|
8
|
+
field, and serves the right context to Claude Code, Cursor, or any MCP agent — so every future
|
|
9
|
+
task, from coding to daily generative-AI use, is grounded in **your** knowledge.*
|
|
10
|
+
|
|
11
|
+
Local-first · your data never leaves your machine · works on any field · no ML expertise required
|
|
12
|
+
|
|
13
|
+
[Quickstart](#-quickstart) · [How it works](#-how-it-works) · [Connect your agents](#-connect-your-agents)
|
|
14
|
+
· [Requirements](#-requirements) · [Install tiers](#-install-tiers--graph-sizes) · [Visual app](#-the-visual-app)
|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## ✨ Why
|
|
21
|
+
|
|
22
|
+
LLM agents are brilliant but amnesiac — they don't know your repos, your conventions, your past
|
|
23
|
+
decisions, or what "good" looks like in your field. Cartograph fixes that locally:
|
|
24
|
+
|
|
25
|
+
1. **Plug** — `pip install cartograph__v1`
|
|
26
|
+
2. **Ingest** — `carto ingest ~/code` builds a graph of your work (incrementally, any folder)
|
|
27
|
+
3. **(Optional) Train** — add semantic search / your own models on *your* graph
|
|
28
|
+
4. **Auto-applied** — your agents query the graph over MCP for grounded context, forever
|
|
29
|
+
|
|
30
|
+
No cloud. No account. Your graph is a single SQLite file you own.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## 🚀 Quickstart
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install cartograph__v1 # PyPI package name; the command is `carto`, the import is `cartograph`
|
|
38
|
+
carto demo # ⚡ see it ALL work in ~10s on a synthetic corpus (zero setup)
|
|
39
|
+
carto init # pick folder(s) + your field(s) — declaring your field makes labels accurate
|
|
40
|
+
carto ingest # build your graph (re-run anytime; only changed files reprocess)
|
|
41
|
+
carto viz # 👀 see your graph in the browser
|
|
42
|
+
carto retrieve "how did I handle auth" --chunks # hybrid search over everything you've done
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Want semantic (meaning-based) search too?
|
|
46
|
+
```bash
|
|
47
|
+
pip install "cartograph__v1[semantic]" # ~2GB; uses your GPU if present, else CPU
|
|
48
|
+
carto index # embed your chunks once
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## 🧠 How it works
|
|
54
|
+
|
|
55
|
+
```mermaid
|
|
56
|
+
flowchart TB
|
|
57
|
+
subgraph YOU["Your stuff (stays on your machine)"]
|
|
58
|
+
R["repos · notes · docs · books"]
|
|
59
|
+
end
|
|
60
|
+
subgraph CARTO["Cartograph"]
|
|
61
|
+
ING["Ingest: walk → chunk → infer field (incremental)"]
|
|
62
|
+
GRAPH["The graph — SQLite + FTS5\nprojects · files · chunks · skills · edges"]
|
|
63
|
+
SEM["Semantic index (optional)\nlocal embeddings, GPU-aware"]
|
|
64
|
+
HY["Hybrid retrieval (RRF of semantic + keyword)"]
|
|
65
|
+
EL["Elite layer\nfrontier catalog · playbooks · Definition-of-Done · coverage"]
|
|
66
|
+
end
|
|
67
|
+
subgraph OUT["Where it shows up"]
|
|
68
|
+
MCP["MCP server → Claude Code / Cursor / any agent"]
|
|
69
|
+
VIZ["Desktop visual graph"]
|
|
70
|
+
CLI["carto CLI"]
|
|
71
|
+
end
|
|
72
|
+
R --> ING --> GRAPH
|
|
73
|
+
GRAPH --> SEM --> HY
|
|
74
|
+
GRAPH --> HY --> EL
|
|
75
|
+
HY --> MCP & VIZ & CLI
|
|
76
|
+
EL --> MCP & CLI
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**The graph schema** — everything you work with becomes connected nodes:
|
|
80
|
+
|
|
81
|
+
```mermaid
|
|
82
|
+
flowchart LR
|
|
83
|
+
Project -->|CONTAINS| File -->|CHUNKED_INTO| Chunk
|
|
84
|
+
Project -->|IN_FIELD| Field[("field: ml / quant / web / …")]
|
|
85
|
+
Project -->|RELATED_TO| Project
|
|
86
|
+
Project -.->|REPO_TEACHES| Skill
|
|
87
|
+
Chunk -->|FTS5 + embeddings| Search[("hybrid retrieval")]
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Hybrid retrieval is the headline.** It fuses meaning-based (semantic) and exact (keyword) search via
|
|
91
|
+
Reciprocal Rank Fusion. In the engine Cartograph distills from, hybrid scored **success@10 0.986 vs
|
|
92
|
+
0.958** for either method alone — it's *never worse*, because it catches both paraphrases (semantic)
|
|
93
|
+
and exact tokens like a function name (keyword). It works in pure-keyword mode with zero ML installed,
|
|
94
|
+
and turns on semantic automatically once you run `carto index`.
|
|
95
|
+
|
|
96
|
+
**The elite layer** pulls any build toward the top of its field (works for ML, quant, web, HPC, data,
|
|
97
|
+
devops, mobile, game-dev, research, libraries — and is one file to extend):
|
|
98
|
+
- `carto elevate "<task>"` — the elite bar, reference repos, the frontier *playbook* (process), and the
|
|
99
|
+
repos you already have to build on
|
|
100
|
+
- `carto frontier` — how much of your field's top-tier reference set you've ingested + what to add
|
|
101
|
+
- `carto review <project> --field <f>` — grade a build against the field's Definition-of-Done
|
|
102
|
+
|
|
103
|
+
### 🧭 The persona layer — steer your agents *to you*
|
|
104
|
+
|
|
105
|
+
The graph knows your work; the **persona layer** learns *what you respond to* and shapes every answer.
|
|
106
|
+
It models you as field weights + an optional preference vector in embedding space, re-ranks retrieval by
|
|
107
|
+
alignment to you, and emits a model-agnostic **steering brief** any agent prepends — so Claude / Cursor /
|
|
108
|
+
ChatGPT / Gemini outputs adapt to your field, conventions, and preferences, and **keep adapting** as you
|
|
109
|
+
give feedback. Confidence-scaled: well-supported preferences steer hard, sparse ones barely nudge.
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
carto persona # your learned focus + confidence
|
|
113
|
+
carto personalize "how do I cache this?" # the steering brief an agent prepends
|
|
114
|
+
carto feedback --liked my-repo # teach it what was useful (adapts over time)
|
|
115
|
+
```
|
|
116
|
+
The same brief is available to agents via the MCP `personalize` tool and to web GenAI via
|
|
117
|
+
`carto serve` + a tiny userscript ([docs/BROWSER.md](docs/BROWSER.md)). Foundations, the
|
|
118
|
+
Hilbert-space mapping, and honest limits: **[docs/PERSONA.md](docs/PERSONA.md)**.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## 🔌 Connect your agents
|
|
123
|
+
|
|
124
|
+
Cartograph speaks **MCP** (Model Context Protocol). Add it once and your agent can query your graph.
|
|
125
|
+
|
|
126
|
+
**Claude Code / Cursor** — add to your MCP config (`~/.cursor/mcp.json` or Claude Code's MCP settings):
|
|
127
|
+
```json
|
|
128
|
+
{
|
|
129
|
+
"mcpServers": {
|
|
130
|
+
"cartograph": { "command": "carto", "args": ["mcp-server"], "type": "stdio" }
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Your agent now has these tools:
|
|
136
|
+
| Tool | What it gives the agent |
|
|
137
|
+
|---|---|
|
|
138
|
+
| **`personalize`** | **call first** — a steering brief (your persona, field, output guidance + your relevant snippets) so the answer fits *you* and adapts over time |
|
|
139
|
+
| `retrieve_context` | the relevant code/doc **snippets** to inject (hybrid) |
|
|
140
|
+
| `relevant_projects` | which of your repos relate to the task |
|
|
141
|
+
| `elevate_task` | top-of-field briefing: bar + reference repos + playbook |
|
|
142
|
+
| `frontier_status` | your coverage of each field's best references |
|
|
143
|
+
| `record_use` | after answering, report what helped → the persona adapts automatically |
|
|
144
|
+
| `graph_stats` | size of your graph |
|
|
145
|
+
|
|
146
|
+
> Tip: tell your agent in its system prompt *"At the start of a task, call `elevate_task` and
|
|
147
|
+
> `retrieve_context` against Cartograph."* — then every future task is grounded in your knowledge.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## 💻 Requirements
|
|
152
|
+
|
|
153
|
+
| | Minimum | Recommended |
|
|
154
|
+
|---|---|---|
|
|
155
|
+
| Python | 3.10+ | 3.12 |
|
|
156
|
+
| OS | Windows / macOS / Linux | any |
|
|
157
|
+
| RAM | 4 GB | 16 GB+ |
|
|
158
|
+
| Disk | ~50 MB + your data | put the graph on a fast/large drive (`CARTOGRAPH_HOME`) |
|
|
159
|
+
| GPU | none (CPU works) | any CUDA GPU → ~10× faster embedding |
|
|
160
|
+
| Heavy ML | **not required** | `cartograph__v1[semantic]` for meaning-based search |
|
|
161
|
+
|
|
162
|
+
**Efficiency tips**
|
|
163
|
+
- Put your workspace on a fast, roomy drive: `export CARTOGRAPH_HOME=/mnt/fast/cartograph`.
|
|
164
|
+
- A CUDA GPU makes `carto index` dramatically faster; CPU still works (just slower).
|
|
165
|
+
- `carto ingest` is incremental — re-run it anytime; only changed files reprocess.
|
|
166
|
+
- Brute-force semantic search is fine to a few million chunks (~150 ms/query). Past that, see
|
|
167
|
+
[docs/SCALING.md](docs/SCALING.md) to swap in FAISS.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## 📦 Install tiers & graph sizes
|
|
172
|
+
|
|
173
|
+
Cartograph is **modular** — install only what you need, and grow the graph to any size:
|
|
174
|
+
|
|
175
|
+
| Install | Command | Adds |
|
|
176
|
+
|---|---|---|
|
|
177
|
+
| **Core** | `pip install cartograph__v1` | full graph + keyword search + viz + MCP. Tiny, instant. |
|
|
178
|
+
| **Semantic** | `pip install "cartograph__v1[semantic]"` | meaning-based + hybrid search (~2 GB model) |
|
|
179
|
+
| **ML** | `pip install "cartograph__v1[ml]"` | train your own graph models on your data |
|
|
180
|
+
| **Everything** | `pip install "cartograph__v1[full]"` | all of the above |
|
|
181
|
+
|
|
182
|
+
**Graph-size tiers** — *don't* download one giant graph. Choose what fits:
|
|
183
|
+
- **Your own** (recommended): `carto ingest` your folders — the graph is exactly your scale.
|
|
184
|
+
- **Starter reference packs** (optional, public OSS only — never anyone's personal data): pre-built
|
|
185
|
+
graphs of curated top-tier repos per field, offered as quantized download tiers (S / M / L) via
|
|
186
|
+
GitHub Releases. Pick a small pack to seed a new field, or build your own with
|
|
187
|
+
`scripts/build_reference_pack.py`. See [docs/REFERENCE_PACKS.md](docs/REFERENCE_PACKS.md).
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## 🖥️ The visual app
|
|
192
|
+
|
|
193
|
+
For non-technical users, one command opens an interactive map of your knowledge:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
carto viz # or double-click scripts/launch_viz.bat (Windows) / launch_viz.sh (mac/linux)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
- Pan/zoom a force-directed graph of your projects, colored by field.
|
|
200
|
+
- Type a query → relevant projects **light up** and matching snippets appear.
|
|
201
|
+
- Zero setup, runs locally in your browser, no data leaves your machine.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## 🔒 Privacy
|
|
206
|
+
|
|
207
|
+
- **Local-first.** Everything lives in `~/.cartograph` (or `CARTOGRAPH_HOME`). Nothing is uploaded.
|
|
208
|
+
- **Your data is git-ignored** by default; the graph, index, and config never get committed.
|
|
209
|
+
- Reference packs contain **only public OSS** — never personal data.
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## 🧩 Extending
|
|
214
|
+
|
|
215
|
+
Add your field in three small files and Cartograph elevates it like any other:
|
|
216
|
+
`cartograph/elite/catalog.py` (reference repos) · `playbooks.py` (the process) · `dod.py` (the bar).
|
|
217
|
+
Field inference lives in `cartograph/ingest.py`.
|
|
218
|
+
|
|
219
|
+
## 🗂️ Commands
|
|
220
|
+
|
|
221
|
+
`carto demo · init · ingest · index · retrieve · elevate · frontier · review · persona · personalize ·
|
|
222
|
+
feedback · prefs · serve · stats · viz · mcp-server · doctor` (run `carto --help` or `carto <cmd> --help`).
|
|
223
|
+
|
|
224
|
+
## License
|
|
225
|
+
|
|
226
|
+
MIT — see [LICENSE](LICENSE). Use it, fork it, build your own.
|