contextlake 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextlake-2.1.0/LICENSE +21 -0
- contextlake-2.1.0/PKG-INFO +240 -0
- contextlake-2.1.0/README.md +183 -0
- contextlake-2.1.0/pyproject.toml +70 -0
- contextlake-2.1.0/setup.cfg +4 -0
- contextlake-2.1.0/src/contextlake/__init__.py +3 -0
- contextlake-2.1.0/src/contextlake/__main__.py +6 -0
- contextlake-2.1.0/src/contextlake/cli.py +333 -0
- contextlake-2.1.0/src/contextlake/config.py +120 -0
- contextlake-2.1.0/src/contextlake/core.py +773 -0
- contextlake-2.1.0/src/contextlake/kb/__init__.py +15 -0
- contextlake-2.1.0/src/contextlake/kb/commands.py +698 -0
- contextlake-2.1.0/src/contextlake/kb/config.py +138 -0
- contextlake-2.1.0/src/contextlake/kb/connectors/__init__.py +7 -0
- contextlake-2.1.0/src/contextlake/kb/connectors/atlassian.py +175 -0
- contextlake-2.1.0/src/contextlake/kb/connectors/common.py +42 -0
- contextlake-2.1.0/src/contextlake/kb/connectors/figma.py +124 -0
- contextlake-2.1.0/src/contextlake/kb/connectors/gitlab.py +86 -0
- contextlake-2.1.0/src/contextlake/kb/connectors/orchestrate.py +143 -0
- contextlake-2.1.0/src/contextlake/kb/embeddings/__init__.py +9 -0
- contextlake-2.1.0/src/contextlake/kb/embeddings/base.py +48 -0
- contextlake-2.1.0/src/contextlake/kb/embeddings/hybrid.py +79 -0
- contextlake-2.1.0/src/contextlake/kb/embeddings/index.py +46 -0
- contextlake-2.1.0/src/contextlake/kb/embeddings/ollama.py +42 -0
- contextlake-2.1.0/src/contextlake/kb/embeddings/openai.py +54 -0
- contextlake-2.1.0/src/contextlake/kb/embeddings/store.py +200 -0
- contextlake-2.1.0/src/contextlake/kb/ids.py +40 -0
- contextlake-2.1.0/src/contextlake/kb/llm/__init__.py +9 -0
- contextlake-2.1.0/src/contextlake/kb/llm/base.py +44 -0
- contextlake-2.1.0/src/contextlake/kb/llm/ollama.py +39 -0
- contextlake-2.1.0/src/contextlake/kb/llm/openai.py +53 -0
- contextlake-2.1.0/src/contextlake/kb/manifest.py +101 -0
- contextlake-2.1.0/src/contextlake/kb/mcp_client.py +63 -0
- contextlake-2.1.0/src/contextlake/kb/model.py +71 -0
- contextlake-2.1.0/src/contextlake/kb/parse.py +329 -0
- contextlake-2.1.0/src/contextlake/kb/references.py +66 -0
- contextlake-2.1.0/src/contextlake/kb/security.py +34 -0
- contextlake-2.1.0/src/contextlake/kb/server.py +231 -0
- contextlake-2.1.0/src/contextlake/kb/state.py +40 -0
- contextlake-2.1.0/src/contextlake/kb/steer/__init__.py +7 -0
- contextlake-2.1.0/src/contextlake/kb/steer/generate.py +161 -0
- contextlake-2.1.0/src/contextlake/kb/steer/skills.py +113 -0
- contextlake-2.1.0/src/contextlake/kb/store/__init__.py +5 -0
- contextlake-2.1.0/src/contextlake/kb/store/base.py +81 -0
- contextlake-2.1.0/src/contextlake/kb/store/shards.py +97 -0
- contextlake-2.1.0/src/contextlake/kb/store/sqlite_store.py +265 -0
- contextlake-2.1.0/src/contextlake/kb/wiki/__init__.py +4 -0
- contextlake-2.1.0/src/contextlake/kb/wiki/council.py +60 -0
- contextlake-2.1.0/src/contextlake/kb/wiki/generate.py +86 -0
- contextlake-2.1.0/src/contextlake/logging_setup.py +119 -0
- contextlake-2.1.0/src/contextlake/safety.py +82 -0
- contextlake-2.1.0/src/contextlake/style.py +147 -0
- contextlake-2.1.0/src/contextlake.egg-info/PKG-INFO +240 -0
- contextlake-2.1.0/src/contextlake.egg-info/SOURCES.txt +67 -0
- contextlake-2.1.0/src/contextlake.egg-info/dependency_links.txt +1 -0
- contextlake-2.1.0/src/contextlake.egg-info/entry_points.txt +3 -0
- contextlake-2.1.0/src/contextlake.egg-info/requires.txt +20 -0
- contextlake-2.1.0/src/contextlake.egg-info/top_level.txt +1 -0
- contextlake-2.1.0/tests/test_branches.py +69 -0
- contextlake-2.1.0/tests/test_cli_overrides.py +74 -0
- contextlake-2.1.0/tests/test_clone.py +86 -0
- contextlake-2.1.0/tests/test_config.py +77 -0
- contextlake-2.1.0/tests/test_logging.py +69 -0
- contextlake-2.1.0/tests/test_orchestration.py +90 -0
- contextlake-2.1.0/tests/test_resilience.py +80 -0
- contextlake-2.1.0/tests/test_safety.py +80 -0
- contextlake-2.1.0/tests/test_style.py +90 -0
- contextlake-2.1.0/tests/test_update.py +113 -0
- contextlake-2.1.0/tests/test_verify_fetch.py +97 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sayak Sarkar <sayak.bugsmith@gmail.com>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: contextlake
|
|
3
|
+
Version: 2.1.0
|
|
4
|
+
Summary: A local context layer for AI tools: mirror your repositories, index them into a knowledge graph, and serve it over MCP.
|
|
5
|
+
Author-email: Sayak Sarkar <sayak.bugsmith@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Sayak Sarkar <sayak.bugsmith@gmail.com>
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/sayak-sarkar/contextlake
|
|
29
|
+
Project-URL: Repository, https://github.com/sayak-sarkar/contextlake
|
|
30
|
+
Project-URL: Changelog, https://github.com/sayak-sarkar/contextlake/blob/main/CHANGELOG.md
|
|
31
|
+
Keywords: context,knowledge-graph,mcp,ai,code-search,gitlab,git,mirror,cli,glab,workspace
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Environment :: Console
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Topic :: Software Development :: Version Control :: Git
|
|
38
|
+
Requires-Python: >=3.9
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
License-File: LICENSE
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
44
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
45
|
+
Provides-Extra: kb
|
|
46
|
+
Requires-Dist: mcp>=1.28; extra == "kb"
|
|
47
|
+
Requires-Dist: pydantic>=2; extra == "kb"
|
|
48
|
+
Requires-Dist: tomli; python_version < "3.11" and extra == "kb"
|
|
49
|
+
Requires-Dist: tree-sitter>=0.25; extra == "kb"
|
|
50
|
+
Requires-Dist: tree-sitter-python>=0.23; extra == "kb"
|
|
51
|
+
Requires-Dist: tree-sitter-javascript>=0.23; extra == "kb"
|
|
52
|
+
Requires-Dist: tree-sitter-typescript>=0.23; extra == "kb"
|
|
53
|
+
Requires-Dist: tree-sitter-c-sharp>=0.23; extra == "kb"
|
|
54
|
+
Provides-Extra: kb-vec
|
|
55
|
+
Requires-Dist: sqlite-vec>=0.1; extra == "kb-vec"
|
|
56
|
+
Dynamic: license-file
|
|
57
|
+
|
|
58
|
+
# contextlake
|
|
59
|
+
|
|
60
|
+
> **A local context layer for your AI tools — your repositories mirrored, indexed into a knowledge graph, and served over MCP, so agents work from real source instead of guessing.**
|
|
61
|
+
|
|
62
|
+
[](https://github.com/sayak-sarkar/contextlake/actions/workflows/ci.yml)
|
|
63
|
+

|
|
64
|
+

|
|
65
|
+
|
|
66
|
+
You have access to dozens — maybe hundreds — of repositories scattered across a
|
|
67
|
+
GitLab group and its subgroups. You want them all on your laptop, in the same
|
|
68
|
+
shape they have on GitLab, each sitting on the branch where the real work is
|
|
69
|
+
happening, and you want a single command to keep it that way.
|
|
70
|
+
|
|
71
|
+
That's the foundation. `contextlake` enumerates everything you can reach, clones
|
|
72
|
+
what's missing into a faithful mirror of the namespace tree, pulls what's stale,
|
|
73
|
+
and parks each repo on its most active branch — concurrently, with retries, and
|
|
74
|
+
**without ever stomping on the feature branch you're in the middle of.**
|
|
75
|
+
|
|
76
|
+
On top of that mirror, an optional [knowledge layer](#knowledge-layer-optional)
|
|
77
|
+
indexes everything into a graph and serves it to your AI tools over MCP — so they
|
|
78
|
+
answer from real source. (Today the source is GitLab; the design is source-agnostic.)
|
|
79
|
+
|
|
80
|
+
It carries no credentials of its own: authentication rides entirely on your
|
|
81
|
+
existing [`glab`](https://gitlab.com/gitlab-org/cli) login and `git` setup.
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install .
|
|
85
|
+
contextlake status # see where you stand
|
|
86
|
+
contextlake sync # fetch → clone → update → branches → verify
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
> **New here?** [**QUICKSTART.md**](QUICKSTART.md) takes you from install to a
|
|
90
|
+
> fully-wired AI workspace (mirror → knowledge graph → Claude Code / Windsurf) in a
|
|
91
|
+
> few minutes.
|
|
92
|
+
|
|
93
|
+
## What's in the box
|
|
94
|
+
|
|
95
|
+
**The core loop**
|
|
96
|
+
|
|
97
|
+
- **Discovers everything** in a GitLab group and its subgroups via the API.
|
|
98
|
+
- **Clones what's missing**, preserving GitLab's exact directory structure.
|
|
99
|
+
- **Updates what's stale** with a fast-forward pull, honestly reporting whether
|
|
100
|
+
anything actually changed.
|
|
101
|
+
- **Rides the active branch** — picks each repo's liveliest branch by commit
|
|
102
|
+
count, recency, or a hybrid of both (your call).
|
|
103
|
+
- **Verifies the mirror** against GitLab and flags drift, orphans, and
|
|
104
|
+
repos-nested-inside-repos.
|
|
105
|
+
|
|
106
|
+
**Because it runs across hundreds of repos**
|
|
107
|
+
|
|
108
|
+
- **Concurrent** by default, with an **adaptive worker pool** that backs off when
|
|
109
|
+
the network starts misbehaving and ramps back up when it recovers.
|
|
110
|
+
- **Resilient** — exponential backoff with jitter on transient failures, fail-fast
|
|
111
|
+
on the ones that won't recover (DNS, TLS).
|
|
112
|
+
|
|
113
|
+
**Because it's your working machine**
|
|
114
|
+
|
|
115
|
+
- **Branch safety**: never yanks you off a working branch or clobbers uncommitted
|
|
116
|
+
changes — skip, or `--auto-stash`, your choice.
|
|
117
|
+
- **`--dry-run`** everything first if you're the cautious type.
|
|
118
|
+
- **Configurable** via INI files (local + global) with sensible precedence, plus
|
|
119
|
+
per-run CLI overrides.
|
|
120
|
+
|
|
121
|
+
## Installation
|
|
122
|
+
|
|
123
|
+
**Prerequisites:** Python 3.9+ (3.10+ for the knowledge layer), `git`, and an
|
|
124
|
+
authenticated [`glab`](https://gitlab.com/gitlab-org/cli) (`glab auth login`).
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
pipx install "git+https://github.com/sayak-sarkar/contextlake" # isolated CLI
|
|
128
|
+
# or: pip install . (add the [kb] extra for the knowledge layer)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Once installed, `contextlake`, `python -m contextlake`, and `python3 contextlake.py`
|
|
132
|
+
are equivalent.
|
|
133
|
+
|
|
134
|
+
**Configure** — copy the example and set your group + workspace:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
cp .contextlake.ini.example ~/.contextlake.ini
|
|
138
|
+
```
|
|
139
|
+
```ini
|
|
140
|
+
[contextlake]
|
|
141
|
+
work_dir = ~/work
|
|
142
|
+
gitlab_group = your-gitlab-group
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
The tool carries no credentials of its own — auth rides on `glab` — so
|
|
146
|
+
`.contextlake.ini` holds only non-secret settings and is gitignored by default. The
|
|
147
|
+
full option reference is in [docs/usage.md](docs/usage.md).
|
|
148
|
+
|
|
149
|
+
## Usage
|
|
150
|
+
|
|
151
|
+
Run commands as `contextlake <command>` — full per-command docs are in
|
|
152
|
+
**[docs/usage.md](docs/usage.md)**.
|
|
153
|
+
|
|
154
|
+
### Commands at a glance
|
|
155
|
+
|
|
156
|
+
| Command | What it does |
|
|
157
|
+
| --- | --- |
|
|
158
|
+
| `status` | Show the workspace sync state vs GitLab (read-only) |
|
|
159
|
+
| `fetch` | Cache the GitLab project list |
|
|
160
|
+
| `clone` | Clone repos that exist on GitLab but not locally |
|
|
161
|
+
| `update` | Pull updates for local repos (skips only repos with a dirty working tree) |
|
|
162
|
+
| `branches` | Switch each repo to its most active branch |
|
|
163
|
+
| `verify` | Check the local mirror matches GitLab (drift, orphans, nesting) |
|
|
164
|
+
| `sync` | The full pipeline: fetch → clone → update → branches → verify |
|
|
165
|
+
| `bootstrap` | **Turnkey**: sync + index + connect + embed + wiki + steer |
|
|
166
|
+
| `index` | Build the code/dependency graph (`--workspace`, incremental, `--watch`) |
|
|
167
|
+
| `connect` | Link repos to Atlassian / Figma / GitLab sources |
|
|
168
|
+
| `embed` | Build semantic-search vectors (needs an embeddings model) |
|
|
169
|
+
| `lint` | Graph health — stale repos (HEAD moved) and dangling edges; exits non-zero if any |
|
|
170
|
+
| `wiki` | LLM-synthesized, council-verified wiki pages (needs a model) |
|
|
171
|
+
| `steer` | Write editor steering — `AGENTS.md`, `.mcp.json`, `.windsurfrules`, skills |
|
|
172
|
+
| `serve` | Expose the graph over MCP (`--transport stdio`/`http`) |
|
|
173
|
+
| `query` | Search the index (`--kind`, `--repo`, `--limit`, `--as-of <commit>`) |
|
|
174
|
+
| `doctor` | Check the knowledge-layer environment (SQLite FTS5, git/glab, store, embeddings) |
|
|
175
|
+
|
|
176
|
+
The first seven are the core sync (detailed below); the rest are the optional
|
|
177
|
+
**[knowledge layer](#knowledge-layer)**. Run any command with `--config` (sync INI)
|
|
178
|
+
and, for the knowledge layer, `--config`/`--kb-config` pointing at your `kb.toml`.
|
|
179
|
+
|
|
180
|
+
### Global options
|
|
181
|
+
|
|
182
|
+
These apply to any command:
|
|
183
|
+
|
|
184
|
+
- `--dry-run` — preview clone/update/branch actions without changing anything.
|
|
185
|
+
- `-v` / `--verbose`, `-q` / `--quiet` — control console verbosity.
|
|
186
|
+
- `--log-file PATH` — append a full timestamped audit log (rotating).
|
|
187
|
+
- `--config PATH` — use a specific config file (highest precedence).
|
|
188
|
+
- `--version` — print the version and exit.
|
|
189
|
+
|
|
190
|
+
Output is colorized on a terminal (status glyphs, a progress bar); set `NO_COLOR`
|
|
191
|
+
to disable or `FORCE_COLOR` to keep colours when piping. Colours are dropped
|
|
192
|
+
automatically for non-TTY output (pipes, cron, log files).
|
|
193
|
+
|
|
194
|
+
A read-only `status` followed by a `--dry-run sync` is the safest way to preview
|
|
195
|
+
what a sync would do:
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
contextlake status
|
|
199
|
+
contextlake --dry-run sync
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Knowledge layer (optional)
|
|
203
|
+
|
|
204
|
+
Beyond mirroring, an optional layer (`contextlake.kb`) turns your repos into a
|
|
205
|
+
**knowledge graph** and serves it to AI tools over **MCP** — so Claude Code, Windsurf,
|
|
206
|
+
or Kiro can answer *"where is `X` defined?"* or *"who calls `Y`?"* instead of grepping.
|
|
207
|
+
It can also link repos to their Atlassian / Figma / GitLab items, add semantic search,
|
|
208
|
+
write a curated wiki, and generate per-tool steering files + a skills library. Most of
|
|
209
|
+
it needs no model; the rest works with a local Ollama or any OpenAI-compatible endpoint.
|
|
210
|
+
|
|
211
|
+
One command sets it all up:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
contextlake bootstrap --kb-config ~/.contextlake/kb.toml
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
→ Full guide: **[docs/knowledge-layer.md](docs/knowledge-layer.md)**.
|
|
218
|
+
|
|
219
|
+
## Documentation
|
|
220
|
+
|
|
221
|
+
- **[QUICKSTART.md](QUICKSTART.md)** — install → bootstrap → wire your editor, in minutes
|
|
222
|
+
- **[docs/usage.md](docs/usage.md)** — every command, configuration, branch safety, scheduling
|
|
223
|
+
- **[docs/knowledge-layer.md](docs/knowledge-layer.md)** — the graph, connectors, search, wiki, steering
|
|
224
|
+
- **[docs/internals.md](docs/internals.md)** — architecture & internals
|
|
225
|
+
- **[BRANDING.md](BRANDING.md)** — brand guide (name, palette, logo, mascot)
|
|
226
|
+
- **[CHANGELOG.md](CHANGELOG.md)** · **[ROADMAP.md](ROADMAP.md)** · **[CONTRIBUTING.md](CONTRIBUTING.md)**
|
|
227
|
+
|
|
228
|
+
## License
|
|
229
|
+
|
|
230
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
231
|
+
|
|
232
|
+
## Support
|
|
233
|
+
|
|
234
|
+
For issues or questions:
|
|
235
|
+
|
|
236
|
+
1. Check this documentation first
|
|
237
|
+
2. Review log files for error messages
|
|
238
|
+
3. Test individual commands to isolate issues
|
|
239
|
+
4. Verify `glab` authentication: `glab auth status`
|
|
240
|
+
5. Check GitLab access permissions in web interface
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# contextlake
|
|
2
|
+
|
|
3
|
+
> **A local context layer for your AI tools — your repositories mirrored, indexed into a knowledge graph, and served over MCP, so agents work from real source instead of guessing.**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/sayak-sarkar/contextlake/actions/workflows/ci.yml)
|
|
6
|
+

|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
You have access to dozens — maybe hundreds — of repositories scattered across a
|
|
10
|
+
GitLab group and its subgroups. You want them all on your laptop, in the same
|
|
11
|
+
shape they have on GitLab, each sitting on the branch where the real work is
|
|
12
|
+
happening, and you want a single command to keep it that way.
|
|
13
|
+
|
|
14
|
+
That's the foundation. `contextlake` enumerates everything you can reach, clones
|
|
15
|
+
what's missing into a faithful mirror of the namespace tree, pulls what's stale,
|
|
16
|
+
and parks each repo on its most active branch — concurrently, with retries, and
|
|
17
|
+
**without ever stomping on the feature branch you're in the middle of.**
|
|
18
|
+
|
|
19
|
+
On top of that mirror, an optional [knowledge layer](#knowledge-layer-optional)
|
|
20
|
+
indexes everything into a graph and serves it to your AI tools over MCP — so they
|
|
21
|
+
answer from real source. (Today the source is GitLab; the design is source-agnostic.)
|
|
22
|
+
|
|
23
|
+
It carries no credentials of its own: authentication rides entirely on your
|
|
24
|
+
existing [`glab`](https://gitlab.com/gitlab-org/cli) login and `git` setup.
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install .
|
|
28
|
+
contextlake status # see where you stand
|
|
29
|
+
contextlake sync # fetch → clone → update → branches → verify
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
> **New here?** [**QUICKSTART.md**](QUICKSTART.md) takes you from install to a
|
|
33
|
+
> fully-wired AI workspace (mirror → knowledge graph → Claude Code / Windsurf) in a
|
|
34
|
+
> few minutes.
|
|
35
|
+
|
|
36
|
+
## What's in the box
|
|
37
|
+
|
|
38
|
+
**The core loop**
|
|
39
|
+
|
|
40
|
+
- **Discovers everything** in a GitLab group and its subgroups via the API.
|
|
41
|
+
- **Clones what's missing**, preserving GitLab's exact directory structure.
|
|
42
|
+
- **Updates what's stale** with a fast-forward pull, honestly reporting whether
|
|
43
|
+
anything actually changed.
|
|
44
|
+
- **Rides the active branch** — picks each repo's liveliest branch by commit
|
|
45
|
+
count, recency, or a hybrid of both (your call).
|
|
46
|
+
- **Verifies the mirror** against GitLab and flags drift, orphans, and
|
|
47
|
+
repos-nested-inside-repos.
|
|
48
|
+
|
|
49
|
+
**Because it runs across hundreds of repos**
|
|
50
|
+
|
|
51
|
+
- **Concurrent** by default, with an **adaptive worker pool** that backs off when
|
|
52
|
+
the network starts misbehaving and ramps back up when it recovers.
|
|
53
|
+
- **Resilient** — exponential backoff with jitter on transient failures, fail-fast
|
|
54
|
+
on the ones that won't recover (DNS, TLS).
|
|
55
|
+
|
|
56
|
+
**Because it's your working machine**
|
|
57
|
+
|
|
58
|
+
- **Branch safety**: never yanks you off a working branch or clobbers uncommitted
|
|
59
|
+
changes — skip, or `--auto-stash`, your choice.
|
|
60
|
+
- **`--dry-run`** everything first if you're the cautious type.
|
|
61
|
+
- **Configurable** via INI files (local + global) with sensible precedence, plus
|
|
62
|
+
per-run CLI overrides.
|
|
63
|
+
|
|
64
|
+
## Installation
|
|
65
|
+
|
|
66
|
+
**Prerequisites:** Python 3.9+ (3.10+ for the knowledge layer), `git`, and an
|
|
67
|
+
authenticated [`glab`](https://gitlab.com/gitlab-org/cli) (`glab auth login`).
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pipx install "git+https://github.com/sayak-sarkar/contextlake" # isolated CLI
|
|
71
|
+
# or: pip install . (add the [kb] extra for the knowledge layer)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Once installed, `contextlake`, `python -m contextlake`, and `python3 contextlake.py`
|
|
75
|
+
are equivalent.
|
|
76
|
+
|
|
77
|
+
**Configure** — copy the example and set your group + workspace:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
cp .contextlake.ini.example ~/.contextlake.ini
|
|
81
|
+
```
|
|
82
|
+
```ini
|
|
83
|
+
[contextlake]
|
|
84
|
+
work_dir = ~/work
|
|
85
|
+
gitlab_group = your-gitlab-group
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The tool carries no credentials of its own — auth rides on `glab` — so
|
|
89
|
+
`.contextlake.ini` holds only non-secret settings and is gitignored by default. The
|
|
90
|
+
full option reference is in [docs/usage.md](docs/usage.md).
|
|
91
|
+
|
|
92
|
+
## Usage
|
|
93
|
+
|
|
94
|
+
Run commands as `contextlake <command>` — full per-command docs are in
|
|
95
|
+
**[docs/usage.md](docs/usage.md)**.
|
|
96
|
+
|
|
97
|
+
### Commands at a glance
|
|
98
|
+
|
|
99
|
+
| Command | What it does |
|
|
100
|
+
| --- | --- |
|
|
101
|
+
| `status` | Show the workspace sync state vs GitLab (read-only) |
|
|
102
|
+
| `fetch` | Cache the GitLab project list |
|
|
103
|
+
| `clone` | Clone repos that exist on GitLab but not locally |
|
|
104
|
+
| `update` | Pull updates for local repos (skips only repos with a dirty working tree) |
|
|
105
|
+
| `branches` | Switch each repo to its most active branch |
|
|
106
|
+
| `verify` | Check the local mirror matches GitLab (drift, orphans, nesting) |
|
|
107
|
+
| `sync` | The full pipeline: fetch → clone → update → branches → verify |
|
|
108
|
+
| `bootstrap` | **Turnkey**: sync + index + connect + embed + wiki + steer |
|
|
109
|
+
| `index` | Build the code/dependency graph (`--workspace`, incremental, `--watch`) |
|
|
110
|
+
| `connect` | Link repos to Atlassian / Figma / GitLab sources |
|
|
111
|
+
| `embed` | Build semantic-search vectors (needs an embeddings model) |
|
|
112
|
+
| `lint` | Graph health — stale repos (HEAD moved) and dangling edges; exits non-zero if any |
|
|
113
|
+
| `wiki` | LLM-synthesized, council-verified wiki pages (needs a model) |
|
|
114
|
+
| `steer` | Write editor steering — `AGENTS.md`, `.mcp.json`, `.windsurfrules`, skills |
|
|
115
|
+
| `serve` | Expose the graph over MCP (`--transport stdio`/`http`) |
|
|
116
|
+
| `query` | Search the index (`--kind`, `--repo`, `--limit`, `--as-of <commit>`) |
|
|
117
|
+
| `doctor` | Check the knowledge-layer environment (SQLite FTS5, git/glab, store, embeddings) |
|
|
118
|
+
|
|
119
|
+
The first seven are the core sync (detailed below); the rest are the optional
|
|
120
|
+
**[knowledge layer](#knowledge-layer)**. Run any command with `--config` (sync INI)
|
|
121
|
+
and, for the knowledge layer, `--config`/`--kb-config` pointing at your `kb.toml`.
|
|
122
|
+
|
|
123
|
+
### Global options
|
|
124
|
+
|
|
125
|
+
These apply to any command:
|
|
126
|
+
|
|
127
|
+
- `--dry-run` — preview clone/update/branch actions without changing anything.
|
|
128
|
+
- `-v` / `--verbose`, `-q` / `--quiet` — control console verbosity.
|
|
129
|
+
- `--log-file PATH` — append a full timestamped audit log (rotating).
|
|
130
|
+
- `--config PATH` — use a specific config file (highest precedence).
|
|
131
|
+
- `--version` — print the version and exit.
|
|
132
|
+
|
|
133
|
+
Output is colorized on a terminal (status glyphs, a progress bar); set `NO_COLOR`
|
|
134
|
+
to disable or `FORCE_COLOR` to keep colours when piping. Colours are dropped
|
|
135
|
+
automatically for non-TTY output (pipes, cron, log files).
|
|
136
|
+
|
|
137
|
+
A read-only `status` followed by a `--dry-run sync` is the safest way to preview
|
|
138
|
+
what a sync would do:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
contextlake status
|
|
142
|
+
contextlake --dry-run sync
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Knowledge layer (optional)
|
|
146
|
+
|
|
147
|
+
Beyond mirroring, an optional layer (`contextlake.kb`) turns your repos into a
|
|
148
|
+
**knowledge graph** and serves it to AI tools over **MCP** — so Claude Code, Windsurf,
|
|
149
|
+
or Kiro can answer *"where is `X` defined?"* or *"who calls `Y`?"* instead of grepping.
|
|
150
|
+
It can also link repos to their Atlassian / Figma / GitLab items, add semantic search,
|
|
151
|
+
write a curated wiki, and generate per-tool steering files + a skills library. Most of
|
|
152
|
+
it needs no model; the rest works with a local Ollama or any OpenAI-compatible endpoint.
|
|
153
|
+
|
|
154
|
+
One command sets it all up:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
contextlake bootstrap --kb-config ~/.contextlake/kb.toml
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
→ Full guide: **[docs/knowledge-layer.md](docs/knowledge-layer.md)**.
|
|
161
|
+
|
|
162
|
+
## Documentation
|
|
163
|
+
|
|
164
|
+
- **[QUICKSTART.md](QUICKSTART.md)** — install → bootstrap → wire your editor, in minutes
|
|
165
|
+
- **[docs/usage.md](docs/usage.md)** — every command, configuration, branch safety, scheduling
|
|
166
|
+
- **[docs/knowledge-layer.md](docs/knowledge-layer.md)** — the graph, connectors, search, wiki, steering
|
|
167
|
+
- **[docs/internals.md](docs/internals.md)** — architecture & internals
|
|
168
|
+
- **[BRANDING.md](BRANDING.md)** — brand guide (name, palette, logo, mascot)
|
|
169
|
+
- **[CHANGELOG.md](CHANGELOG.md)** · **[ROADMAP.md](ROADMAP.md)** · **[CONTRIBUTING.md](CONTRIBUTING.md)**
|
|
170
|
+
|
|
171
|
+
## License
|
|
172
|
+
|
|
173
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
174
|
+
|
|
175
|
+
## Support
|
|
176
|
+
|
|
177
|
+
For issues or questions:
|
|
178
|
+
|
|
179
|
+
1. Check this documentation first
|
|
180
|
+
2. Review log files for error messages
|
|
181
|
+
3. Test individual commands to isolate issues
|
|
182
|
+
4. Verify `glab` authentication: `glab auth status`
|
|
183
|
+
5. Check GitLab access permissions in web interface
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "contextlake"
|
|
7
|
+
version = "2.1.0"
|
|
8
|
+
description = "A local context layer for AI tools: mirror your repositories, index them into a knowledge graph, and serve it over MCP."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "Sayak Sarkar", email = "sayak.bugsmith@gmail.com" }]
|
|
13
|
+
keywords = ["context", "knowledge-graph", "mcp", "ai", "code-search",
|
|
14
|
+
"gitlab", "git", "mirror", "cli", "glab", "workspace"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Environment :: Console",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Topic :: Software Development :: Version Control :: Git",
|
|
22
|
+
]
|
|
23
|
+
# Runtime depends only on the Python standard library plus the external
|
|
24
|
+
# `git` and `glab` command-line tools (not pip-installable).
|
|
25
|
+
dependencies = []
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/sayak-sarkar/contextlake"
|
|
29
|
+
Repository = "https://github.com/sayak-sarkar/contextlake"
|
|
30
|
+
Changelog = "https://github.com/sayak-sarkar/contextlake/blob/main/CHANGELOG.md"
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4"]
|
|
34
|
+
# Optional knowledge-layer subsystem (contextlake.kb). Note: `mcp` requires
|
|
35
|
+
# Python >= 3.10, so this extra's floor is 3.10 while the core tool stays 3.9+.
|
|
36
|
+
kb = [
|
|
37
|
+
"mcp>=1.28", "pydantic>=2", "tomli; python_version<'3.11'",
|
|
38
|
+
# Code parsing: the tree-sitter runtime plus per-language grammar packages
|
|
39
|
+
# (added incrementally as languages are supported).
|
|
40
|
+
"tree-sitter>=0.25", "tree-sitter-python>=0.23",
|
|
41
|
+
"tree-sitter-javascript>=0.23", "tree-sitter-typescript>=0.23",
|
|
42
|
+
"tree-sitter-c-sharp>=0.23",
|
|
43
|
+
]
|
|
44
|
+
# Optional ANN backend for the semantic tier. Without it the vector store falls
|
|
45
|
+
# back to an exact pure-Python cosine scan (no native dependency required).
|
|
46
|
+
kb-vec = ["sqlite-vec>=0.1"]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
contextlake = "contextlake.cli:main"
|
|
50
|
+
# Deprecated alias for the former package name (gitlab-sync); kept so existing
|
|
51
|
+
# installs and scripts that call `gitlab-sync` keep working. Removed in a future major.
|
|
52
|
+
gitlab-sync = "contextlake.cli:main"
|
|
53
|
+
|
|
54
|
+
[tool.setuptools]
|
|
55
|
+
package-dir = { "" = "src" }
|
|
56
|
+
|
|
57
|
+
[tool.setuptools.packages.find]
|
|
58
|
+
where = ["src"]
|
|
59
|
+
|
|
60
|
+
[tool.pytest.ini_options]
|
|
61
|
+
testpaths = ["tests"]
|
|
62
|
+
addopts = "-q"
|
|
63
|
+
|
|
64
|
+
[tool.ruff]
|
|
65
|
+
line-length = 100
|
|
66
|
+
target-version = "py39"
|
|
67
|
+
src = ["src", "tests"]
|
|
68
|
+
|
|
69
|
+
[tool.ruff.lint]
|
|
70
|
+
select = ["E", "F", "W", "I", "UP", "B"]
|