nodestradamus 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nodestradamus-0.1.0/LICENSE +21 -0
- nodestradamus-0.1.0/PKG-INFO +266 -0
- nodestradamus-0.1.0/README.md +212 -0
- nodestradamus-0.1.0/pyproject.toml +121 -0
- nodestradamus-0.1.0/rust/Cargo.lock +226 -0
- nodestradamus-0.1.0/rust/Cargo.toml +15 -0
- nodestradamus-0.1.0/rust/src/graph.rs +804 -0
- nodestradamus-0.1.0/rust/src/lib.rs +206 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Christos Grigoras
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nodestradamus
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Classifier: Development Status :: 4 - Beta
|
|
5
|
+
Classifier: Intended Audience :: Developers
|
|
6
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
10
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
11
|
+
Requires-Dist: mcp>=1.0.0
|
|
12
|
+
Requires-Dist: click>=8.0.0
|
|
13
|
+
Requires-Dist: pydantic>=2.0.0
|
|
14
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
15
|
+
Requires-Dist: tqdm>=4.66.0
|
|
16
|
+
Requires-Dist: tree-sitter>=0.21.0
|
|
17
|
+
Requires-Dist: tree-sitter-python>=0.21.0
|
|
18
|
+
Requires-Dist: tree-sitter-typescript>=0.21.0
|
|
19
|
+
Requires-Dist: tree-sitter-javascript>=0.21.0
|
|
20
|
+
Requires-Dist: tree-sitter-rust>=0.21.0
|
|
21
|
+
Requires-Dist: tree-sitter-sql>=0.3.0
|
|
22
|
+
Requires-Dist: tree-sitter-bash>=0.23.0
|
|
23
|
+
Requires-Dist: tree-sitter-json>=0.21.0
|
|
24
|
+
Requires-Dist: tree-sitter-markdown>=0.3.2
|
|
25
|
+
Requires-Dist: tree-sitter-cpp>=0.23.0
|
|
26
|
+
Requires-Dist: networkx>=3.0
|
|
27
|
+
Requires-Dist: scipy>=1.10.0
|
|
28
|
+
Requires-Dist: msgpack>=1.0.0
|
|
29
|
+
Requires-Dist: nodestradamus[dev,faiss,mistral] ; extra == 'all'
|
|
30
|
+
Requires-Dist: pytest>=8.0.0 ; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-asyncio>=0.23.0 ; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov>=4.1.0 ; extra == 'dev'
|
|
33
|
+
Requires-Dist: mypy>=1.8.0 ; extra == 'dev'
|
|
34
|
+
Requires-Dist: black>=24.0.0 ; extra == 'dev'
|
|
35
|
+
Requires-Dist: ruff>=0.2.0 ; extra == 'dev'
|
|
36
|
+
Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
|
|
37
|
+
Requires-Dist: faiss-cpu>=1.7.4 ; extra == 'faiss'
|
|
38
|
+
Requires-Dist: httpx>=0.27.0 ; extra == 'mistral'
|
|
39
|
+
Provides-Extra: all
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Provides-Extra: faiss
|
|
42
|
+
Provides-Extra: mistral
|
|
43
|
+
License-File: LICENSE
|
|
44
|
+
Summary: See what breaks before you break it. Codebase intelligence for AI assistants.
|
|
45
|
+
Keywords: mcp,dependency-graph,code-analysis,cursor,claude,impact-analysis,codebase-intelligence
|
|
46
|
+
Author-email: Christos Grigoras <chris.grigoras@gmail.com>
|
|
47
|
+
License: MIT
|
|
48
|
+
Requires-Python: >=3.12
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
Project-URL: Documentation, https://github.com/ChristosGrigoras/nodestradamus/tree/main/docs
|
|
51
|
+
Project-URL: Homepage, https://github.com/ChristosGrigoras/nodestradamus
|
|
52
|
+
Project-URL: Repository, https://github.com/ChristosGrigoras/nodestradamus
|
|
53
|
+
|
|
54
|
+
# <img src="assets/nodestradamus-logo.png" alt="Nodestradamus" width="42" style="vertical-align: middle;" /> Nodestradamus
|
|
55
|
+
|
|
56
|
+
**Nodestradamus: See what breaks before you break it.** Codebase intelligence for AI and human coders—an MCP server and a Python library that give Cursor, Claude, and other AI tools (or your own scripts) deep understanding of your code through dependency graphs, semantic search, and impact analysis.
|
|
57
|
+
|
|
58
|
+
Nodestradamus predicts **what breaks** if you change something—impact before you refactor. It builds a map of your code (who calls what) so you or your AI can see that impact and find important or risky areas.
|
|
59
|
+
|
|
60
|
+
> I had determined to go as far as declaring in abstruse and puzzling utterances the future causes… Yet lest whatever human changes may be to come should scandalise delicate ears, the whole thing is written in nebulous form, rather than as a clear prophecy of any kind.
|
|
61
|
+
> — *Nostradamus, 1555*
|
|
62
|
+
|
|
63
|
+
### What Nodestradamus does
|
|
64
|
+
|
|
65
|
+
- **Maps who-calls-what** — builds a dependency graph of your codebase
|
|
66
|
+
- **Answers "what breaks if I change this?"** — impact analysis before refactors
|
|
67
|
+
- **Finds code by meaning** — semantic search and duplicate detection
|
|
68
|
+
- **Checks docs and rules** — finds stale references and coverage gaps
|
|
69
|
+
|
|
70
|
+
New to dependency graphs or these terms? See [Understanding dependency graphs](docs/dependency-graphs.md) and [Glossary](docs/glossary.md).
|
|
71
|
+
|
|
72
|
+
### Why use Nodestradamus with cheaper models?
|
|
73
|
+
|
|
74
|
+
Nodestradamus pre-computes codebase structure (who calls what, impact, semantic index) and exposes it via MCP tools. That shifts the work from the model to the tools:
|
|
75
|
+
|
|
76
|
+
| | **Cheap model + Nodestradamus** | **Expensive model, no Nodestradamus** |
|
|
77
|
+
|---|--------------------------------|--------------------------------------|
|
|
78
|
+
| **Context** | Structured answers from tools (graph, impact, search) — small, precise inputs | Raw file dumps and long context — more tokens, more noise |
|
|
79
|
+
| **Cost** | Fewer tokens per task; small/cheap models can drive the same workflows | Large context and repeated reads; often need bigger, pricier models |
|
|
80
|
+
| **Accuracy** | Impact and dependencies come from the graph, not guesswork | Model infers structure from text; easy to miss callers or side effects |
|
|
81
|
+
| **Speed** | One tool call → targeted result (e.g. “what breaks if I change this?”) | Many file reads and long chains of reasoning |
|
|
82
|
+
|
|
83
|
+
Use Nodestradamus so your assistant gets **precise, graph-backed answers** instead of guessing from raw code. That makes cheaper models effective for refactors, impact analysis, and codebase navigation.
|
|
84
|
+
|
|
85
|
+
### Evaluation
|
|
86
|
+
|
|
87
|
+
Evaluations on three codebases (LangChain Python monorepo, Rich, Django) compared Nodestradamus-on vs Nodestradamus-off across 18 codebase-understanding questions (overview, impact, cycles, dead code, duplicates, health, etc.):
|
|
88
|
+
|
|
89
|
+
| Codebase | Finding |
|
|
90
|
+
|----------|--------|
|
|
91
|
+
| **LangChain** | Cheaper model (Composer) + Nodestradamus was ~40% faster and more accurate than Opus without tools (e.g. correct "0 cycles" vs inferred "many potential cycles"). Same model with Nodestradamus: 27% more concise, quantified metrics vs prose. |
|
|
92
|
+
| **Rich** | Haiku + Nodestradamus **42% faster** with comparable verbosity; Opus + Nodestradamus ~14% slower but more metric-rich (betweenness, cohesion, line-level analysis). Both produced more actionable answers with tools. |
|
|
93
|
+
| **Django** | Both models with Nodestradamus gave quantified insights (graph metrics, cycle detection, duplicate file:line refs) vs estimates; trade-off was longer time for substantiated, data-driven answers. |
|
|
94
|
+
|
|
95
|
+
Across reports: **cheaper LLM + Nodestradamus** can match or beat **expensive LLM without tools** on accuracy and actionability for structural analysis; tools provide ground truth (cycles, dead code, centrality) that models often get wrong when inferring.
|
|
96
|
+
|
|
97
|
+
**Note:** The evaluation setup and benchmarks need further tests and validation (more codebases, question sets, and baselines) before drawing stronger conclusions.
|
|
98
|
+
|
|
99
|
+
## Install
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pip install nodestradamus
|
|
103
|
+
|
|
104
|
+
# Or from source
|
|
105
|
+
git clone https://github.com/ChristosGrigoras/nodestradamus.git
|
|
106
|
+
cd nodestradamus && pip install -e .
|
|
107
|
+
|
|
108
|
+
# With FAISS for faster similarity search on large codebases (optional)
|
|
109
|
+
pip install nodestradamus[faiss]
|
|
110
|
+
|
|
111
|
+
# With Rust acceleration (optional, requires Rust toolchain)
|
|
112
|
+
pip install maturin
|
|
113
|
+
maturin develop --release
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Quick Start
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Start the MCP server
|
|
120
|
+
nodestradamus serve
|
|
121
|
+
|
|
122
|
+
# Analyze a repo
|
|
123
|
+
nodestradamus analyze /path/to/repo
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Optimal tool sequence** for best results:
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
1. project_scout → Get overview + suggested_ignores
|
|
130
|
+
2. analyze_deps → Build graph (pass suggested_ignores)
|
|
131
|
+
3. codebase_health → Health check
|
|
132
|
+
4. semantic_analysis → mode="embeddings" (pre-compute)
|
|
133
|
+
5. semantic_analysis → mode="search" (now fast)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
See [docs/getting-started-workflow.md](docs/getting-started-workflow.md) for the complete guide.
|
|
137
|
+
|
|
138
|
+
Add to Cursor (`.cursor/mcp.json`):
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{
|
|
142
|
+
"mcpServers": {
|
|
143
|
+
"nodestradamus": {
|
|
144
|
+
"command": "nodestradamus",
|
|
145
|
+
"args": ["serve"]
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Supported Languages
|
|
152
|
+
|
|
153
|
+
| Language | Dependency Analysis | Semantic Search | String Analysis |
|
|
154
|
+
|----------|---------------------|-----------------|-----------------|
|
|
155
|
+
| **Python** | ✅ Full | ✅ Full | ✅ Full |
|
|
156
|
+
| **TypeScript/JavaScript** | ✅ Full | ✅ Full | ✅ Full |
|
|
157
|
+
| **Rust** | ✅ Full | ✅ Full | ✅ Full |
|
|
158
|
+
| **SQL (PostgreSQL)** | ✅ Full | ✅ Full | ✅ Full |
|
|
159
|
+
| **Bash** | ✅ Full | ✅ Full | ✅ Full |
|
|
160
|
+
| **JSON** | ✅ Configs | — | — |
|
|
161
|
+
|
|
162
|
+
## Tools
|
|
163
|
+
|
|
164
|
+
| Tool | What it does |
|
|
165
|
+
|------|--------------|
|
|
166
|
+
| `quick_start` | Runs optimal setup sequence automatically |
|
|
167
|
+
| `project_scout` | Reconnaissance: languages, frameworks, key dirs; lazy options for monorepos |
|
|
168
|
+
| `analyze_deps` | Build dependency graph (Python, TS, Rust, SQL, Bash, JSON) |
|
|
169
|
+
| `analyze_cooccurrence` | Files that change together in git history |
|
|
170
|
+
| `get_impact` | What breaks if I change this file/function? |
|
|
171
|
+
| `analyze_graph` | Graph algorithms on dependencies |
|
|
172
|
+
| `analyze_strings` | Find and trace string literals |
|
|
173
|
+
| `semantic_analysis` | Embedding-based search and duplicate detection |
|
|
174
|
+
| `find_similar` | Structurally similar code (fingerprint match) |
|
|
175
|
+
| `get_changes_since_last` | Diff vs last run (snapshots) |
|
|
176
|
+
| `codebase_health` | Health check: dead code, duplicates, cycles, docs |
|
|
177
|
+
| `manage_cache` | Inspect or clear `.nodestradamus/` cache |
|
|
178
|
+
| `analyze_docs` | Docs: stale refs and coverage |
|
|
179
|
+
| `compare_rules_to_codebase` | Audit rules vs hotspots; gaps and stale refs |
|
|
180
|
+
| `validate_rules` | Validate rule file structure and frontmatter |
|
|
181
|
+
| `detect_rule_conflicts` | Detect conflicts between AI rules |
|
|
182
|
+
|
|
183
|
+
### Examples
|
|
184
|
+
|
|
185
|
+
**Check impact before refactoring:**
|
|
186
|
+
```
|
|
187
|
+
get_impact(repo_path="/my/repo", file_path="src/auth.py", symbol="validate_token")
|
|
188
|
+
→ Shows files that call this function
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
**Semantic search:**
|
|
192
|
+
```
|
|
193
|
+
semantic_analysis(repo_path="/my/repo", mode="search", query="authentication")
|
|
194
|
+
→ Natural language code search over embedded chunks
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
More examples in [docs/getting-started-workflow.md](docs/getting-started-workflow.md) and [docs/creative-use-cases.md](docs/creative-use-cases.md).
|
|
198
|
+
|
|
199
|
+
### Graph Algorithms
|
|
200
|
+
|
|
201
|
+
`analyze_graph` supports pagerank, betweenness, communities, cycles, path, hierarchy, layers. Optional Rust backend for speed. See [docs/dependency-graphs.md](docs/dependency-graphs.md) and [docs/graph-theory-reference.md](docs/graph-theory-reference.md). Rust extension: [docs/installation.md](docs/installation.md).
|
|
202
|
+
|
|
203
|
+
### Rust Support
|
|
204
|
+
|
|
205
|
+
Rust analysis extracts functions, structs, enums, traits, impls, use statements. See [docs/mcp-server-spec.md](docs/mcp-server-spec.md) for details.
|
|
206
|
+
|
|
207
|
+
### Semantic Analysis
|
|
208
|
+
|
|
209
|
+
Modes: search, similar, duplicates, embeddings. See [docs/getting-started-workflow.md](docs/getting-started-workflow.md).
|
|
210
|
+
|
|
211
|
+
### Cache
|
|
212
|
+
|
|
213
|
+
Results are cached under **`.nodestradamus/`** (repo when standalone, workspace when via MCP). Use `manage_cache` (mode="info" / "clear"). Optional `.nodestradamusignore` (gitignore-style) excludes paths; `project_scout` reports if it exists. File list and incremental embedding behavior: [docs/getting-started-workflow.md](docs/getting-started-workflow.md).
|
|
214
|
+
|
|
215
|
+
### Environment
|
|
216
|
+
|
|
217
|
+
Copy `.env.example` to `.env` for embedding provider and API keys. See [docs/installation.md](docs/installation.md).
|
|
218
|
+
|
|
219
|
+
## Cursor Rules
|
|
220
|
+
|
|
221
|
+
Nodestradamus ships `.cursor/rules/` for code quality, security, and meta-generator. See [docs/cursor-rules.md](docs/cursor-rules.md).
|
|
222
|
+
|
|
223
|
+
## Documentation
|
|
224
|
+
|
|
225
|
+
| Topic | Link |
|
|
226
|
+
|-------|------|
|
|
227
|
+
| **Getting Started** | [docs/getting-started-workflow.md](docs/getting-started-workflow.md) |
|
|
228
|
+
| Installation | [docs/installation.md](docs/installation.md) |
|
|
229
|
+
| Understanding Dependency Graphs | [docs/dependency-graphs.md](docs/dependency-graphs.md) |
|
|
230
|
+
| Glossary | [docs/glossary.md](docs/glossary.md) |
|
|
231
|
+
| MCP Server Spec | [docs/mcp-server-spec.md](docs/mcp-server-spec.md) |
|
|
232
|
+
| Creative Use Cases | [docs/creative-use-cases.md](docs/creative-use-cases.md) |
|
|
233
|
+
| Cursor Rules | [docs/cursor-rules.md](docs/cursor-rules.md) |
|
|
234
|
+
| GitHub Setup | [docs/github-setup.md](docs/github-setup.md) |
|
|
235
|
+
| Troubleshooting | [docs/troubleshooting.md](docs/troubleshooting.md) |
|
|
236
|
+
| Publishing to PyPI | [docs/publishing-pypi.md](docs/publishing-pypi.md) |
|
|
237
|
+
|
|
238
|
+
## Publishing to PyPI
|
|
239
|
+
|
|
240
|
+
Maintainers: see [docs/publishing-pypi.md](docs/publishing-pypi.md) for prerequisites, one-time setup (`~/.pypirc`), build with **maturin** (this project uses a Rust extension), upload with twine, TestPyPI, and a pre-publish checklist.
|
|
241
|
+
|
|
242
|
+
Quick build and upload:
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
pip install build twine maturin
|
|
246
|
+
rm -rf dist/ build/ *.egg-info/
|
|
247
|
+
maturin build --release --out dist --sdist
|
|
248
|
+
twine upload dist/*
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
Verify: `pip install nodestradamus` then `nodestradamus --version`.
|
|
252
|
+
|
|
253
|
+
## License
|
|
254
|
+
|
|
255
|
+
MIT
|
|
256
|
+
|
|
257
|
+
## Credits
|
|
258
|
+
|
|
259
|
+
- [MCP](https://modelcontextprotocol.io) — Model Context Protocol
|
|
260
|
+
- [NetworkX](https://networkx.org) — Graph algorithms (Python)
|
|
261
|
+
- [petgraph](https://github.com/petgraph/petgraph) — Graph algorithms (Rust)
|
|
262
|
+
- [PyO3](https://pyo3.rs) — Rust-Python bindings
|
|
263
|
+
- [FAISS](https://github.com/facebookresearch/faiss) — Approximate nearest neighbor search (optional)
|
|
264
|
+
- [sentence-transformers](https://sbert.net) — Embeddings
|
|
265
|
+
- [tree-sitter](https://tree-sitter.github.io/tree-sitter/) — Code parsing (Python, TypeScript, Rust, SQL)
|
|
266
|
+
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# <img src="assets/nodestradamus-logo.png" alt="Nodestradamus" width="42" style="vertical-align: middle;" /> Nodestradamus
|
|
2
|
+
|
|
3
|
+
**Nodestradamus: See what breaks before you break it.** Codebase intelligence for AI and human coders—an MCP server and a Python library that give Cursor, Claude, and other AI tools (or your own scripts) deep understanding of your code through dependency graphs, semantic search, and impact analysis.
|
|
4
|
+
|
|
5
|
+
Nodestradamus predicts **what breaks** if you change something—impact before you refactor. It builds a map of your code (who calls what) so you or your AI can see that impact and find important or risky areas.
|
|
6
|
+
|
|
7
|
+
> I had determined to go as far as declaring in abstruse and puzzling utterances the future causes… Yet lest whatever human changes may be to come should scandalise delicate ears, the whole thing is written in nebulous form, rather than as a clear prophecy of any kind.
|
|
8
|
+
> — *Nostradamus, 1555*
|
|
9
|
+
|
|
10
|
+
### What Nodestradamus does
|
|
11
|
+
|
|
12
|
+
- **Maps who-calls-what** — builds a dependency graph of your codebase
|
|
13
|
+
- **Answers "what breaks if I change this?"** — impact analysis before refactors
|
|
14
|
+
- **Finds code by meaning** — semantic search and duplicate detection
|
|
15
|
+
- **Checks docs and rules** — finds stale references and coverage gaps
|
|
16
|
+
|
|
17
|
+
New to dependency graphs or these terms? See [Understanding dependency graphs](docs/dependency-graphs.md) and [Glossary](docs/glossary.md).
|
|
18
|
+
|
|
19
|
+
### Why use Nodestradamus with cheaper models?
|
|
20
|
+
|
|
21
|
+
Nodestradamus pre-computes codebase structure (who calls what, impact, semantic index) and exposes it via MCP tools. That shifts the work from the model to the tools:
|
|
22
|
+
|
|
23
|
+
| | **Cheap model + Nodestradamus** | **Expensive model, no Nodestradamus** |
|
|
24
|
+
|---|--------------------------------|--------------------------------------|
|
|
25
|
+
| **Context** | Structured answers from tools (graph, impact, search) — small, precise inputs | Raw file dumps and long context — more tokens, more noise |
|
|
26
|
+
| **Cost** | Fewer tokens per task; small/cheap models can drive the same workflows | Large context and repeated reads; often need bigger, pricier models |
|
|
27
|
+
| **Accuracy** | Impact and dependencies come from the graph, not guesswork | Model infers structure from text; easy to miss callers or side effects |
|
|
28
|
+
| **Speed** | One tool call → targeted result (e.g. “what breaks if I change this?”) | Many file reads and long chains of reasoning |
|
|
29
|
+
|
|
30
|
+
Use Nodestradamus so your assistant gets **precise, graph-backed answers** instead of guessing from raw code. That makes cheaper models effective for refactors, impact analysis, and codebase navigation.
|
|
31
|
+
|
|
32
|
+
### Evaluation
|
|
33
|
+
|
|
34
|
+
Evaluations on three codebases (LangChain Python monorepo, Rich, Django) compared Nodestradamus-on vs Nodestradamus-off across 18 codebase-understanding questions (overview, impact, cycles, dead code, duplicates, health, etc.):
|
|
35
|
+
|
|
36
|
+
| Codebase | Finding |
|
|
37
|
+
|----------|--------|
|
|
38
|
+
| **LangChain** | Cheaper model (Composer) + Nodestradamus was ~40% faster and more accurate than Opus without tools (e.g. correct "0 cycles" vs inferred "many potential cycles"). Same model with Nodestradamus: 27% more concise, quantified metrics vs prose. |
|
|
39
|
+
| **Rich** | Haiku + Nodestradamus **42% faster** with comparable verbosity; Opus + Nodestradamus ~14% slower but more metric-rich (betweenness, cohesion, line-level analysis). Both produced more actionable answers with tools. |
|
|
40
|
+
| **Django** | Both models with Nodestradamus gave quantified insights (graph metrics, cycle detection, duplicate file:line refs) vs estimates; trade-off was longer time for substantiated, data-driven answers. |
|
|
41
|
+
|
|
42
|
+
Across reports: **cheaper LLM + Nodestradamus** can match or beat **expensive LLM without tools** on accuracy and actionability for structural analysis; tools provide ground truth (cycles, dead code, centrality) that models often get wrong when inferring.
|
|
43
|
+
|
|
44
|
+
**Note:** The evaluation setup and benchmarks need further tests and validation (more codebases, question sets, and baselines) before drawing stronger conclusions.
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install nodestradamus
|
|
50
|
+
|
|
51
|
+
# Or from source
|
|
52
|
+
git clone https://github.com/ChristosGrigoras/nodestradamus.git
|
|
53
|
+
cd nodestradamus && pip install -e .
|
|
54
|
+
|
|
55
|
+
# With FAISS for faster similarity search on large codebases (optional)
|
|
56
|
+
pip install nodestradamus[faiss]
|
|
57
|
+
|
|
58
|
+
# With Rust acceleration (optional, requires Rust toolchain)
|
|
59
|
+
pip install maturin
|
|
60
|
+
maturin develop --release
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Quick Start
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Start the MCP server
|
|
67
|
+
nodestradamus serve
|
|
68
|
+
|
|
69
|
+
# Analyze a repo
|
|
70
|
+
nodestradamus analyze /path/to/repo
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Optimal tool sequence** for best results:
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
1. project_scout → Get overview + suggested_ignores
|
|
77
|
+
2. analyze_deps → Build graph (pass suggested_ignores)
|
|
78
|
+
3. codebase_health → Health check
|
|
79
|
+
4. semantic_analysis → mode="embeddings" (pre-compute)
|
|
80
|
+
5. semantic_analysis → mode="search" (now fast)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
See [docs/getting-started-workflow.md](docs/getting-started-workflow.md) for the complete guide.
|
|
84
|
+
|
|
85
|
+
Add to Cursor (`.cursor/mcp.json`):
|
|
86
|
+
|
|
87
|
+
```json
|
|
88
|
+
{
|
|
89
|
+
"mcpServers": {
|
|
90
|
+
"nodestradamus": {
|
|
91
|
+
"command": "nodestradamus",
|
|
92
|
+
"args": ["serve"]
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Supported Languages
|
|
99
|
+
|
|
100
|
+
| Language | Dependency Analysis | Semantic Search | String Analysis |
|
|
101
|
+
|----------|---------------------|-----------------|-----------------|
|
|
102
|
+
| **Python** | ✅ Full | ✅ Full | ✅ Full |
|
|
103
|
+
| **TypeScript/JavaScript** | ✅ Full | ✅ Full | ✅ Full |
|
|
104
|
+
| **Rust** | ✅ Full | ✅ Full | ✅ Full |
|
|
105
|
+
| **SQL (PostgreSQL)** | ✅ Full | ✅ Full | ✅ Full |
|
|
106
|
+
| **Bash** | ✅ Full | ✅ Full | ✅ Full |
|
|
107
|
+
| **JSON** | ✅ Configs | — | — |
|
|
108
|
+
|
|
109
|
+
## Tools
|
|
110
|
+
|
|
111
|
+
| Tool | What it does |
|
|
112
|
+
|------|--------------|
|
|
113
|
+
| `quick_start` | Runs optimal setup sequence automatically |
|
|
114
|
+
| `project_scout` | Reconnaissance: languages, frameworks, key dirs; lazy options for monorepos |
|
|
115
|
+
| `analyze_deps` | Build dependency graph (Python, TS, Rust, SQL, Bash, JSON) |
|
|
116
|
+
| `analyze_cooccurrence` | Files that change together in git history |
|
|
117
|
+
| `get_impact` | What breaks if I change this file/function? |
|
|
118
|
+
| `analyze_graph` | Graph algorithms on dependencies |
|
|
119
|
+
| `analyze_strings` | Find and trace string literals |
|
|
120
|
+
| `semantic_analysis` | Embedding-based search and duplicate detection |
|
|
121
|
+
| `find_similar` | Structurally similar code (fingerprint match) |
|
|
122
|
+
| `get_changes_since_last` | Diff vs last run (snapshots) |
|
|
123
|
+
| `codebase_health` | Health check: dead code, duplicates, cycles, docs |
|
|
124
|
+
| `manage_cache` | Inspect or clear `.nodestradamus/` cache |
|
|
125
|
+
| `analyze_docs` | Docs: stale refs and coverage |
|
|
126
|
+
| `compare_rules_to_codebase` | Audit rules vs hotspots; gaps and stale refs |
|
|
127
|
+
| `validate_rules` | Validate rule file structure and frontmatter |
|
|
128
|
+
| `detect_rule_conflicts` | Detect conflicts between AI rules |
|
|
129
|
+
|
|
130
|
+
### Examples
|
|
131
|
+
|
|
132
|
+
**Check impact before refactoring:**
|
|
133
|
+
```
|
|
134
|
+
get_impact(repo_path="/my/repo", file_path="src/auth.py", symbol="validate_token")
|
|
135
|
+
→ Shows files that call this function
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
**Semantic search:**
|
|
139
|
+
```
|
|
140
|
+
semantic_analysis(repo_path="/my/repo", mode="search", query="authentication")
|
|
141
|
+
→ Natural language code search over embedded chunks
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
More examples in [docs/getting-started-workflow.md](docs/getting-started-workflow.md) and [docs/creative-use-cases.md](docs/creative-use-cases.md).
|
|
145
|
+
|
|
146
|
+
### Graph Algorithms
|
|
147
|
+
|
|
148
|
+
`analyze_graph` supports pagerank, betweenness, communities, cycles, path, hierarchy, layers. Optional Rust backend for speed. See [docs/dependency-graphs.md](docs/dependency-graphs.md) and [docs/graph-theory-reference.md](docs/graph-theory-reference.md). Rust extension: [docs/installation.md](docs/installation.md).
|
|
149
|
+
|
|
150
|
+
### Rust Support
|
|
151
|
+
|
|
152
|
+
Rust analysis extracts functions, structs, enums, traits, impls, use statements. See [docs/mcp-server-spec.md](docs/mcp-server-spec.md) for details.
|
|
153
|
+
|
|
154
|
+
### Semantic Analysis
|
|
155
|
+
|
|
156
|
+
Modes: search, similar, duplicates, embeddings. See [docs/getting-started-workflow.md](docs/getting-started-workflow.md).
|
|
157
|
+
|
|
158
|
+
### Cache
|
|
159
|
+
|
|
160
|
+
Results are cached under **`.nodestradamus/`** (repo when standalone, workspace when via MCP). Use `manage_cache` (mode="info" / "clear"). Optional `.nodestradamusignore` (gitignore-style) excludes paths; `project_scout` reports if it exists. File list and incremental embedding behavior: [docs/getting-started-workflow.md](docs/getting-started-workflow.md).
|
|
161
|
+
|
|
162
|
+
### Environment
|
|
163
|
+
|
|
164
|
+
Copy `.env.example` to `.env` for embedding provider and API keys. See [docs/installation.md](docs/installation.md).
|
|
165
|
+
|
|
166
|
+
## Cursor Rules
|
|
167
|
+
|
|
168
|
+
Nodestradamus ships `.cursor/rules/` for code quality, security, and meta-generator. See [docs/cursor-rules.md](docs/cursor-rules.md).
|
|
169
|
+
|
|
170
|
+
## Documentation
|
|
171
|
+
|
|
172
|
+
| Topic | Link |
|
|
173
|
+
|-------|------|
|
|
174
|
+
| **Getting Started** | [docs/getting-started-workflow.md](docs/getting-started-workflow.md) |
|
|
175
|
+
| Installation | [docs/installation.md](docs/installation.md) |
|
|
176
|
+
| Understanding Dependency Graphs | [docs/dependency-graphs.md](docs/dependency-graphs.md) |
|
|
177
|
+
| Glossary | [docs/glossary.md](docs/glossary.md) |
|
|
178
|
+
| MCP Server Spec | [docs/mcp-server-spec.md](docs/mcp-server-spec.md) |
|
|
179
|
+
| Creative Use Cases | [docs/creative-use-cases.md](docs/creative-use-cases.md) |
|
|
180
|
+
| Cursor Rules | [docs/cursor-rules.md](docs/cursor-rules.md) |
|
|
181
|
+
| GitHub Setup | [docs/github-setup.md](docs/github-setup.md) |
|
|
182
|
+
| Troubleshooting | [docs/troubleshooting.md](docs/troubleshooting.md) |
|
|
183
|
+
| Publishing to PyPI | [docs/publishing-pypi.md](docs/publishing-pypi.md) |
|
|
184
|
+
|
|
185
|
+
## Publishing to PyPI
|
|
186
|
+
|
|
187
|
+
Maintainers: see [docs/publishing-pypi.md](docs/publishing-pypi.md) for prerequisites, one-time setup (`~/.pypirc`), build with **maturin** (this project uses a Rust extension), upload with twine, TestPyPI, and a pre-publish checklist.
|
|
188
|
+
|
|
189
|
+
Quick build and upload:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
pip install build twine maturin
|
|
193
|
+
rm -rf dist/ build/ *.egg-info/
|
|
194
|
+
maturin build --release --out dist --sdist
|
|
195
|
+
twine upload dist/*
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Verify: `pip install nodestradamus` then `nodestradamus --version`.
|
|
199
|
+
|
|
200
|
+
## License
|
|
201
|
+
|
|
202
|
+
MIT
|
|
203
|
+
|
|
204
|
+
## Credits
|
|
205
|
+
|
|
206
|
+
- [MCP](https://modelcontextprotocol.io) — Model Context Protocol
|
|
207
|
+
- [NetworkX](https://networkx.org) — Graph algorithms (Python)
|
|
208
|
+
- [petgraph](https://github.com/petgraph/petgraph) — Graph algorithms (Rust)
|
|
209
|
+
- [PyO3](https://pyo3.rs) — Rust-Python bindings
|
|
210
|
+
- [FAISS](https://github.com/facebookresearch/faiss) — Approximate nearest neighbor search (optional)
|
|
211
|
+
- [sentence-transformers](https://sbert.net) — Embeddings
|
|
212
|
+
- [tree-sitter](https://tree-sitter.github.io/tree-sitter/) — Code parsing (Python, TypeScript, Rust, SQL)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "nodestradamus"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "See what breaks before you break it. Codebase intelligence for AI assistants."
|
|
5
|
+
readme = { file = "README.md", content-type = "text/markdown" }
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
requires-python = ">=3.12"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Christos Grigoras", email = "chris.grigoras@gmail.com" }
|
|
10
|
+
]
|
|
11
|
+
keywords = ["mcp", "dependency-graph", "code-analysis", "cursor", "claude", "impact-analysis", "codebase-intelligence"]
|
|
12
|
+
urls = { Homepage = "https://github.com/ChristosGrigoras/nodestradamus", Repository = "https://github.com/ChristosGrigoras/nodestradamus", Documentation = "https://github.com/ChristosGrigoras/nodestradamus/tree/main/docs" }
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
20
|
+
"Topic :: Software Development :: Libraries",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"mcp>=1.0.0",
|
|
24
|
+
"click>=8.0.0",
|
|
25
|
+
"pydantic>=2.0.0",
|
|
26
|
+
"python-dotenv>=1.0.0",
|
|
27
|
+
"tqdm>=4.66.0",
|
|
28
|
+
"tree-sitter>=0.21.0",
|
|
29
|
+
"tree-sitter-python>=0.21.0",
|
|
30
|
+
"tree-sitter-typescript>=0.21.0",
|
|
31
|
+
"tree-sitter-javascript>=0.21.0",
|
|
32
|
+
"tree-sitter-rust>=0.21.0",
|
|
33
|
+
"tree-sitter-sql>=0.3.0",
|
|
34
|
+
"tree-sitter-bash>=0.23.0",
|
|
35
|
+
"tree-sitter-json>=0.21.0",
|
|
36
|
+
"tree-sitter-markdown>=0.3.2",
|
|
37
|
+
"tree-sitter-cpp>=0.23.0",
|
|
38
|
+
"networkx>=3.0",
|
|
39
|
+
"scipy>=1.10.0",
|
|
40
|
+
"msgpack>=1.0.0",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[project.optional-dependencies]
|
|
44
|
+
dev = [
|
|
45
|
+
"pytest>=8.0.0",
|
|
46
|
+
"pytest-asyncio>=0.23.0",
|
|
47
|
+
"pytest-cov>=4.1.0",
|
|
48
|
+
"mypy>=1.8.0",
|
|
49
|
+
"black>=24.0.0",
|
|
50
|
+
"ruff>=0.2.0",
|
|
51
|
+
"maturin>=1.4,<2.0",
|
|
52
|
+
]
|
|
53
|
+
faiss = [
|
|
54
|
+
"faiss-cpu>=1.7.4",
|
|
55
|
+
]
|
|
56
|
+
mistral = [
|
|
57
|
+
"httpx>=0.27.0",
|
|
58
|
+
]
|
|
59
|
+
all = [
|
|
60
|
+
"nodestradamus[dev,faiss,mistral]",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
[project.scripts]
|
|
64
|
+
nodestradamus = "nodestradamus.cli:main"
|
|
65
|
+
analyze-python-deps = "scripts.analyze_python_deps:main"
|
|
66
|
+
analyze-git-cooccurrence = "scripts.analyze_git_cooccurrence:main"
|
|
67
|
+
analyze-ts-deps = "scripts.analyze_ts_deps:main"
|
|
68
|
+
validate-rules = "scripts.validate_rules:main"
|
|
69
|
+
merge-graphs = "scripts.merge_graphs:main"
|
|
70
|
+
detect-rule-conflicts = "scripts.detect_rule_conflicts:main"
|
|
71
|
+
benchmark-graph = "scripts.benchmark_graph:main"
|
|
72
|
+
|
|
73
|
+
[build-system]
|
|
74
|
+
requires = ["maturin>=1.4,<2.0"]
|
|
75
|
+
build-backend = "maturin"
|
|
76
|
+
|
|
77
|
+
[tool.maturin]
|
|
78
|
+
manifest-path = "rust/Cargo.toml"
|
|
79
|
+
python-source = "."
|
|
80
|
+
features = ["pyo3/extension-module"]
|
|
81
|
+
module-name = "nodestradamus_graph"
|
|
82
|
+
include = ["LICENSE", "README.md"]
|
|
83
|
+
|
|
84
|
+
[tool.black]
|
|
85
|
+
line-length = 100
|
|
86
|
+
target-version = ["py312"]
|
|
87
|
+
|
|
88
|
+
[tool.ruff]
|
|
89
|
+
line-length = 100
|
|
90
|
+
target-version = "py312"
|
|
91
|
+
|
|
92
|
+
[tool.ruff.lint]
|
|
93
|
+
select = [
|
|
94
|
+
"E", # pycodestyle errors
|
|
95
|
+
"W", # pycodestyle warnings
|
|
96
|
+
"F", # Pyflakes
|
|
97
|
+
"I", # isort
|
|
98
|
+
"B", # flake8-bugbear
|
|
99
|
+
"C4", # flake8-comprehensions
|
|
100
|
+
"UP", # pyupgrade
|
|
101
|
+
]
|
|
102
|
+
ignore = [
|
|
103
|
+
"E501", # line too long (handled by black)
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
[tool.mypy]
|
|
107
|
+
python_version = "3.12"
|
|
108
|
+
warn_return_any = true
|
|
109
|
+
warn_unused_ignores = true
|
|
110
|
+
disallow_untyped_defs = true
|
|
111
|
+
strict = true
|
|
112
|
+
|
|
113
|
+
[tool.pytest.ini_options]
|
|
114
|
+
testpaths = ["tests"]
|
|
115
|
+
python_files = ["test_*.py"]
|
|
116
|
+
python_functions = ["test_*"]
|
|
117
|
+
addopts = "-v --tb=short -m 'not slow'"
|
|
118
|
+
asyncio_mode = "auto"
|
|
119
|
+
markers = [
|
|
120
|
+
"slow: marks tests as slow (deselected by default, use -m slow to run)",
|
|
121
|
+
]
|