codebase-intel 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_intel-0.1.0/.github/workflows/ci.yml +48 -0
- codebase_intel-0.1.0/.gitignore +41 -0
- codebase_intel-0.1.0/CLAUDE.md +83 -0
- codebase_intel-0.1.0/CONTRIBUTING.md +74 -0
- codebase_intel-0.1.0/LICENSE +21 -0
- codebase_intel-0.1.0/PKG-INFO +361 -0
- codebase_intel-0.1.0/README.md +315 -0
- codebase_intel-0.1.0/community-contracts/fastapi.yaml +143 -0
- codebase_intel-0.1.0/community-contracts/nodejs-express.yaml +161 -0
- codebase_intel-0.1.0/community-contracts/react-typescript.yaml +160 -0
- codebase_intel-0.1.0/docs/COMPARISON.md +110 -0
- codebase_intel-0.1.0/pyproject.toml +119 -0
- codebase_intel-0.1.0/src/codebase_intel/__init__.py +3 -0
- codebase_intel-0.1.0/src/codebase_intel/analytics/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/analytics/benchmark.py +406 -0
- codebase_intel-0.1.0/src/codebase_intel/analytics/feedback.py +496 -0
- codebase_intel-0.1.0/src/codebase_intel/analytics/tracker.py +439 -0
- codebase_intel-0.1.0/src/codebase_intel/cli/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/cli/main.py +740 -0
- codebase_intel-0.1.0/src/codebase_intel/contracts/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/contracts/auto_generator.py +438 -0
- codebase_intel-0.1.0/src/codebase_intel/contracts/evaluator.py +531 -0
- codebase_intel-0.1.0/src/codebase_intel/contracts/models.py +433 -0
- codebase_intel-0.1.0/src/codebase_intel/contracts/registry.py +225 -0
- codebase_intel-0.1.0/src/codebase_intel/core/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/core/config.py +248 -0
- codebase_intel-0.1.0/src/codebase_intel/core/exceptions.py +454 -0
- codebase_intel-0.1.0/src/codebase_intel/core/types.py +375 -0
- codebase_intel-0.1.0/src/codebase_intel/decisions/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/decisions/miner.py +297 -0
- codebase_intel-0.1.0/src/codebase_intel/decisions/models.py +302 -0
- codebase_intel-0.1.0/src/codebase_intel/decisions/store.py +411 -0
- codebase_intel-0.1.0/src/codebase_intel/drift/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/drift/detector.py +443 -0
- codebase_intel-0.1.0/src/codebase_intel/graph/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/graph/builder.py +391 -0
- codebase_intel-0.1.0/src/codebase_intel/graph/parser.py +1232 -0
- codebase_intel-0.1.0/src/codebase_intel/graph/query.py +377 -0
- codebase_intel-0.1.0/src/codebase_intel/graph/storage.py +736 -0
- codebase_intel-0.1.0/src/codebase_intel/mcp/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/mcp/server.py +710 -0
- codebase_intel-0.1.0/src/codebase_intel/orchestrator/__init__.py +1 -0
- codebase_intel-0.1.0/src/codebase_intel/orchestrator/assembler.py +649 -0
- codebase_intel-0.1.0/tests/__init__.py +0 -0
- codebase_intel-0.1.0/tests/conftest.py +134 -0
- codebase_intel-0.1.0/tests/integration/__init__.py +0 -0
- codebase_intel-0.1.0/tests/integration/test_graph_pipeline.py +455 -0
- codebase_intel-0.1.0/tests/unit/__init__.py +0 -0
- codebase_intel-0.1.0/tests/unit/contracts/__init__.py +0 -0
- codebase_intel-0.1.0/tests/unit/contracts/test_evaluator.py +707 -0
- codebase_intel-0.1.0/tests/unit/contracts/test_models.py +415 -0
- codebase_intel-0.1.0/tests/unit/core/__init__.py +0 -0
- codebase_intel-0.1.0/tests/unit/core/test_exceptions.py +1152 -0
- codebase_intel-0.1.0/tests/unit/core/test_types.py +745 -0
- codebase_intel-0.1.0/tests/unit/decisions/__init__.py +0 -0
- codebase_intel-0.1.0/tests/unit/decisions/test_models.py +569 -0
- codebase_intel-0.1.0/tests/unit/decisions/test_store.py +716 -0
- codebase_intel-0.1.0/tests/unit/drift/__init__.py +0 -0
- codebase_intel-0.1.0/tests/unit/drift/test_detector.py +388 -0
- codebase_intel-0.1.0/tests/unit/graph/__init__.py +0 -0
- codebase_intel-0.1.0/tests/unit/graph/test_parser.py +525 -0
- codebase_intel-0.1.0/tests/unit/graph/test_storage.py +701 -0
- codebase_intel-0.1.0/tests/unit/orchestrator/__init__.py +0 -0
- codebase_intel-0.1.0/tests/unit/orchestrator/test_assembler.py +458 -0
- codebase_intel-0.1.0/uv.lock +2371 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
lint:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: astral-sh/setup-uv@v5
|
|
18
|
+
with:
|
|
19
|
+
version: "latest"
|
|
20
|
+
- run: uv sync --dev
|
|
21
|
+
- run: uv run ruff check src/ tests/
|
|
22
|
+
- run: uv run ruff format --check src/ tests/
|
|
23
|
+
|
|
24
|
+
typecheck:
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: astral-sh/setup-uv@v5
|
|
29
|
+
with:
|
|
30
|
+
version: "latest"
|
|
31
|
+
- run: uv sync --dev
|
|
32
|
+
- run: uv run mypy src/ --ignore-missing-imports
|
|
33
|
+
|
|
34
|
+
test:
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
strategy:
|
|
37
|
+
matrix:
|
|
38
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
- uses: astral-sh/setup-uv@v5
|
|
42
|
+
with:
|
|
43
|
+
version: "latest"
|
|
44
|
+
- run: uv python install ${{ matrix.python-version }}
|
|
45
|
+
- run: uv sync --dev --python ${{ matrix.python-version }}
|
|
46
|
+
- run: uv run pytest tests/unit/ -x -q --no-header
|
|
47
|
+
env:
|
|
48
|
+
PYTHONDONTWRITEBYTECODE: 1
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg-info/
|
|
7
|
+
*.egg
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
.eggs/
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
|
|
17
|
+
# IDE
|
|
18
|
+
.idea/
|
|
19
|
+
.vscode/
|
|
20
|
+
*.swp
|
|
21
|
+
*.swo
|
|
22
|
+
*~
|
|
23
|
+
|
|
24
|
+
# Testing
|
|
25
|
+
.coverage
|
|
26
|
+
htmlcov/
|
|
27
|
+
.pytest_cache/
|
|
28
|
+
.mypy_cache/
|
|
29
|
+
|
|
30
|
+
# OS
|
|
31
|
+
.DS_Store
|
|
32
|
+
Thumbs.db
|
|
33
|
+
|
|
34
|
+
# Project runtime data (generated by codebase-intel itself)
|
|
35
|
+
.codebase-intel/graph.db
|
|
36
|
+
.codebase-intel/graph.db-wal
|
|
37
|
+
.codebase-intel/graph.db-shm
|
|
38
|
+
|
|
39
|
+
# Environment
|
|
40
|
+
.env
|
|
41
|
+
.env.*
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Codebase Intel — Project CLAUDE.md
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
Codebase Intelligence Platform: an open-source, agent-agnostic system that provides AI coding agents with structured context, decision provenance, and quality contracts. Solves the three biggest gaps in AI-assisted development: context/memory, judgment/business context, and quality assurance.
|
|
5
|
+
|
|
6
|
+
## Tech Stack
|
|
7
|
+
- **Language:** Python 3.11+
|
|
8
|
+
- **Type System:** Pydantic v2 for all models, mypy strict mode
|
|
9
|
+
- **Storage:** SQLite via aiosqlite (zero-dependency, portable)
|
|
10
|
+
- **Parsing:** tree-sitter for language-agnostic AST analysis
|
|
11
|
+
- **Agent Interface:** MCP (Model Context Protocol) server
|
|
12
|
+
- **CLI:** Typer + Rich
|
|
13
|
+
- **Git Integration:** GitPython
|
|
14
|
+
- **Token Counting:** tiktoken
|
|
15
|
+
- **Hashing:** xxhash for content fingerprinting
|
|
16
|
+
|
|
17
|
+
## Architecture
|
|
18
|
+
Layered system with five core modules + two interface layers:
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
AI Agent (any) → MCP Server / CLI
|
|
22
|
+
↓
|
|
23
|
+
Context Orchestrator
|
|
24
|
+
↙ ↓ ↘
|
|
25
|
+
Code Graph Decisions Contracts
|
|
26
|
+
↘ ↓ ↙
|
|
27
|
+
Drift Detector
|
|
28
|
+
↓
|
|
29
|
+
Codebase (git)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Module Responsibilities:
|
|
33
|
+
- **core/** — Shared types, config, exceptions. No business logic.
|
|
34
|
+
- **graph/** — Semantic code graph: AST parsing, dependency mapping, impact analysis, SQLite storage
|
|
35
|
+
- **decisions/** — Decision journal: structured records, git mining, code linking, temporal validation
|
|
36
|
+
- **contracts/** — Quality contracts: architectural rules, pattern libraries, evaluation engine
|
|
37
|
+
- **orchestrator/** — Context assembly: budget management, freshness scoring, conflict detection
|
|
38
|
+
- **drift/** — Drift detection: staleness, pattern violations, knowledge decay
|
|
39
|
+
- **mcp/** — MCP server: exposes all modules as queryable tools for AI agents
|
|
40
|
+
- **cli/** — CLI interface: init, analyze, query, serve commands
|
|
41
|
+
|
|
42
|
+
## Project Flow
|
|
43
|
+
1. `codebase-intel init` → scans repo, builds initial code graph, generates starter configs
|
|
44
|
+
2. Git hooks keep graph updated incrementally on each commit
|
|
45
|
+
3. AI agent connects via MCP → sends task description
|
|
46
|
+
4. Orchestrator assembles relevant context (files, decisions, contracts) within token budget
|
|
47
|
+
5. Agent receives structured context, writes code
|
|
48
|
+
6. Drift detector flags violations post-commit
|
|
49
|
+
|
|
50
|
+
## Directory Structure
|
|
51
|
+
```
|
|
52
|
+
src/codebase_intel/
|
|
53
|
+
├── core/ # types.py, config.py, exceptions.py
|
|
54
|
+
├── graph/ # models.py, storage.py, parser.py, query.py, builder.py
|
|
55
|
+
├── decisions/ # models.py, store.py, linker.py, miner.py, validator.py
|
|
56
|
+
├── contracts/ # models.py, parser.py, evaluator.py, registry.py
|
|
57
|
+
├── orchestrator/ # assembler.py, budget.py, scorer.py, conflict.py
|
|
58
|
+
├── drift/ # detector.py, reporter.py
|
|
59
|
+
├── mcp/ # server.py
|
|
60
|
+
└── cli/ # main.py
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Key Commands
|
|
64
|
+
- `ruff check src/ tests/` — Lint
|
|
65
|
+
- `ruff format src/ tests/` — Format
|
|
66
|
+
- `mypy src/` — Type check
|
|
67
|
+
- `pytest` — Run tests with coverage
|
|
68
|
+
- `pytest tests/unit/` — Unit tests only
|
|
69
|
+
- `pytest tests/integration/` — Integration tests only
|
|
70
|
+
|
|
71
|
+
## Conventions
|
|
72
|
+
- All models inherit from Pydantic BaseModel — never use raw dicts for structured data
|
|
73
|
+
- Async-first: all I/O operations are async
|
|
74
|
+
- Repository pattern for storage: models never touch SQLite directly
|
|
75
|
+
- Type hints on every function (params + return)
|
|
76
|
+
- Custom exceptions over generic ones — always include context
|
|
77
|
+
- Content hashing via xxhash for change detection
|
|
78
|
+
- ISO 8601 for all timestamps, UTC timezone
|
|
79
|
+
|
|
80
|
+
## Current Focus
|
|
81
|
+
- Initial architecture build-out with comprehensive edge case handling
|
|
82
|
+
- Core module implementation
|
|
83
|
+
- MCP server interface design
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Contributing to codebase-intel
|
|
2
|
+
|
|
3
|
+
Thanks for wanting to help. Here's how to get started.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/MutharasuArchunan13/codebase-intel.git
|
|
9
|
+
cd codebase-intel
|
|
10
|
+
uv sync --dev # or: pip install -e ".[dev]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Development workflow
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Run tests
|
|
17
|
+
uv run pytest tests/unit/ -x -q
|
|
18
|
+
|
|
19
|
+
# Lint
|
|
20
|
+
uv run ruff check src/ tests/
|
|
21
|
+
uv run ruff format src/ tests/
|
|
22
|
+
|
|
23
|
+
# Type check
|
|
24
|
+
uv run mypy src/ --ignore-missing-imports
|
|
25
|
+
|
|
26
|
+
# Run against a real project
|
|
27
|
+
uv run codebase-intel init /path/to/your/project
|
|
28
|
+
uv run codebase-intel benchmark /path/to/your/project
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## What to work on
|
|
32
|
+
|
|
33
|
+
### High-impact areas
|
|
34
|
+
|
|
35
|
+
1. **Contract packs** — Write quality rules for your framework (Django, Spring Boot, Next.js, etc.). Drop a YAML file in `community-contracts/`.
|
|
36
|
+
|
|
37
|
+
2. **Language extraction** — Improve parsing for specific languages in `src/codebase_intel/graph/parser.py`. The generic extractor works but language-specific extractors (like Python and JS already have) produce better results.
|
|
38
|
+
|
|
39
|
+
3. **Decision mining** — Improve git history analysis in `src/codebase_intel/decisions/miner.py`. Better keyword detection, PR description parsing, code review comment extraction.
|
|
40
|
+
|
|
41
|
+
4. **Benchmarks** — Run `codebase-intel benchmark` on your repos and share results. Real numbers from diverse projects strengthen the case.
|
|
42
|
+
|
|
43
|
+
5. **Auto-pattern detection** — Add new pattern detectors in `src/codebase_intel/contracts/auto_generator.py`. The more conventions we detect automatically, the lower the adoption barrier.
|
|
44
|
+
|
|
45
|
+
### Architecture rules
|
|
46
|
+
|
|
47
|
+
- **Pydantic v2 models** for all structured data — never raw dicts
|
|
48
|
+
- **Async-first** for all I/O operations
|
|
49
|
+
- **Repository pattern** for storage — models never touch SQLite directly
|
|
50
|
+
- **Type hints** on every function (params + return)
|
|
51
|
+
- **Custom exceptions** with structured context — never bare try/except
|
|
52
|
+
|
|
53
|
+
### Code quality
|
|
54
|
+
|
|
55
|
+
- Run `ruff check` and `ruff format` before committing
|
|
56
|
+
- Tests should cover edge cases, not just happy paths
|
|
57
|
+
- Comments explain *why*, not *what*
|
|
58
|
+
|
|
59
|
+
## Pull requests
|
|
60
|
+
|
|
61
|
+
- Keep PRs focused — one feature or fix per PR
|
|
62
|
+
- Include test coverage for new code
|
|
63
|
+
- Update `CLAUDE.md` if you change architecture
|
|
64
|
+
|
|
65
|
+
## Community contract packs
|
|
66
|
+
|
|
67
|
+
To contribute a contract pack:
|
|
68
|
+
|
|
69
|
+
1. Create a YAML file in `community-contracts/`
|
|
70
|
+
2. Include 5+ rules with clear descriptions
|
|
71
|
+
3. Add `fix_suggestion` for every rule
|
|
72
|
+
4. Test it: copy to a project's `.codebase-intel/contracts/` and run `codebase-intel benchmark`
|
|
73
|
+
|
|
74
|
+
See existing packs (`fastapi.yaml`, `react-typescript.yaml`, `nodejs-express.yaml`) for the format.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mutharasu
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codebase-intel
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Your AI agent writes code — but does it know WHY your code exists? Decision provenance, quality contracts, and AI anti-pattern detection for coding agents.
|
|
5
|
+
Project-URL: Homepage, https://github.com/mutharasu/codebase-intel
|
|
6
|
+
Project-URL: Repository, https://github.com/mutharasu/codebase-intel
|
|
7
|
+
Project-URL: Issues, https://github.com/mutharasu/codebase-intel/issues
|
|
8
|
+
Author: Mutharasu
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: adr,ai,ai-agent,ai-coding,architecture-decision-records,claude-code,code-graph,code-intelligence,code-quality,code-review,coding-agents,context,copilot,cursor,developer-tools,devtools,knowledge-graph,mcp,mcp-server,tree-sitter
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
20
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
21
|
+
Classifier: Topic :: Software Development :: Testing
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: aiosqlite<1,>=0.20
|
|
25
|
+
Requires-Dist: gitpython<4,>=3.1
|
|
26
|
+
Requires-Dist: mcp<2,>=1.0
|
|
27
|
+
Requires-Dist: pydantic-settings<3,>=2.2
|
|
28
|
+
Requires-Dist: pydantic<3,>=2.6
|
|
29
|
+
Requires-Dist: pyyaml<7,>=6.0
|
|
30
|
+
Requires-Dist: rich<14,>=13.7
|
|
31
|
+
Requires-Dist: tiktoken<1,>=0.7
|
|
32
|
+
Requires-Dist: tree-sitter-language-pack<1,>=0.3
|
|
33
|
+
Requires-Dist: tree-sitter<1,>=0.23
|
|
34
|
+
Requires-Dist: typer<1,>=0.12
|
|
35
|
+
Requires-Dist: xxhash<4,>=3.4
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: mypy>=1.9; extra == 'dev'
|
|
38
|
+
Requires-Dist: pre-commit>=3.7; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
40
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
43
|
+
Provides-Extra: eval
|
|
44
|
+
Requires-Dist: matplotlib>=3.7; extra == 'eval'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
<h1 align="center">codebase-intel</h1>
|
|
48
|
+
|
|
49
|
+
<p align="center">
|
|
50
|
+
<strong>Your AI agent writes code. But does it know <em>why</em> your code exists?</strong>
|
|
51
|
+
</p>
|
|
52
|
+
|
|
53
|
+
<p align="center">
|
|
54
|
+
<a href="https://github.com/MutharasuArchunan13/codebase-intel/stargazers"><img src="https://img.shields.io/github/stars/MutharasuArchunan13/codebase-intel?style=flat-square" alt="Stars"></a>
|
|
55
|
+
<a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-green.svg?style=flat-square" alt="MIT"></a>
|
|
56
|
+
<a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11+-blue.svg?style=flat-square" alt="Python 3.11+"></a>
|
|
57
|
+
<a href="https://modelcontextprotocol.io/"><img src="https://img.shields.io/badge/MCP-compatible-purple.svg?style=flat-square" alt="MCP"></a>
|
|
58
|
+
<a href="#19-languages"><img src="https://img.shields.io/badge/languages-19-orange.svg?style=flat-square" alt="19 Languages"></a>
|
|
59
|
+
</p>
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
AI coding agents can autocomplete, refactor, and generate code. But they still fail at the things that matter most in production:
|
|
64
|
+
|
|
65
|
+
- They don't know your team **decided** to use token bucket over sliding window — and why
|
|
66
|
+
- They don't know the compliance team **requires** rate limit headers on every response
|
|
67
|
+
- They don't know that changing `config.py` will **break** billing and analytics
|
|
68
|
+
- They generate code that **looks right** but violates your project's architectural patterns
|
|
69
|
+
|
|
70
|
+
**codebase-intel** fixes this. It's the context layer that sits between your codebase and any AI agent — providing not just *what* code exists, but *why* it exists, *what rules* it must follow, and *what breaks* if you change it.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Before vs After
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
WITHOUT codebase-intel WITH codebase-intel
|
|
78
|
+
───────────────────── ───────────────────
|
|
79
|
+
|
|
80
|
+
Agent reads: every file in the dir Agent reads: only what matters
|
|
81
|
+
Tokens used: 16,063 Tokens used: 5,955
|
|
82
|
+
Knows why code exists: No Knows why: Yes (13 decisions)
|
|
83
|
+
Quality guardrails: None Guardrails: 4 contracts enforced
|
|
84
|
+
Drift awareness: None Drift: stale context detected
|
|
85
|
+
Impact analysis: None Impact: knows what else breaks
|
|
86
|
+
|
|
87
|
+
Result: faster but fragile Result: faster AND correct
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Real benchmarks on production codebases
|
|
91
|
+
|
|
92
|
+
| Project | Files | Naive Tokens | codebase-intel | Reduction | Decisions | Contracts |
|
|
93
|
+
|---|---:|---:|---:|---:|---:|---:|
|
|
94
|
+
| **IDP Backend** (FastAPI monolith) | 359 | 16,063 | 5,955 | **63%** | 13 | 4 |
|
|
95
|
+
| **Job Marketing AI** (microservice) | 358 | 14,611 | 5,955 | **59%** | 0 | 7 |
|
|
96
|
+
| **Resume Builder** (microservice) | 87 | 2,461 | 1,275 | **48%** | 0 | 6 |
|
|
97
|
+
| **User Module** (microservice) | 153 | 5,904 | 1,476 | **75%** | 0 | 4 |
|
|
98
|
+
|
|
99
|
+
> Numbers from `codebase-intel benchmark` on real production repos. The token reduction comes from targeted graph traversal. The decisions and contracts are what no other tool provides.
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## What makes this different
|
|
104
|
+
|
|
105
|
+
There are great tools for code graphs (code-review-graph is excellent — 6K+ stars). **We don't compete with them.** We solve what they don't:
|
|
106
|
+
|
|
107
|
+
| Capability | Graph-only tools | codebase-intel |
|
|
108
|
+
|---|---|---|
|
|
109
|
+
| Code graph + dependencies | Yes | Yes (19 languages) |
|
|
110
|
+
| **Decision Journal** — *why* code is the way it is | No | **Yes** |
|
|
111
|
+
| **Quality Contracts** — rules AI must follow | No | **Yes** |
|
|
112
|
+
| **AI Anti-pattern Detection** — catches hallucinated imports, over-abstraction | No | **Yes** |
|
|
113
|
+
| **Drift Detection** — stale context, context rot alerts | No | **Yes** |
|
|
114
|
+
| **Token Budgeting** — fits context to any agent's window | No | **Yes** |
|
|
115
|
+
| **Live Analytics** — prove efficiency over time | No | **Yes** |
|
|
116
|
+
|
|
117
|
+
### The missing layer
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
What exists today: What codebase-intel adds:
|
|
121
|
+
|
|
122
|
+
Code → Graph → Agent Code → Graph ──────────────────→ Agent
|
|
123
|
+
↓ ↑
|
|
124
|
+
Decision Journal ──→ WHY ────────┤
|
|
125
|
+
Quality Contracts → RULES ───────┤
|
|
126
|
+
Drift Detector ──→ WARNINGS ─────┘
|
|
127
|
+
|
|
128
|
+
"Here are the 3 files that matter,
|
|
129
|
+
the decision your team made 6 months ago,
|
|
130
|
+
and the 2 rules you must not violate."
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Quick Start
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
pip install codebase-intel
|
|
139
|
+
|
|
140
|
+
# Initialize on your project
|
|
141
|
+
cd your-project
|
|
142
|
+
codebase-intel init
|
|
143
|
+
|
|
144
|
+
# See what it found
|
|
145
|
+
codebase-intel status
|
|
146
|
+
|
|
147
|
+
# Mine decisions from git history
|
|
148
|
+
codebase-intel mine --save
|
|
149
|
+
|
|
150
|
+
# Run benchmarks (see the before/after)
|
|
151
|
+
codebase-intel benchmark
|
|
152
|
+
|
|
153
|
+
# View live dashboard
|
|
154
|
+
codebase-intel dashboard
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Connect to Claude Code
|
|
158
|
+
|
|
159
|
+
```json
|
|
160
|
+
{
|
|
161
|
+
"mcpServers": {
|
|
162
|
+
"codebase-intel": {
|
|
163
|
+
"command": "codebase-intel",
|
|
164
|
+
"args": ["serve", "/path/to/your/project"]
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Now your agent automatically gets relevant context, decisions, and contracts before writing code.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## The Three Pillars
|
|
175
|
+
|
|
176
|
+
### 1. Decision Journal — "Why does this code exist?"
|
|
177
|
+
|
|
178
|
+
Every team makes hundreds of decisions that never get documented. *Why* did you choose Postgres over Mongo? *Why* is auth middleware structured that way? *Why* was the sliding window approach rejected?
|
|
179
|
+
|
|
180
|
+
codebase-intel captures these from git history automatically and links them to code:
|
|
181
|
+
|
|
182
|
+
```yaml
|
|
183
|
+
# .codebase-intel/decisions/DEC-042.yaml
|
|
184
|
+
id: DEC-042
|
|
185
|
+
title: "Use token bucket for rate limiting"
|
|
186
|
+
status: active
|
|
187
|
+
context: "Payment endpoint was getting hammered during flash sales"
|
|
188
|
+
decision: "Token bucket algorithm with per-user buckets, 100 req/min"
|
|
189
|
+
alternatives:
|
|
190
|
+
- name: sliding_window
|
|
191
|
+
rejection_reason: "Memory overhead too high at scale"
|
|
192
|
+
constraints:
|
|
193
|
+
- description: "Must not add >2ms p99 latency"
|
|
194
|
+
source: sla
|
|
195
|
+
is_hard: true
|
|
196
|
+
code_anchors:
|
|
197
|
+
- "src/middleware/rate_limiter.py:15-82"
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
**Without this**: your agent proposes sliding window (the exact approach you rejected 6 months ago).
|
|
201
|
+
**With this**: your agent sees the decision, follows it, and respects the SLA constraint.
|
|
202
|
+
|
|
203
|
+
### 2. Quality Contracts — "What rules must AI follow?"
|
|
204
|
+
|
|
205
|
+
Linters check syntax. Contracts enforce **your project's patterns**:
|
|
206
|
+
|
|
207
|
+
```yaml
|
|
208
|
+
# .codebase-intel/contracts/api-rules.yaml
|
|
209
|
+
rules:
|
|
210
|
+
- id: no-raw-sql
|
|
211
|
+
name: No raw SQL in API layer
|
|
212
|
+
severity: error
|
|
213
|
+
pattern: "execute\\(.*SELECT|INSERT|UPDATE"
|
|
214
|
+
fix_suggestion: "Use the repository pattern"
|
|
215
|
+
|
|
216
|
+
- id: async-everywhere
|
|
217
|
+
name: All I/O must be async
|
|
218
|
+
severity: error
|
|
219
|
+
pattern: "requests\\.(get|post)"
|
|
220
|
+
fix_suggestion: "Use httpx.AsyncClient"
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Built-in AI guardrails** catch the patterns AI agents mess up most:
|
|
224
|
+
- Hallucinated imports (modules that don't exist)
|
|
225
|
+
- Over-abstraction (base classes with one subclass)
|
|
226
|
+
- Unnecessary error handling for impossible conditions
|
|
227
|
+
- Comments that restate code instead of explaining why
|
|
228
|
+
- Features that weren't requested (YAGNI violations)
|
|
229
|
+
|
|
230
|
+
### 3. Drift Detection — "Is our context still valid?"
|
|
231
|
+
|
|
232
|
+
Context rots. Decisions get outdated. Code anchors point to deleted files. codebase-intel detects this:
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
$ codebase-intel drift
|
|
236
|
+
|
|
237
|
+
╭──────────────── Drift Report ────────────────╮
|
|
238
|
+
│ Overall: MEDIUM │
|
|
239
|
+
│ 3 items need attention │
|
|
240
|
+
╰───────────────────────────────────────────────╯
|
|
241
|
+
|
|
242
|
+
- [MEDIUM] Decision DEC-012 anchored to deleted file
|
|
243
|
+
- [MEDIUM] Decision DEC-008 is past its review date
|
|
244
|
+
- [LOW] 2 files changed since last graph index
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## 19 Languages
|
|
250
|
+
|
|
251
|
+
Full tree-sitter parsing via [tree-sitter-language-pack](https://github.com/nicolo-ribaudo/tree-sitter-language-pack):
|
|
252
|
+
|
|
253
|
+
| Category | Languages |
|
|
254
|
+
|---|---|
|
|
255
|
+
| **Web** | JavaScript, TypeScript, TSX |
|
|
256
|
+
| **Backend** | Python, Java, Go, Ruby, PHP, Elixir |
|
|
257
|
+
| **Systems** | Rust, C, C++ |
|
|
258
|
+
| **Mobile** | Swift, Kotlin, Dart |
|
|
259
|
+
| **Other** | C#, Scala, Lua, Haskell |
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## CLI Commands
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
codebase-intel init [path] # Initialize — build graph, create configs
|
|
267
|
+
codebase-intel analyze [--incremental] # Rebuild or update the code graph
|
|
268
|
+
codebase-intel mine [--save] # Mine git history for decision candidates
|
|
269
|
+
codebase-intel drift # Run drift detection
|
|
270
|
+
codebase-intel benchmark # Measure token efficiency (before/after)
|
|
271
|
+
codebase-intel dashboard # Live efficiency tracking over time
|
|
272
|
+
codebase-intel serve # Start MCP server
|
|
273
|
+
codebase-intel status # Component health check
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
## MCP Tools (7 tools)
|
|
277
|
+
|
|
278
|
+
| Tool | What it does |
|
|
279
|
+
|---|---|
|
|
280
|
+
| `get_context` | **The main tool.** Assembles relevant files + decisions + contracts within a token budget. |
|
|
281
|
+
| `query_graph` | Query dependencies, dependents, or run impact analysis. |
|
|
282
|
+
| `get_decisions` | Get architectural decisions relevant to specific files. |
|
|
283
|
+
| `get_contracts` | Get quality contracts for files you're editing. |
|
|
284
|
+
| `check_drift` | Verify context freshness before trusting old decisions. |
|
|
285
|
+
| `impact_analysis` | "What breaks if I change this file?" |
|
|
286
|
+
| `get_status` | Health check — graph stats, decision count, contract count. |
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Community Contract Packs
|
|
291
|
+
|
|
292
|
+
Pre-built quality rules for popular frameworks:
|
|
293
|
+
|
|
294
|
+
| Pack | Rules | Covers |
|
|
295
|
+
|---|---|---|
|
|
296
|
+
| **fastapi.yaml** | 10 | Layered architecture, Pydantic schemas, async, Depends(), secrets |
|
|
297
|
+
| **react-typescript.yaml** | 11 | Functional components, no `any`, custom hooks, lazy loading |
|
|
298
|
+
| **nodejs-express.yaml** | 12 | Error handling, helmet, rate limiting, structured logging |
|
|
299
|
+
|
|
300
|
+
```bash
|
|
301
|
+
cp community-contracts/fastapi.yaml .codebase-intel/contracts/
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## Architecture
|
|
307
|
+
|
|
308
|
+
```
|
|
309
|
+
AI Agent (any) ──→ MCP Server (7 tools)
|
|
310
|
+
│
|
|
311
|
+
Context Orchestrator
|
|
312
|
+
(token budgeting, priority, conflicts)
|
|
313
|
+
╱ │ ╲
|
|
314
|
+
Code Graph Decision Quality
|
|
315
|
+
(19 langs) Journal Contracts
|
|
316
|
+
SQLite+WAL YAML files YAML+builtins
|
|
317
|
+
╲ │ ╱
|
|
318
|
+
Drift Detector
|
|
319
|
+
(staleness, rot, orphans)
|
|
320
|
+
│
|
|
321
|
+
Analytics Tracker
|
|
322
|
+
(live efficiency metrics)
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
## The Philosophy
|
|
328
|
+
|
|
329
|
+
**We don't make AI agents smarter. We make them informed.**
|
|
330
|
+
|
|
331
|
+
An agent with 1M tokens of context is like a developer with access to every file in the company — overwhelming and unfocused. An agent with codebase-intel is like a developer who just had a 5-minute conversation with the senior engineer: *"Here's what you need to know, here's why we did it this way, and here are the three things you absolutely cannot break."*
|
|
332
|
+
|
|
333
|
+
**We don't compete with graph tools. We complete them.**
|
|
334
|
+
|
|
335
|
+
Code graphs answer "what depends on what." That's necessary but not sufficient. codebase-intel answers the harder questions: *Why was this decision made? What constraints apply? What will the compliance team flag? What did we already try and reject?*
|
|
336
|
+
|
|
337
|
+
**We don't hide the truth. We prove it.**
|
|
338
|
+
|
|
339
|
+
Run `codebase-intel benchmark` on your project. See the numbers. Run `codebase-intel dashboard` over time. Watch the improvement. Every claim is backed by reproducible, project-specific data.
|
|
340
|
+
|
|
341
|
+
---
|
|
342
|
+
|
|
343
|
+
## Contributing
|
|
344
|
+
|
|
345
|
+
Areas with the most impact:
|
|
346
|
+
|
|
347
|
+
1. **Contract packs** — Share quality rules for your framework
|
|
348
|
+
2. **Language extraction** — Improve parsing for specific languages
|
|
349
|
+
3. **Decision mining** — Better git history analysis
|
|
350
|
+
4. **Benchmarks** — Run against your repos, share results
|
|
351
|
+
|
|
352
|
+
```bash
|
|
353
|
+
git clone https://github.com/MutharasuArchunan13/codebase-intel.git
|
|
354
|
+
cd codebase-intel
|
|
355
|
+
pip install -e ".[dev]"
|
|
356
|
+
pytest
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
## License
|
|
360
|
+
|
|
361
|
+
MIT
|