mcp-context-memory 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_context_memory-0.2.0/.github/workflows/pypi-publish.yml +34 -0
- mcp_context_memory-0.2.0/.github/workflows/tests.yml +32 -0
- mcp_context_memory-0.2.0/.gitignore +2 -0
- mcp_context_memory-0.2.0/CHANGELOG.md +31 -0
- mcp_context_memory-0.2.0/PKG-INFO +114 -0
- mcp_context_memory-0.2.0/README.md +100 -0
- mcp_context_memory-0.2.0/pyproject.toml +28 -0
- mcp_context_memory-0.2.0/src/mcp_context_memory/__init__.py +263 -0
- mcp_context_memory-0.2.0/tests/test_server.py +54 -0
- mcp_context_memory-0.2.0/uv.lock +3024 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Release and Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build-and-publish:
|
|
10
|
+
name: Build and publish Python distributions to PyPI
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
permissions:
|
|
13
|
+
id-token: write # Mandatory for trusted publishing
|
|
14
|
+
contents: read
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- name: Checkout source
|
|
18
|
+
uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@v5
|
|
22
|
+
with:
|
|
23
|
+
enable-cache: true
|
|
24
|
+
|
|
25
|
+
- name: Set up Python
|
|
26
|
+
uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version-file: "pyproject.toml"
|
|
29
|
+
|
|
30
|
+
- name: Build distributions
|
|
31
|
+
run: uv build
|
|
32
|
+
|
|
33
|
+
- name: Publish package distributions to PyPI
|
|
34
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: Run Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
name: Run Python Tests
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout source
|
|
16
|
+
uses: actions/checkout@v5
|
|
17
|
+
|
|
18
|
+
- name: Install uv
|
|
19
|
+
uses: astral-sh/setup-uv@v5
|
|
20
|
+
with:
|
|
21
|
+
enable-cache: true
|
|
22
|
+
|
|
23
|
+
- name: Set up Python
|
|
24
|
+
uses: actions/setup-python@v5
|
|
25
|
+
with:
|
|
26
|
+
python-version-file: "pyproject.toml"
|
|
27
|
+
|
|
28
|
+
- name: Install dependencies
|
|
29
|
+
run: uv sync --dev
|
|
30
|
+
|
|
31
|
+
- name: Run tests
|
|
32
|
+
run: uv run pytest tests/
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.2.0] - 2026-03-28
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- Converted naive Context Memory into a "Semantic Project Brain".
|
|
12
|
+
- `index_project` tool introduced for AST-based code parsing using `tree-sitter`.
|
|
13
|
+
- Unified `search_context` tool that searches across both semantic code chunks and manual decisions.
|
|
14
|
+
- Replaced `store_context` with `remember_decision` for storing architectural reasoning.
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
- Support for extracting AST nodes (classes, methods, functions) for Python, Java, PHP, TypeScript, and JavaScript using `tree-sitter-languages`.
|
|
18
|
+
- Structural HTML extraction using `BeautifulSoup`.
|
|
19
|
+
- Added snippet for `AGENTS.md` to instruct LLMs on brain usage.
|
|
20
|
+
- GitHub Action for automated releases to PyPI on tag creation.
|
|
21
|
+
- Project restructuring into a proper Python package (`src/mcp_context_memory`).
|
|
22
|
+
- Automated test suite using `pytest` and `pytest-mock`.
|
|
23
|
+
- GitHub Action to run tests on push and pull request.
|
|
24
|
+
- Explicitly set all project and dev dependencies to their latest stable versions as of March 2026.
|
|
25
|
+
- Restricted Python version to 3.10-3.12 for `tree-sitter-languages` compatibility.
|
|
26
|
+
|
|
27
|
+
## [0.1.0] - 2026-03-28
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
- Initial implementation of the Local Context Memory MCP server.
|
|
31
|
+
- Basic tools: `store_context`, `query_context`, `list_all_topics`.
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-context-memory
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Semantic Project Brain MCP Server with AST Parsing
|
|
5
|
+
Requires-Python: <3.13,>=3.10
|
|
6
|
+
Requires-Dist: beautifulsoup4==4.14.3
|
|
7
|
+
Requires-Dist: chromadb==1.5.5
|
|
8
|
+
Requires-Dist: fastmcp==3.1.1
|
|
9
|
+
Requires-Dist: pathspec==1.0.4
|
|
10
|
+
Requires-Dist: sentence-transformers==5.3.0
|
|
11
|
+
Requires-Dist: tree-sitter-languages==1.10.2
|
|
12
|
+
Requires-Dist: tree-sitter==0.25.2
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# Semantic Project Brain MCP Server
|
|
16
|
+
|
|
17
|
+
A high-end Python-based MCP (Model Context Protocol) server that provides a local, semantic memory and code-indexer. It uses `tree-sitter` for AST-based parsing of source code to understand class definitions, method signatures, and structures, rather than naive text chunking. It also acts as a "Long-Term Memory" to help AI assistants bypass context window limits by persisting architectural decisions across sessions.
|
|
18
|
+
|
|
19
|
+
The server uses [ChromaDB](https://docs.trychroma.com/) for fast, local embedding storage, and stores its data in a `.context_db` folder within your current project directory.
|
|
20
|
+
|
|
21
|
+
## Prerequisites
|
|
22
|
+
|
|
23
|
+
You need [uv](https://github.com/astral-sh/uv) installed to run the server without managing virtual environments manually.
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Install uv if you haven't already
|
|
27
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Available Tools
|
|
31
|
+
|
|
32
|
+
The server provides three MCP tools:
|
|
33
|
+
1. `index_project(path: str)`: Scans a directory, parses code semantically using AST (Python, Java, PHP, TS, JS, HTML), and indexes it.
|
|
34
|
+
2. `search_context(query: str)`: A unified search over AST nodes and past project decisions.
|
|
35
|
+
3. `remember_decision(topic: str, context: str)`: Saves manual architectural notes or reasoning (e.g., "Why we chose framework X").
|
|
36
|
+
|
|
37
|
+
## Usage with AI Assistants
|
|
38
|
+
|
|
39
|
+
You can use `uvx` (part of `uv`) to run this server directly.
|
|
40
|
+
|
|
41
|
+
### Claude Desktop Integration
|
|
42
|
+
|
|
43
|
+
To install and use this MCP server with Claude Desktop, add the following to your `claude_desktop_config.json`:
|
|
44
|
+
|
|
45
|
+
```json
|
|
46
|
+
{
|
|
47
|
+
"mcpServers": {
|
|
48
|
+
"semantic-brain": {
|
|
49
|
+
"command": "uvx",
|
|
50
|
+
"args": [
|
|
51
|
+
"--from", "fastmcp",
|
|
52
|
+
"--from", "chromadb",
|
|
53
|
+
"--from", "sentence-transformers",
|
|
54
|
+
"--from", "tree-sitter",
|
|
55
|
+
"--from", "tree-sitter-languages",
|
|
56
|
+
"--from", "beautifulsoup4",
|
|
57
|
+
"--from", "pathspec",
|
|
58
|
+
"python",
|
|
59
|
+
"/path/to/this/project/src/mcp_context_memory/__init__.py"
|
|
60
|
+
]
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
*Note: Replace `/path/to/this/project/src/mcp_context_memory/__init__.py` with the absolute path to the file.*
|
|
66
|
+
|
|
67
|
+
### Cursor Integration
|
|
68
|
+
|
|
69
|
+
In Cursor, go to **Settings > Features > MCP** and add a new MCP Server:
|
|
70
|
+
- **Name:** Semantic Brain
|
|
71
|
+
- **Type:** `command`
|
|
72
|
+
- **Command:** `uvx --from fastmcp --from chromadb --from sentence-transformers --from tree-sitter --from tree-sitter-languages --from beautifulsoup4 --from pathspec python /path/to/this/project/src/mcp_context_memory/__init__.py`
|
|
73
|
+
|
|
74
|
+
## Bootstrapping an Existing Project
|
|
75
|
+
|
|
76
|
+
To get the most out of the Semantic Project Brain in an existing codebase, follow these steps to seed it with relevant context:
|
|
77
|
+
|
|
78
|
+
1. **Initial Semantic Indexing**:
|
|
79
|
+
Run the indexing tool to build the initial AST-based map of your code:
|
|
80
|
+
`index_project(path=".")`
|
|
81
|
+
This allows the brain to immediately understand your classes, methods, and structural HTML.
|
|
82
|
+
|
|
83
|
+
2. **Capturing Core Architecture**:
|
|
84
|
+
Use `remember_decision` to document the foundational "Why" of the project. Good candidates for initial entries include:
|
|
85
|
+
- **Tech Stack Choice**: `remember_decision(topic="Tech Stack", context="We use Symfony 7 with PHP 8.3 because...")`
|
|
86
|
+
- **Database Schema**: `remember_decision(topic="Data Model", context="The 'Orders' table is partitioned by year to handle high volume...")`
|
|
87
|
+
- **Authentication Flow**: `remember_decision(topic="Auth", context="JWT tokens are handled via LexikJWTAuthenticationBundle with a 1-hour TTL...")`
|
|
88
|
+
|
|
89
|
+
3. **Indexing Documentation**:
|
|
90
|
+
If you have existing `DOCS.md` or `ARCHITECTURE.md` files, you can copy-paste their key insights into `remember_decision` to make them semantically searchable alongside the code.
|
|
91
|
+
|
|
92
|
+
4. **Verification**:
|
|
93
|
+
Test the brain's "memory" by asking it a question through `search_context(query="How is authentication handled?")`. If it returns your stored decisions, it's ready to assist.
|
|
94
|
+
|
|
95
|
+
## Instructions for AI Agents (AGENTS.md)
|
|
96
|
+
|
|
97
|
+
Copy the following block and paste it into your project's `.cursorrules`, `AGENTS.md`, or `GEMINI.md` to instruct the LLM on how to use this server:
|
|
98
|
+
|
|
99
|
+
```markdown
|
|
100
|
+
# Semantic Project Brain Usage Guidelines
|
|
101
|
+
|
|
102
|
+
You have access to the `semantic-brain` MCP server. Follow these rules rigorously:
|
|
103
|
+
|
|
104
|
+
1. **Re-indexing:**
|
|
105
|
+
- If you make significant structural changes (e.g., creating a new module, renaming classes, or refactoring), you MUST trigger `index_project(path=".")` when you finish to keep the AST index up to date.
|
|
106
|
+
- If you cannot find expected code in `search_context`, trigger an index update first.
|
|
107
|
+
|
|
108
|
+
2. **Understanding the Codebase:**
|
|
109
|
+
- Use `search_context(query="ClassName")` to understand class hierarchies, locate method definitions, and retrieve precise semantic chunks of code instead of grepping the entire workspace.
|
|
110
|
+
|
|
111
|
+
3. **Remembering Decisions:**
|
|
112
|
+
- Before completing a task that involved a notable architectural decision, tradeoff, or complex logic, you are OBLIGATED to call `remember_decision(topic="...", context="...")`.
|
|
113
|
+
- Store "Why" something was built a certain way, so you and other agents can retrieve it in future sessions using `search_context`.
|
|
114
|
+
```
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Semantic Project Brain MCP Server
|
|
2
|
+
|
|
3
|
+
A high-end Python-based MCP (Model Context Protocol) server that provides a local, semantic memory and code-indexer. It uses `tree-sitter` for AST-based parsing of source code to understand class definitions, method signatures, and structures, rather than naive text chunking. It also acts as a "Long-Term Memory" to help AI assistants bypass context window limits by persisting architectural decisions across sessions.
|
|
4
|
+
|
|
5
|
+
The server uses [ChromaDB](https://docs.trychroma.com/) for fast, local embedding storage, and stores its data in a `.context_db` folder within your current project directory.
|
|
6
|
+
|
|
7
|
+
## Prerequisites
|
|
8
|
+
|
|
9
|
+
You need [uv](https://github.com/astral-sh/uv) installed to run the server without managing virtual environments manually.
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install uv if you haven't already
|
|
13
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Available Tools
|
|
17
|
+
|
|
18
|
+
The server provides three MCP tools:
|
|
19
|
+
1. `index_project(path: str)`: Scans a directory, parses code semantically using AST (Python, Java, PHP, TS, JS, HTML), and indexes it.
|
|
20
|
+
2. `search_context(query: str)`: A unified search over AST nodes and past project decisions.
|
|
21
|
+
3. `remember_decision(topic: str, context: str)`: Saves manual architectural notes or reasoning (e.g., "Why we chose framework X").
|
|
22
|
+
|
|
23
|
+
## Usage with AI Assistants
|
|
24
|
+
|
|
25
|
+
You can use `uvx` (part of `uv`) to run this server directly.
|
|
26
|
+
|
|
27
|
+
### Claude Desktop Integration
|
|
28
|
+
|
|
29
|
+
To install and use this MCP server with Claude Desktop, add the following to your `claude_desktop_config.json`:
|
|
30
|
+
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
"mcpServers": {
|
|
34
|
+
"semantic-brain": {
|
|
35
|
+
"command": "uvx",
|
|
36
|
+
"args": [
|
|
37
|
+
"--from", "fastmcp",
|
|
38
|
+
"--from", "chromadb",
|
|
39
|
+
"--from", "sentence-transformers",
|
|
40
|
+
"--from", "tree-sitter",
|
|
41
|
+
"--from", "tree-sitter-languages",
|
|
42
|
+
"--from", "beautifulsoup4",
|
|
43
|
+
"--from", "pathspec",
|
|
44
|
+
"python",
|
|
45
|
+
"/path/to/this/project/src/mcp_context_memory/__init__.py"
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
*Note: Replace `/path/to/this/project/src/mcp_context_memory/__init__.py` with the absolute path to the file.*
|
|
52
|
+
|
|
53
|
+
### Cursor Integration
|
|
54
|
+
|
|
55
|
+
In Cursor, go to **Settings > Features > MCP** and add a new MCP Server:
|
|
56
|
+
- **Name:** Semantic Brain
|
|
57
|
+
- **Type:** `command`
|
|
58
|
+
- **Command:** `uvx --from fastmcp --from chromadb --from sentence-transformers --from tree-sitter --from tree-sitter-languages --from beautifulsoup4 --from pathspec python /path/to/this/project/src/mcp_context_memory/__init__.py`
|
|
59
|
+
|
|
60
|
+
## Bootstrapping an Existing Project
|
|
61
|
+
|
|
62
|
+
To get the most out of the Semantic Project Brain in an existing codebase, follow these steps to seed it with relevant context:
|
|
63
|
+
|
|
64
|
+
1. **Initial Semantic Indexing**:
|
|
65
|
+
Run the indexing tool to build the initial AST-based map of your code:
|
|
66
|
+
`index_project(path=".")`
|
|
67
|
+
This allows the brain to immediately understand your classes, methods, and structural HTML.
|
|
68
|
+
|
|
69
|
+
2. **Capturing Core Architecture**:
|
|
70
|
+
Use `remember_decision` to document the foundational "Why" of the project. Good candidates for initial entries include:
|
|
71
|
+
- **Tech Stack Choice**: `remember_decision(topic="Tech Stack", context="We use Symfony 7 with PHP 8.3 because...")`
|
|
72
|
+
- **Database Schema**: `remember_decision(topic="Data Model", context="The 'Orders' table is partitioned by year to handle high volume...")`
|
|
73
|
+
- **Authentication Flow**: `remember_decision(topic="Auth", context="JWT tokens are handled via LexikJWTAuthenticationBundle with a 1-hour TTL...")`
|
|
74
|
+
|
|
75
|
+
3. **Indexing Documentation**:
|
|
76
|
+
If you have existing `DOCS.md` or `ARCHITECTURE.md` files, you can copy-paste their key insights into `remember_decision` to make them semantically searchable alongside the code.
|
|
77
|
+
|
|
78
|
+
4. **Verification**:
|
|
79
|
+
Test the brain's "memory" by asking it a question through `search_context(query="How is authentication handled?")`. If it returns your stored decisions, it's ready to assist.
|
|
80
|
+
|
|
81
|
+
## Instructions for AI Agents (AGENTS.md)
|
|
82
|
+
|
|
83
|
+
Copy the following block and paste it into your project's `.cursorrules`, `AGENTS.md`, or `GEMINI.md` to instruct the LLM on how to use this server:
|
|
84
|
+
|
|
85
|
+
```markdown
|
|
86
|
+
# Semantic Project Brain Usage Guidelines
|
|
87
|
+
|
|
88
|
+
You have access to the `semantic-brain` MCP server. Follow these rules rigorously:
|
|
89
|
+
|
|
90
|
+
1. **Re-indexing:**
|
|
91
|
+
- If you make significant structural changes (e.g., creating a new module, renaming classes, or refactoring), you MUST trigger `index_project(path=".")` when you finish to keep the AST index up to date.
|
|
92
|
+
- If you cannot find expected code in `search_context`, trigger an index update first.
|
|
93
|
+
|
|
94
|
+
2. **Understanding the Codebase:**
|
|
95
|
+
- Use `search_context(query="ClassName")` to understand class hierarchies, locate method definitions, and retrieve precise semantic chunks of code instead of grepping the entire workspace.
|
|
96
|
+
|
|
97
|
+
3. **Remembering Decisions:**
|
|
98
|
+
- Before completing a task that involved a notable architectural decision, tradeoff, or complex logic, you are OBLIGATED to call `remember_decision(topic="...", context="...")`.
|
|
99
|
+
- Store "Why" something was built a certain way, so you and other agents can retrieve it in future sessions using `search_context`.
|
|
100
|
+
```
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mcp-context-memory"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Semantic Project Brain MCP Server with AST Parsing"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10, <3.13"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"chromadb==1.5.5",
|
|
9
|
+
"fastmcp==3.1.1",
|
|
10
|
+
"sentence-transformers==5.3.0",
|
|
11
|
+
"tree-sitter==0.25.2",
|
|
12
|
+
"tree-sitter-languages==1.10.2",
|
|
13
|
+
"beautifulsoup4==4.14.3",
|
|
14
|
+
"pathspec==1.0.4",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
mcp-context-memory = "mcp_context_memory:mcp.run"
|
|
19
|
+
|
|
20
|
+
[dependency-groups]
|
|
21
|
+
dev = [
|
|
22
|
+
"pytest==9.0.2",
|
|
23
|
+
"pytest-mock==3.15.1",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["hatchling"]
|
|
28
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Dict, Any
|
|
4
|
+
from fastmcp import FastMCP
|
|
5
|
+
import chromadb
|
|
6
|
+
import pathspec
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
from tree_sitter_languages import get_parser
|
|
9
|
+
|
|
10
|
+
# Initialize the MCP server
|
|
11
|
+
mcp = FastMCP("Semantic Project Brain")
|
|
12
|
+
|
|
13
|
+
# Ensure the database directory exists relative to the current working directory
|
|
14
|
+
DB_PATH = os.path.join(os.getcwd(), ".context_db")
|
|
15
|
+
os.makedirs(DB_PATH, exist_ok=True)
|
|
16
|
+
|
|
17
|
+
# Initialize ChromaDB client
|
|
18
|
+
client = chromadb.PersistentClient(path=DB_PATH)
|
|
19
|
+
|
|
20
|
+
# Collections
|
|
21
|
+
code_collection = client.get_or_create_collection(name="code_semantics")
|
|
22
|
+
decisions_collection = client.get_or_create_collection(name="project_decisions")
|
|
23
|
+
|
|
24
|
+
# Supported extensions mapping to tree-sitter languages
|
|
25
|
+
# tree-sitter-languages bundles these grammars (Java, Python, PHP, JS, TS, HTML, CSS)
|
|
26
|
+
EXT_TO_LANG = {
|
|
27
|
+
'.py': 'python',
|
|
28
|
+
'.java': 'java',
|
|
29
|
+
'.php': 'php',
|
|
30
|
+
'.js': 'javascript',
|
|
31
|
+
'.jsx': 'javascript',
|
|
32
|
+
'.ts': 'typescript',
|
|
33
|
+
'.tsx': 'tsx', # Uses specialized TSX grammar
|
|
34
|
+
'.css': 'css',
|
|
35
|
+
'.html': 'html',
|
|
36
|
+
'.htm': 'html'
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Default directories to exclude from indexing
|
|
40
|
+
DEFAULT_EXCLUDES = {'node_modules', 'venv', '.venv', '.git', '__pycache__', '.context_db', 'build', 'dist', '.idea', '.vscode'}
|
|
41
|
+
|
|
42
|
+
# Target AST Node Types for semantic extraction
|
|
43
|
+
TARGET_TYPES = {
|
|
44
|
+
'python': {'class_definition', 'function_definition'},
|
|
45
|
+
'java': {'class_declaration', 'method_declaration'},
|
|
46
|
+
'javascript': {'class_declaration', 'method_definition', 'function_declaration'},
|
|
47
|
+
'typescript': {'class_declaration', 'method_definition', 'function_declaration'},
|
|
48
|
+
'tsx': {'class_declaration', 'method_definition', 'function_declaration'},
|
|
49
|
+
'php': {'class_declaration', 'method_declaration', 'function_declaration'},
|
|
50
|
+
'css': {'rule_set', 'media_statement'},
|
|
51
|
+
'html': {'element'} # For tree-sitter HTML, but we use BeautifulSoup as a 'specialized parser' as requested
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def _get_gitignore_spec(path: str) -> pathspec.PathSpec:
|
|
55
|
+
"""Reads .gitignore and returns a PathSpec for matching."""
|
|
56
|
+
gitignore_path = os.path.join(path, '.gitignore')
|
|
57
|
+
if os.path.exists(gitignore_path):
|
|
58
|
+
with open(gitignore_path, 'r', encoding='utf-8') as f:
|
|
59
|
+
lines = f.readlines()
|
|
60
|
+
return pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, lines)
|
|
61
|
+
return pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, [])
|
|
62
|
+
|
|
63
|
+
def extract_ast_nodes(source_code: bytes, lang: str) -> List[Dict[str, Any]]:
|
|
64
|
+
"""Extract semantic AST nodes using tree-sitter."""
|
|
65
|
+
try:
|
|
66
|
+
parser = get_parser(lang)
|
|
67
|
+
tree = parser.parse(source_code)
|
|
68
|
+
|
|
69
|
+
extracted = []
|
|
70
|
+
target_types = TARGET_TYPES.get(lang, set())
|
|
71
|
+
|
|
72
|
+
def traverse(node):
|
|
73
|
+
if node.type in target_types:
|
|
74
|
+
extracted.append({
|
|
75
|
+
'node_type': node.type,
|
|
76
|
+
'text': source_code[node.start_byte:node.end_byte].decode('utf-8', errors='ignore'),
|
|
77
|
+
'start_line': node.start_point[0] + 1,
|
|
78
|
+
'end_line': node.end_point[0] + 1
|
|
79
|
+
})
|
|
80
|
+
for child in node.children:
|
|
81
|
+
traverse(child)
|
|
82
|
+
|
|
83
|
+
traverse(tree.root_node)
|
|
84
|
+
return extracted
|
|
85
|
+
except Exception as e:
|
|
86
|
+
print(f"Failed to parse {lang}: {e}")
|
|
87
|
+
return []
|
|
88
|
+
|
|
89
|
+
def extract_html_semantics(content: str) -> List[Dict[str, Any]]:
|
|
90
|
+
"""
|
|
91
|
+
Extract semantic structures from HTML using BeautifulSoup.
|
|
92
|
+
Fulfills the 'specialized parser' requirement for HTML structure.
|
|
93
|
+
"""
|
|
94
|
+
soup = BeautifulSoup(content, 'html.parser')
|
|
95
|
+
extracted = []
|
|
96
|
+
# Index major structural elements
|
|
97
|
+
for tag in soup.find_all(['h1', 'h2', 'h3', 'section', 'article', 'div', 'form', 'nav', 'header', 'footer']):
|
|
98
|
+
text = tag.get_text(separator=' ', strip=True)
|
|
99
|
+
if len(text) > 30: # Only index chunks with substantial text content
|
|
100
|
+
extracted.append({
|
|
101
|
+
'node_type': f"html_{tag.name}",
|
|
102
|
+
'text': text[:1500], # Cap chunk size for vectorization efficiency
|
|
103
|
+
'start_line': 0, # BS4 doesn't easily provide line numbers
|
|
104
|
+
'end_line': 0
|
|
105
|
+
})
|
|
106
|
+
return extracted
|
|
107
|
+
|
|
108
|
+
@mcp.tool()
|
|
109
|
+
def index_project(path: str) -> str:
|
|
110
|
+
"""
|
|
111
|
+
Scans the directory, respects .gitignore, and performs AST-based decomposition
|
|
112
|
+
to index class definitions, methods, and structural HTML/CSS into 'code_semantics'.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
path: The absolute or relative path of the directory to index.
|
|
116
|
+
"""
|
|
117
|
+
if not os.path.exists(path) or not os.path.isdir(path):
|
|
118
|
+
return f"Error: Path '{path}' is not a valid directory."
|
|
119
|
+
|
|
120
|
+
spec = _get_gitignore_spec(path)
|
|
121
|
+
files_indexed = 0
|
|
122
|
+
chunks_indexed = 0
|
|
123
|
+
|
|
124
|
+
for root, dirs, files in os.walk(path):
|
|
125
|
+
# Prevent traversal into excluded directories
|
|
126
|
+
dirs[:] = [d for d in dirs if d not in DEFAULT_EXCLUDES and not d.startswith('.')]
|
|
127
|
+
|
|
128
|
+
for file in files:
|
|
129
|
+
ext = os.path.splitext(file)[1].lower()
|
|
130
|
+
if ext not in EXT_TO_LANG:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
file_path = os.path.join(root, file)
|
|
134
|
+
rel_path = os.path.relpath(file_path, path)
|
|
135
|
+
|
|
136
|
+
# Skip files ignored by git
|
|
137
|
+
if spec.match_file(rel_path) or any(p in rel_path.split(os.sep) for p in DEFAULT_EXCLUDES):
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
with open(file_path, 'rb') as f:
|
|
142
|
+
raw_content = f.read()
|
|
143
|
+
except Exception:
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
chunks = []
|
|
147
|
+
if ext in {'.html', '.htm'}:
|
|
148
|
+
# Use BeautifulSoup for HTML as requested
|
|
149
|
+
content = raw_content.decode('utf-8', errors='ignore')
|
|
150
|
+
chunks = extract_html_semantics(content)
|
|
151
|
+
else:
|
|
152
|
+
# Use Tree-sitter for all other languages (Python, Java, PHP, JS, TS, CSS)
|
|
153
|
+
lang = EXT_TO_LANG[ext]
|
|
154
|
+
chunks = extract_ast_nodes(raw_content, lang)
|
|
155
|
+
|
|
156
|
+
if not chunks:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
docs = []
|
|
160
|
+
metadatas = []
|
|
161
|
+
ids = []
|
|
162
|
+
|
|
163
|
+
for i, chunk in enumerate(chunks):
|
|
164
|
+
doc_id = f"{rel_path}_{i}_{uuid.uuid4().hex[:8]}"
|
|
165
|
+
docs.append(chunk['text'])
|
|
166
|
+
metadatas.append({
|
|
167
|
+
"file_path": rel_path,
|
|
168
|
+
"node_type": chunk['node_type'],
|
|
169
|
+
"start_line": chunk['start_line'],
|
|
170
|
+
"end_line": chunk['end_line']
|
|
171
|
+
})
|
|
172
|
+
ids.append(doc_id)
|
|
173
|
+
|
|
174
|
+
if docs:
|
|
175
|
+
code_collection.add(documents=docs, metadatas=metadatas, ids=ids)
|
|
176
|
+
files_indexed += 1
|
|
177
|
+
chunks_indexed += len(chunks)
|
|
178
|
+
|
|
179
|
+
return f"Successfully semantically indexed {files_indexed} files into {chunks_indexed} semantic units from '{path}'."
|
|
180
|
+
|
|
181
|
+
@mcp.tool()
|
|
182
|
+
def remember_decision(topic: str, context: str) -> str:
|
|
183
|
+
"""
|
|
184
|
+
Allows the LLM to save manual architectural notes or 'Why' something was built a certain way.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
topic: A short topic or category name for this decision.
|
|
188
|
+
context: The detailed reasoning, architectural decision, or context.
|
|
189
|
+
"""
|
|
190
|
+
try:
|
|
191
|
+
doc_id = str(uuid.uuid4())
|
|
192
|
+
decisions_collection.add(
|
|
193
|
+
documents=[context],
|
|
194
|
+
metadatas=[{"topic": topic}],
|
|
195
|
+
ids=[doc_id]
|
|
196
|
+
)
|
|
197
|
+
return f"Decision successfully stored under topic '{topic}' with ID: {doc_id}"
|
|
198
|
+
except Exception as e:
|
|
199
|
+
return f"Error storing decision: {str(e)}"
|
|
200
|
+
|
|
201
|
+
@mcp.tool()
|
|
202
|
+
def search_context(query: str) -> str:
|
|
203
|
+
"""
|
|
204
|
+
A unified search that looks through both code structures (AST nodes) and past project decisions.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
query: The search query to find relevant context.
|
|
208
|
+
"""
|
|
209
|
+
try:
|
|
210
|
+
combined_results = []
|
|
211
|
+
|
|
212
|
+
# Query code_semantics collection
|
|
213
|
+
try:
|
|
214
|
+
code_results = code_collection.query(
|
|
215
|
+
query_texts=[query], n_results=4, include=["documents", "metadatas", "distances"]
|
|
216
|
+
)
|
|
217
|
+
if code_results and code_results["documents"] and code_results["documents"][0]:
|
|
218
|
+
for doc, meta, dist in zip(code_results["documents"][0], code_results["metadatas"][0], code_results["distances"][0]):
|
|
219
|
+
combined_results.append({"source": "Code Semantics", "content": doc, "meta": meta, "distance": dist})
|
|
220
|
+
except Exception:
|
|
221
|
+
pass
|
|
222
|
+
|
|
223
|
+
# Query project_decisions collection
|
|
224
|
+
try:
|
|
225
|
+
decision_results = decisions_collection.query(
|
|
226
|
+
query_texts=[query], n_results=2, include=["documents", "metadatas", "distances"]
|
|
227
|
+
)
|
|
228
|
+
if decision_results and decision_results["documents"] and decision_results["documents"][0]:
|
|
229
|
+
for doc, meta, dist in zip(decision_results["documents"][0], decision_results["metadatas"][0], decision_results["distances"][0]):
|
|
230
|
+
combined_results.append({"source": "Project Decision", "content": doc, "meta": meta, "distance": dist})
|
|
231
|
+
except Exception:
|
|
232
|
+
pass
|
|
233
|
+
|
|
234
|
+
if not combined_results:
|
|
235
|
+
return "No relevant context found in Code Semantics or Project Decisions."
|
|
236
|
+
|
|
237
|
+
# Sort by distance (lower distance is more relevant in ChromaDB's default L2)
|
|
238
|
+
combined_results.sort(key=lambda x: x["distance"])
|
|
239
|
+
|
|
240
|
+
response_parts = []
|
|
241
|
+
for idx, res in enumerate(combined_results):
|
|
242
|
+
source = res["source"]
|
|
243
|
+
meta = res["meta"] or {}
|
|
244
|
+
|
|
245
|
+
if source == "Project Decision":
|
|
246
|
+
topic = meta.get("topic", "Uncategorized")
|
|
247
|
+
header = f"--- Result {idx + 1} ({source} | Topic: {topic}) ---"
|
|
248
|
+
else:
|
|
249
|
+
file_path = meta.get("file_path", "Unknown")
|
|
250
|
+
node_type = meta.get("node_type", "Unknown")
|
|
251
|
+
start_line = meta.get("start_line", 0)
|
|
252
|
+
end_line = meta.get("end_line", 0)
|
|
253
|
+
lines = f"Lines {start_line}-{end_line}" if start_line > 0 else "Structural"
|
|
254
|
+
header = f"--- Result {idx + 1} ({source} | File: {file_path} | Type: {node_type} | {lines}) ---"
|
|
255
|
+
|
|
256
|
+
response_parts.append(f"{header}\n{res['content']}\n")
|
|
257
|
+
|
|
258
|
+
return "\n".join(response_parts)
|
|
259
|
+
except Exception as e:
|
|
260
|
+
return f"Error querying context: {str(e)}"
|
|
261
|
+
|
|
262
|
+
if __name__ == "__main__":
|
|
263
|
+
mcp.run()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
|
+
import mcp_context_memory
|
|
4
|
+
|
|
5
|
+
@pytest.fixture
|
|
6
|
+
def mock_collections():
|
|
7
|
+
with patch('mcp_context_memory.decisions_collection') as mock_decisions, \
|
|
8
|
+
patch('mcp_context_memory.code_collection') as mock_code:
|
|
9
|
+
yield mock_code, mock_decisions
|
|
10
|
+
|
|
11
|
+
def test_remember_decision(mock_collections):
|
|
12
|
+
mock_code, mock_decisions = mock_collections
|
|
13
|
+
topic = "Tech Stack"
|
|
14
|
+
context = "We use Symfony 7"
|
|
15
|
+
|
|
16
|
+
result = mcp_context_memory.remember_decision(topic, context)
|
|
17
|
+
|
|
18
|
+
assert "Decision successfully stored" in result
|
|
19
|
+
assert topic in result
|
|
20
|
+
mock_decisions.add.assert_called_once()
|
|
21
|
+
args, kwargs = mock_decisions.add.call_args
|
|
22
|
+
assert kwargs['documents'] == [context]
|
|
23
|
+
assert kwargs['metadatas'] == [{"topic": topic}]
|
|
24
|
+
|
|
25
|
+
def test_search_context_no_results(mock_collections):
|
|
26
|
+
mock_code, mock_decisions = mock_collections
|
|
27
|
+
mock_code.query.return_value = {"documents": [[]], "metadatas": [[]], "distances": [[]]}
|
|
28
|
+
mock_decisions.query.return_value = {"documents": [[]], "metadatas": [[]], "distances": [[]]}
|
|
29
|
+
|
|
30
|
+
result = mcp_context_memory.search_context("nothing")
|
|
31
|
+
|
|
32
|
+
assert "No relevant context found" in result
|
|
33
|
+
|
|
34
|
+
def test_search_context_with_results(mock_collections):
|
|
35
|
+
mock_code, mock_decisions = mock_collections
|
|
36
|
+
|
|
37
|
+
# Mock results
|
|
38
|
+
mock_code.query.return_value = {
|
|
39
|
+
"documents": [["class MyClass: pass"]],
|
|
40
|
+
"metadatas": [[{"file_path": "test.py", "node_type": "class_definition", "start_line": 1, "end_line": 2}]],
|
|
41
|
+
"distances": [[0.1]]
|
|
42
|
+
}
|
|
43
|
+
mock_decisions.query.return_value = {
|
|
44
|
+
"documents": [["Use Symfony because it is robust"]],
|
|
45
|
+
"metadatas": [[{"topic": "Tech Stack"}]],
|
|
46
|
+
"distances": [[0.2]]
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
result = mcp_context_memory.search_context("MyClass")
|
|
50
|
+
|
|
51
|
+
assert "Code Semantics" in result
|
|
52
|
+
assert "Project Decision" in result
|
|
53
|
+
assert "MyClass" in result
|
|
54
|
+
assert "Symfony" in result
|