code-explore-by-sql 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_explore_by_sql-0.1.0/.gitignore +11 -0
- code_explore_by_sql-0.1.0/AGENTS.md +82 -0
- code_explore_by_sql-0.1.0/CLAUDE.md +15 -0
- code_explore_by_sql-0.1.0/LICENSE +21 -0
- code_explore_by_sql-0.1.0/MANIFEST.in +2 -0
- code_explore_by_sql-0.1.0/PKG-INFO +205 -0
- code_explore_by_sql-0.1.0/README.md +183 -0
- code_explore_by_sql-0.1.0/pyproject.toml +81 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/__init__.py +9 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/__main__.py +5 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/bracket_scanner.py +385 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/build_db.py +284 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/code_block_summary.py +522 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/configs.py +402 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/db.py +625 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/edge_extractor.py +183 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/__init__.py +31 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/c.py +118 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/cpp.py +106 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/csharp.py +103 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/glsl.py +162 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/go.py +91 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/hlsl.py +155 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/java.py +98 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/javascript.py +215 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/kotlin.py +108 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/python.py +105 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/rust.py +91 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/languages/swift.py +116 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/server.py +264 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/symbol_analyzer.py +487 -0
- code_explore_by_sql-0.1.0/src/code_source_sql/unreal_rules.py +163 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
This repository provides a local MCP server for source code retrieval using **SQLite FTS5** (trigram tokenizer) with a **bracket skeleton structural index**.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
One file = one row in `source_files`. FTS5 `snippet()` extracts relevant code fragments via a **two-step deferred query** (rank first, snippet only for top-N, truncated to 300 chars), producing compact ~2,600 token responses for 20 results (95% token reduction vs full snippets).
|
|
8
|
+
|
|
9
|
+
**Bracket skeleton index**: A 6-state FSM scans C/C++ source tracking brace pairs while ignoring braces in comments, strings, and raw string literals. Each brace pair records depth, open/close line, block type, block name, and `parent_id` (hierarchical parent-child relationships). This provides structural context without AST parsing — robust against macros and incomplete syntax.
|
|
10
|
+
|
|
11
|
+
**Symbol references**: Pre-computed identifier references tracked in `symbol_references` table. Supports fast lookup of which files reference a given symbol, with enclosing block context.
|
|
12
|
+
|
|
13
|
+
**Include dependency graph**: O(1) basename hash lookup resolves 96.5% of include paths. Supports upstream/downstream traversal with configurable recursion depth.
|
|
14
|
+
|
|
15
|
+
**History-as-ranking-signal**: Past search feedback adjusts result ranking but never filters out results. This prevents confirmation bias while still accelerating relevant results.
|
|
16
|
+
|
|
17
|
+
### Token cost quick reference
|
|
18
|
+
|
|
19
|
+
| Operation | ~Tokens | Note |
|
|
20
|
+
|-----------|---------|------|
|
|
21
|
+
| `search_code_source` (20 results) | ~2,600 | Compact 300-char snippets |
|
|
22
|
+
| `get_file_content(anchor=...)` | ~125 | **Always prefer** over full read |
|
|
23
|
+
| `get_file_content` (full file) | ~45,000 | Avoid — use anchor or line range |
|
|
24
|
+
| `find_references` (pre-computed) | 50–500 | **Try first** for symbol lookup |
|
|
25
|
+
| `find_callers` (specific symbol) | 127–3,000 | Use `scope` for common symbols |
|
|
26
|
+
| `find_include_graph` | 50–2,100 | Cheap — use freely |
|
|
27
|
+
|
|
28
|
+
## Tools (7)
|
|
29
|
+
|
|
30
|
+
1. **`search_code_source`** — FTS5 search with history ranking, scope filtering, compact snippets.
|
|
31
|
+
- Simple: `query="GetGBuffer"`
|
|
32
|
+
- Advanced: `raw_query='"GetGBuffer" AND "Emissive"'`
|
|
33
|
+
- `scope_filter` is a **dict**: `{"block_type": "function"}` (no JSON string needed)
|
|
34
|
+
- `module="Renderer"` — filter by module name
|
|
35
|
+
|
|
36
|
+
2. **`get_file_content`** — Read file content. Prefer **anchor mode** for efficiency.
|
|
37
|
+
- Anchor: `anchor="Render", context_chars=500` (~125 tokens)
|
|
38
|
+
- Line range: `start_line=100, end_line=200`
|
|
39
|
+
- Auto-records feedback from search results
|
|
40
|
+
|
|
41
|
+
3. **`log_code_query`** — Record explicit feedback (optional, only to correct automatic feedback)
|
|
42
|
+
|
|
43
|
+
4. **`find_include_graph`** — Include dependency graph (upstream/downstream, recursive, depth control)
|
|
44
|
+
|
|
45
|
+
5. **`find_callers`** — Dynamic caller lookup via FTS5 + full file scan + bracket matching.
|
|
46
|
+
- Returns exact `caller_line` per call site
|
|
47
|
+
- **Always use `scope`** for common symbols like "Render" (500+ callers otherwise)
|
|
48
|
+
|
|
49
|
+
6. **`find_references`** — Pre-computed symbol references from `symbol_references` table.
|
|
50
|
+
- Fast lookup: `find_references("BeginPlay", limit=100)`
|
|
51
|
+
- Returns file paths and enclosing block info
|
|
52
|
+
- Falls back to empty list if table is not yet populated
|
|
53
|
+
|
|
54
|
+
7. **`get_directory_structure`** — Discover valid module names and directory layout.
|
|
55
|
+
- Returns `modules` (top 30 by file count), `top_dirs`, `total_files`
|
|
56
|
+
- Use `module_name` as `module` param in `search_code_source` or `scope` in `find_callers`
|
|
57
|
+
|
|
58
|
+
## Recommended flow
|
|
59
|
+
|
|
60
|
+
1. `search_code_source` → compact snippets (~2,600 tok)
|
|
61
|
+
2. `get_file_content(anchor=...)` → deep context (~125 tok each)
|
|
62
|
+
3. `find_callers` / `find_include_graph` → structural exploration
|
|
63
|
+
4. `log_code_query` → only to correct feedback
|
|
64
|
+
|
|
65
|
+
## FTS5 Query Syntax (for raw_query)
|
|
66
|
+
|
|
67
|
+
| Operator | Example |
|
|
68
|
+
|----------|---------|
|
|
69
|
+
| AND | `'"A" AND "B"'` |
|
|
70
|
+
| OR | `'"A" OR "B"'` |
|
|
71
|
+
| NOT | `'"A" NOT "B"'` |
|
|
72
|
+
| Grouping | `'("A" OR "B") AND "C"'` |
|
|
73
|
+
| Column filter | `'file_path : "BasePass"'` |
|
|
74
|
+
|
|
75
|
+
All terms must be 3+ characters. NEAR and prefix (`*`) do NOT work with trigram tokenizer.
|
|
76
|
+
|
|
77
|
+
## Guidance
|
|
78
|
+
|
|
79
|
+
- Use the `code-source-lookup` skill for detailed tool documentation and search strategy
|
|
80
|
+
- Avoid full file reads — anchor mode is 358x cheaper in tokens
|
|
81
|
+
- History feedback is automatic — no need to manually log unless correcting
|
|
82
|
+
- If the database has not been built yet, guide the user toward indexing first
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Working Rules
|
|
6
|
+
|
|
7
|
+
### Plan Writing
|
|
8
|
+
|
|
9
|
+
- Plans must be **complete and directly executable**. Every step must include concrete code, file paths, and parameter values. No TODO, TBD, or vague placeholders allowed.
|
|
10
|
+
- Break tasks into **minimum independently executable steps**. Each step does exactly one thing, with clear structure and explicit ordering.
|
|
11
|
+
|
|
12
|
+
### Code Refactoring
|
|
13
|
+
|
|
14
|
+
- Refactored code must preserve **identical observable behavior** to the original. No changes to external API, output, or side effects.
|
|
15
|
+
- Before and after refactoring, run the same tests or verification steps to confirm behavioral equivalence.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 yanwei
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-explore-by-sql
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SQLite FTS5 (trigram) MCP server for code source search.
|
|
5
|
+
Project-URL: Repository, https://github.com/didi514354875/code-explore-by-sql
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
17
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Requires-Dist: mcp[cli]>=1.2.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# code-explore-by-sql
|
|
24
|
+
|
|
25
|
+
Local stdio MCP server for fast source code navigation using **SQLite FTS5** (trigram tokenizer) + **bracket skeleton indexing**.
|
|
26
|
+
|
|
27
|
+
## Features
|
|
28
|
+
|
|
29
|
+
- **Full-text search**: FTS5 with trigram tokenizer for code-symbol-precise search (`GetGBuffer`, `FMaterial`, `UE_LOG`)
|
|
30
|
+
- **Symbol lookup**: read code by qualified name with fuzzy matching (`Jump` → `ACharacter::Jump`)
|
|
31
|
+
- **Bracket skeleton index**: lightweight structural indexing via FSM brace matching (no AST parser needed)
|
|
32
|
+
- **12 language support**: C, C++, C#, Go, HLSL, GLSL, Java, JavaScript, Kotlin, Python, Rust, Swift
|
|
33
|
+
- **Multi-database**: query multiple codebases simultaneously via `CODE_SOURCE_DBS`
|
|
34
|
+
- **Token-efficient responses**: compact snippets (~2,600 tokens/20 results, 95% reduction vs full file reads)
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
### From PyPI (recommended)
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Run the MCP server directly (no clone needed)
|
|
42
|
+
uvx code-explore-by-sql
|
|
43
|
+
|
|
44
|
+
# Or install persistently
|
|
45
|
+
pip install code-explore-by-sql
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Build a database
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Build index for your codebase
|
|
52
|
+
uvx code-source-sql-build-db /path/to/source /path/to/output.db
|
|
53
|
+
|
|
54
|
+
# Smoke test with limited files
|
|
55
|
+
uvx code-source-sql-build-db /path/to/source /path/to/output.db --limit 1000
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Performance: ~84,700 files indexed in ~3.3 minutes on a 2-core machine.
|
|
59
|
+
|
|
60
|
+
### Configure in MCP clients
|
|
61
|
+
|
|
62
|
+
**Claude Code** (`.claude/mcp.json`):
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"code-source-sql": {
|
|
67
|
+
"command": "uvx",
|
|
68
|
+
"args": ["code-explore-by-sql"],
|
|
69
|
+
"env": {
|
|
70
|
+
"CODE_SOURCE_DB": "/path/to/your/code.db",
|
|
71
|
+
"CODE_SOURCE_DBS": "/path/to/your/code.db:/path/to/another.db"
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**VS Code** (`.vscode/mcp.json`):
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"servers": {
|
|
82
|
+
"code-source-sql": {
|
|
83
|
+
"type": "stdio",
|
|
84
|
+
"command": "uvx",
|
|
85
|
+
"args": ["code-explore-by-sql"],
|
|
86
|
+
"env": {
|
|
87
|
+
"CODE_SOURCE_DB": "/path/to/your/code.db"
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**OpenAI Codex** (`~/.codex/config.toml`):
|
|
95
|
+
```toml
|
|
96
|
+
[mcp_servers.code-source-sql]
|
|
97
|
+
command = "uvx"
|
|
98
|
+
args = ["code-explore-by-sql"]
|
|
99
|
+
|
|
100
|
+
[mcp_servers.code-source-sql.env]
|
|
101
|
+
CODE_SOURCE_DB = "/path/to/your/code.db"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Hermes Agent** (`~/.hermes/config.yaml`):
|
|
105
|
+
```yaml
|
|
106
|
+
mcp_servers:
|
|
107
|
+
code-source-sql:
|
|
108
|
+
command: uvx
|
|
109
|
+
args:
|
|
110
|
+
- code-explore-by-sql
|
|
111
|
+
env:
|
|
112
|
+
CODE_SOURCE_DB: /path/to/your/code.db
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Tools (5)
|
|
116
|
+
|
|
117
|
+
| Tool | Purpose |
|
|
118
|
+
|------|---------|
|
|
119
|
+
| `list_databases` | Discover available databases with stats |
|
|
120
|
+
| `search_fts_tool` | FTS5 search — locate code blocks by keyword or raw FTS5 query |
|
|
121
|
+
| `read_symbol` | Read symbol code by qualified name (exact or fuzzy) |
|
|
122
|
+
| `read_file_range` | Read source code by file path and line range |
|
|
123
|
+
| `get_directory_structure` | Module/file counts overview |
|
|
124
|
+
|
|
125
|
+
### Multi-database
|
|
126
|
+
|
|
127
|
+
Each tool accepts an optional `db` parameter to select a database by alias. Aliases are derived from database filenames (`unreal.db` → `"unreal"`). Use `list_databases` to discover available aliases. Default (`db=""`) uses the primary database (`CODE_SOURCE_DB`).
|
|
128
|
+
|
|
129
|
+
### Search query modes
|
|
130
|
+
|
|
131
|
+
**Simple mode** (`keyword`):
|
|
132
|
+
```
|
|
133
|
+
keyword="GetGBuffer"
|
|
134
|
+
keyword="FMaterial Render"
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Advanced mode** (`raw_query`) — full FTS5 boolean:
|
|
138
|
+
```
|
|
139
|
+
raw_query='"GetGBuffer" AND "Emissive"'
|
|
140
|
+
raw_query='"Material" NOT "hlsl"'
|
|
141
|
+
raw_query='(file_path : "BasePass") AND "roughness"'
|
|
142
|
+
raw_query='(module_name : "Renderer") AND "VirtualTexture"'
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Three-level funnel
|
|
146
|
+
|
|
147
|
+
1. **`search_fts_tool(keyword)`** → file candidates + block QNs
|
|
148
|
+
2. **`search_fts_tool(raw_query, file_path filter)`** → precise block in target file
|
|
149
|
+
3. **`read_symbol(block QN)`** or **`read_file_range(file, line)`** → full code
|
|
150
|
+
|
|
151
|
+
## Architecture
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
155
|
+
│ MCP Server (FastMCP) │
|
|
156
|
+
├──────────┬──────────┬──────────┬──────────┬──────────────────┤
|
|
157
|
+
│ search │ read │ read │ get_dir │ list │
|
|
158
|
+
│ fts_tool │ _symbol │ _file │ _struct │ _databases │
|
|
159
|
+
│ │ │ _range │ │ │
|
|
160
|
+
├──────────┴──────────┴──────────┴──────────┴──────────────────┤
|
|
161
|
+
│ Query Pipeline │
|
|
162
|
+
│ FTS5 trigram → Symbol match → Edge extraction │
|
|
163
|
+
├──────────────────────────────────────────────────────────────┤
|
|
164
|
+
│ SQLite Database │
|
|
165
|
+
│ file_content + FTS5 │ symbol_index │ strict_edges │
|
|
166
|
+
└──────────────────────────────────────────────────────────────┘
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Bracket skeleton index
|
|
170
|
+
|
|
171
|
+
A 6-state finite state machine (CODE, LINE_COMMENT, BLOCK_COMMENT, STRING, CHAR_LITERAL, RAW_STRING) scans source code tracking brace pairs while correctly ignoring braces in comments and string literals. Each matched pair records `open_line`, `close_line`, `depth`, and `is_complete`.
|
|
172
|
+
|
|
173
|
+
Top-level blocks are classified by a **symbol analyzer** producing `block_type` (namespace/class/enum/function/macro) and `block_name` (qualified name).
|
|
174
|
+
|
|
175
|
+
### Multi-database registry
|
|
176
|
+
|
|
177
|
+
Databases are registered via environment variables at server startup:
|
|
178
|
+
- `CODE_SOURCE_DB` — primary database (default when `db` is omitted)
|
|
179
|
+
- `CODE_SOURCE_DBS` — colon-separated list of additional databases
|
|
180
|
+
|
|
181
|
+
Aliases are auto-derived from filename stems. Connections are cached with health checks.
|
|
182
|
+
|
|
183
|
+
## Environment Variables
|
|
184
|
+
|
|
185
|
+
| Variable | Required | Description |
|
|
186
|
+
|----------|----------|-------------|
|
|
187
|
+
| `CODE_SOURCE_DB` | Yes | Path to primary SQLite database |
|
|
188
|
+
| `CODE_SOURCE_DBS` | No | Colon-separated paths to additional databases |
|
|
189
|
+
|
|
190
|
+
## Development
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
# Clone and setup
|
|
194
|
+
git clone https://github.com/didi514354875/code-explore-by-sql.git
|
|
195
|
+
cd code-explore-by-sql
|
|
196
|
+
uv sync --dev
|
|
197
|
+
|
|
198
|
+
# Run tests
|
|
199
|
+
uv run pytest
|
|
200
|
+
uv run ruff check .
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## License
|
|
204
|
+
|
|
205
|
+
MIT
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# code-explore-by-sql
|
|
2
|
+
|
|
3
|
+
Local stdio MCP server for fast source code navigation using **SQLite FTS5** (trigram tokenizer) + **bracket skeleton indexing**.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Full-text search**: FTS5 with trigram tokenizer for code-symbol-precise search (`GetGBuffer`, `FMaterial`, `UE_LOG`)
|
|
8
|
+
- **Symbol lookup**: read code by qualified name with fuzzy matching (`Jump` → `ACharacter::Jump`)
|
|
9
|
+
- **Bracket skeleton index**: lightweight structural indexing via FSM brace matching (no AST parser needed)
|
|
10
|
+
- **12 language support**: C, C++, C#, Go, HLSL, GLSL, Java, JavaScript, Kotlin, Python, Rust, Swift
|
|
11
|
+
- **Multi-database**: query multiple codebases simultaneously via `CODE_SOURCE_DBS`
|
|
12
|
+
- **Token-efficient responses**: compact snippets (~2,600 tokens/20 results, 95% reduction vs full file reads)
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
### From PyPI (recommended)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Run the MCP server directly (no clone needed)
|
|
20
|
+
uvx code-explore-by-sql
|
|
21
|
+
|
|
22
|
+
# Or install persistently
|
|
23
|
+
pip install code-explore-by-sql
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Build a database
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Build index for your codebase
|
|
30
|
+
uvx code-source-sql-build-db /path/to/source /path/to/output.db
|
|
31
|
+
|
|
32
|
+
# Smoke test with limited files
|
|
33
|
+
uvx code-source-sql-build-db /path/to/source /path/to/output.db --limit 1000
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Performance: ~84,700 files indexed in ~3.3 minutes on a 2-core machine.
|
|
37
|
+
|
|
38
|
+
### Configure in MCP clients
|
|
39
|
+
|
|
40
|
+
**Claude Code** (`.claude/mcp.json`):
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"mcpServers": {
|
|
44
|
+
"code-source-sql": {
|
|
45
|
+
"command": "uvx",
|
|
46
|
+
"args": ["code-explore-by-sql"],
|
|
47
|
+
"env": {
|
|
48
|
+
"CODE_SOURCE_DB": "/path/to/your/code.db",
|
|
49
|
+
"CODE_SOURCE_DBS": "/path/to/your/code.db:/path/to/another.db"
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**VS Code** (`.vscode/mcp.json`):
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"servers": {
|
|
60
|
+
"code-source-sql": {
|
|
61
|
+
"type": "stdio",
|
|
62
|
+
"command": "uvx",
|
|
63
|
+
"args": ["code-explore-by-sql"],
|
|
64
|
+
"env": {
|
|
65
|
+
"CODE_SOURCE_DB": "/path/to/your/code.db"
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**OpenAI Codex** (`~/.codex/config.toml`):
|
|
73
|
+
```toml
|
|
74
|
+
[mcp_servers.code-source-sql]
|
|
75
|
+
command = "uvx"
|
|
76
|
+
args = ["code-explore-by-sql"]
|
|
77
|
+
|
|
78
|
+
[mcp_servers.code-source-sql.env]
|
|
79
|
+
CODE_SOURCE_DB = "/path/to/your/code.db"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**Hermes Agent** (`~/.hermes/config.yaml`):
|
|
83
|
+
```yaml
|
|
84
|
+
mcp_servers:
|
|
85
|
+
code-source-sql:
|
|
86
|
+
command: uvx
|
|
87
|
+
args:
|
|
88
|
+
- code-explore-by-sql
|
|
89
|
+
env:
|
|
90
|
+
CODE_SOURCE_DB: /path/to/your/code.db
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Tools (5)
|
|
94
|
+
|
|
95
|
+
| Tool | Purpose |
|
|
96
|
+
|------|---------|
|
|
97
|
+
| `list_databases` | Discover available databases with stats |
|
|
98
|
+
| `search_fts_tool` | FTS5 search — locate code blocks by keyword or raw FTS5 query |
|
|
99
|
+
| `read_symbol` | Read symbol code by qualified name (exact or fuzzy) |
|
|
100
|
+
| `read_file_range` | Read source code by file path and line range |
|
|
101
|
+
| `get_directory_structure` | Module/file counts overview |
|
|
102
|
+
|
|
103
|
+
### Multi-database
|
|
104
|
+
|
|
105
|
+
Each tool accepts an optional `db` parameter to select a database by alias. Aliases are derived from database filenames (`unreal.db` → `"unreal"`). Use `list_databases` to discover available aliases. Default (`db=""`) uses the primary database (`CODE_SOURCE_DB`).
|
|
106
|
+
|
|
107
|
+
### Search query modes
|
|
108
|
+
|
|
109
|
+
**Simple mode** (`keyword`):
|
|
110
|
+
```
|
|
111
|
+
keyword="GetGBuffer"
|
|
112
|
+
keyword="FMaterial Render"
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**Advanced mode** (`raw_query`) — full FTS5 boolean:
|
|
116
|
+
```
|
|
117
|
+
raw_query='"GetGBuffer" AND "Emissive"'
|
|
118
|
+
raw_query='"Material" NOT "hlsl"'
|
|
119
|
+
raw_query='(file_path : "BasePass") AND "roughness"'
|
|
120
|
+
raw_query='(module_name : "Renderer") AND "VirtualTexture"'
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Three-level funnel
|
|
124
|
+
|
|
125
|
+
1. **`search_fts_tool(keyword)`** → file candidates + block QNs
|
|
126
|
+
2. **`search_fts_tool(raw_query, file_path filter)`** → precise block in target file
|
|
127
|
+
3. **`read_symbol(block QN)`** or **`read_file_range(file, line)`** → full code
|
|
128
|
+
|
|
129
|
+
## Architecture
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
133
|
+
│ MCP Server (FastMCP) │
|
|
134
|
+
├──────────┬──────────┬──────────┬──────────┬──────────────────┤
|
|
135
|
+
│ search │ read │ read │ get_dir │ list │
|
|
136
|
+
│ fts_tool │ _symbol │ _file │ _struct │ _databases │
|
|
137
|
+
│ │ │ _range │ │ │
|
|
138
|
+
├──────────┴──────────┴──────────┴──────────┴──────────────────┤
|
|
139
|
+
│ Query Pipeline │
|
|
140
|
+
│ FTS5 trigram → Symbol match → Edge extraction │
|
|
141
|
+
├──────────────────────────────────────────────────────────────┤
|
|
142
|
+
│ SQLite Database │
|
|
143
|
+
│ file_content + FTS5 │ symbol_index │ strict_edges │
|
|
144
|
+
└──────────────────────────────────────────────────────────────┘
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Bracket skeleton index
|
|
148
|
+
|
|
149
|
+
A 6-state finite state machine (CODE, LINE_COMMENT, BLOCK_COMMENT, STRING, CHAR_LITERAL, RAW_STRING) scans source code tracking brace pairs while correctly ignoring braces in comments and string literals. Each matched pair records `open_line`, `close_line`, `depth`, and `is_complete`.
|
|
150
|
+
|
|
151
|
+
Top-level blocks are classified by a **symbol analyzer** producing `block_type` (namespace/class/enum/function/macro) and `block_name` (qualified name).
|
|
152
|
+
|
|
153
|
+
### Multi-database registry
|
|
154
|
+
|
|
155
|
+
Databases are registered via environment variables at server startup:
|
|
156
|
+
- `CODE_SOURCE_DB` — primary database (default when `db` is omitted)
|
|
157
|
+
- `CODE_SOURCE_DBS` — colon-separated list of additional databases
|
|
158
|
+
|
|
159
|
+
Aliases are auto-derived from filename stems. Connections are cached with health checks.
|
|
160
|
+
|
|
161
|
+
## Environment Variables
|
|
162
|
+
|
|
163
|
+
| Variable | Required | Description |
|
|
164
|
+
|----------|----------|-------------|
|
|
165
|
+
| `CODE_SOURCE_DB` | Yes | Path to primary SQLite database |
|
|
166
|
+
| `CODE_SOURCE_DBS` | No | Colon-separated paths to additional databases |
|
|
167
|
+
|
|
168
|
+
## Development
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
# Clone and setup
|
|
172
|
+
git clone https://github.com/didi514354875/code-explore-by-sql.git
|
|
173
|
+
cd code-explore-by-sql
|
|
174
|
+
uv sync --dev
|
|
175
|
+
|
|
176
|
+
# Run tests
|
|
177
|
+
uv run pytest
|
|
178
|
+
uv run ruff check .
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## License
|
|
182
|
+
|
|
183
|
+
MIT
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "code-explore-by-sql"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "SQLite FTS5 (trigram) MCP server for code source search."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"mcp[cli]>=1.2.0",
|
|
10
|
+
"pydantic>=2.0",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Topic :: Software Development :: Code Generators",
|
|
23
|
+
"Topic :: Text Processing :: Indexing",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Repository = "https://github.com/didi514354875/code-explore-by-sql"
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
code-source-sql = "code_source_sql:main"
|
|
31
|
+
code-source-sql-build-db = "code_source_sql.build_db:main"
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.wheel]
|
|
34
|
+
packages = [
|
|
35
|
+
"src/code_source_sql",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.sdist]
|
|
39
|
+
exclude = [
|
|
40
|
+
"*.db",
|
|
41
|
+
"*.db-shm",
|
|
42
|
+
"*.db-wal",
|
|
43
|
+
"*.db.bak*",
|
|
44
|
+
"*.log",
|
|
45
|
+
".venv/",
|
|
46
|
+
"uv.lock",
|
|
47
|
+
"tests/",
|
|
48
|
+
"scripts/",
|
|
49
|
+
"ref/",
|
|
50
|
+
"references/",
|
|
51
|
+
".claude/",
|
|
52
|
+
".claude-plugin/",
|
|
53
|
+
".github/",
|
|
54
|
+
".vscode/",
|
|
55
|
+
"*.txt",
|
|
56
|
+
"simplePlan.md",
|
|
57
|
+
"problemArch.md",
|
|
58
|
+
"SKILL copy.md",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
[tool.pytest.ini_options]
|
|
62
|
+
pythonpath = ["src"]
|
|
63
|
+
testpaths = ["tests"]
|
|
64
|
+
|
|
65
|
+
[tool.ruff]
|
|
66
|
+
line-length = 120
|
|
67
|
+
src = ["src", "tests"]
|
|
68
|
+
target-version = "py310"
|
|
69
|
+
|
|
70
|
+
[tool.ruff.lint]
|
|
71
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
72
|
+
|
|
73
|
+
[build-system]
|
|
74
|
+
requires = ["hatchling"]
|
|
75
|
+
build-backend = "hatchling.build"
|
|
76
|
+
|
|
77
|
+
[dependency-groups]
|
|
78
|
+
dev = [
|
|
79
|
+
"pytest>=9.0.3",
|
|
80
|
+
"ruff>=0.15.13",
|
|
81
|
+
]
|