code-explore-by-sql 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. code_explore_by_sql-0.1.0/.gitignore +11 -0
  2. code_explore_by_sql-0.1.0/AGENTS.md +82 -0
  3. code_explore_by_sql-0.1.0/CLAUDE.md +15 -0
  4. code_explore_by_sql-0.1.0/LICENSE +21 -0
  5. code_explore_by_sql-0.1.0/MANIFEST.in +2 -0
  6. code_explore_by_sql-0.1.0/PKG-INFO +205 -0
  7. code_explore_by_sql-0.1.0/README.md +183 -0
  8. code_explore_by_sql-0.1.0/pyproject.toml +81 -0
  9. code_explore_by_sql-0.1.0/src/code_source_sql/__init__.py +9 -0
  10. code_explore_by_sql-0.1.0/src/code_source_sql/__main__.py +5 -0
  11. code_explore_by_sql-0.1.0/src/code_source_sql/bracket_scanner.py +385 -0
  12. code_explore_by_sql-0.1.0/src/code_source_sql/build_db.py +284 -0
  13. code_explore_by_sql-0.1.0/src/code_source_sql/code_block_summary.py +522 -0
  14. code_explore_by_sql-0.1.0/src/code_source_sql/configs.py +402 -0
  15. code_explore_by_sql-0.1.0/src/code_source_sql/db.py +625 -0
  16. code_explore_by_sql-0.1.0/src/code_source_sql/edge_extractor.py +183 -0
  17. code_explore_by_sql-0.1.0/src/code_source_sql/languages/__init__.py +31 -0
  18. code_explore_by_sql-0.1.0/src/code_source_sql/languages/c.py +118 -0
  19. code_explore_by_sql-0.1.0/src/code_source_sql/languages/cpp.py +106 -0
  20. code_explore_by_sql-0.1.0/src/code_source_sql/languages/csharp.py +103 -0
  21. code_explore_by_sql-0.1.0/src/code_source_sql/languages/glsl.py +162 -0
  22. code_explore_by_sql-0.1.0/src/code_source_sql/languages/go.py +91 -0
  23. code_explore_by_sql-0.1.0/src/code_source_sql/languages/hlsl.py +155 -0
  24. code_explore_by_sql-0.1.0/src/code_source_sql/languages/java.py +98 -0
  25. code_explore_by_sql-0.1.0/src/code_source_sql/languages/javascript.py +215 -0
  26. code_explore_by_sql-0.1.0/src/code_source_sql/languages/kotlin.py +108 -0
  27. code_explore_by_sql-0.1.0/src/code_source_sql/languages/python.py +105 -0
  28. code_explore_by_sql-0.1.0/src/code_source_sql/languages/rust.py +91 -0
  29. code_explore_by_sql-0.1.0/src/code_source_sql/languages/swift.py +116 -0
  30. code_explore_by_sql-0.1.0/src/code_source_sql/server.py +264 -0
  31. code_explore_by_sql-0.1.0/src/code_source_sql/symbol_analyzer.py +487 -0
  32. code_explore_by_sql-0.1.0/src/code_source_sql/unreal_rules.py +163 -0
@@ -0,0 +1,11 @@
1
+ .venv/
2
+ __pycache__/
3
+ .pytest_cache/
4
+ .ruff_cache/
5
+ *.pyc
6
+ *.db
7
+ *.db-shm
8
+ *.db-wal
9
+ .env
10
+ .DS_Store
11
+ .mcp.json
@@ -0,0 +1,82 @@
1
+ # AGENTS.md
2
+
3
+ This repository provides a local MCP server for source code retrieval using **SQLite FTS5** (trigram tokenizer) with a **bracket skeleton structural index**.
4
+
5
+ ## Architecture
6
+
7
+ One file = one row in `source_files`. FTS5 `snippet()` extracts relevant code fragments via a **two-step deferred query** (rank first, snippet only for top-N, truncated to 300 chars), producing compact ~2,600 token responses for 20 results (95% token reduction vs full snippets).
8
+
9
+ **Bracket skeleton index**: A 6-state FSM scans C/C++ source tracking brace pairs while ignoring braces in comments, strings, and raw string literals. Each brace pair records depth, open/close line, block type, block name, and `parent_id` (hierarchical parent-child relationships). This provides structural context without AST parsing — robust against macros and incomplete syntax.
10
+
11
+ **Symbol references**: Pre-computed identifier references tracked in `symbol_references` table. Supports fast lookup of which files reference a given symbol, with enclosing block context.
12
+
13
+ **Include dependency graph**: O(1) basename hash lookup resolves 96.5% of include paths. Supports upstream/downstream traversal with configurable recursion depth.
14
+
15
+ **History-as-ranking-signal**: Past search feedback adjusts result ranking but never filters out results. This prevents confirmation bias while still accelerating relevant results.
16
+
17
+ ### Token cost quick reference
18
+
19
+ | Operation | ~Tokens | Note |
20
+ |-----------|---------|------|
21
+ | `search_code_source` (20 results) | ~2,600 | Compact 300-char snippets |
22
+ | `get_file_content(anchor=...)` | ~125 | **Always prefer** over full read |
23
+ | `get_file_content` (full file) | ~45,000 | Avoid — use anchor or line range |
24
+ | `find_references` (pre-computed) | 50–500 | **Try first** for symbol lookup |
25
+ | `find_callers` (specific symbol) | 127–3,000 | Use `scope` for common symbols |
26
+ | `find_include_graph` | 50–2,100 | Cheap — use freely |
27
+
28
+ ## Tools (7)
29
+
30
+ 1. **`search_code_source`** — FTS5 search with history ranking, scope filtering, compact snippets.
31
+ - Simple: `query="GetGBuffer"`
32
+ - Advanced: `raw_query='"GetGBuffer" AND "Emissive"'`
33
+ - `scope_filter` is a **dict**: `{"block_type": "function"}` (no JSON string needed)
34
+ - `module="Renderer"` — filter by module name
35
+
36
+ 2. **`get_file_content`** — Read file content. Prefer **anchor mode** for efficiency.
37
+ - Anchor: `anchor="Render", context_chars=500` (~125 tokens)
38
+ - Line range: `start_line=100, end_line=200`
39
+ - Auto-records feedback from search results
40
+
41
+ 3. **`log_code_query`** — Record explicit feedback (optional, only to correct automatic feedback)
42
+
43
+ 4. **`find_include_graph`** — Include dependency graph (upstream/downstream, recursive, depth control)
44
+
45
+ 5. **`find_callers`** — Dynamic caller lookup via FTS5 + full file scan + bracket matching.
46
+ - Returns exact `caller_line` per call site
47
+ - **Always use `scope`** for common symbols like "Render" (500+ callers otherwise)
48
+
49
+ 6. **`find_references`** — Pre-computed symbol references from `symbol_references` table.
50
+ - Fast lookup: `find_references("BeginPlay", limit=100)`
51
+ - Returns file paths and enclosing block info
52
+ - Falls back to empty list if table is not yet populated
53
+
54
+ 7. **`get_directory_structure`** — Discover valid module names and directory layout.
55
+ - Returns `modules` (top 30 by file count), `top_dirs`, `total_files`
56
+ - Use `module_name` as `module` param in `search_code_source` or `scope` in `find_callers`
57
+
58
+ ## Recommended flow
59
+
60
+ 1. `search_code_source` → compact snippets (~2,600 tok)
61
+ 2. `get_file_content(anchor=...)` → deep context (~125 tok each)
62
+ 3. `find_callers` / `find_include_graph` → structural exploration
63
+ 4. `log_code_query` → only to correct feedback
64
+
65
+ ## FTS5 Query Syntax (for raw_query)
66
+
67
+ | Operator | Example |
68
+ |----------|---------|
69
+ | AND | `'"A" AND "B"'` |
70
+ | OR | `'"A" OR "B"'` |
71
+ | NOT | `'"A" NOT "B"'` |
72
+ | Grouping | `'("A" OR "B") AND "C"'` |
73
+ | Column filter | `'file_path : "BasePass"'` |
74
+
75
+ All terms must be 3+ characters. NEAR and prefix (`*`) do NOT work with trigram tokenizer.
76
+
77
+ ## Guidance
78
+
79
+ - Use the `code-source-lookup` skill for detailed tool documentation and search strategy
80
+ - Avoid full file reads — anchor mode is 358x cheaper in tokens
81
+ - History feedback is automatic — no need to manually log unless correcting
82
+ - If the database has not been built yet, guide the user toward indexing first
@@ -0,0 +1,15 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Working Rules
6
+
7
+ ### Plan Writing
8
+
9
+ - Plans must be **complete and directly executable**. Every step must include concrete code, file paths, and parameter values. No TODO, TBD, or vague placeholders allowed.
10
+ - Break tasks into **minimum independently executable steps**. Each step does exactly one thing, with clear structure and explicit ordering.
11
+
12
+ ### Code Refactoring
13
+
14
+ - Refactored code must preserve **identical observable behavior** to the original. No changes to external API, output, or side effects.
15
+ - Before and after refactoring, run the same tests or verification steps to confirm behavioral equivalence.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 yanwei
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ include LICENSE
2
+ include README.md
@@ -0,0 +1,205 @@
1
+ Metadata-Version: 2.4
2
+ Name: code-explore-by-sql
3
+ Version: 0.1.0
4
+ Summary: SQLite FTS5 (trigram) MCP server for code source search.
5
+ Project-URL: Repository, https://github.com/didi514354875/code-explore-by-sql
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Software Development :: Code Generators
17
+ Classifier: Topic :: Text Processing :: Indexing
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: mcp[cli]>=1.2.0
20
+ Requires-Dist: pydantic>=2.0
21
+ Description-Content-Type: text/markdown
22
+
23
+ # code-explore-by-sql
24
+
25
+ Local stdio MCP server for fast source code navigation using **SQLite FTS5** (trigram tokenizer) + **bracket skeleton indexing**.
26
+
27
+ ## Features
28
+
29
+ - **Full-text search**: FTS5 with trigram tokenizer for code-symbol-precise search (`GetGBuffer`, `FMaterial`, `UE_LOG`)
30
+ - **Symbol lookup**: read code by qualified name with fuzzy matching (`Jump` → `ACharacter::Jump`)
31
+ - **Bracket skeleton index**: lightweight structural indexing via FSM brace matching (no AST parser needed)
32
+ - **12 language support**: C, C++, C#, Go, HLSL, GLSL, Java, JavaScript, Kotlin, Python, Rust, Swift
33
+ - **Multi-database**: query multiple codebases simultaneously via `CODE_SOURCE_DBS`
34
+ - **Token-efficient responses**: compact snippets (~2,600 tokens/20 results, 95% reduction vs full file reads)
35
+
36
+ ## Installation
37
+
38
+ ### From PyPI (recommended)
39
+
40
+ ```bash
41
+ # Run the MCP server directly (no clone needed)
42
+ uvx code-explore-by-sql
43
+
44
+ # Or install persistently
45
+ pip install code-explore-by-sql
46
+ ```
47
+
48
+ ### Build a database
49
+
50
+ ```bash
51
+ # Build index for your codebase
52
+ uvx code-source-sql-build-db /path/to/source /path/to/output.db
53
+
54
+ # Smoke test with limited files
55
+ uvx code-source-sql-build-db /path/to/source /path/to/output.db --limit 1000
56
+ ```
57
+
58
+ Performance: ~84,700 files indexed in ~3.3 minutes on a 2-core machine.
59
+
60
+ ### Configure in MCP clients
61
+
62
+ **Claude Code** (`.claude/mcp.json`):
63
+ ```json
64
+ {
65
+ "mcpServers": {
66
+ "code-source-sql": {
67
+ "command": "uvx",
68
+ "args": ["code-explore-by-sql"],
69
+ "env": {
70
+ "CODE_SOURCE_DB": "/path/to/your/code.db",
71
+ "CODE_SOURCE_DBS": "/path/to/your/code.db:/path/to/another.db"
72
+ }
73
+ }
74
+ }
75
+ }
76
+ ```
77
+
78
+ **VS Code** (`.vscode/mcp.json`):
79
+ ```json
80
+ {
81
+ "servers": {
82
+ "code-source-sql": {
83
+ "type": "stdio",
84
+ "command": "uvx",
85
+ "args": ["code-explore-by-sql"],
86
+ "env": {
87
+ "CODE_SOURCE_DB": "/path/to/your/code.db"
88
+ }
89
+ }
90
+ }
91
+ }
92
+ ```
93
+
94
+ **OpenAI Codex** (`~/.codex/config.toml`):
95
+ ```toml
96
+ [mcp_servers.code-source-sql]
97
+ command = "uvx"
98
+ args = ["code-explore-by-sql"]
99
+
100
+ [mcp_servers.code-source-sql.env]
101
+ CODE_SOURCE_DB = "/path/to/your/code.db"
102
+ ```
103
+
104
+ **Hermes Agent** (`~/.hermes/config.yaml`):
105
+ ```yaml
106
+ mcp_servers:
107
+ code-source-sql:
108
+ command: uvx
109
+ args:
110
+ - code-explore-by-sql
111
+ env:
112
+ CODE_SOURCE_DB: /path/to/your/code.db
113
+ ```
114
+
115
+ ## Tools (5)
116
+
117
+ | Tool | Purpose |
118
+ |------|---------|
119
+ | `list_databases` | Discover available databases with stats |
120
+ | `search_fts_tool` | FTS5 search — locate code blocks by keyword or raw FTS5 query |
121
+ | `read_symbol` | Read symbol code by qualified name (exact or fuzzy) |
122
+ | `read_file_range` | Read source code by file path and line range |
123
+ | `get_directory_structure` | Module/file counts overview |
124
+
125
+ ### Multi-database
126
+
127
+ Each tool accepts an optional `db` parameter to select a database by alias. Aliases are derived from database filenames (`unreal.db` → `"unreal"`). Use `list_databases` to discover available aliases. Default (`db=""`) uses the primary database (`CODE_SOURCE_DB`).
128
+
129
+ ### Search query modes
130
+
131
+ **Simple mode** (`keyword`):
132
+ ```
133
+ keyword="GetGBuffer"
134
+ keyword="FMaterial Render"
135
+ ```
136
+
137
+ **Advanced mode** (`raw_query`) — full FTS5 boolean:
138
+ ```
139
+ raw_query='"GetGBuffer" AND "Emissive"'
140
+ raw_query='"Material" NOT "hlsl"'
141
+ raw_query='(file_path : "BasePass") AND "roughness"'
142
+ raw_query='(module_name : "Renderer") AND "VirtualTexture"'
143
+ ```
144
+
145
+ ### Three-level funnel
146
+
147
+ 1. **`search_fts_tool(keyword)`** → file candidates + block QNs
148
+ 2. **`search_fts_tool(raw_query, file_path filter)`** → precise block in target file
149
+ 3. **`read_symbol(block QN)`** or **`read_file_range(file, line)`** → full code
150
+
151
+ ## Architecture
152
+
153
+ ```
154
+ ┌──────────────────────────────────────────────────────────────┐
155
+ │ MCP Server (FastMCP) │
156
+ ├──────────┬──────────┬──────────┬──────────┬──────────────────┤
157
+ │ search │ read │ read │ get_dir │ list │
158
+ │ fts_tool │ _symbol │ _file │ _struct │ _databases │
159
+ │ │ │ _range │ │ │
160
+ ├──────────┴──────────┴──────────┴──────────┴──────────────────┤
161
+ │ Query Pipeline │
162
+ │ FTS5 trigram → Symbol match → Edge extraction │
163
+ ├──────────────────────────────────────────────────────────────┤
164
+ │ SQLite Database │
165
+ │ file_content + FTS5 │ symbol_index │ strict_edges │
166
+ └──────────────────────────────────────────────────────────────┘
167
+ ```
168
+
169
+ ### Bracket skeleton index
170
+
171
+ A 6-state finite state machine (CODE, LINE_COMMENT, BLOCK_COMMENT, STRING, CHAR_LITERAL, RAW_STRING) scans source code tracking brace pairs while correctly ignoring braces in comments and string literals. Each matched pair records `open_line`, `close_line`, `depth`, and `is_complete`.
172
+
173
+ Top-level blocks are classified by a **symbol analyzer** producing `block_type` (namespace/class/enum/function/macro) and `block_name` (qualified name).
174
+
175
+ ### Multi-database registry
176
+
177
+ Databases are registered via environment variables at server startup:
178
+ - `CODE_SOURCE_DB` — primary database (default when `db` is omitted)
179
+ - `CODE_SOURCE_DBS` — colon-separated list of additional databases
180
+
181
+ Aliases are auto-derived from filename stems. Connections are cached with health checks.
182
+
183
+ ## Environment Variables
184
+
185
+ | Variable | Required | Description |
186
+ |----------|----------|-------------|
187
+ | `CODE_SOURCE_DB` | Yes | Path to primary SQLite database |
188
+ | `CODE_SOURCE_DBS` | No | Colon-separated paths to additional databases |
189
+
190
+ ## Development
191
+
192
+ ```bash
193
+ # Clone and setup
194
+ git clone https://github.com/didi514354875/code-explore-by-sql.git
195
+ cd code-explore-by-sql
196
+ uv sync --dev
197
+
198
+ # Run tests
199
+ uv run pytest
200
+ uv run ruff check .
201
+ ```
202
+
203
+ ## License
204
+
205
+ MIT
@@ -0,0 +1,183 @@
1
+ # code-explore-by-sql
2
+
3
+ Local stdio MCP server for fast source code navigation using **SQLite FTS5** (trigram tokenizer) + **bracket skeleton indexing**.
4
+
5
+ ## Features
6
+
7
+ - **Full-text search**: FTS5 with trigram tokenizer for code-symbol-precise search (`GetGBuffer`, `FMaterial`, `UE_LOG`)
8
+ - **Symbol lookup**: read code by qualified name with fuzzy matching (`Jump` → `ACharacter::Jump`)
9
+ - **Bracket skeleton index**: lightweight structural indexing via FSM brace matching (no AST parser needed)
10
+ - **12 language support**: C, C++, C#, Go, HLSL, GLSL, Java, JavaScript, Kotlin, Python, Rust, Swift
11
+ - **Multi-database**: query multiple codebases simultaneously via `CODE_SOURCE_DBS`
12
+ - **Token-efficient responses**: compact snippets (~2,600 tokens/20 results, 95% reduction vs full file reads)
13
+
14
+ ## Installation
15
+
16
+ ### From PyPI (recommended)
17
+
18
+ ```bash
19
+ # Run the MCP server directly (no clone needed)
20
+ uvx code-explore-by-sql
21
+
22
+ # Or install persistently
23
+ pip install code-explore-by-sql
24
+ ```
25
+
26
+ ### Build a database
27
+
28
+ ```bash
29
+ # Build index for your codebase
30
+ uvx code-source-sql-build-db /path/to/source /path/to/output.db
31
+
32
+ # Smoke test with limited files
33
+ uvx code-source-sql-build-db /path/to/source /path/to/output.db --limit 1000
34
+ ```
35
+
36
+ Performance: ~84,700 files indexed in ~3.3 minutes on a 2-core machine.
37
+
38
+ ### Configure in MCP clients
39
+
40
+ **Claude Code** (`.claude/mcp.json`):
41
+ ```json
42
+ {
43
+ "mcpServers": {
44
+ "code-source-sql": {
45
+ "command": "uvx",
46
+ "args": ["code-explore-by-sql"],
47
+ "env": {
48
+ "CODE_SOURCE_DB": "/path/to/your/code.db",
49
+ "CODE_SOURCE_DBS": "/path/to/your/code.db:/path/to/another.db"
50
+ }
51
+ }
52
+ }
53
+ }
54
+ ```
55
+
56
+ **VS Code** (`.vscode/mcp.json`):
57
+ ```json
58
+ {
59
+ "servers": {
60
+ "code-source-sql": {
61
+ "type": "stdio",
62
+ "command": "uvx",
63
+ "args": ["code-explore-by-sql"],
64
+ "env": {
65
+ "CODE_SOURCE_DB": "/path/to/your/code.db"
66
+ }
67
+ }
68
+ }
69
+ }
70
+ ```
71
+
72
+ **OpenAI Codex** (`~/.codex/config.toml`):
73
+ ```toml
74
+ [mcp_servers.code-source-sql]
75
+ command = "uvx"
76
+ args = ["code-explore-by-sql"]
77
+
78
+ [mcp_servers.code-source-sql.env]
79
+ CODE_SOURCE_DB = "/path/to/your/code.db"
80
+ ```
81
+
82
+ **Hermes Agent** (`~/.hermes/config.yaml`):
83
+ ```yaml
84
+ mcp_servers:
85
+ code-source-sql:
86
+ command: uvx
87
+ args:
88
+ - code-explore-by-sql
89
+ env:
90
+ CODE_SOURCE_DB: /path/to/your/code.db
91
+ ```
92
+
93
+ ## Tools (5)
94
+
95
+ | Tool | Purpose |
96
+ |------|---------|
97
+ | `list_databases` | Discover available databases with stats |
98
+ | `search_fts_tool` | FTS5 search — locate code blocks by keyword or raw FTS5 query |
99
+ | `read_symbol` | Read symbol code by qualified name (exact or fuzzy) |
100
+ | `read_file_range` | Read source code by file path and line range |
101
+ | `get_directory_structure` | Module/file counts overview |
102
+
103
+ ### Multi-database
104
+
105
+ Each tool accepts an optional `db` parameter to select a database by alias. Aliases are derived from database filenames (`unreal.db` → `"unreal"`). Use `list_databases` to discover available aliases. Default (`db=""`) uses the primary database (`CODE_SOURCE_DB`).
106
+
107
+ ### Search query modes
108
+
109
+ **Simple mode** (`keyword`):
110
+ ```
111
+ keyword="GetGBuffer"
112
+ keyword="FMaterial Render"
113
+ ```
114
+
115
+ **Advanced mode** (`raw_query`) — full FTS5 boolean:
116
+ ```
117
+ raw_query='"GetGBuffer" AND "Emissive"'
118
+ raw_query='"Material" NOT "hlsl"'
119
+ raw_query='(file_path : "BasePass") AND "roughness"'
120
+ raw_query='(module_name : "Renderer") AND "VirtualTexture"'
121
+ ```
122
+
123
+ ### Three-level funnel
124
+
125
+ 1. **`search_fts_tool(keyword)`** → file candidates + block QNs
126
+ 2. **`search_fts_tool(raw_query, file_path filter)`** → precise block in target file
127
+ 3. **`read_symbol(block QN)`** or **`read_file_range(file, line)`** → full code
128
+
129
+ ## Architecture
130
+
131
+ ```
132
+ ┌──────────────────────────────────────────────────────────────┐
133
+ │ MCP Server (FastMCP) │
134
+ ├──────────┬──────────┬──────────┬──────────┬──────────────────┤
135
+ │ search │ read │ read │ get_dir │ list │
136
+ │ fts_tool │ _symbol │ _file │ _struct │ _databases │
137
+ │ │ │ _range │ │ │
138
+ ├──────────┴──────────┴──────────┴──────────┴──────────────────┤
139
+ │ Query Pipeline │
140
+ │ FTS5 trigram → Symbol match → Edge extraction │
141
+ ├──────────────────────────────────────────────────────────────┤
142
+ │ SQLite Database │
143
+ │ file_content + FTS5 │ symbol_index │ strict_edges │
144
+ └──────────────────────────────────────────────────────────────┘
145
+ ```
146
+
147
+ ### Bracket skeleton index
148
+
149
+ A 6-state finite state machine (CODE, LINE_COMMENT, BLOCK_COMMENT, STRING, CHAR_LITERAL, RAW_STRING) scans source code tracking brace pairs while correctly ignoring braces in comments and string literals. Each matched pair records `open_line`, `close_line`, `depth`, and `is_complete`.
150
+
151
+ Top-level blocks are classified by a **symbol analyzer** producing `block_type` (namespace/class/enum/function/macro) and `block_name` (qualified name).
152
+
153
+ ### Multi-database registry
154
+
155
+ Databases are registered via environment variables at server startup:
156
+ - `CODE_SOURCE_DB` — primary database (default when `db` is omitted)
157
+ - `CODE_SOURCE_DBS` — colon-separated list of additional databases
158
+
159
+ Aliases are auto-derived from filename stems. Connections are cached with health checks.
160
+
161
+ ## Environment Variables
162
+
163
+ | Variable | Required | Description |
164
+ |----------|----------|-------------|
165
+ | `CODE_SOURCE_DB` | Yes | Path to primary SQLite database |
166
+ | `CODE_SOURCE_DBS` | No | Colon-separated paths to additional databases |
167
+
168
+ ## Development
169
+
170
+ ```bash
171
+ # Clone and setup
172
+ git clone https://github.com/didi514354875/code-explore-by-sql.git
173
+ cd code-explore-by-sql
174
+ uv sync --dev
175
+
176
+ # Run tests
177
+ uv run pytest
178
+ uv run ruff check .
179
+ ```
180
+
181
+ ## License
182
+
183
+ MIT
@@ -0,0 +1,81 @@
1
+ [project]
2
+ name = "code-explore-by-sql"
3
+ version = "0.1.0"
4
+ description = "SQLite FTS5 (trigram) MCP server for code source search."
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ dependencies = [
9
+ "mcp[cli]>=1.2.0",
10
+ "pydantic>=2.0",
11
+ ]
12
+
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Software Development :: Code Generators",
23
+ "Topic :: Text Processing :: Indexing",
24
+ ]
25
+
26
+ [project.urls]
27
+ Repository = "https://github.com/didi514354875/code-explore-by-sql"
28
+
29
+ [project.scripts]
30
+ code-source-sql = "code_source_sql:main"
31
+ code-source-sql-build-db = "code_source_sql.build_db:main"
32
+
33
+ [tool.hatch.build.targets.wheel]
34
+ packages = [
35
+ "src/code_source_sql",
36
+ ]
37
+
38
+ [tool.hatch.build.targets.sdist]
39
+ exclude = [
40
+ "*.db",
41
+ "*.db-shm",
42
+ "*.db-wal",
43
+ "*.db.bak*",
44
+ "*.log",
45
+ ".venv/",
46
+ "uv.lock",
47
+ "tests/",
48
+ "scripts/",
49
+ "ref/",
50
+ "references/",
51
+ ".claude/",
52
+ ".claude-plugin/",
53
+ ".github/",
54
+ ".vscode/",
55
+ "*.txt",
56
+ "simplePlan.md",
57
+ "problemArch.md",
58
+ "SKILL copy.md",
59
+ ]
60
+
61
+ [tool.pytest.ini_options]
62
+ pythonpath = ["src"]
63
+ testpaths = ["tests"]
64
+
65
+ [tool.ruff]
66
+ line-length = 120
67
+ src = ["src", "tests"]
68
+ target-version = "py310"
69
+
70
+ [tool.ruff.lint]
71
+ select = ["E", "F", "I", "UP", "B"]
72
+
73
+ [build-system]
74
+ requires = ["hatchling"]
75
+ build-backend = "hatchling.build"
76
+
77
+ [dependency-groups]
78
+ dev = [
79
+ "pytest>=9.0.3",
80
+ "ruff>=0.15.13",
81
+ ]
@@ -0,0 +1,9 @@
1
+ """UE Semantic Search — plan.md implementation.
2
+
3
+ Three-table architecture:
4
+ file_content (FTS5) -> symbol_index (QN + UE meta) -> strict_edges (4 types)
5
+ """
6
+
7
+ from .server import main
8
+
9
+ __all__ = ["main"]
@@ -0,0 +1,5 @@
1
+ """Allow running as: python -m code_source_sql"""
2
+ from .server import main
3
+
4
+ if __name__ == "__main__":
5
+ main()