mcp-codebase-index 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ pip install -e ".[dev]"
27
+
28
+ - name: Lint with ruff
29
+ run: ruff check src/ tests/
30
+
31
+ - name: Run tests
32
+ run: pytest tests/ -v
@@ -0,0 +1,36 @@
1
+ name: Publish release assets
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+
21
+ - name: Install build tools
22
+ run: pip install build twine
23
+
24
+ - name: Build package
25
+ run: python -m build
26
+
27
+ - name: Upload assets to release
28
+ env:
29
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30
+ run: gh release upload "${{ github.event.release.tag_name }}" dist/*
31
+
32
+ - name: Publish to PyPI
33
+ env:
34
+ TWINE_USERNAME: __token__
35
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
36
+ run: twine upload dist/*
@@ -0,0 +1,57 @@
1
+ # ── Secrets & credentials ──────────────────────
2
+ .env
3
+ .env.*
4
+ !.env.example
5
+ *.pem
6
+ *.key
7
+ *.crt
8
+ *.p12
9
+ *.pfx
10
+ *.jks
11
+ credentials.json
12
+ service-account*.json
13
+ *secret*
14
+ !*secret*.py
15
+
16
+ # ── Python ─────────────────────────────────────
17
+ __pycache__/
18
+ *.py[cod]
19
+ *$py.class
20
+ *.egg-info/
21
+ dist/
22
+ build/
23
+ .eggs/
24
+ *.egg
25
+ *.so
26
+ *.whl
27
+
28
+ # ── Virtual environments ───────────────────────
29
+ .venv/
30
+ venv/
31
+ env/
32
+ ENV/
33
+
34
+ # ── IDE & editor ───────────────────────────────
35
+ .idea/
36
+ .vscode/
37
+ *.swp
38
+ *.swo
39
+ *~
40
+ .DS_Store
41
+ Thumbs.db
42
+
43
+ # ── Testing & linting ─────────────────────────
44
+ .mypy_cache/
45
+ .pytest_cache/
46
+ .ruff_cache/
47
+ .coverage
48
+ htmlcov/
49
+ .tox/
50
+
51
+ # ── Claude Code local state ───────────────────
52
+ .claude/
53
+
54
+ # ── Misc ───────────────────────────────────────
55
+ *.log
56
+ *.bak
57
+ *.tmp
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Michael Doyle
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,176 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-codebase-index
3
+ Version: 0.1.1
4
+ Summary: Structural codebase indexer with MCP server for AI-assisted development
5
+ Project-URL: Homepage, https://github.com/MikeRecognex/mcp-codebase-index
6
+ Project-URL: Repository, https://github.com/MikeRecognex/mcp-codebase-index
7
+ Author: Michael Doyle
8
+ License: MIT License
9
+
10
+ Copyright (c) 2026 Michael Doyle
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ License-File: LICENSE
30
+ Keywords: code-navigation,codebase,indexer,mcp,structural-analysis
31
+ Classifier: Development Status :: 3 - Alpha
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Topic :: Software Development :: Libraries
38
+ Requires-Python: >=3.11
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=8.0; extra == 'dev'
41
+ Requires-Dist: ruff>=0.5; extra == 'dev'
42
+ Provides-Extra: mcp
43
+ Requires-Dist: mcp>=1.0; extra == 'mcp'
44
+ Description-Content-Type: text/markdown
45
+
46
+ # mcp-codebase-index
47
+
48
+ A structural codebase indexer with an [MCP](https://modelcontextprotocol.io) server for AI-assisted development. Zero runtime dependencies — uses Python's `ast` module for Python analysis and regex for TypeScript/JS. Requires Python 3.11+.
49
+
50
+ ## What It Does
51
+
52
+ Indexes codebases by parsing source files into structural metadata -- functions, classes, imports, dependency graphs, and cross-file call chains -- then exposes 17 query tools via the Model Context Protocol, enabling Claude Code and other MCP clients to navigate codebases efficiently without reading entire files.
53
+
54
+ ## Language Support
55
+
56
+ | Language | Method | Extracts |
57
+ |----------|--------|----------|
58
+ | Python (`.py`) | AST parsing | Functions, classes, methods, imports, dependency graph |
59
+ | TypeScript/JS (`.ts`, `.tsx`, `.js`, `.jsx`) | Regex-based | Functions, arrow functions, classes, interfaces, type aliases, imports |
60
+ | Markdown/Text (`.md`, `.txt`, `.rst`) | Heading detection | Sections (# headings, underlines, numbered, ALL-CAPS) |
61
+ | Other | Generic | Line counts only |
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install "mcp-codebase-index[mcp]"
67
+ ```
68
+
69
+ The `[mcp]` extra includes the MCP server dependency. Omit it if you only need the programmatic API.
70
+
71
+ For development (from a local clone):
72
+
73
+ ```bash
74
+ pip install -e ".[dev,mcp]"
75
+ ```
76
+
77
+ ## MCP Server
78
+
79
+ ### Running
80
+
81
+ ```bash
82
+ # As a console script
83
+ PROJECT_ROOT=/path/to/project mcp-codebase-index
84
+
85
+ # As a Python module
86
+ PROJECT_ROOT=/path/to/project python -m mcp_codebase_index.server
87
+ ```
88
+
89
+ `PROJECT_ROOT` specifies which directory to index. Defaults to the current working directory.
90
+
91
+ ### Configuring with Claude Code
92
+
93
+ Add to your project's `.mcp.json`:
94
+
95
+ ```json
96
+ {
97
+ "mcpServers": {
98
+ "codebase-index": {
99
+ "command": "mcp-codebase-index",
100
+ "env": {
101
+ "PROJECT_ROOT": "/path/to/project"
102
+ }
103
+ }
104
+ }
105
+ }
106
+ ```
107
+
108
+ Or using the Python module directly (useful if installed in a virtualenv):
109
+
110
+ ```json
111
+ {
112
+ "mcpServers": {
113
+ "codebase-index": {
114
+ "command": "/path/to/.venv/bin/python3",
115
+ "args": ["-m", "mcp_codebase_index.server"],
116
+ "env": {
117
+ "PROJECT_ROOT": "/path/to/project"
118
+ }
119
+ }
120
+ }
121
+ }
122
+ ```
123
+
124
+ ### Available Tools (17)
125
+
126
+ | Tool | Description |
127
+ |------|-------------|
128
+ | `get_project_summary` | File count, packages, top classes/functions |
129
+ | `list_files` | List indexed files with optional glob filter |
130
+ | `get_structure_summary` | Structure of a file or the whole project |
131
+ | `get_functions` | List functions with name, lines, params |
132
+ | `get_classes` | List classes with name, lines, methods, bases |
133
+ | `get_imports` | List imports with module, names, line |
134
+ | `get_function_source` | Full source of a function/method |
135
+ | `get_class_source` | Full source of a class |
136
+ | `find_symbol` | Find where a symbol is defined (file, line, type) |
137
+ | `get_dependencies` | What a symbol calls/uses |
138
+ | `get_dependents` | What calls/uses a symbol |
139
+ | `get_change_impact` | Direct + transitive dependents |
140
+ | `get_call_chain` | Shortest dependency path (BFS) |
141
+ | `get_file_dependencies` | Files imported by a given file |
142
+ | `get_file_dependents` | Files that import from a given file |
143
+ | `search_codebase` | Regex search across all files (max 100 results) |
144
+ | `reindex` | Re-index the project after file changes (MCP server only) |
145
+
146
+ ## Programmatic Usage
147
+
148
+ ```python
149
+ from mcp_codebase_index.project_indexer import ProjectIndexer
150
+ from mcp_codebase_index.query_api import create_project_query_functions
151
+
152
+ indexer = ProjectIndexer("/path/to/project", include_patterns=["**/*.py"])
153
+ index = indexer.index()
154
+ query_funcs = create_project_query_functions(index)
155
+
156
+ # Use query functions
157
+ print(query_funcs["get_project_summary"]())
158
+ print(query_funcs["find_symbol"]("MyClass"))
159
+ print(query_funcs["get_change_impact"]("some_function"))
160
+ ```
161
+
162
+ ## Development
163
+
164
+ ```bash
165
+ pip install -e ".[dev,mcp]"
166
+ pytest tests/ -v
167
+ ruff check src/ tests/
168
+ ```
169
+
170
+ ## References
171
+
172
+ The structural indexer was originally developed as part of the [RMLPlus](https://github.com/MikeRecognex/RMLPlus) project, an implementation of the [Recursive Language Models](https://arxiv.org/abs/2512.24601) framework.
173
+
174
+ ## License
175
+
176
+ MIT
@@ -0,0 +1,131 @@
1
+ # mcp-codebase-index
2
+
3
+ A structural codebase indexer with an [MCP](https://modelcontextprotocol.io) server for AI-assisted development. Zero runtime dependencies — uses Python's `ast` module for Python analysis and regex for TypeScript/JS. Requires Python 3.11+.
4
+
5
+ ## What It Does
6
+
7
+ Indexes codebases by parsing source files into structural metadata -- functions, classes, imports, dependency graphs, and cross-file call chains -- then exposes 17 query tools via the Model Context Protocol, enabling Claude Code and other MCP clients to navigate codebases efficiently without reading entire files.
8
+
9
+ ## Language Support
10
+
11
+ | Language | Method | Extracts |
12
+ |----------|--------|----------|
13
+ | Python (`.py`) | AST parsing | Functions, classes, methods, imports, dependency graph |
14
+ | TypeScript/JS (`.ts`, `.tsx`, `.js`, `.jsx`) | Regex-based | Functions, arrow functions, classes, interfaces, type aliases, imports |
15
+ | Markdown/Text (`.md`, `.txt`, `.rst`) | Heading detection | Sections (# headings, underlines, numbered, ALL-CAPS) |
16
+ | Other | Generic | Line counts only |
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install "mcp-codebase-index[mcp]"
22
+ ```
23
+
24
+ The `[mcp]` extra includes the MCP server dependency. Omit it if you only need the programmatic API.
25
+
26
+ For development (from a local clone):
27
+
28
+ ```bash
29
+ pip install -e ".[dev,mcp]"
30
+ ```
31
+
32
+ ## MCP Server
33
+
34
+ ### Running
35
+
36
+ ```bash
37
+ # As a console script
38
+ PROJECT_ROOT=/path/to/project mcp-codebase-index
39
+
40
+ # As a Python module
41
+ PROJECT_ROOT=/path/to/project python -m mcp_codebase_index.server
42
+ ```
43
+
44
+ `PROJECT_ROOT` specifies which directory to index. Defaults to the current working directory.
45
+
46
+ ### Configuring with Claude Code
47
+
48
+ Add to your project's `.mcp.json`:
49
+
50
+ ```json
51
+ {
52
+ "mcpServers": {
53
+ "codebase-index": {
54
+ "command": "mcp-codebase-index",
55
+ "env": {
56
+ "PROJECT_ROOT": "/path/to/project"
57
+ }
58
+ }
59
+ }
60
+ }
61
+ ```
62
+
63
+ Or using the Python module directly (useful if installed in a virtualenv):
64
+
65
+ ```json
66
+ {
67
+ "mcpServers": {
68
+ "codebase-index": {
69
+ "command": "/path/to/.venv/bin/python3",
70
+ "args": ["-m", "mcp_codebase_index.server"],
71
+ "env": {
72
+ "PROJECT_ROOT": "/path/to/project"
73
+ }
74
+ }
75
+ }
76
+ }
77
+ ```
78
+
79
+ ### Available Tools (17)
80
+
81
+ | Tool | Description |
82
+ |------|-------------|
83
+ | `get_project_summary` | File count, packages, top classes/functions |
84
+ | `list_files` | List indexed files with optional glob filter |
85
+ | `get_structure_summary` | Structure of a file or the whole project |
86
+ | `get_functions` | List functions with name, lines, params |
87
+ | `get_classes` | List classes with name, lines, methods, bases |
88
+ | `get_imports` | List imports with module, names, line |
89
+ | `get_function_source` | Full source of a function/method |
90
+ | `get_class_source` | Full source of a class |
91
+ | `find_symbol` | Find where a symbol is defined (file, line, type) |
92
+ | `get_dependencies` | What a symbol calls/uses |
93
+ | `get_dependents` | What calls/uses a symbol |
94
+ | `get_change_impact` | Direct + transitive dependents |
95
+ | `get_call_chain` | Shortest dependency path (BFS) |
96
+ | `get_file_dependencies` | Files imported by a given file |
97
+ | `get_file_dependents` | Files that import from a given file |
98
+ | `search_codebase` | Regex search across all files (max 100 results) |
99
+ | `reindex` | Re-index the project after file changes (MCP server only) |
100
+
101
+ ## Programmatic Usage
102
+
103
+ ```python
104
+ from mcp_codebase_index.project_indexer import ProjectIndexer
105
+ from mcp_codebase_index.query_api import create_project_query_functions
106
+
107
+ indexer = ProjectIndexer("/path/to/project", include_patterns=["**/*.py"])
108
+ index = indexer.index()
109
+ query_funcs = create_project_query_functions(index)
110
+
111
+ # Use query functions
112
+ print(query_funcs["get_project_summary"]())
113
+ print(query_funcs["find_symbol"]("MyClass"))
114
+ print(query_funcs["get_change_impact"]("some_function"))
115
+ ```
116
+
117
+ ## Development
118
+
119
+ ```bash
120
+ pip install -e ".[dev,mcp]"
121
+ pytest tests/ -v
122
+ ruff check src/ tests/
123
+ ```
124
+
125
+ ## References
126
+
127
+ The structural indexer was originally developed as part of the [RMLPlus](https://github.com/MikeRecognex/RMLPlus) project, an implementation of the [Recursive Language Models](https://arxiv.org/abs/2512.24601) framework.
128
+
129
+ ## License
130
+
131
+ MIT
@@ -0,0 +1,51 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "mcp-codebase-index"
7
+ version = "0.1.1"
8
+ description = "Structural codebase indexer with MCP server for AI-assisted development"
9
+ requires-python = ">=3.11"
10
+ readme = "README.md"
11
+ license = {file = "LICENSE"}
12
+ authors = [
13
+ {name = "Michael Doyle"},
14
+ ]
15
+ keywords = ["mcp", "codebase", "indexer", "code-navigation", "structural-analysis"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Software Development :: Libraries",
24
+ ]
25
+ dependencies = []
26
+
27
+ [project.optional-dependencies]
28
+ mcp = ["mcp>=1.0"]
29
+ dev = ["pytest>=8.0", "ruff>=0.5"]
30
+
31
+ [project.scripts]
32
+ mcp-codebase-index = "mcp_codebase_index.server:main_sync"
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/MikeRecognex/mcp-codebase-index"
36
+ Repository = "https://github.com/MikeRecognex/mcp-codebase-index"
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/mcp_codebase_index"]
40
+
41
+ [tool.pytest.ini_options]
42
+ testpaths = ["tests"]
43
+ timeout = 30
44
+
45
+ [tool.ruff]
46
+ target-version = "py311"
47
+ line-length = 100
48
+
49
+ [tool.mypy]
50
+ python_version = "3.11"
51
+ strict = true
@@ -0,0 +1,3 @@
1
+ """Structural codebase indexer with MCP server for AI-assisted development."""
2
+
3
+ __version__ = "0.1.1"
@@ -0,0 +1,51 @@
1
+ """Dispatch layer that selects the appropriate annotator by file type."""
2
+
3
+ from mcp_codebase_index.generic_annotator import annotate_generic
4
+ from mcp_codebase_index.models import StructuralMetadata
5
+ from mcp_codebase_index.python_annotator import annotate_python
6
+ from mcp_codebase_index.text_annotator import annotate_text
7
+ from mcp_codebase_index.typescript_annotator import annotate_typescript
8
+
9
+ _EXTENSION_MAP: dict[str, str] = {
10
+ ".py": "python",
11
+ ".pyw": "python",
12
+ ".md": "text",
13
+ ".txt": "text",
14
+ ".rst": "text",
15
+ ".ts": "typescript",
16
+ ".tsx": "typescript",
17
+ ".js": "javascript",
18
+ ".jsx": "javascript",
19
+ }
20
+
21
+
22
+ def annotate(
23
+ text: str,
24
+ source_name: str = "<source>",
25
+ file_type: str | None = None,
26
+ ) -> StructuralMetadata:
27
+ """Annotate text with structural metadata.
28
+
29
+ Dispatch rules:
30
+ - file_type overrides extension-based detection
31
+ - .py -> python annotator
32
+ - .md, .txt, .rst -> text annotator
33
+ - .ts, .tsx -> typescript annotator
34
+ - .js, .jsx -> typescript annotator (close enough for regex-based parsing)
35
+ - Otherwise -> generic annotator (line-only)
36
+ """
37
+ if file_type is None:
38
+ # Detect from source_name extension
39
+ dot_idx = source_name.rfind(".")
40
+ if dot_idx >= 0:
41
+ ext = source_name[dot_idx:].lower()
42
+ file_type = _EXTENSION_MAP.get(ext)
43
+
44
+ if file_type == "python":
45
+ return annotate_python(text, source_name)
46
+ elif file_type == "text":
47
+ return annotate_text(text, source_name)
48
+ elif file_type in ("typescript", "javascript"):
49
+ return annotate_typescript(text, source_name)
50
+ else:
51
+ return annotate_generic(text, source_name)
@@ -0,0 +1,21 @@
1
+ """Generic fallback annotator providing line-only metadata."""
2
+
3
+ from mcp_codebase_index.models import StructuralMetadata
4
+
5
+
6
+ def annotate_generic(text: str, source_name: str = "<source>") -> StructuralMetadata:
7
+ """Create minimal structural metadata with just line information."""
8
+ lines = text.splitlines()
9
+ offsets: list[int] = []
10
+ offset = 0
11
+ for line in lines:
12
+ offsets.append(offset)
13
+ offset += len(line) + 1 # +1 for newline
14
+
15
+ return StructuralMetadata(
16
+ source_name=source_name,
17
+ total_lines=len(lines),
18
+ total_chars=len(text),
19
+ lines=lines,
20
+ line_char_offsets=offsets,
21
+ )
@@ -0,0 +1,110 @@
1
+ """Structural metadata models for codebase indexing."""
2
+
3
+ from dataclasses import dataclass, field
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class LineRange:
8
+ """A range of lines (1-indexed, inclusive on both ends)."""
9
+
10
+ start: int
11
+ end: int
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class FunctionInfo:
16
+ """Metadata about a function or method."""
17
+
18
+ name: str
19
+ qualified_name: str # e.g., "MyClass.my_method"
20
+ line_range: LineRange
21
+ parameters: list[str]
22
+ decorators: list[str] # Decorator names (without @)
23
+ docstring: str | None
24
+ is_method: bool
25
+ parent_class: str | None # None for top-level functions
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class ClassInfo:
30
+ """Metadata about a class."""
31
+
32
+ name: str
33
+ line_range: LineRange
34
+ base_classes: list[str]
35
+ methods: list[FunctionInfo]
36
+ decorators: list[str]
37
+ docstring: str | None
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class ImportInfo:
42
+ """Metadata about an import statement."""
43
+
44
+ module: str # e.g., "os.path"
45
+ names: list[str] # e.g., ["join", "exists"] for "from os.path import join, exists"
46
+ alias: str | None # e.g., "np" for "import numpy as np"
47
+ line_number: int
48
+ is_from_import: bool # True for "from X import Y", False for "import X"
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class SectionInfo:
53
+ """Metadata about a section in a text document."""
54
+
55
+ title: str
56
+ level: int # Heading level (1 = top-level, 2 = subsection, etc.)
57
+ line_range: LineRange
58
+
59
+
60
+ @dataclass
61
+ class StructuralMetadata:
62
+ """Complete structural metadata for a single file or text document."""
63
+
64
+ # Source
65
+ source_name: str # Filename or identifier
66
+ total_lines: int
67
+ total_chars: int
68
+
69
+ # Line data (always populated)
70
+ lines: list[str] # All lines (0-indexed internally, but API uses 1-indexed)
71
+ line_char_offsets: list[int] # Character offset of each line start
72
+
73
+ # Code structure (populated for code files)
74
+ functions: list[FunctionInfo] = field(default_factory=list)
75
+ classes: list[ClassInfo] = field(default_factory=list)
76
+ imports: list[ImportInfo] = field(default_factory=list)
77
+
78
+ # Text structure (populated for text/markdown files)
79
+ sections: list[SectionInfo] = field(default_factory=list)
80
+
81
+ # Dependency map (populated for code files)
82
+ # Maps each function/class name to the names it references
83
+ dependency_graph: dict[str, list[str]] = field(default_factory=dict)
84
+
85
+
86
+ @dataclass
87
+ class ProjectIndex:
88
+ """Structural index for an entire codebase."""
89
+
90
+ root_path: str
91
+ files: dict[str, StructuralMetadata] = field(default_factory=dict)
92
+
93
+ # Cross-file dependency graphs
94
+ global_dependency_graph: dict[str, set[str]] = field(default_factory=dict)
95
+ reverse_dependency_graph: dict[str, set[str]] = field(default_factory=dict)
96
+
97
+ # File-level import graph
98
+ import_graph: dict[str, set[str]] = field(default_factory=dict)
99
+ reverse_import_graph: dict[str, set[str]] = field(default_factory=dict)
100
+
101
+ # Global symbol table: symbol_name -> file_path where defined
102
+ symbol_table: dict[str, str] = field(default_factory=dict)
103
+
104
+ # Stats
105
+ total_files: int = 0
106
+ total_lines: int = 0
107
+ total_functions: int = 0
108
+ total_classes: int = 0
109
+ index_build_time_seconds: float = 0.0
110
+ index_memory_bytes: int = 0