codebase-retrieval-context-engine 2.0.2__py3-none-any.whl → 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_retrieval_context_engine-2.0.5.dist-info/METADATA +103 -0
- {codebase_retrieval_context_engine-2.0.2.dist-info → codebase_retrieval_context_engine-2.0.5.dist-info}/RECORD +24 -23
- corbell/__init__.py +1 -1
- corbell/cli/commands/debug.py +305 -0
- corbell/cli/commands/index.py +13 -0
- corbell/cli/main.py +2 -0
- corbell/core/constants.py +8 -0
- corbell/core/embeddings/extractor.py +4 -1
- corbell/core/embeddings/model.py +12 -6
- corbell/core/embeddings/sqlite_store.py +71 -26
- corbell/core/gitignore.py +2 -0
- corbell/core/graph/builder.py +2 -0
- corbell/core/graph/method_graph.py +194 -15
- corbell/core/indexing/builder.py +257 -25
- corbell/core/indexing/tracker.py +2 -0
- corbell/core/llm_client.py +1 -1
- corbell/core/mcp/server.py +101 -140
- corbell/core/query/diagnostics.py +18 -1
- corbell/core/query/engine.py +484 -326
- corbell/core/query/reranker.py +98 -22
- corbell/core/workspace.py +44 -25
- codebase_retrieval_context_engine-2.0.2.dist-info/METADATA +0 -503
- {codebase_retrieval_context_engine-2.0.2.dist-info → codebase_retrieval_context_engine-2.0.5.dist-info}/WHEEL +0 -0
- {codebase_retrieval_context_engine-2.0.2.dist-info → codebase_retrieval_context_engine-2.0.5.dist-info}/entry_points.txt +0 -0
- {codebase_retrieval_context_engine-2.0.2.dist-info → codebase_retrieval_context_engine-2.0.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codebase-retrieval-context-engine
|
|
3
|
+
Version: 2.0.5
|
|
4
|
+
Summary: Code retrieval engine — hybrid embedding + graph search for LLM context injection.
|
|
5
|
+
Project-URL: Homepage, https://github.com/nullmastermind/local-context-engine
|
|
6
|
+
Project-URL: Repository, https://github.com/nullmastermind/local-context-engine
|
|
7
|
+
Project-URL: Issues, https://github.com/nullmastermind/local-context-engine/issues
|
|
8
|
+
Author: nullmastermind
|
|
9
|
+
License: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: LLM,MCP,code-search,codebase-retrieval,context,embeddings,retrieval
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Requires-Dist: google-genai>=2.7.0
|
|
19
|
+
Requires-Dist: mcp>=1.1.2
|
|
20
|
+
Requires-Dist: numpy>=2.0
|
|
21
|
+
Requires-Dist: pathspec>=0.11
|
|
22
|
+
Requires-Dist: pydantic>=2.0
|
|
23
|
+
Requires-Dist: python-dotenv>=1.0
|
|
24
|
+
Requires-Dist: rich>=13.0
|
|
25
|
+
Requires-Dist: typer>=0.12
|
|
26
|
+
Requires-Dist: voyageai>=0.3
|
|
27
|
+
Provides-Extra: anthropic
|
|
28
|
+
Requires-Dist: anthropic>=0.25; extra == 'anthropic'
|
|
29
|
+
Provides-Extra: aws
|
|
30
|
+
Requires-Dist: boto3>=1.34; extra == 'aws'
|
|
31
|
+
Provides-Extra: azure
|
|
32
|
+
Requires-Dist: openai>=1.0; extra == 'azure'
|
|
33
|
+
Provides-Extra: debug
|
|
34
|
+
Requires-Dist: gradio>=4.0; extra == 'debug'
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: httpx; extra == 'dev'
|
|
37
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
40
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: respx; extra == 'dev'
|
|
42
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
43
|
+
Provides-Extra: gcp
|
|
44
|
+
Requires-Dist: anthropic[vertex]>=0.25; extra == 'gcp'
|
|
45
|
+
Requires-Dist: google-cloud-aiplatform>=1.38; extra == 'gcp'
|
|
46
|
+
Provides-Extra: openai
|
|
47
|
+
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
48
|
+
Provides-Extra: treesitter
|
|
49
|
+
Requires-Dist: tree-sitter-c-sharp>=0.21; extra == 'treesitter'
|
|
50
|
+
Requires-Dist: tree-sitter-go>=0.21; extra == 'treesitter'
|
|
51
|
+
Requires-Dist: tree-sitter-java>=0.21; extra == 'treesitter'
|
|
52
|
+
Requires-Dist: tree-sitter-javascript>=0.21; extra == 'treesitter'
|
|
53
|
+
Requires-Dist: tree-sitter-php>=0.21; extra == 'treesitter'
|
|
54
|
+
Requires-Dist: tree-sitter-python>=0.21; extra == 'treesitter'
|
|
55
|
+
Requires-Dist: tree-sitter-ruby>=0.21; extra == 'treesitter'
|
|
56
|
+
Requires-Dist: tree-sitter-rust>=0.21; extra == 'treesitter'
|
|
57
|
+
Requires-Dist: tree-sitter-typescript>=0.21; extra == 'treesitter'
|
|
58
|
+
Requires-Dist: tree-sitter>=0.21; extra == 'treesitter'
|
|
59
|
+
Description-Content-Type: text/markdown
|
|
60
|
+
|
|
61
|
+
<div align="center">
|
|
62
|
+
<h1>codebase-retrieval-context-engine</h1>
|
|
63
|
+
<p><strong>Code retrieval engine for LLM context via MCP.</strong></p>
|
|
64
|
+
<p>
|
|
65
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License"/></a>
|
|
66
|
+
</p>
|
|
67
|
+
</div>
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Add to Claude Code
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
claude mcp add-json codebase-retrieval --scope user '{"type":"stdio","command":"uvx","args":["codebase-retrieval-context-engine"],"env":{"CORBELL_LLM_PROVIDER":"google","GOOGLE_API_KEY":"your-google-api-key","GOOGLE_MODEL":"gemini-3.1-flash-lite","CORBELL_EMBEDDING_MODEL":"voyage-4-lite","VOYAGE_API_KEY":"your-voyage-api-key"}}'
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
That's it. The AI agent passes workspace path and triggers index builds automatically.
|
|
78
|
+
|
|
79
|
+
## Remove from Claude Code
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
claude mcp remove codebase-retrieval --scope user
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
After adding, you can also edit or remove the MCP config directly in `~/.claude.json`.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Environment variables
|
|
90
|
+
|
|
91
|
+
| Variable | Description |
|
|
92
|
+
|---|---|
|
|
93
|
+
| `CORBELL_LLM_PROVIDER` | LLM provider for reranking (`google`, `anthropic`, `openai`) |
|
|
94
|
+
| `GOOGLE_API_KEY` | Google AI API key (supports multiple: `key1,key2,key3`) |
|
|
95
|
+
| `GOOGLE_MODEL` | e.g. `gemini-3.1-flash-lite` |
|
|
96
|
+
| `CORBELL_EMBEDDING_MODEL` | `voyage-4-lite`, `voyage-code-3`, or `gemini-embedding-001` |
|
|
97
|
+
| `VOYAGE_API_KEY` | Voyage AI API key (supports multiple: `key1,key2,key3`). Add a card to billing to unlock rate limits. |
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
|
|
103
|
+
Apache 2.0
|
|
@@ -1,24 +1,25 @@
|
|
|
1
|
-
corbell/__init__.py,sha256=
|
|
1
|
+
corbell/__init__.py,sha256=GUZEYd2hejlRYIyIEmvdrYMTDjIT3liSSzQaXItrOho,124
|
|
2
2
|
corbell/cli/__init__.py,sha256=5-MP6JIWgp4nDLNIhqP6Gtx97GESaIYg3NGxtRGaMv0,28
|
|
3
|
-
corbell/cli/main.py,sha256=
|
|
3
|
+
corbell/cli/main.py,sha256=CP5EHizFLaBLF1EohgVo_-XFlm4VaO6peQaSnzyfxAI,1954
|
|
4
4
|
corbell/cli/commands/__init__.py,sha256=0mAOs3RWC7XMZnGRN677hjPCHHQKDq9ASjIr_GQM3js,37
|
|
5
|
-
corbell/cli/commands/
|
|
5
|
+
corbell/cli/commands/debug.py,sha256=wdwveCeQSgcQbNg5-R5ekU_smEQKMq8WfH0obBbq3i8,10764
|
|
6
|
+
corbell/cli/commands/index.py,sha256=_nv5TC2O1xusX2gY8s2p00xPLN3wQrHEiFmc0EL6oHY,3432
|
|
6
7
|
corbell/cli/commands/query.py,sha256=Sh-xnVj4n3zAI2hTxVyMTqFEPsq3vkWucfljnCEaGyU,2310
|
|
7
8
|
corbell/core/__init__.py,sha256=VS9PnhHr4NXYlWs1TLCyllnVCNsiwVZ1Xj-AOBhZpAU,29
|
|
8
|
-
corbell/core/constants.py,sha256=
|
|
9
|
-
corbell/core/gitignore.py,sha256=
|
|
10
|
-
corbell/core/llm_client.py,sha256=
|
|
11
|
-
corbell/core/workspace.py,sha256=
|
|
9
|
+
corbell/core/constants.py,sha256=P0fCJ0J5V2Nt348ZAVH1bHd9dFPJRLtpUyQhHPAl0_8,1203
|
|
10
|
+
corbell/core/gitignore.py,sha256=UO588tAxSVv7YEGNDjzdcBys_aqMIAhXrDgToRfcnzc,2347
|
|
11
|
+
corbell/core/llm_client.py,sha256=qGKuptxMAMDwqvhGAKVjppf2p-sX-auaA26WKo6Nlkk,26221
|
|
12
|
+
corbell/core/workspace.py,sha256=qpBJNoxYmt-2OOx4K8bSsoJPgjEPDM3IKSYHMm6H54M,15130
|
|
12
13
|
corbell/core/embeddings/__init__.py,sha256=RCekvfNkFuMGEDLnls78i3znR84cTdnj4KJ_PeQrMNg,213
|
|
13
14
|
corbell/core/embeddings/base.py,sha256=udPW4XmcPhCpNQA6n8KqMcu2JXvVNv1JjdRJmFq5ZRA,2175
|
|
14
|
-
corbell/core/embeddings/extractor.py,sha256=
|
|
15
|
+
corbell/core/embeddings/extractor.py,sha256=2_BxRpsUcz-C-3HXjvlARqM3U5dzHRJcPR_hhPdMxSE,7314
|
|
15
16
|
corbell/core/embeddings/factory.py,sha256=Lonjbk8Lsxykz-2ZEgFCWoH9zZ005Qm4dXVdA6P4qJY,1817
|
|
16
|
-
corbell/core/embeddings/model.py,sha256=
|
|
17
|
+
corbell/core/embeddings/model.py,sha256=QYQy7W0iuce3ZHFXuNLHMnkqg5axQIyeYLpOBk2qpf8,14458
|
|
17
18
|
corbell/core/embeddings/search_cache.py,sha256=FHzO3mu4m4MJGy2jOFwb9GCEypcT11CcVrLts4Ib0ho,3351
|
|
18
|
-
corbell/core/embeddings/sqlite_store.py,sha256=
|
|
19
|
+
corbell/core/embeddings/sqlite_store.py,sha256=99lHU_gPYwKw9BhUMS-XimQI8vDpBbBrIc_RkrsVdOM,11676
|
|
19
20
|
corbell/core/graph/__init__.py,sha256=VaxDKeXMgMEBBMC0dglwj68A_aNYRI5O8VM6oMC1GIM,29
|
|
20
|
-
corbell/core/graph/builder.py,sha256=
|
|
21
|
-
corbell/core/graph/method_graph.py,sha256=
|
|
21
|
+
corbell/core/graph/builder.py,sha256=dXUdAhuZ4t-wuW4dFZHz6k9-wBXdYkY6dysjQIkvl3Q,32214
|
|
22
|
+
corbell/core/graph/method_graph.py,sha256=fwmkSZXiGGYZIc2iC-6hbTrb26fAwielOrJBlqaz8Oc,57594
|
|
22
23
|
corbell/core/graph/schema.py,sha256=swy1VZZpL88LPEj6zihl5bglQLrGD-ohOYjFeNC31a0,5253
|
|
23
24
|
corbell/core/graph/sqlite_store.py,sha256=B1ObNit7MXbQpst6dpuloTcFAmUim_MoP3PSCATf_4A,21116
|
|
24
25
|
corbell/core/graph/providers/__init__.py,sha256=__ZVe1uwIHSyFh_t-V4MyT5MsM5hooTOrxxkm9Txt7o,268
|
|
@@ -26,21 +27,21 @@ corbell/core/graph/providers/aws_patterns.py,sha256=w2iF5qQJcV7S6J64ZYb3IzGPdXjC
|
|
|
26
27
|
corbell/core/graph/providers/azure_patterns.py,sha256=tJ9AQQXW2xYzJ36wNOxTHHhaivaCv3RYEMJUjw8WjeQ,3515
|
|
27
28
|
corbell/core/graph/providers/gcp_patterns.py,sha256=vIofjanvRWGhFftuGdzt9YgTIGZRJz7lLG0abUNjFdA,2789
|
|
28
29
|
corbell/core/indexing/__init__.py,sha256=VczeSHUfKR3YVowGCleFjo2pIpDHfl9kl-OkEl8szow,47
|
|
29
|
-
corbell/core/indexing/builder.py,sha256=
|
|
30
|
+
corbell/core/indexing/builder.py,sha256=apF-FFz_bZ6SeBEVVZzNXMavp9zuLVMVhg4598YJfMs,33333
|
|
30
31
|
corbell/core/indexing/lock.py,sha256=uUMelIrtrp6Ww9rTfbl2OvomByc-IJyiHIMnptfA4xI,4743
|
|
31
|
-
corbell/core/indexing/tracker.py,sha256=
|
|
32
|
+
corbell/core/indexing/tracker.py,sha256=UCeKARiUMyZcg1yvbIZxibZUM2HOA-_6rNTkyPgpQhE,8571
|
|
32
33
|
corbell/core/mcp/__init__.py,sha256=DDzfuVbX_GBTM5Nqy34JVgDUMeFd2_5ZcVMVuvjOddU,32
|
|
33
|
-
corbell/core/mcp/server.py,sha256=
|
|
34
|
+
corbell/core/mcp/server.py,sha256=HzA3F02X6oqzM7vwPDRhNf7LfLcIzhcZtyqzx4aNOs4,7262
|
|
34
35
|
corbell/core/query/__init__.py,sha256=OCyVRZOyh_eLGhOxR_JYyH6zp8O7qy_-rC3fqGHm7Bc,56
|
|
35
|
-
corbell/core/query/diagnostics.py,sha256=
|
|
36
|
-
corbell/core/query/engine.py,sha256=
|
|
36
|
+
corbell/core/query/diagnostics.py,sha256=o9uIAYFQy8hHua1xLMToSaQPP6xcmnvDJMY3fVg1Dhg,2102
|
|
37
|
+
corbell/core/query/engine.py,sha256=vTFVlXqHavxcR1mIy4KbIRWXx-u_uNHDt4Jb3JRiJ78,18016
|
|
37
38
|
corbell/core/query/enhancer.py,sha256=w5mvm1B8qQZpL6RVhMuhq_rls77hakGSNUyanfkyNEU,3934
|
|
38
39
|
corbell/core/query/formatter.py,sha256=xMr8HE-oxBSEKb514aixY7aoUWGeYoK1w5wnaIlCYEc,2813
|
|
39
40
|
corbell/core/query/graph_expander.py,sha256=Y-yKnr6db-OM2Gh8ukYgVIcUZa6-wfWA-GhdvOwf_yA,9184
|
|
40
41
|
corbell/core/query/merger.py,sha256=fs6PL7X7EweXnSnDRnpzmpaU8JjwJpL0akzm4hSwLJk,6168
|
|
41
|
-
corbell/core/query/reranker.py,sha256=
|
|
42
|
-
codebase_retrieval_context_engine-2.0.
|
|
43
|
-
codebase_retrieval_context_engine-2.0.
|
|
44
|
-
codebase_retrieval_context_engine-2.0.
|
|
45
|
-
codebase_retrieval_context_engine-2.0.
|
|
46
|
-
codebase_retrieval_context_engine-2.0.
|
|
42
|
+
corbell/core/query/reranker.py,sha256=0M8Km2WEO3NX46gT0mF7ma9e0v_HOYXu-t6WgF5U2tI,7262
|
|
43
|
+
codebase_retrieval_context_engine-2.0.5.dist-info/METADATA,sha256=5-myVeI6Z9ecAJFK1FpZV08PfxtcD6VDtr7V7CaxlUk,4036
|
|
44
|
+
codebase_retrieval_context_engine-2.0.5.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
45
|
+
codebase_retrieval_context_engine-2.0.5.dist-info/entry_points.txt,sha256=vFB4a4Qb7Ty182usK8deJXiis0UYnGIUDusw0V3Jya8,115
|
|
46
|
+
codebase_retrieval_context_engine-2.0.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
47
|
+
codebase_retrieval_context_engine-2.0.5.dist-info/RECORD,,
|
corbell/__init__.py
CHANGED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""CLI: corbell debug — launch a Gradio UI for inspecting query pipeline internals."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(no_args_is_help=False, help="Query debug UI commands.")
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.callback(invoke_without_command=True)
|
|
16
|
+
def debug(
|
|
17
|
+
ctx: typer.Context,
|
|
18
|
+
workspace: str = typer.Option(
|
|
19
|
+
"",
|
|
20
|
+
"--workspace",
|
|
21
|
+
"-w",
|
|
22
|
+
help="Path to the workspace root (default: current directory).",
|
|
23
|
+
),
|
|
24
|
+
port: int = typer.Option(7860, "--port", "-p", help="Port for the Gradio server."),
|
|
25
|
+
share: bool = typer.Option(False, "--share", help="Create a public Gradio share link."),
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Launch the Gradio debug UI for inspecting the query pipeline.
|
|
28
|
+
|
|
29
|
+
The UI lets you run a query against a workspace and inspect:
|
|
30
|
+
- Per-phase timing
|
|
31
|
+
- Final formatted results
|
|
32
|
+
- Pre-rerank chunk table (file, lines, score, symbol, type)
|
|
33
|
+
- LLM rerank prompts and raw response
|
|
34
|
+
"""
|
|
35
|
+
if ctx.invoked_subcommand is not None:
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
import gradio as gr # type: ignore[import-untyped]
|
|
40
|
+
except ImportError:
|
|
41
|
+
console.print(
|
|
42
|
+
"[red]Gradio is not installed. Install it with:[/red]\n"
|
|
43
|
+
" pip install 'codebase-retrieval-context-engine[debug]'"
|
|
44
|
+
)
|
|
45
|
+
raise typer.Exit(1)
|
|
46
|
+
|
|
47
|
+
default_workspace = workspace or os.environ.get("CORBELL_WORKSPACE") or str(Path.cwd())
|
|
48
|
+
|
|
49
|
+
def run_mcp_tool(
|
|
50
|
+
env_vars_text: str,
|
|
51
|
+
mcp_workspace: str,
|
|
52
|
+
mcp_query: str,
|
|
53
|
+
): # type: ignore[no-untyped-def]
|
|
54
|
+
"""Invoke context_engine_codebase_retrieval directly and return results."""
|
|
55
|
+
if not mcp_query.strip():
|
|
56
|
+
return "", ""
|
|
57
|
+
|
|
58
|
+
# Apply env var overrides for this invocation
|
|
59
|
+
env_backup: dict[str, str | None] = {}
|
|
60
|
+
if env_vars_text.strip():
|
|
61
|
+
for line in env_vars_text.strip().splitlines():
|
|
62
|
+
line = line.strip()
|
|
63
|
+
if not line or line.startswith("#"):
|
|
64
|
+
continue
|
|
65
|
+
if "=" not in line:
|
|
66
|
+
continue
|
|
67
|
+
key, _, value = line.partition("=")
|
|
68
|
+
key, value = key.strip(), value.strip()
|
|
69
|
+
env_backup[key] = os.environ.get(key)
|
|
70
|
+
os.environ[key] = value
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
from corbell.core.mcp.server import context_engine_codebase_retrieval
|
|
74
|
+
|
|
75
|
+
result = context_engine_codebase_retrieval(
|
|
76
|
+
query=mcp_query.strip(),
|
|
77
|
+
workspace_full_path=mcp_workspace.strip(),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if result.startswith("Error:"):
|
|
81
|
+
return result, ""
|
|
82
|
+
return "", result
|
|
83
|
+
except Exception as exc:
|
|
84
|
+
return f"Error: {exc}", ""
|
|
85
|
+
finally:
|
|
86
|
+
for key, original in env_backup.items():
|
|
87
|
+
if original is None:
|
|
88
|
+
os.environ.pop(key, None)
|
|
89
|
+
else:
|
|
90
|
+
os.environ[key] = original
|
|
91
|
+
|
|
92
|
+
def run_query(workspace_path: str, query: str): # type: ignore[no-untyped-def]
|
|
93
|
+
"""Run the debug pipeline and return Gradio component values."""
|
|
94
|
+
from corbell.core.query.engine import codebase_retrieval_debug
|
|
95
|
+
|
|
96
|
+
if not query.strip():
|
|
97
|
+
return (
|
|
98
|
+
"", # error_box
|
|
99
|
+
"", # timing_md
|
|
100
|
+
"", # final_results
|
|
101
|
+
[], # pre_rerank_table
|
|
102
|
+
"", # rerank_system
|
|
103
|
+
"", # rerank_user
|
|
104
|
+
"", # rerank_response
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
ws = workspace_path.strip() or default_workspace
|
|
108
|
+
result = codebase_retrieval_debug(query=query, workspace_path=ws)
|
|
109
|
+
|
|
110
|
+
# --- Error banner ---
|
|
111
|
+
error_text = result.error or ""
|
|
112
|
+
|
|
113
|
+
# --- Timing table ---
|
|
114
|
+
timing = result.diagnostics.timing if result.diagnostics else {}
|
|
115
|
+
if timing:
|
|
116
|
+
rows = "".join(
|
|
117
|
+
f"| {phase} | {elapsed:.3f}s |\n"
|
|
118
|
+
for phase, elapsed in timing.items()
|
|
119
|
+
)
|
|
120
|
+
timing_md = (
|
|
121
|
+
"| Phase | Elapsed |\n"
|
|
122
|
+
"|---|---|\n"
|
|
123
|
+
+ rows
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
timing_md = "_No timing data available._"
|
|
127
|
+
|
|
128
|
+
# --- Final results ---
|
|
129
|
+
final_results = result.final_output or ""
|
|
130
|
+
|
|
131
|
+
# --- Pre-rerank table ---
|
|
132
|
+
pre_rerank_rows = []
|
|
133
|
+
graph_ids = set()
|
|
134
|
+
if result.diagnostics and result.diagnostics.graph_chunk_ids:
|
|
135
|
+
graph_ids = result.diagnostics.graph_chunk_ids
|
|
136
|
+
for chunk in result.pre_rerank_chunks:
|
|
137
|
+
chunk_id = getattr(chunk, "chunk_id", "")
|
|
138
|
+
parts = chunk_id.split("+") if chunk_id else []
|
|
139
|
+
has_graph = any(p in graph_ids for p in parts) if graph_ids else False
|
|
140
|
+
has_embedding = any(p not in graph_ids for p in parts) if graph_ids else True
|
|
141
|
+
if has_graph and has_embedding and len(parts) > 1:
|
|
142
|
+
source = "embedding+graph"
|
|
143
|
+
elif has_graph:
|
|
144
|
+
source = "graph"
|
|
145
|
+
else:
|
|
146
|
+
source = "embedding"
|
|
147
|
+
pre_rerank_rows.append([
|
|
148
|
+
getattr(chunk, "file_path", ""),
|
|
149
|
+
f"{getattr(chunk, 'start_line', '')}-{getattr(chunk, 'end_line', '')}",
|
|
150
|
+
f"{getattr(chunk, 'score', 0.0):.4f}",
|
|
151
|
+
getattr(chunk, "symbol", "") or "",
|
|
152
|
+
getattr(chunk, "chunk_type", "") or "",
|
|
153
|
+
source,
|
|
154
|
+
getattr(chunk, "content", "") or "",
|
|
155
|
+
])
|
|
156
|
+
|
|
157
|
+
# --- Rerank prompts ---
|
|
158
|
+
detail = result.rerank_detail
|
|
159
|
+
if detail is None or not detail.system_prompt:
|
|
160
|
+
rerank_system = "_LLM not configured — reranking skipped_"
|
|
161
|
+
rerank_user = ""
|
|
162
|
+
rerank_response = ""
|
|
163
|
+
else:
|
|
164
|
+
rerank_system = detail.system_prompt
|
|
165
|
+
rerank_user = detail.user_prompt
|
|
166
|
+
rerank_response = detail.raw_response or "_No response (LLM call failed)_"
|
|
167
|
+
|
|
168
|
+
return (
|
|
169
|
+
error_text,
|
|
170
|
+
timing_md,
|
|
171
|
+
final_results,
|
|
172
|
+
pre_rerank_rows,
|
|
173
|
+
rerank_system,
|
|
174
|
+
rerank_user,
|
|
175
|
+
rerank_response,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
with gr.Blocks(title="Corbell Query Debugger") as demo:
|
|
179
|
+
gr.Markdown("# Corbell Query Debugger")
|
|
180
|
+
gr.Markdown("Inspect query pipeline internals: timing, pre-rerank chunks, and LLM rerank prompts.")
|
|
181
|
+
|
|
182
|
+
with gr.Row():
|
|
183
|
+
workspace_input = gr.Textbox(
|
|
184
|
+
label="Workspace Path",
|
|
185
|
+
value=default_workspace,
|
|
186
|
+
placeholder="Path to repository root",
|
|
187
|
+
scale=2,
|
|
188
|
+
)
|
|
189
|
+
query_input = gr.Textbox(
|
|
190
|
+
label="Query",
|
|
191
|
+
placeholder="e.g. authentication middleware",
|
|
192
|
+
scale=3,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
run_btn = gr.Button("Run Query", variant="primary")
|
|
196
|
+
|
|
197
|
+
error_box = gr.Textbox(
|
|
198
|
+
label="Error",
|
|
199
|
+
visible=True,
|
|
200
|
+
interactive=False,
|
|
201
|
+
lines=2,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
timing_md = gr.Markdown(label="Timing")
|
|
205
|
+
|
|
206
|
+
with gr.Tabs():
|
|
207
|
+
with gr.Tab("Final Results"):
|
|
208
|
+
final_output = gr.Code(label="Formatted Output", language=None)
|
|
209
|
+
|
|
210
|
+
with gr.Tab("Pre-Rerank Chunks"):
|
|
211
|
+
pre_rerank_table = gr.Dataframe(
|
|
212
|
+
headers=["File", "Lines", "Score", "Symbol", "Type", "Source", "Content"],
|
|
213
|
+
datatype=["str", "str", "str", "str", "str", "str", "str"],
|
|
214
|
+
label="Chunks before reranking",
|
|
215
|
+
wrap=False,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
with gr.Tab("LLM Rerank"):
|
|
219
|
+
rerank_system_box = gr.Textbox(
|
|
220
|
+
label="System Prompt",
|
|
221
|
+
lines=6,
|
|
222
|
+
interactive=False,
|
|
223
|
+
)
|
|
224
|
+
rerank_user_box = gr.Textbox(
|
|
225
|
+
label="User Prompt",
|
|
226
|
+
lines=12,
|
|
227
|
+
interactive=False,
|
|
228
|
+
)
|
|
229
|
+
rerank_response_box = gr.Textbox(
|
|
230
|
+
label="Raw LLM Response",
|
|
231
|
+
lines=4,
|
|
232
|
+
interactive=False,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
with gr.Tab("MCP Debug"):
|
|
236
|
+
gr.Markdown(
|
|
237
|
+
"### MCP Tool Tester\n"
|
|
238
|
+
"Configure environment and invoke "
|
|
239
|
+
"`context_engine_codebase_retrieval` directly."
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
with gr.Accordion("Environment Configuration", open=False):
|
|
243
|
+
mcp_env_vars = gr.Textbox(
|
|
244
|
+
label="Environment Variables (one per line, KEY=VALUE)",
|
|
245
|
+
placeholder=(
|
|
246
|
+
"# Example:\n"
|
|
247
|
+
"CORBELL_LLM_PROVIDER=anthropic\n"
|
|
248
|
+
"CORBELL_RERANK=true\n"
|
|
249
|
+
"ANTHROPIC_API_KEY=sk-..."
|
|
250
|
+
),
|
|
251
|
+
lines=6,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
gr.Markdown("#### Tool Parameters")
|
|
255
|
+
with gr.Row():
|
|
256
|
+
mcp_workspace_input = gr.Textbox(
|
|
257
|
+
label="workspace_full_path",
|
|
258
|
+
value=default_workspace,
|
|
259
|
+
placeholder="Path to repository root",
|
|
260
|
+
scale=3,
|
|
261
|
+
)
|
|
262
|
+
mcp_query_input = gr.Textbox(
|
|
263
|
+
label="query",
|
|
264
|
+
placeholder="e.g. authentication middleware",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
mcp_run_btn = gr.Button("Invoke MCP Tool", variant="primary")
|
|
268
|
+
|
|
269
|
+
mcp_error_box = gr.Textbox(
|
|
270
|
+
label="Error",
|
|
271
|
+
visible=True,
|
|
272
|
+
interactive=False,
|
|
273
|
+
lines=2,
|
|
274
|
+
)
|
|
275
|
+
mcp_result_box = gr.Code(
|
|
276
|
+
label="Tool Response",
|
|
277
|
+
language=None,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
run_btn.click(
|
|
281
|
+
fn=run_query,
|
|
282
|
+
inputs=[workspace_input, query_input],
|
|
283
|
+
outputs=[
|
|
284
|
+
error_box,
|
|
285
|
+
timing_md,
|
|
286
|
+
final_output,
|
|
287
|
+
pre_rerank_table,
|
|
288
|
+
rerank_system_box,
|
|
289
|
+
rerank_user_box,
|
|
290
|
+
rerank_response_box,
|
|
291
|
+
],
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
mcp_run_btn.click(
|
|
295
|
+
fn=run_mcp_tool,
|
|
296
|
+
inputs=[
|
|
297
|
+
mcp_env_vars,
|
|
298
|
+
mcp_workspace_input,
|
|
299
|
+
mcp_query_input,
|
|
300
|
+
],
|
|
301
|
+
outputs=[mcp_error_box, mcp_result_box],
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
console.print(f"[green]Starting Corbell debug UI on port {port}...[/green]")
|
|
305
|
+
demo.launch(server_port=port, share=share)
|
corbell/cli/commands/index.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import logging
|
|
5
6
|
import os
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Optional
|
|
@@ -26,6 +27,9 @@ def build(
|
|
|
26
27
|
repo: Optional[str] = typer.Option(
|
|
27
28
|
None, "--repo", help="Only index a specific repo by ID."
|
|
28
29
|
),
|
|
30
|
+
verbose: bool = typer.Option(
|
|
31
|
+
False, "--verbose", "-v", help="Enable detailed performance logging."
|
|
32
|
+
),
|
|
29
33
|
) -> None:
|
|
30
34
|
"""Build (or incrementally update) the code search index.
|
|
31
35
|
|
|
@@ -39,6 +43,15 @@ def build(
|
|
|
39
43
|
2. CORBELL_WORKSPACE environment variable
|
|
40
44
|
3. Current working directory
|
|
41
45
|
"""
|
|
46
|
+
if verbose or os.environ.get("CORBELL_VERBOSE", ""):
|
|
47
|
+
logging.basicConfig(
|
|
48
|
+
level=logging.INFO,
|
|
49
|
+
format="%(asctime)s %(name)s %(message)s",
|
|
50
|
+
datefmt="%H:%M:%S",
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
logging.basicConfig(level=logging.WARNING)
|
|
54
|
+
|
|
42
55
|
from corbell.core.workspace import build_config, db_path_for_workspace
|
|
43
56
|
|
|
44
57
|
# Resolve workspace path: flag → env var → cwd
|
corbell/cli/main.py
CHANGED
|
@@ -8,6 +8,7 @@ import typer
|
|
|
8
8
|
from dotenv import load_dotenv
|
|
9
9
|
from rich.console import Console
|
|
10
10
|
|
|
11
|
+
from corbell.cli.commands.debug import app as debug_app
|
|
11
12
|
from corbell.cli.commands.index import app as index_app
|
|
12
13
|
from corbell.cli.commands.query import app as query_app
|
|
13
14
|
|
|
@@ -32,6 +33,7 @@ console = Console()
|
|
|
32
33
|
|
|
33
34
|
app.add_typer(index_app, name="index", help="Code index commands.")
|
|
34
35
|
app.add_typer(query_app, name="query", help="Code search commands.")
|
|
36
|
+
app.add_typer(debug_app, name="debug", help="Query debug UI.")
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
# ---------------------------------------------------------------------------
|
corbell/core/constants.py
CHANGED
|
@@ -45,6 +45,14 @@ EXTENSION_LANG: dict[str, str] = {
|
|
|
45
45
|
".php": "php",
|
|
46
46
|
".cs": "csharp",
|
|
47
47
|
".rs": "rust",
|
|
48
|
+
".c": "c",
|
|
49
|
+
".cc": "cpp",
|
|
50
|
+
".cpp": "cpp",
|
|
51
|
+
".cxx": "cpp",
|
|
52
|
+
".h": "c",
|
|
53
|
+
".hh": "cpp",
|
|
54
|
+
".hpp": "cpp",
|
|
55
|
+
".hxx": "cpp",
|
|
48
56
|
".md": "markdown",
|
|
49
57
|
".yml": "yaml",
|
|
50
58
|
".yaml": "yaml",
|
|
@@ -83,7 +83,10 @@ class CodeChunkExtractor:
|
|
|
83
83
|
lang = _SUPPORTED.get(fp.suffix)
|
|
84
84
|
if not lang:
|
|
85
85
|
continue
|
|
86
|
-
|
|
86
|
+
rel_path = fp.relative_to(repo_path)
|
|
87
|
+
if any(part.startswith(".") for part in rel_path.parts[:-1]):
|
|
88
|
+
continue
|
|
89
|
+
rel = str(rel_path)
|
|
87
90
|
if gitignore_spec.match_file(rel.replace("\\", "/")):
|
|
88
91
|
continue
|
|
89
92
|
chunks = self._extract_file(fp, rel, lang, service_id, str(repo_path))
|
corbell/core/embeddings/model.py
CHANGED
|
@@ -339,15 +339,21 @@ class VoyageEmbeddingModel(EmbeddingModel):
|
|
|
339
339
|
key = self._api_keys[idx]
|
|
340
340
|
try:
|
|
341
341
|
vo = voyageai.Client(api_key=key)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
)
|
|
342
|
+
kwargs: dict = {
|
|
343
|
+
"model": self.model_name,
|
|
344
|
+
"input_type": input_type,
|
|
345
|
+
}
|
|
346
|
+
import inspect
|
|
347
|
+
if "output_dimension" in inspect.signature(vo.embed).parameters:
|
|
348
|
+
kwargs["output_dimension"] = self.dimension
|
|
349
|
+
result = vo.embed(batch, **kwargs)
|
|
348
350
|
self._key_index = (idx + 1) % len(self._api_keys)
|
|
349
351
|
return result.embeddings
|
|
350
352
|
except Exception as e:
|
|
353
|
+
logger.info(
|
|
354
|
+
"Voyage API error: key[%d] %s: %s",
|
|
355
|
+
idx, type(e).__name__, e,
|
|
356
|
+
)
|
|
351
357
|
if _is_voyage_rate_limit_error(e):
|
|
352
358
|
errors.append(f"key[{idx}]: {e}")
|
|
353
359
|
continue
|