codebase-retrieval-context-engine 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: codebase-retrieval-context-engine
3
+ Version: 2.0.3
4
+ Summary: Code retrieval engine — hybrid embedding + graph search for LLM context injection.
5
+ Project-URL: Homepage, https://github.com/nullmastermind/local-context-engine
6
+ Project-URL: Repository, https://github.com/nullmastermind/local-context-engine
7
+ Project-URL: Issues, https://github.com/nullmastermind/local-context-engine/issues
8
+ Author: nullmastermind
9
+ License: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: LLM,MCP,code-search,codebase-retrieval,context,embeddings,retrieval
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Requires-Python: >=3.11
18
+ Requires-Dist: mcp>=1.1.2
19
+ Requires-Dist: numpy>=2.0
20
+ Requires-Dist: pathspec>=0.11
21
+ Requires-Dist: pydantic>=2.0
22
+ Requires-Dist: python-dotenv>=1.0
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: typer>=0.12
25
+ Provides-Extra: anthropic
26
+ Requires-Dist: anthropic>=0.25; extra == 'anthropic'
27
+ Provides-Extra: aws
28
+ Requires-Dist: boto3>=1.34; extra == 'aws'
29
+ Provides-Extra: azure
30
+ Requires-Dist: openai>=1.0; extra == 'azure'
31
+ Provides-Extra: debug
32
+ Requires-Dist: gradio>=4.0; extra == 'debug'
33
+ Provides-Extra: dev
34
+ Requires-Dist: httpx; extra == 'dev'
35
+ Requires-Dist: mypy; extra == 'dev'
36
+ Requires-Dist: pytest-asyncio; extra == 'dev'
37
+ Requires-Dist: pytest-cov; extra == 'dev'
38
+ Requires-Dist: pytest>=8.0; extra == 'dev'
39
+ Requires-Dist: respx; extra == 'dev'
40
+ Requires-Dist: ruff; extra == 'dev'
41
+ Provides-Extra: gcp
42
+ Requires-Dist: anthropic[vertex]>=0.25; extra == 'gcp'
43
+ Requires-Dist: google-cloud-aiplatform>=1.38; extra == 'gcp'
44
+ Provides-Extra: google
45
+ Requires-Dist: google-genai>=2.7.0; extra == 'google'
46
+ Provides-Extra: openai
47
+ Requires-Dist: openai>=1.0; extra == 'openai'
48
+ Provides-Extra: treesitter
49
+ Requires-Dist: tree-sitter-c-sharp>=0.21; extra == 'treesitter'
50
+ Requires-Dist: tree-sitter-go>=0.21; extra == 'treesitter'
51
+ Requires-Dist: tree-sitter-java>=0.21; extra == 'treesitter'
52
+ Requires-Dist: tree-sitter-javascript>=0.21; extra == 'treesitter'
53
+ Requires-Dist: tree-sitter-php>=0.21; extra == 'treesitter'
54
+ Requires-Dist: tree-sitter-python>=0.21; extra == 'treesitter'
55
+ Requires-Dist: tree-sitter-ruby>=0.21; extra == 'treesitter'
56
+ Requires-Dist: tree-sitter-rust>=0.21; extra == 'treesitter'
57
+ Requires-Dist: tree-sitter-typescript>=0.21; extra == 'treesitter'
58
+ Requires-Dist: tree-sitter>=0.21; extra == 'treesitter'
59
+ Provides-Extra: voyage
60
+ Requires-Dist: voyageai>=0.3; extra == 'voyage'
61
+ Description-Content-Type: text/markdown
62
+
63
+ <div align="center">
64
+ <h1>codebase-retrieval-context-engine</h1>
65
+ <p><strong>Code retrieval engine for LLM context via MCP.</strong></p>
66
+ <p>
67
+ <a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License"/></a>
68
+ </p>
69
+ </div>
70
+
71
+ ---
72
+
73
+ ## Add to Claude Code
74
+
75
+ ```bash
76
+ claude mcp add codebase-retrieval -e CORBELL_LLM_PROVIDER=google -e GOOGLE_API_KEY=your-google-api-key -e GOOGLE_MODEL=gemini-3.1-flash-lite -e CORBELL_EMBEDDING_MODEL=voyage-4-lite -e VOYAGE_API_KEY=your-voyage-api-key -- uvx codebase-retrieval-context-engine
77
+ ```
78
+
79
+ That's it. The AI agent passes workspace path and triggers index builds automatically.
80
+
81
+ ---
82
+
83
+ ## Build index manually (optional)
84
+
85
+ ```bash
86
+ uvx codebase-retrieval-context-engine index build
87
+ ```
88
+
89
+ Run from your project root. Env vars (`CORBELL_LLM_PROVIDER`, `GOOGLE_API_KEY`, etc.) must be set in your shell.
90
+
91
+ ---
92
+
93
+ ## Environment variables
94
+
95
+ | Variable | Description |
96
+ |---|---|
97
+ | `CORBELL_LLM_PROVIDER` | LLM provider for reranking (`google`, `anthropic`, `openai`) |
98
+ | `GOOGLE_API_KEY` | Google AI API key (supports multiple: `key1,key2,key3`) |
99
+ | `GOOGLE_MODEL` | e.g. `gemini-3.1-flash-lite` |
100
+ | `CORBELL_EMBEDDING_MODEL` | `voyage-4-lite`, `voyage-code-3`, or `gemini-embedding-001` |
101
+ | `VOYAGE_API_KEY` | Voyage AI API key (supports multiple: `key1,key2,key3`). Add a card to billing to unlock rate limits. |
102
+
103
+ ---
104
+
105
+ ## License
106
+
107
+ Apache 2.0
@@ -1,24 +1,25 @@
1
- corbell/__init__.py,sha256=DK8C29me67FSOnq2v_CAPc0COnXW4plMGTNHfZvmX5Y,124
1
+ corbell/__init__.py,sha256=BaFS4Y0-zDapO7abzb83G5HBc7cB1xLntwgXWJbDixk,124
2
2
  corbell/cli/__init__.py,sha256=5-MP6JIWgp4nDLNIhqP6Gtx97GESaIYg3NGxtRGaMv0,28
3
- corbell/cli/main.py,sha256=anYpXiyQD6_1wMS0Dtef6Rxtxd0NEFe7HHnerHxf3J4,1835
3
+ corbell/cli/main.py,sha256=CP5EHizFLaBLF1EohgVo_-XFlm4VaO6peQaSnzyfxAI,1954
4
4
  corbell/cli/commands/__init__.py,sha256=0mAOs3RWC7XMZnGRN677hjPCHHQKDq9ASjIr_GQM3js,37
5
- corbell/cli/commands/index.py,sha256=pdtGr0dNy3OdxqMBwUjkhOXxxR0N966T3MqIGweZHQI,3021
5
+ corbell/cli/commands/debug.py,sha256=wdwveCeQSgcQbNg5-R5ekU_smEQKMq8WfH0obBbq3i8,10764
6
+ corbell/cli/commands/index.py,sha256=_nv5TC2O1xusX2gY8s2p00xPLN3wQrHEiFmc0EL6oHY,3432
6
7
  corbell/cli/commands/query.py,sha256=Sh-xnVj4n3zAI2hTxVyMTqFEPsq3vkWucfljnCEaGyU,2310
7
8
  corbell/core/__init__.py,sha256=VS9PnhHr4NXYlWs1TLCyllnVCNsiwVZ1Xj-AOBhZpAU,29
8
- corbell/core/constants.py,sha256=HTGYpShlp9pP2_a4WngHtTujUQfHcypFAYoaczmkBdQ,1061
9
- corbell/core/gitignore.py,sha256=VS7_s6NwZWQAwgLiaRzPHdBRIj86XdnPm_P_x_e0hvI,2266
10
- corbell/core/llm_client.py,sha256=2MDwe6kr_EyY3DFv3fNO91WCig8ER021ogzdLGH3IN8,26219
11
- corbell/core/workspace.py,sha256=NsfByxnqTbPeflXLBqXAkqVaQCQ9Qs9maUmxp2Y6n1k,14024
9
+ corbell/core/constants.py,sha256=P0fCJ0J5V2Nt348ZAVH1bHd9dFPJRLtpUyQhHPAl0_8,1203
10
+ corbell/core/gitignore.py,sha256=UO588tAxSVv7YEGNDjzdcBys_aqMIAhXrDgToRfcnzc,2347
11
+ corbell/core/llm_client.py,sha256=qGKuptxMAMDwqvhGAKVjppf2p-sX-auaA26WKo6Nlkk,26221
12
+ corbell/core/workspace.py,sha256=p24p_yJss7B3UPbv7Qx7XCUagJ2YKTrsBxDhFLCfqd4,14118
12
13
  corbell/core/embeddings/__init__.py,sha256=RCekvfNkFuMGEDLnls78i3znR84cTdnj4KJ_PeQrMNg,213
13
14
  corbell/core/embeddings/base.py,sha256=udPW4XmcPhCpNQA6n8KqMcu2JXvVNv1JjdRJmFq5ZRA,2175
14
- corbell/core/embeddings/extractor.py,sha256=hOolMX6JX3sVBf062h2zUQpr9SVt81S0hzhNCeJoV1I,7180
15
+ corbell/core/embeddings/extractor.py,sha256=2_BxRpsUcz-C-3HXjvlARqM3U5dzHRJcPR_hhPdMxSE,7314
15
16
  corbell/core/embeddings/factory.py,sha256=Lonjbk8Lsxykz-2ZEgFCWoH9zZ005Qm4dXVdA6P4qJY,1817
16
- corbell/core/embeddings/model.py,sha256=sKFjUYJ8-COth1CXjgX9Bn_oPcf1OSbbq04oSywMDSo,14128
17
+ corbell/core/embeddings/model.py,sha256=hU-SyW7YM9jGv9-_-bfxxOUh1ZZdc-8fpDK7o5j5s88,14289
17
18
  corbell/core/embeddings/search_cache.py,sha256=FHzO3mu4m4MJGy2jOFwb9GCEypcT11CcVrLts4Ib0ho,3351
18
- corbell/core/embeddings/sqlite_store.py,sha256=8rv89WOMqMm-JhJO36-FdRiC68Ija3TwHkrmRrPr1os,10158
19
+ corbell/core/embeddings/sqlite_store.py,sha256=99lHU_gPYwKw9BhUMS-XimQI8vDpBbBrIc_RkrsVdOM,11676
19
20
  corbell/core/graph/__init__.py,sha256=VaxDKeXMgMEBBMC0dglwj68A_aNYRI5O8VM6oMC1GIM,29
20
- corbell/core/graph/builder.py,sha256=_TjcKfOKObeJ3ScCMLZNHhtzmBYs1VtJEEp3UJLfoO0,32118
21
- corbell/core/graph/method_graph.py,sha256=x6X91Dz3DzNAuzld2f7ORkODt3qC5L1Fzg1bdAcIhK4,50851
21
+ corbell/core/graph/builder.py,sha256=dXUdAhuZ4t-wuW4dFZHz6k9-wBXdYkY6dysjQIkvl3Q,32214
22
+ corbell/core/graph/method_graph.py,sha256=fwmkSZXiGGYZIc2iC-6hbTrb26fAwielOrJBlqaz8Oc,57594
22
23
  corbell/core/graph/schema.py,sha256=swy1VZZpL88LPEj6zihl5bglQLrGD-ohOYjFeNC31a0,5253
23
24
  corbell/core/graph/sqlite_store.py,sha256=B1ObNit7MXbQpst6dpuloTcFAmUim_MoP3PSCATf_4A,21116
24
25
  corbell/core/graph/providers/__init__.py,sha256=__ZVe1uwIHSyFh_t-V4MyT5MsM5hooTOrxxkm9Txt7o,268
@@ -26,21 +27,21 @@ corbell/core/graph/providers/aws_patterns.py,sha256=w2iF5qQJcV7S6J64ZYb3IzGPdXjC
26
27
  corbell/core/graph/providers/azure_patterns.py,sha256=tJ9AQQXW2xYzJ36wNOxTHHhaivaCv3RYEMJUjw8WjeQ,3515
27
28
  corbell/core/graph/providers/gcp_patterns.py,sha256=vIofjanvRWGhFftuGdzt9YgTIGZRJz7lLG0abUNjFdA,2789
28
29
  corbell/core/indexing/__init__.py,sha256=VczeSHUfKR3YVowGCleFjo2pIpDHfl9kl-OkEl8szow,47
29
- corbell/core/indexing/builder.py,sha256=mxWdHqgAx6akO8vb8-tlshD4zTlmbRuR-TOt-jETDLs,23303
30
+ corbell/core/indexing/builder.py,sha256=apF-FFz_bZ6SeBEVVZzNXMavp9zuLVMVhg4598YJfMs,33333
30
31
  corbell/core/indexing/lock.py,sha256=uUMelIrtrp6Ww9rTfbl2OvomByc-IJyiHIMnptfA4xI,4743
31
- corbell/core/indexing/tracker.py,sha256=mbL1M-EeYf6KoIT5qoz7LCHwSHL6UlZNX7mjm4DczR0,8469
32
+ corbell/core/indexing/tracker.py,sha256=UCeKARiUMyZcg1yvbIZxibZUM2HOA-_6rNTkyPgpQhE,8571
32
33
  corbell/core/mcp/__init__.py,sha256=DDzfuVbX_GBTM5Nqy34JVgDUMeFd2_5ZcVMVuvjOddU,32
33
- corbell/core/mcp/server.py,sha256=nTiPQ9yyenL7uhgLCsGwEm7yyoqk1tUPTsZYFAAmPBU,7270
34
+ corbell/core/mcp/server.py,sha256=CmkqS2EYx4eRzquaJNdPPAx_G07_sJUaK1v_u_aXhTc,5380
34
35
  corbell/core/query/__init__.py,sha256=OCyVRZOyh_eLGhOxR_JYyH6zp8O7qy_-rC3fqGHm7Bc,56
35
- corbell/core/query/diagnostics.py,sha256=ObQyZWmMVRXEHFYGXBP2-EMBmM8SYr0H6cCi95uFnIk,1406
36
- corbell/core/query/engine.py,sha256=cEueZdZQcg_o5HaPaayE4hCGiCvyIxvv0OWnXWD2DzU,11855
36
+ corbell/core/query/diagnostics.py,sha256=o9uIAYFQy8hHua1xLMToSaQPP6xcmnvDJMY3fVg1Dhg,2102
37
+ corbell/core/query/engine.py,sha256=wqaZy-ACZQhLua9mlgad4boowDsRFW1TQtQpP5dTReU,17374
37
38
  corbell/core/query/enhancer.py,sha256=w5mvm1B8qQZpL6RVhMuhq_rls77hakGSNUyanfkyNEU,3934
38
39
  corbell/core/query/formatter.py,sha256=xMr8HE-oxBSEKb514aixY7aoUWGeYoK1w5wnaIlCYEc,2813
39
40
  corbell/core/query/graph_expander.py,sha256=Y-yKnr6db-OM2Gh8ukYgVIcUZa6-wfWA-GhdvOwf_yA,9184
40
41
  corbell/core/query/merger.py,sha256=fs6PL7X7EweXnSnDRnpzmpaU8JjwJpL0akzm4hSwLJk,6168
41
- corbell/core/query/reranker.py,sha256=HYckYiUVZ80mbLGHhK4IHxNI7uUqNaztwXLbYgdnoWU,4298
42
- codebase_retrieval_context_engine-2.0.2.dist-info/METADATA,sha256=20ALXtYeqFP5ZR_j0hsKhZpI1YAxRvLFm2CzM8BRHgQ,17304
43
- codebase_retrieval_context_engine-2.0.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
44
- codebase_retrieval_context_engine-2.0.2.dist-info/entry_points.txt,sha256=vFB4a4Qb7Ty182usK8deJXiis0UYnGIUDusw0V3Jya8,115
45
- codebase_retrieval_context_engine-2.0.2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
46
- codebase_retrieval_context_engine-2.0.2.dist-info/RECORD,,
42
+ corbell/core/query/reranker.py,sha256=0M8Km2WEO3NX46gT0mF7ma9e0v_HOYXu-t6WgF5U2tI,7262
43
+ codebase_retrieval_context_engine-2.0.3.dist-info/METADATA,sha256=yb84Ich965QFp98h1XcO_uk9uhRn-OaYGG8zsZtEWp0,4089
44
+ codebase_retrieval_context_engine-2.0.3.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
45
+ codebase_retrieval_context_engine-2.0.3.dist-info/entry_points.txt,sha256=vFB4a4Qb7Ty182usK8deJXiis0UYnGIUDusw0V3Jya8,115
46
+ codebase_retrieval_context_engine-2.0.3.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
47
+ codebase_retrieval_context_engine-2.0.3.dist-info/RECORD,,
corbell/__init__.py CHANGED
@@ -2,5 +2,5 @@
2
2
  Corbell — Code retrieval engine for LLM context injection.
3
3
  """
4
4
 
5
- __version__ = "2.0.2"
5
+ __version__ = "2.0.3"
6
6
  __all__ = ["__version__"]
@@ -0,0 +1,305 @@
1
+ """CLI: corbell debug — launch a Gradio UI for inspecting query pipeline internals."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ app = typer.Typer(no_args_is_help=False, help="Query debug UI commands.")
12
+ console = Console()
13
+
14
+
15
+ @app.callback(invoke_without_command=True)
16
+ def debug(
17
+ ctx: typer.Context,
18
+ workspace: str = typer.Option(
19
+ "",
20
+ "--workspace",
21
+ "-w",
22
+ help="Path to the workspace root (default: current directory).",
23
+ ),
24
+ port: int = typer.Option(7860, "--port", "-p", help="Port for the Gradio server."),
25
+ share: bool = typer.Option(False, "--share", help="Create a public Gradio share link."),
26
+ ) -> None:
27
+ """Launch the Gradio debug UI for inspecting the query pipeline.
28
+
29
+ The UI lets you run a query against a workspace and inspect:
30
+ - Per-phase timing
31
+ - Final formatted results
32
+ - Pre-rerank chunk table (file, lines, score, symbol, type)
33
+ - LLM rerank prompts and raw response
34
+ """
35
+ if ctx.invoked_subcommand is not None:
36
+ return
37
+
38
+ try:
39
+ import gradio as gr # type: ignore[import-untyped]
40
+ except ImportError:
41
+ console.print(
42
+ "[red]Gradio is not installed. Install it with:[/red]\n"
43
+ " pip install 'codebase-retrieval-context-engine[debug]'"
44
+ )
45
+ raise typer.Exit(1)
46
+
47
+ default_workspace = workspace or os.environ.get("CORBELL_WORKSPACE") or str(Path.cwd())
48
+
49
+ def run_mcp_tool(
50
+ env_vars_text: str,
51
+ mcp_workspace: str,
52
+ mcp_query: str,
53
+ ): # type: ignore[no-untyped-def]
54
+ """Invoke context_engine_codebase_retrieval directly and return results."""
55
+ if not mcp_query.strip():
56
+ return "", ""
57
+
58
+ # Apply env var overrides for this invocation
59
+ env_backup: dict[str, str | None] = {}
60
+ if env_vars_text.strip():
61
+ for line in env_vars_text.strip().splitlines():
62
+ line = line.strip()
63
+ if not line or line.startswith("#"):
64
+ continue
65
+ if "=" not in line:
66
+ continue
67
+ key, _, value = line.partition("=")
68
+ key, value = key.strip(), value.strip()
69
+ env_backup[key] = os.environ.get(key)
70
+ os.environ[key] = value
71
+
72
+ try:
73
+ from corbell.core.mcp.server import context_engine_codebase_retrieval
74
+
75
+ result = context_engine_codebase_retrieval(
76
+ query=mcp_query.strip(),
77
+ workspace_full_path=mcp_workspace.strip(),
78
+ )
79
+
80
+ if result.startswith("Error:"):
81
+ return result, ""
82
+ return "", result
83
+ except Exception as exc:
84
+ return f"Error: {exc}", ""
85
+ finally:
86
+ for key, original in env_backup.items():
87
+ if original is None:
88
+ os.environ.pop(key, None)
89
+ else:
90
+ os.environ[key] = original
91
+
92
+ def run_query(workspace_path: str, query: str): # type: ignore[no-untyped-def]
93
+ """Run the debug pipeline and return Gradio component values."""
94
+ from corbell.core.query.engine import codebase_retrieval_debug
95
+
96
+ if not query.strip():
97
+ return (
98
+ "", # error_box
99
+ "", # timing_md
100
+ "", # final_results
101
+ [], # pre_rerank_table
102
+ "", # rerank_system
103
+ "", # rerank_user
104
+ "", # rerank_response
105
+ )
106
+
107
+ ws = workspace_path.strip() or default_workspace
108
+ result = codebase_retrieval_debug(query=query, workspace_path=ws)
109
+
110
+ # --- Error banner ---
111
+ error_text = result.error or ""
112
+
113
+ # --- Timing table ---
114
+ timing = result.diagnostics.timing if result.diagnostics else {}
115
+ if timing:
116
+ rows = "".join(
117
+ f"| {phase} | {elapsed:.3f}s |\n"
118
+ for phase, elapsed in timing.items()
119
+ )
120
+ timing_md = (
121
+ "| Phase | Elapsed |\n"
122
+ "|---|---|\n"
123
+ + rows
124
+ )
125
+ else:
126
+ timing_md = "_No timing data available._"
127
+
128
+ # --- Final results ---
129
+ final_results = result.final_output or ""
130
+
131
+ # --- Pre-rerank table ---
132
+ pre_rerank_rows = []
133
+ graph_ids = set()
134
+ if result.diagnostics and result.diagnostics.graph_chunk_ids:
135
+ graph_ids = result.diagnostics.graph_chunk_ids
136
+ for chunk in result.pre_rerank_chunks:
137
+ chunk_id = getattr(chunk, "chunk_id", "")
138
+ parts = chunk_id.split("+") if chunk_id else []
139
+ has_graph = any(p in graph_ids for p in parts) if graph_ids else False
140
+ has_embedding = any(p not in graph_ids for p in parts) if graph_ids else True
141
+ if has_graph and has_embedding and len(parts) > 1:
142
+ source = "embedding+graph"
143
+ elif has_graph:
144
+ source = "graph"
145
+ else:
146
+ source = "embedding"
147
+ pre_rerank_rows.append([
148
+ getattr(chunk, "file_path", ""),
149
+ f"{getattr(chunk, 'start_line', '')}-{getattr(chunk, 'end_line', '')}",
150
+ f"{getattr(chunk, 'score', 0.0):.4f}",
151
+ getattr(chunk, "symbol", "") or "",
152
+ getattr(chunk, "chunk_type", "") or "",
153
+ source,
154
+ getattr(chunk, "content", "") or "",
155
+ ])
156
+
157
+ # --- Rerank prompts ---
158
+ detail = result.rerank_detail
159
+ if detail is None or not detail.system_prompt:
160
+ rerank_system = "_LLM not configured — reranking skipped_"
161
+ rerank_user = ""
162
+ rerank_response = ""
163
+ else:
164
+ rerank_system = detail.system_prompt
165
+ rerank_user = detail.user_prompt
166
+ rerank_response = detail.raw_response or "_No response (LLM call failed)_"
167
+
168
+ return (
169
+ error_text,
170
+ timing_md,
171
+ final_results,
172
+ pre_rerank_rows,
173
+ rerank_system,
174
+ rerank_user,
175
+ rerank_response,
176
+ )
177
+
178
+ with gr.Blocks(title="Corbell Query Debugger") as demo:
179
+ gr.Markdown("# Corbell Query Debugger")
180
+ gr.Markdown("Inspect query pipeline internals: timing, pre-rerank chunks, and LLM rerank prompts.")
181
+
182
+ with gr.Row():
183
+ workspace_input = gr.Textbox(
184
+ label="Workspace Path",
185
+ value=default_workspace,
186
+ placeholder="Path to repository root",
187
+ scale=2,
188
+ )
189
+ query_input = gr.Textbox(
190
+ label="Query",
191
+ placeholder="e.g. authentication middleware",
192
+ scale=3,
193
+ )
194
+
195
+ run_btn = gr.Button("Run Query", variant="primary")
196
+
197
+ error_box = gr.Textbox(
198
+ label="Error",
199
+ visible=True,
200
+ interactive=False,
201
+ lines=2,
202
+ )
203
+
204
+ timing_md = gr.Markdown(label="Timing")
205
+
206
+ with gr.Tabs():
207
+ with gr.Tab("Final Results"):
208
+ final_output = gr.Code(label="Formatted Output", language=None)
209
+
210
+ with gr.Tab("Pre-Rerank Chunks"):
211
+ pre_rerank_table = gr.Dataframe(
212
+ headers=["File", "Lines", "Score", "Symbol", "Type", "Source", "Content"],
213
+ datatype=["str", "str", "str", "str", "str", "str", "str"],
214
+ label="Chunks before reranking",
215
+ wrap=False,
216
+ )
217
+
218
+ with gr.Tab("LLM Rerank"):
219
+ rerank_system_box = gr.Textbox(
220
+ label="System Prompt",
221
+ lines=6,
222
+ interactive=False,
223
+ )
224
+ rerank_user_box = gr.Textbox(
225
+ label="User Prompt",
226
+ lines=12,
227
+ interactive=False,
228
+ )
229
+ rerank_response_box = gr.Textbox(
230
+ label="Raw LLM Response",
231
+ lines=4,
232
+ interactive=False,
233
+ )
234
+
235
+ with gr.Tab("MCP Debug"):
236
+ gr.Markdown(
237
+ "### MCP Tool Tester\n"
238
+ "Configure environment and invoke "
239
+ "`context_engine_codebase_retrieval` directly."
240
+ )
241
+
242
+ with gr.Accordion("Environment Configuration", open=False):
243
+ mcp_env_vars = gr.Textbox(
244
+ label="Environment Variables (one per line, KEY=VALUE)",
245
+ placeholder=(
246
+ "# Example:\n"
247
+ "CORBELL_LLM_PROVIDER=anthropic\n"
248
+ "CORBELL_RERANK=true\n"
249
+ "ANTHROPIC_API_KEY=sk-..."
250
+ ),
251
+ lines=6,
252
+ )
253
+
254
+ gr.Markdown("#### Tool Parameters")
255
+ with gr.Row():
256
+ mcp_workspace_input = gr.Textbox(
257
+ label="workspace_full_path",
258
+ value=default_workspace,
259
+ placeholder="Path to repository root",
260
+ scale=3,
261
+ )
262
+ mcp_query_input = gr.Textbox(
263
+ label="query",
264
+ placeholder="e.g. authentication middleware",
265
+ )
266
+
267
+ mcp_run_btn = gr.Button("Invoke MCP Tool", variant="primary")
268
+
269
+ mcp_error_box = gr.Textbox(
270
+ label="Error",
271
+ visible=True,
272
+ interactive=False,
273
+ lines=2,
274
+ )
275
+ mcp_result_box = gr.Code(
276
+ label="Tool Response",
277
+ language=None,
278
+ )
279
+
280
+ run_btn.click(
281
+ fn=run_query,
282
+ inputs=[workspace_input, query_input],
283
+ outputs=[
284
+ error_box,
285
+ timing_md,
286
+ final_output,
287
+ pre_rerank_table,
288
+ rerank_system_box,
289
+ rerank_user_box,
290
+ rerank_response_box,
291
+ ],
292
+ )
293
+
294
+ mcp_run_btn.click(
295
+ fn=run_mcp_tool,
296
+ inputs=[
297
+ mcp_env_vars,
298
+ mcp_workspace_input,
299
+ mcp_query_input,
300
+ ],
301
+ outputs=[mcp_error_box, mcp_result_box],
302
+ )
303
+
304
+ console.print(f"[green]Starting Corbell debug UI on port {port}...[/green]")
305
+ demo.launch(server_port=port, share=share)
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import logging
5
6
  import os
6
7
  from pathlib import Path
7
8
  from typing import Optional
@@ -26,6 +27,9 @@ def build(
26
27
  repo: Optional[str] = typer.Option(
27
28
  None, "--repo", help="Only index a specific repo by ID."
28
29
  ),
30
+ verbose: bool = typer.Option(
31
+ False, "--verbose", "-v", help="Enable detailed performance logging."
32
+ ),
29
33
  ) -> None:
30
34
  """Build (or incrementally update) the code search index.
31
35
 
@@ -39,6 +43,15 @@ def build(
39
43
  2. CORBELL_WORKSPACE environment variable
40
44
  3. Current working directory
41
45
  """
46
+ if verbose or os.environ.get("CORBELL_VERBOSE", ""):
47
+ logging.basicConfig(
48
+ level=logging.INFO,
49
+ format="%(asctime)s %(name)s %(message)s",
50
+ datefmt="%H:%M:%S",
51
+ )
52
+ else:
53
+ logging.basicConfig(level=logging.WARNING)
54
+
42
55
  from corbell.core.workspace import build_config, db_path_for_workspace
43
56
 
44
57
  # Resolve workspace path: flag → env var → cwd
corbell/cli/main.py CHANGED
@@ -8,6 +8,7 @@ import typer
8
8
  from dotenv import load_dotenv
9
9
  from rich.console import Console
10
10
 
11
+ from corbell.cli.commands.debug import app as debug_app
11
12
  from corbell.cli.commands.index import app as index_app
12
13
  from corbell.cli.commands.query import app as query_app
13
14
 
@@ -32,6 +33,7 @@ console = Console()
32
33
 
33
34
  app.add_typer(index_app, name="index", help="Code index commands.")
34
35
  app.add_typer(query_app, name="query", help="Code search commands.")
36
+ app.add_typer(debug_app, name="debug", help="Query debug UI.")
35
37
 
36
38
 
37
39
  # ---------------------------------------------------------------------------
corbell/core/constants.py CHANGED
@@ -45,6 +45,14 @@ EXTENSION_LANG: dict[str, str] = {
45
45
  ".php": "php",
46
46
  ".cs": "csharp",
47
47
  ".rs": "rust",
48
+ ".c": "c",
49
+ ".cc": "cpp",
50
+ ".cpp": "cpp",
51
+ ".cxx": "cpp",
52
+ ".h": "c",
53
+ ".hh": "cpp",
54
+ ".hpp": "cpp",
55
+ ".hxx": "cpp",
48
56
  ".md": "markdown",
49
57
  ".yml": "yaml",
50
58
  ".yaml": "yaml",
@@ -83,7 +83,10 @@ class CodeChunkExtractor:
83
83
  lang = _SUPPORTED.get(fp.suffix)
84
84
  if not lang:
85
85
  continue
86
- rel = str(fp.relative_to(repo_path))
86
+ rel_path = fp.relative_to(repo_path)
87
+ if any(part.startswith(".") for part in rel_path.parts[:-1]):
88
+ continue
89
+ rel = str(rel_path)
87
90
  if gitignore_spec.match_file(rel.replace("\\", "/")):
88
91
  continue
89
92
  chunks = self._extract_file(fp, rel, lang, service_id, str(repo_path))
@@ -339,12 +339,14 @@ class VoyageEmbeddingModel(EmbeddingModel):
339
339
  key = self._api_keys[idx]
340
340
  try:
341
341
  vo = voyageai.Client(api_key=key)
342
- result = vo.embed(
343
- batch,
344
- model=self.model_name,
345
- input_type=input_type,
346
- output_dimension=self.dimension,
347
- )
342
+ kwargs: dict = {
343
+ "model": self.model_name,
344
+ "input_type": input_type,
345
+ }
346
+ import inspect
347
+ if "output_dimension" in inspect.signature(vo.embed).parameters:
348
+ kwargs["output_dimension"] = self.dimension
349
+ result = vo.embed(batch, **kwargs)
348
350
  self._key_index = (idx + 1) % len(self._api_keys)
349
351
  return result.embeddings
350
352
  except Exception as e: