codebase-retrieval-context-engine 2.0.2__tar.gz → 2.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/.env.example +121 -104
- codebase_retrieval_context_engine-2.0.4/PKG-INFO +95 -0
- codebase_retrieval_context_engine-2.0.4/README.md +35 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/__init__.py +1 -1
- codebase_retrieval_context_engine-2.0.4/corbell/cli/commands/debug.py +305 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/commands/index.py +13 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/main.py +2 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/constants.py +8 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/extractor.py +4 -1
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/model.py +8 -6
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/sqlite_store.py +71 -26
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/gitignore.py +2 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/builder.py +2 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/method_graph.py +194 -15
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/builder.py +257 -25
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/tracker.py +2 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/llm_client.py +1 -1
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/mcp/server.py +3 -54
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/diagnostics.py +18 -1
- codebase_retrieval_context_engine-2.0.4/corbell/core/query/engine.py +472 -0
- codebase_retrieval_context_engine-2.0.4/corbell/core/query/reranker.py +207 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/workspace.py +2 -1
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/pyproject.toml +4 -3
- codebase_retrieval_context_engine-2.0.4/tests/test_reranker.py +243 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/uv.lock +418 -6
- codebase_retrieval_context_engine-2.0.2/PKG-INFO +0 -503
- codebase_retrieval_context_engine-2.0.2/corbell/core/query/engine.py +0 -326
- codebase_retrieval_context_engine-2.0.2/corbell/core/query/reranker.py +0 -131
- codebase_retrieval_context_engine-2.0.2/tests/test_reranker.py +0 -147
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/.github/workflows/ci.yml +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/.gitignore +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/CONTRIBUTING.md +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/LICENSE +0 -0
- /codebase_retrieval_context_engine-2.0.2/README.md → /codebase_retrieval_context_engine-2.0.4/README.backup.md +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/corbell_ui.png +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/logo.png +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/mermaid_diagram.png +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/star_history.png +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/commands/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/commands/query.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/base.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/factory.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/search_cache.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/aws_patterns.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/azure_patterns.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/gcp_patterns.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/schema.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/sqlite_store.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/lock.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/mcp/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/enhancer.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/formatter.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/graph_expander.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/merger.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/graph.json +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/requirements.txt +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/test_regex.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/__init__.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/conftest.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_builder.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_embeddings.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_graph_expander.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_graph_sqlite_store.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_llm_client.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_mcp.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_merger.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_method_graph_improvements.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_new_language_support.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_query_engine.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_search_cache.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_tracker.py +0 -0
- {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_workspace.py +0 -0
{codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/.env.example
RENAMED
|
@@ -1,104 +1,121 @@
|
|
|
1
|
-
# ============================================================
|
|
2
|
-
# Corbell — Environment Variables
|
|
3
|
-
# Copy this file to .env and fill in the values you need.
|
|
4
|
-
# ============================================================
|
|
5
|
-
|
|
6
|
-
# ----------------------------------------------------------
|
|
7
|
-
# LLM Provider API Keys (pick one or more)
|
|
8
|
-
# ----------------------------------------------------------
|
|
9
|
-
ANTHROPIC_API_KEY=
|
|
10
|
-
OPENAI_API_KEY=
|
|
11
|
-
GOOGLE_API_KEY=
|
|
12
|
-
# Multiple Google keys for round-robin: GOOGLE_API_KEY=key1,key2,key3
|
|
13
|
-
VOYAGE_API_KEY=
|
|
14
|
-
# Multiple Voyage keys for round-robin: VOYAGE_API_KEY=pa-key1,pa-key2,pa-key3
|
|
15
|
-
|
|
16
|
-
# AWS Bedrock
|
|
17
|
-
BEDROCK_API_KEY=
|
|
18
|
-
AWS_ACCESS_KEY_ID=
|
|
19
|
-
AWS_SECRET_ACCESS_KEY=
|
|
20
|
-
AWS_REGION=us-east-1
|
|
21
|
-
|
|
22
|
-
# Azure OpenAI
|
|
23
|
-
AZURE_OPENAI_API_KEY=
|
|
24
|
-
AZURE_OPENAI_ENDPOINT=
|
|
25
|
-
AZURE_OPENAI_DEPLOYMENT=
|
|
26
|
-
AZURE_OPENAI_API_VERSION=2024-02-01
|
|
27
|
-
|
|
28
|
-
# GCP Vertex AI
|
|
29
|
-
GOOGLE_APPLICATION_CREDENTIALS=
|
|
30
|
-
GCP_PROJECT=
|
|
31
|
-
GOOGLE_CLOUD_PROJECT=
|
|
32
|
-
GCP_REGION=us-central1
|
|
33
|
-
|
|
34
|
-
# Generic fallback key (used when provider-specific key is not set)
|
|
35
|
-
CORBELL_LLM_API_KEY=
|
|
36
|
-
|
|
37
|
-
# ----------------------------------------------------------
|
|
38
|
-
# Model Overrides
|
|
39
|
-
# ----------------------------------------------------------
|
|
40
|
-
|
|
41
|
-
# Embedding model — cloud only (default: voyage-code-3)
|
|
42
|
-
# Supported: voyage-code-3, voyage-4-lite (VOYAGE_API_KEY), gemini-embedding-001 (GOOGLE_API_KEY)
|
|
43
|
-
CORBELL_EMBEDDING_MODEL=
|
|
44
|
-
|
|
45
|
-
# Embedding dimension override — overrides the model's default dimension.
|
|
46
|
-
# Voyage default: 1024, Google (gemini-embedding-001) default: 768.
|
|
47
|
-
CORBELL_EMBEDDING_DIM=
|
|
48
|
-
|
|
49
|
-
# LLM model — generic (overrides llm.model default)
|
|
50
|
-
CORBELL_LLM_MODEL=
|
|
51
|
-
|
|
52
|
-
# LLM
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
#
|
|
66
|
-
#
|
|
67
|
-
#
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
#
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
#
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
#
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
#
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
#
|
|
104
|
-
#
|
|
1
|
+
# ============================================================
|
|
2
|
+
# Corbell — Environment Variables
|
|
3
|
+
# Copy this file to .env and fill in the values you need.
|
|
4
|
+
# ============================================================
|
|
5
|
+
|
|
6
|
+
# ----------------------------------------------------------
|
|
7
|
+
# LLM Provider API Keys (pick one or more)
|
|
8
|
+
# ----------------------------------------------------------
|
|
9
|
+
ANTHROPIC_API_KEY=
|
|
10
|
+
OPENAI_API_KEY=
|
|
11
|
+
GOOGLE_API_KEY=
|
|
12
|
+
# Multiple Google keys for round-robin: GOOGLE_API_KEY=key1,key2,key3
|
|
13
|
+
VOYAGE_API_KEY=
|
|
14
|
+
# Multiple Voyage keys for round-robin: VOYAGE_API_KEY=pa-key1,pa-key2,pa-key3
|
|
15
|
+
|
|
16
|
+
# AWS Bedrock
|
|
17
|
+
BEDROCK_API_KEY=
|
|
18
|
+
AWS_ACCESS_KEY_ID=
|
|
19
|
+
AWS_SECRET_ACCESS_KEY=
|
|
20
|
+
AWS_REGION=us-east-1
|
|
21
|
+
|
|
22
|
+
# Azure OpenAI
|
|
23
|
+
AZURE_OPENAI_API_KEY=
|
|
24
|
+
AZURE_OPENAI_ENDPOINT=
|
|
25
|
+
AZURE_OPENAI_DEPLOYMENT=
|
|
26
|
+
AZURE_OPENAI_API_VERSION=2024-02-01
|
|
27
|
+
|
|
28
|
+
# GCP Vertex AI
|
|
29
|
+
GOOGLE_APPLICATION_CREDENTIALS=
|
|
30
|
+
GCP_PROJECT=
|
|
31
|
+
GOOGLE_CLOUD_PROJECT=
|
|
32
|
+
GCP_REGION=us-central1
|
|
33
|
+
|
|
34
|
+
# Generic fallback key (used when provider-specific key is not set)
|
|
35
|
+
CORBELL_LLM_API_KEY=
|
|
36
|
+
|
|
37
|
+
# ----------------------------------------------------------
|
|
38
|
+
# Model Overrides
|
|
39
|
+
# ----------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
# Embedding model — cloud only (default: voyage-code-3)
|
|
42
|
+
# Supported: voyage-code-3, voyage-4-lite (VOYAGE_API_KEY), gemini-embedding-001 (GOOGLE_API_KEY)
|
|
43
|
+
CORBELL_EMBEDDING_MODEL=
|
|
44
|
+
|
|
45
|
+
# Embedding dimension override — overrides the model's default dimension.
|
|
46
|
+
# Voyage default: 1024, Google (gemini-embedding-001) default: 768.
|
|
47
|
+
CORBELL_EMBEDDING_DIM=
|
|
48
|
+
|
|
49
|
+
# LLM model — generic (overrides llm.model default)
|
|
50
|
+
CORBELL_LLM_MODEL=
|
|
51
|
+
|
|
52
|
+
# LLM provider — which API to use for reranking
|
|
53
|
+
# Options: anthropic, openai, google, ollama, aws, azure, gcp (default: anthropic)
|
|
54
|
+
CORBELL_LLM_PROVIDER=
|
|
55
|
+
|
|
56
|
+
# LLM model — provider-specific (takes priority over CORBELL_LLM_MODEL)
|
|
57
|
+
ANTHROPIC_MODEL=
|
|
58
|
+
OPENAI_MODEL=
|
|
59
|
+
GOOGLE_MODEL=
|
|
60
|
+
OLLAMA_MODEL=
|
|
61
|
+
AWS_MODEL=
|
|
62
|
+
AZURE_MODEL=
|
|
63
|
+
GCP_MODEL=
|
|
64
|
+
|
|
65
|
+
# ----------------------------------------------------------
|
|
66
|
+
# Workspace Path
|
|
67
|
+
# ----------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
# Full path to the workspace (repository) root directory.
|
|
70
|
+
# Used as fallback when --workspace-full-path is not passed to CLI.
|
|
71
|
+
# Resolution order: --workspace-full-path flag → CORBELL_WORKSPACE → cwd
|
|
72
|
+
CORBELL_WORKSPACE=
|
|
73
|
+
|
|
74
|
+
# ----------------------------------------------------------
|
|
75
|
+
# Query Configuration (all optional, sensible defaults shown)
|
|
76
|
+
# ----------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
# Maximum number of code chunks returned per query (default: 50)
|
|
79
|
+
CORBELL_TOP_K=
|
|
80
|
+
|
|
81
|
+
# Enable LLM reranking of results (default: true)
|
|
82
|
+
CORBELL_RERANK=
|
|
83
|
+
|
|
84
|
+
# BFS depth for call-graph expansion (default: 2)
|
|
85
|
+
CORBELL_EXPAND_CALL_DEPTH=
|
|
86
|
+
|
|
87
|
+
# Maximum chunks added via graph expansion (default: 30)
|
|
88
|
+
CORBELL_EXPAND_MAX_CHUNKS=
|
|
89
|
+
|
|
90
|
+
# ----------------------------------------------------------
|
|
91
|
+
# Indexing Configuration (all optional, sensible defaults shown)
|
|
92
|
+
# ----------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
# Number of lines per embedding chunk (default: 50)
|
|
95
|
+
CORBELL_CHUNK_SIZE=
|
|
96
|
+
|
|
97
|
+
# Overlap between consecutive chunks in lines (default: 10)
|
|
98
|
+
CORBELL_CHUNK_OVERLAP=
|
|
99
|
+
|
|
100
|
+
# Maximum file size to index in bytes (default: 1048576 = 1 MB)
|
|
101
|
+
CORBELL_MAX_FILE_BYTES=
|
|
102
|
+
|
|
103
|
+
# Comma-separated list of directory names to skip during indexing
|
|
104
|
+
# (in addition to built-in skip list: .git, node_modules, __pycache__, etc.)
|
|
105
|
+
CORBELL_SKIP_DIRS=
|
|
106
|
+
|
|
107
|
+
# Number of parallel workers for indexing (default: min(cpu_count, 8))
|
|
108
|
+
# CORBELL_INDEX_WORKERS=4
|
|
109
|
+
|
|
110
|
+
# Number of concurrent embedding API threads per encode call (default: provider-aware)
|
|
111
|
+
# VoyageEmbeddingModel default: 30, GoogleEmbeddingModel default: 8, other: 4
|
|
112
|
+
CORBELL_EMBED_CONCURRENCY=
|
|
113
|
+
|
|
114
|
+
# Super-batch size: number of chunks encoded and written per streaming slice (default: 6000)
|
|
115
|
+
# Sized to saturate the concurrent thread pool. Lower values reduce peak memory
|
|
116
|
+
# at the cost of throughput; higher values may exceed API rate limits.
|
|
117
|
+
CORBELL_EMBED_BATCH=
|
|
118
|
+
|
|
119
|
+
# Enable verbose performance logging during index builds (any non-empty value)
|
|
120
|
+
# Also available as --verbose / -v flag on `corbell index build`
|
|
121
|
+
CORBELL_VERBOSE=
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codebase-retrieval-context-engine
|
|
3
|
+
Version: 2.0.4
|
|
4
|
+
Summary: Code retrieval engine — hybrid embedding + graph search for LLM context injection.
|
|
5
|
+
Project-URL: Homepage, https://github.com/nullmastermind/local-context-engine
|
|
6
|
+
Project-URL: Repository, https://github.com/nullmastermind/local-context-engine
|
|
7
|
+
Project-URL: Issues, https://github.com/nullmastermind/local-context-engine/issues
|
|
8
|
+
Author: nullmastermind
|
|
9
|
+
License: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: LLM,MCP,code-search,codebase-retrieval,context,embeddings,retrieval
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Requires-Dist: google-genai>=2.7.0
|
|
19
|
+
Requires-Dist: mcp>=1.1.2
|
|
20
|
+
Requires-Dist: numpy>=2.0
|
|
21
|
+
Requires-Dist: pathspec>=0.11
|
|
22
|
+
Requires-Dist: pydantic>=2.0
|
|
23
|
+
Requires-Dist: python-dotenv>=1.0
|
|
24
|
+
Requires-Dist: rich>=13.0
|
|
25
|
+
Requires-Dist: typer>=0.12
|
|
26
|
+
Requires-Dist: voyageai>=0.3
|
|
27
|
+
Provides-Extra: anthropic
|
|
28
|
+
Requires-Dist: anthropic>=0.25; extra == 'anthropic'
|
|
29
|
+
Provides-Extra: aws
|
|
30
|
+
Requires-Dist: boto3>=1.34; extra == 'aws'
|
|
31
|
+
Provides-Extra: azure
|
|
32
|
+
Requires-Dist: openai>=1.0; extra == 'azure'
|
|
33
|
+
Provides-Extra: debug
|
|
34
|
+
Requires-Dist: gradio>=4.0; extra == 'debug'
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: httpx; extra == 'dev'
|
|
37
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
40
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: respx; extra == 'dev'
|
|
42
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
43
|
+
Provides-Extra: gcp
|
|
44
|
+
Requires-Dist: anthropic[vertex]>=0.25; extra == 'gcp'
|
|
45
|
+
Requires-Dist: google-cloud-aiplatform>=1.38; extra == 'gcp'
|
|
46
|
+
Provides-Extra: openai
|
|
47
|
+
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
48
|
+
Provides-Extra: treesitter
|
|
49
|
+
Requires-Dist: tree-sitter-c-sharp>=0.21; extra == 'treesitter'
|
|
50
|
+
Requires-Dist: tree-sitter-go>=0.21; extra == 'treesitter'
|
|
51
|
+
Requires-Dist: tree-sitter-java>=0.21; extra == 'treesitter'
|
|
52
|
+
Requires-Dist: tree-sitter-javascript>=0.21; extra == 'treesitter'
|
|
53
|
+
Requires-Dist: tree-sitter-php>=0.21; extra == 'treesitter'
|
|
54
|
+
Requires-Dist: tree-sitter-python>=0.21; extra == 'treesitter'
|
|
55
|
+
Requires-Dist: tree-sitter-ruby>=0.21; extra == 'treesitter'
|
|
56
|
+
Requires-Dist: tree-sitter-rust>=0.21; extra == 'treesitter'
|
|
57
|
+
Requires-Dist: tree-sitter-typescript>=0.21; extra == 'treesitter'
|
|
58
|
+
Requires-Dist: tree-sitter>=0.21; extra == 'treesitter'
|
|
59
|
+
Description-Content-Type: text/markdown
|
|
60
|
+
|
|
61
|
+
<div align="center">
|
|
62
|
+
<h1>codebase-retrieval-context-engine</h1>
|
|
63
|
+
<p><strong>Code retrieval engine for LLM context via MCP.</strong></p>
|
|
64
|
+
<p>
|
|
65
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License"/></a>
|
|
66
|
+
</p>
|
|
67
|
+
</div>
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Add to Claude Code
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
claude mcp add codebase-retrieval -e CORBELL_LLM_PROVIDER=google -e GOOGLE_API_KEY=your-google-api-key -e GOOGLE_MODEL=gemini-3.1-flash-lite -e CORBELL_EMBEDDING_MODEL=voyage-4-lite -e VOYAGE_API_KEY=your-voyage-api-key -- uvx codebase-retrieval-context-engine
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
That's it. The AI agent passes workspace path and triggers index builds automatically.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Environment variables
|
|
82
|
+
|
|
83
|
+
| Variable | Description |
|
|
84
|
+
|---|---|
|
|
85
|
+
| `CORBELL_LLM_PROVIDER` | LLM provider for reranking (`google`, `anthropic`, `openai`) |
|
|
86
|
+
| `GOOGLE_API_KEY` | Google AI API key (supports multiple: `key1,key2,key3`) |
|
|
87
|
+
| `GOOGLE_MODEL` | e.g. `gemini-3.1-flash-lite` |
|
|
88
|
+
| `CORBELL_EMBEDDING_MODEL` | `voyage-4-lite`, `voyage-code-3`, or `gemini-embedding-001` |
|
|
89
|
+
| `VOYAGE_API_KEY` | Voyage AI API key (supports multiple: `key1,key2,key3`). Add a card to billing to unlock rate limits. |
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
Apache 2.0
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<h1>codebase-retrieval-context-engine</h1>
|
|
3
|
+
<p><strong>Code retrieval engine for LLM context via MCP.</strong></p>
|
|
4
|
+
<p>
|
|
5
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License"/></a>
|
|
6
|
+
</p>
|
|
7
|
+
</div>
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Add to Claude Code
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
claude mcp add codebase-retrieval -e CORBELL_LLM_PROVIDER=google -e GOOGLE_API_KEY=your-google-api-key -e GOOGLE_MODEL=gemini-3.1-flash-lite -e CORBELL_EMBEDDING_MODEL=voyage-4-lite -e VOYAGE_API_KEY=your-voyage-api-key -- uvx codebase-retrieval-context-engine
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
That's it. The AI agent passes workspace path and triggers index builds automatically.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Environment variables
|
|
22
|
+
|
|
23
|
+
| Variable | Description |
|
|
24
|
+
|---|---|
|
|
25
|
+
| `CORBELL_LLM_PROVIDER` | LLM provider for reranking (`google`, `anthropic`, `openai`) |
|
|
26
|
+
| `GOOGLE_API_KEY` | Google AI API key (supports multiple: `key1,key2,key3`) |
|
|
27
|
+
| `GOOGLE_MODEL` | e.g. `gemini-3.1-flash-lite` |
|
|
28
|
+
| `CORBELL_EMBEDDING_MODEL` | `voyage-4-lite`, `voyage-code-3`, or `gemini-embedding-001` |
|
|
29
|
+
| `VOYAGE_API_KEY` | Voyage AI API key (supports multiple: `key1,key2,key3`). Add a card to billing to unlock rate limits. |
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## License
|
|
34
|
+
|
|
35
|
+
Apache 2.0
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""CLI: corbell debug — launch a Gradio UI for inspecting query pipeline internals."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(no_args_is_help=False, help="Query debug UI commands.")
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.callback(invoke_without_command=True)
|
|
16
|
+
def debug(
|
|
17
|
+
ctx: typer.Context,
|
|
18
|
+
workspace: str = typer.Option(
|
|
19
|
+
"",
|
|
20
|
+
"--workspace",
|
|
21
|
+
"-w",
|
|
22
|
+
help="Path to the workspace root (default: current directory).",
|
|
23
|
+
),
|
|
24
|
+
port: int = typer.Option(7860, "--port", "-p", help="Port for the Gradio server."),
|
|
25
|
+
share: bool = typer.Option(False, "--share", help="Create a public Gradio share link."),
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Launch the Gradio debug UI for inspecting the query pipeline.
|
|
28
|
+
|
|
29
|
+
The UI lets you run a query against a workspace and inspect:
|
|
30
|
+
- Per-phase timing
|
|
31
|
+
- Final formatted results
|
|
32
|
+
- Pre-rerank chunk table (file, lines, score, symbol, type)
|
|
33
|
+
- LLM rerank prompts and raw response
|
|
34
|
+
"""
|
|
35
|
+
if ctx.invoked_subcommand is not None:
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
import gradio as gr # type: ignore[import-untyped]
|
|
40
|
+
except ImportError:
|
|
41
|
+
console.print(
|
|
42
|
+
"[red]Gradio is not installed. Install it with:[/red]\n"
|
|
43
|
+
" pip install 'codebase-retrieval-context-engine[debug]'"
|
|
44
|
+
)
|
|
45
|
+
raise typer.Exit(1)
|
|
46
|
+
|
|
47
|
+
default_workspace = workspace or os.environ.get("CORBELL_WORKSPACE") or str(Path.cwd())
|
|
48
|
+
|
|
49
|
+
def run_mcp_tool(
|
|
50
|
+
env_vars_text: str,
|
|
51
|
+
mcp_workspace: str,
|
|
52
|
+
mcp_query: str,
|
|
53
|
+
): # type: ignore[no-untyped-def]
|
|
54
|
+
"""Invoke context_engine_codebase_retrieval directly and return results."""
|
|
55
|
+
if not mcp_query.strip():
|
|
56
|
+
return "", ""
|
|
57
|
+
|
|
58
|
+
# Apply env var overrides for this invocation
|
|
59
|
+
env_backup: dict[str, str | None] = {}
|
|
60
|
+
if env_vars_text.strip():
|
|
61
|
+
for line in env_vars_text.strip().splitlines():
|
|
62
|
+
line = line.strip()
|
|
63
|
+
if not line or line.startswith("#"):
|
|
64
|
+
continue
|
|
65
|
+
if "=" not in line:
|
|
66
|
+
continue
|
|
67
|
+
key, _, value = line.partition("=")
|
|
68
|
+
key, value = key.strip(), value.strip()
|
|
69
|
+
env_backup[key] = os.environ.get(key)
|
|
70
|
+
os.environ[key] = value
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
from corbell.core.mcp.server import context_engine_codebase_retrieval
|
|
74
|
+
|
|
75
|
+
result = context_engine_codebase_retrieval(
|
|
76
|
+
query=mcp_query.strip(),
|
|
77
|
+
workspace_full_path=mcp_workspace.strip(),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if result.startswith("Error:"):
|
|
81
|
+
return result, ""
|
|
82
|
+
return "", result
|
|
83
|
+
except Exception as exc:
|
|
84
|
+
return f"Error: {exc}", ""
|
|
85
|
+
finally:
|
|
86
|
+
for key, original in env_backup.items():
|
|
87
|
+
if original is None:
|
|
88
|
+
os.environ.pop(key, None)
|
|
89
|
+
else:
|
|
90
|
+
os.environ[key] = original
|
|
91
|
+
|
|
92
|
+
def run_query(workspace_path: str, query: str): # type: ignore[no-untyped-def]
|
|
93
|
+
"""Run the debug pipeline and return Gradio component values."""
|
|
94
|
+
from corbell.core.query.engine import codebase_retrieval_debug
|
|
95
|
+
|
|
96
|
+
if not query.strip():
|
|
97
|
+
return (
|
|
98
|
+
"", # error_box
|
|
99
|
+
"", # timing_md
|
|
100
|
+
"", # final_results
|
|
101
|
+
[], # pre_rerank_table
|
|
102
|
+
"", # rerank_system
|
|
103
|
+
"", # rerank_user
|
|
104
|
+
"", # rerank_response
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
ws = workspace_path.strip() or default_workspace
|
|
108
|
+
result = codebase_retrieval_debug(query=query, workspace_path=ws)
|
|
109
|
+
|
|
110
|
+
# --- Error banner ---
|
|
111
|
+
error_text = result.error or ""
|
|
112
|
+
|
|
113
|
+
# --- Timing table ---
|
|
114
|
+
timing = result.diagnostics.timing if result.diagnostics else {}
|
|
115
|
+
if timing:
|
|
116
|
+
rows = "".join(
|
|
117
|
+
f"| {phase} | {elapsed:.3f}s |\n"
|
|
118
|
+
for phase, elapsed in timing.items()
|
|
119
|
+
)
|
|
120
|
+
timing_md = (
|
|
121
|
+
"| Phase | Elapsed |\n"
|
|
122
|
+
"|---|---|\n"
|
|
123
|
+
+ rows
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
timing_md = "_No timing data available._"
|
|
127
|
+
|
|
128
|
+
# --- Final results ---
|
|
129
|
+
final_results = result.final_output or ""
|
|
130
|
+
|
|
131
|
+
# --- Pre-rerank table ---
|
|
132
|
+
pre_rerank_rows = []
|
|
133
|
+
graph_ids = set()
|
|
134
|
+
if result.diagnostics and result.diagnostics.graph_chunk_ids:
|
|
135
|
+
graph_ids = result.diagnostics.graph_chunk_ids
|
|
136
|
+
for chunk in result.pre_rerank_chunks:
|
|
137
|
+
chunk_id = getattr(chunk, "chunk_id", "")
|
|
138
|
+
parts = chunk_id.split("+") if chunk_id else []
|
|
139
|
+
has_graph = any(p in graph_ids for p in parts) if graph_ids else False
|
|
140
|
+
has_embedding = any(p not in graph_ids for p in parts) if graph_ids else True
|
|
141
|
+
if has_graph and has_embedding and len(parts) > 1:
|
|
142
|
+
source = "embedding+graph"
|
|
143
|
+
elif has_graph:
|
|
144
|
+
source = "graph"
|
|
145
|
+
else:
|
|
146
|
+
source = "embedding"
|
|
147
|
+
pre_rerank_rows.append([
|
|
148
|
+
getattr(chunk, "file_path", ""),
|
|
149
|
+
f"{getattr(chunk, 'start_line', '')}-{getattr(chunk, 'end_line', '')}",
|
|
150
|
+
f"{getattr(chunk, 'score', 0.0):.4f}",
|
|
151
|
+
getattr(chunk, "symbol", "") or "",
|
|
152
|
+
getattr(chunk, "chunk_type", "") or "",
|
|
153
|
+
source,
|
|
154
|
+
getattr(chunk, "content", "") or "",
|
|
155
|
+
])
|
|
156
|
+
|
|
157
|
+
# --- Rerank prompts ---
|
|
158
|
+
detail = result.rerank_detail
|
|
159
|
+
if detail is None or not detail.system_prompt:
|
|
160
|
+
rerank_system = "_LLM not configured — reranking skipped_"
|
|
161
|
+
rerank_user = ""
|
|
162
|
+
rerank_response = ""
|
|
163
|
+
else:
|
|
164
|
+
rerank_system = detail.system_prompt
|
|
165
|
+
rerank_user = detail.user_prompt
|
|
166
|
+
rerank_response = detail.raw_response or "_No response (LLM call failed)_"
|
|
167
|
+
|
|
168
|
+
return (
|
|
169
|
+
error_text,
|
|
170
|
+
timing_md,
|
|
171
|
+
final_results,
|
|
172
|
+
pre_rerank_rows,
|
|
173
|
+
rerank_system,
|
|
174
|
+
rerank_user,
|
|
175
|
+
rerank_response,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
with gr.Blocks(title="Corbell Query Debugger") as demo:
|
|
179
|
+
gr.Markdown("# Corbell Query Debugger")
|
|
180
|
+
gr.Markdown("Inspect query pipeline internals: timing, pre-rerank chunks, and LLM rerank prompts.")
|
|
181
|
+
|
|
182
|
+
with gr.Row():
|
|
183
|
+
workspace_input = gr.Textbox(
|
|
184
|
+
label="Workspace Path",
|
|
185
|
+
value=default_workspace,
|
|
186
|
+
placeholder="Path to repository root",
|
|
187
|
+
scale=2,
|
|
188
|
+
)
|
|
189
|
+
query_input = gr.Textbox(
|
|
190
|
+
label="Query",
|
|
191
|
+
placeholder="e.g. authentication middleware",
|
|
192
|
+
scale=3,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
run_btn = gr.Button("Run Query", variant="primary")
|
|
196
|
+
|
|
197
|
+
error_box = gr.Textbox(
|
|
198
|
+
label="Error",
|
|
199
|
+
visible=True,
|
|
200
|
+
interactive=False,
|
|
201
|
+
lines=2,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
timing_md = gr.Markdown(label="Timing")
|
|
205
|
+
|
|
206
|
+
with gr.Tabs():
|
|
207
|
+
with gr.Tab("Final Results"):
|
|
208
|
+
final_output = gr.Code(label="Formatted Output", language=None)
|
|
209
|
+
|
|
210
|
+
with gr.Tab("Pre-Rerank Chunks"):
|
|
211
|
+
pre_rerank_table = gr.Dataframe(
|
|
212
|
+
headers=["File", "Lines", "Score", "Symbol", "Type", "Source", "Content"],
|
|
213
|
+
datatype=["str", "str", "str", "str", "str", "str", "str"],
|
|
214
|
+
label="Chunks before reranking",
|
|
215
|
+
wrap=False,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
with gr.Tab("LLM Rerank"):
|
|
219
|
+
rerank_system_box = gr.Textbox(
|
|
220
|
+
label="System Prompt",
|
|
221
|
+
lines=6,
|
|
222
|
+
interactive=False,
|
|
223
|
+
)
|
|
224
|
+
rerank_user_box = gr.Textbox(
|
|
225
|
+
label="User Prompt",
|
|
226
|
+
lines=12,
|
|
227
|
+
interactive=False,
|
|
228
|
+
)
|
|
229
|
+
rerank_response_box = gr.Textbox(
|
|
230
|
+
label="Raw LLM Response",
|
|
231
|
+
lines=4,
|
|
232
|
+
interactive=False,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
with gr.Tab("MCP Debug"):
|
|
236
|
+
gr.Markdown(
|
|
237
|
+
"### MCP Tool Tester\n"
|
|
238
|
+
"Configure environment and invoke "
|
|
239
|
+
"`context_engine_codebase_retrieval` directly."
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
with gr.Accordion("Environment Configuration", open=False):
|
|
243
|
+
mcp_env_vars = gr.Textbox(
|
|
244
|
+
label="Environment Variables (one per line, KEY=VALUE)",
|
|
245
|
+
placeholder=(
|
|
246
|
+
"# Example:\n"
|
|
247
|
+
"CORBELL_LLM_PROVIDER=anthropic\n"
|
|
248
|
+
"CORBELL_RERANK=true\n"
|
|
249
|
+
"ANTHROPIC_API_KEY=sk-..."
|
|
250
|
+
),
|
|
251
|
+
lines=6,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
gr.Markdown("#### Tool Parameters")
|
|
255
|
+
with gr.Row():
|
|
256
|
+
mcp_workspace_input = gr.Textbox(
|
|
257
|
+
label="workspace_full_path",
|
|
258
|
+
value=default_workspace,
|
|
259
|
+
placeholder="Path to repository root",
|
|
260
|
+
scale=3,
|
|
261
|
+
)
|
|
262
|
+
mcp_query_input = gr.Textbox(
|
|
263
|
+
label="query",
|
|
264
|
+
placeholder="e.g. authentication middleware",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
mcp_run_btn = gr.Button("Invoke MCP Tool", variant="primary")
|
|
268
|
+
|
|
269
|
+
mcp_error_box = gr.Textbox(
|
|
270
|
+
label="Error",
|
|
271
|
+
visible=True,
|
|
272
|
+
interactive=False,
|
|
273
|
+
lines=2,
|
|
274
|
+
)
|
|
275
|
+
mcp_result_box = gr.Code(
|
|
276
|
+
label="Tool Response",
|
|
277
|
+
language=None,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
run_btn.click(
|
|
281
|
+
fn=run_query,
|
|
282
|
+
inputs=[workspace_input, query_input],
|
|
283
|
+
outputs=[
|
|
284
|
+
error_box,
|
|
285
|
+
timing_md,
|
|
286
|
+
final_output,
|
|
287
|
+
pre_rerank_table,
|
|
288
|
+
rerank_system_box,
|
|
289
|
+
rerank_user_box,
|
|
290
|
+
rerank_response_box,
|
|
291
|
+
],
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
mcp_run_btn.click(
|
|
295
|
+
fn=run_mcp_tool,
|
|
296
|
+
inputs=[
|
|
297
|
+
mcp_env_vars,
|
|
298
|
+
mcp_workspace_input,
|
|
299
|
+
mcp_query_input,
|
|
300
|
+
],
|
|
301
|
+
outputs=[mcp_error_box, mcp_result_box],
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
console.print(f"[green]Starting Corbell debug UI on port {port}...[/green]")
|
|
305
|
+
demo.launch(server_port=port, share=share)
|