codebase-retrieval-context-engine 2.0.2__tar.gz → 2.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/.env.example +121 -104
  2. codebase_retrieval_context_engine-2.0.4/PKG-INFO +95 -0
  3. codebase_retrieval_context_engine-2.0.4/README.md +35 -0
  4. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/__init__.py +1 -1
  5. codebase_retrieval_context_engine-2.0.4/corbell/cli/commands/debug.py +305 -0
  6. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/commands/index.py +13 -0
  7. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/main.py +2 -0
  8. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/constants.py +8 -0
  9. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/extractor.py +4 -1
  10. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/model.py +8 -6
  11. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/sqlite_store.py +71 -26
  12. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/gitignore.py +2 -0
  13. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/builder.py +2 -0
  14. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/method_graph.py +194 -15
  15. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/builder.py +257 -25
  16. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/tracker.py +2 -0
  17. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/llm_client.py +1 -1
  18. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/mcp/server.py +3 -54
  19. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/diagnostics.py +18 -1
  20. codebase_retrieval_context_engine-2.0.4/corbell/core/query/engine.py +472 -0
  21. codebase_retrieval_context_engine-2.0.4/corbell/core/query/reranker.py +207 -0
  22. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/workspace.py +2 -1
  23. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/pyproject.toml +4 -3
  24. codebase_retrieval_context_engine-2.0.4/tests/test_reranker.py +243 -0
  25. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/uv.lock +418 -6
  26. codebase_retrieval_context_engine-2.0.2/PKG-INFO +0 -503
  27. codebase_retrieval_context_engine-2.0.2/corbell/core/query/engine.py +0 -326
  28. codebase_retrieval_context_engine-2.0.2/corbell/core/query/reranker.py +0 -131
  29. codebase_retrieval_context_engine-2.0.2/tests/test_reranker.py +0 -147
  30. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/.github/workflows/ci.yml +0 -0
  31. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/.gitignore +0 -0
  32. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/CONTRIBUTING.md +0 -0
  33. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/LICENSE +0 -0
  34. /codebase_retrieval_context_engine-2.0.2/README.md → /codebase_retrieval_context_engine-2.0.4/README.backup.md +0 -0
  35. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/corbell_ui.png +0 -0
  36. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/logo.png +0 -0
  37. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/mermaid_diagram.png +0 -0
  38. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/assets/star_history.png +0 -0
  39. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/__init__.py +0 -0
  40. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/commands/__init__.py +0 -0
  41. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/cli/commands/query.py +0 -0
  42. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/__init__.py +0 -0
  43. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/__init__.py +0 -0
  44. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/base.py +0 -0
  45. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/factory.py +0 -0
  46. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/embeddings/search_cache.py +0 -0
  47. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/__init__.py +0 -0
  48. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/__init__.py +0 -0
  49. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/aws_patterns.py +0 -0
  50. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/azure_patterns.py +0 -0
  51. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/providers/gcp_patterns.py +0 -0
  52. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/schema.py +0 -0
  53. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/graph/sqlite_store.py +0 -0
  54. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/__init__.py +0 -0
  55. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/indexing/lock.py +0 -0
  56. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/mcp/__init__.py +0 -0
  57. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/__init__.py +0 -0
  58. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/enhancer.py +0 -0
  59. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/formatter.py +0 -0
  60. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/graph_expander.py +0 -0
  61. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/corbell/core/query/merger.py +0 -0
  62. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/graph.json +0 -0
  63. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/requirements.txt +0 -0
  64. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/test_regex.py +0 -0
  65. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/__init__.py +0 -0
  66. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/conftest.py +0 -0
  67. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_builder.py +0 -0
  68. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_embeddings.py +0 -0
  69. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_graph_expander.py +0 -0
  70. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_graph_sqlite_store.py +0 -0
  71. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_llm_client.py +0 -0
  72. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_mcp.py +0 -0
  73. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_merger.py +0 -0
  74. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_method_graph_improvements.py +0 -0
  75. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_new_language_support.py +0 -0
  76. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_query_engine.py +0 -0
  77. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_search_cache.py +0 -0
  78. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_tracker.py +0 -0
  79. {codebase_retrieval_context_engine-2.0.2 → codebase_retrieval_context_engine-2.0.4}/tests/test_workspace.py +0 -0
@@ -1,104 +1,121 @@
1
- # ============================================================
2
- # Corbell — Environment Variables
3
- # Copy this file to .env and fill in the values you need.
4
- # ============================================================
5
-
6
- # ----------------------------------------------------------
7
- # LLM Provider API Keys (pick one or more)
8
- # ----------------------------------------------------------
9
- ANTHROPIC_API_KEY=
10
- OPENAI_API_KEY=
11
- GOOGLE_API_KEY=
12
- # Multiple Google keys for round-robin: GOOGLE_API_KEY=key1,key2,key3
13
- VOYAGE_API_KEY=
14
- # Multiple Voyage keys for round-robin: VOYAGE_API_KEY=pa-key1,pa-key2,pa-key3
15
-
16
- # AWS Bedrock
17
- BEDROCK_API_KEY=
18
- AWS_ACCESS_KEY_ID=
19
- AWS_SECRET_ACCESS_KEY=
20
- AWS_REGION=us-east-1
21
-
22
- # Azure OpenAI
23
- AZURE_OPENAI_API_KEY=
24
- AZURE_OPENAI_ENDPOINT=
25
- AZURE_OPENAI_DEPLOYMENT=
26
- AZURE_OPENAI_API_VERSION=2024-02-01
27
-
28
- # GCP Vertex AI
29
- GOOGLE_APPLICATION_CREDENTIALS=
30
- GCP_PROJECT=
31
- GOOGLE_CLOUD_PROJECT=
32
- GCP_REGION=us-central1
33
-
34
- # Generic fallback key (used when provider-specific key is not set)
35
- CORBELL_LLM_API_KEY=
36
-
37
- # ----------------------------------------------------------
38
- # Model Overrides
39
- # ----------------------------------------------------------
40
-
41
- # Embedding model — cloud only (default: voyage-code-3)
42
- # Supported: voyage-code-3, voyage-4-lite (VOYAGE_API_KEY), gemini-embedding-001 (GOOGLE_API_KEY)
43
- CORBELL_EMBEDDING_MODEL=
44
-
45
- # Embedding dimension override — overrides the model's default dimension.
46
- # Voyage default: 1024, Google (gemini-embedding-001) default: 768.
47
- CORBELL_EMBEDDING_DIM=
48
-
49
- # LLM model — generic (overrides llm.model default)
50
- CORBELL_LLM_MODEL=
51
-
52
- # LLM modelprovider-specific (takes priority over CORBELL_LLM_MODEL)
53
- ANTHROPIC_MODEL=
54
- OPENAI_MODEL=
55
- GOOGLE_MODEL=
56
- OLLAMA_MODEL=
57
- AWS_MODEL=
58
- AZURE_MODEL=
59
- GCP_MODEL=
60
-
61
- # ----------------------------------------------------------
62
- # Workspace Path
63
- # ----------------------------------------------------------
64
-
65
- # Full path to the workspace (repository) root directory.
66
- # Used as fallback when --workspace-full-path is not passed to CLI.
67
- # Resolution order: --workspace-full-path flag → CORBELL_WORKSPACE → cwd
68
- CORBELL_WORKSPACE=
69
-
70
- # ----------------------------------------------------------
71
- # Query Configuration (all optional, sensible defaults shown)
72
- # ----------------------------------------------------------
73
-
74
- # Maximum number of code chunks returned per query (default: 50)
75
- CORBELL_TOP_K=
76
-
77
- # Enable LLM reranking of results (default: true)
78
- CORBELL_RERANK=
79
-
80
- # BFS depth for call-graph expansion (default: 2)
81
- CORBELL_EXPAND_CALL_DEPTH=
82
-
83
- # Maximum chunks added via graph expansion (default: 30)
84
- CORBELL_EXPAND_MAX_CHUNKS=
85
-
86
- # ----------------------------------------------------------
87
- # Indexing Configuration (all optional, sensible defaults shown)
88
- # ----------------------------------------------------------
89
-
90
- # Number of lines per embedding chunk (default: 50)
91
- CORBELL_CHUNK_SIZE=
92
-
93
- # Overlap between consecutive chunks in lines (default: 10)
94
- CORBELL_CHUNK_OVERLAP=
95
-
96
- # Maximum file size to index in bytes (default: 1048576 = 1 MB)
97
- CORBELL_MAX_FILE_BYTES=
98
-
99
- # Comma-separated list of directory names to skip during indexing
100
- # (in addition to built-in skip list: .git, node_modules, __pycache__, etc.)
101
- CORBELL_SKIP_DIRS=
102
-
103
- # Number of parallel workers for indexing (default: min(cpu_count, 8))
104
- # CORBELL_INDEX_WORKERS=4
1
+ # ============================================================
2
+ # Corbell — Environment Variables
3
+ # Copy this file to .env and fill in the values you need.
4
+ # ============================================================
5
+
6
+ # ----------------------------------------------------------
7
+ # LLM Provider API Keys (pick one or more)
8
+ # ----------------------------------------------------------
9
+ ANTHROPIC_API_KEY=
10
+ OPENAI_API_KEY=
11
+ GOOGLE_API_KEY=
12
+ # Multiple Google keys for round-robin: GOOGLE_API_KEY=key1,key2,key3
13
+ VOYAGE_API_KEY=
14
+ # Multiple Voyage keys for round-robin: VOYAGE_API_KEY=pa-key1,pa-key2,pa-key3
15
+
16
+ # AWS Bedrock
17
+ BEDROCK_API_KEY=
18
+ AWS_ACCESS_KEY_ID=
19
+ AWS_SECRET_ACCESS_KEY=
20
+ AWS_REGION=us-east-1
21
+
22
+ # Azure OpenAI
23
+ AZURE_OPENAI_API_KEY=
24
+ AZURE_OPENAI_ENDPOINT=
25
+ AZURE_OPENAI_DEPLOYMENT=
26
+ AZURE_OPENAI_API_VERSION=2024-02-01
27
+
28
+ # GCP Vertex AI
29
+ GOOGLE_APPLICATION_CREDENTIALS=
30
+ GCP_PROJECT=
31
+ GOOGLE_CLOUD_PROJECT=
32
+ GCP_REGION=us-central1
33
+
34
+ # Generic fallback key (used when provider-specific key is not set)
35
+ CORBELL_LLM_API_KEY=
36
+
37
+ # ----------------------------------------------------------
38
+ # Model Overrides
39
+ # ----------------------------------------------------------
40
+
41
+ # Embedding model — cloud only (default: voyage-code-3)
42
+ # Supported: voyage-code-3, voyage-4-lite (VOYAGE_API_KEY), gemini-embedding-001 (GOOGLE_API_KEY)
43
+ CORBELL_EMBEDDING_MODEL=
44
+
45
+ # Embedding dimension override — overrides the model's default dimension.
46
+ # Voyage default: 1024, Google (gemini-embedding-001) default: 768.
47
+ CORBELL_EMBEDDING_DIM=
48
+
49
+ # LLM model — generic (overrides llm.model default)
50
+ CORBELL_LLM_MODEL=
51
+
52
+ # LLM providerwhich API to use for reranking
53
+ # Options: anthropic, openai, google, ollama, aws, azure, gcp (default: anthropic)
54
+ CORBELL_LLM_PROVIDER=
55
+
56
+ # LLM model — provider-specific (takes priority over CORBELL_LLM_MODEL)
57
+ ANTHROPIC_MODEL=
58
+ OPENAI_MODEL=
59
+ GOOGLE_MODEL=
60
+ OLLAMA_MODEL=
61
+ AWS_MODEL=
62
+ AZURE_MODEL=
63
+ GCP_MODEL=
64
+
65
+ # ----------------------------------------------------------
66
+ # Workspace Path
67
+ # ----------------------------------------------------------
68
+
69
+ # Full path to the workspace (repository) root directory.
70
+ # Used as fallback when --workspace-full-path is not passed to CLI.
71
+ # Resolution order: --workspace-full-path flag CORBELL_WORKSPACE → cwd
72
+ CORBELL_WORKSPACE=
73
+
74
+ # ----------------------------------------------------------
75
+ # Query Configuration (all optional, sensible defaults shown)
76
+ # ----------------------------------------------------------
77
+
78
+ # Maximum number of code chunks returned per query (default: 50)
79
+ CORBELL_TOP_K=
80
+
81
+ # Enable LLM reranking of results (default: true)
82
+ CORBELL_RERANK=
83
+
84
+ # BFS depth for call-graph expansion (default: 2)
85
+ CORBELL_EXPAND_CALL_DEPTH=
86
+
87
+ # Maximum chunks added via graph expansion (default: 30)
88
+ CORBELL_EXPAND_MAX_CHUNKS=
89
+
90
+ # ----------------------------------------------------------
91
+ # Indexing Configuration (all optional, sensible defaults shown)
92
+ # ----------------------------------------------------------
93
+
94
+ # Number of lines per embedding chunk (default: 50)
95
+ CORBELL_CHUNK_SIZE=
96
+
97
+ # Overlap between consecutive chunks in lines (default: 10)
98
+ CORBELL_CHUNK_OVERLAP=
99
+
100
+ # Maximum file size to index in bytes (default: 1048576 = 1 MB)
101
+ CORBELL_MAX_FILE_BYTES=
102
+
103
+ # Comma-separated list of directory names to skip during indexing
104
+ # (in addition to built-in skip list: .git, node_modules, __pycache__, etc.)
105
+ CORBELL_SKIP_DIRS=
106
+
107
+ # Number of parallel workers for indexing (default: min(cpu_count, 8))
108
+ # CORBELL_INDEX_WORKERS=4
109
+
110
+ # Number of concurrent embedding API threads per encode call (default: provider-aware)
111
+ # VoyageEmbeddingModel default: 30, GoogleEmbeddingModel default: 8, other: 4
112
+ CORBELL_EMBED_CONCURRENCY=
113
+
114
+ # Super-batch size: number of chunks encoded and written per streaming slice (default: 6000)
115
+ # Sized to saturate the concurrent thread pool. Lower values reduce peak memory
116
+ # at the cost of throughput; higher values may exceed API rate limits.
117
+ CORBELL_EMBED_BATCH=
118
+
119
+ # Enable verbose performance logging during index builds (any non-empty value)
120
+ # Also available as --verbose / -v flag on `corbell index build`
121
+ CORBELL_VERBOSE=
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: codebase-retrieval-context-engine
3
+ Version: 2.0.4
4
+ Summary: Code retrieval engine — hybrid embedding + graph search for LLM context injection.
5
+ Project-URL: Homepage, https://github.com/nullmastermind/local-context-engine
6
+ Project-URL: Repository, https://github.com/nullmastermind/local-context-engine
7
+ Project-URL: Issues, https://github.com/nullmastermind/local-context-engine/issues
8
+ Author: nullmastermind
9
+ License: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: LLM,MCP,code-search,codebase-retrieval,context,embeddings,retrieval
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Requires-Python: >=3.11
18
+ Requires-Dist: google-genai>=2.7.0
19
+ Requires-Dist: mcp>=1.1.2
20
+ Requires-Dist: numpy>=2.0
21
+ Requires-Dist: pathspec>=0.11
22
+ Requires-Dist: pydantic>=2.0
23
+ Requires-Dist: python-dotenv>=1.0
24
+ Requires-Dist: rich>=13.0
25
+ Requires-Dist: typer>=0.12
26
+ Requires-Dist: voyageai>=0.3
27
+ Provides-Extra: anthropic
28
+ Requires-Dist: anthropic>=0.25; extra == 'anthropic'
29
+ Provides-Extra: aws
30
+ Requires-Dist: boto3>=1.34; extra == 'aws'
31
+ Provides-Extra: azure
32
+ Requires-Dist: openai>=1.0; extra == 'azure'
33
+ Provides-Extra: debug
34
+ Requires-Dist: gradio>=4.0; extra == 'debug'
35
+ Provides-Extra: dev
36
+ Requires-Dist: httpx; extra == 'dev'
37
+ Requires-Dist: mypy; extra == 'dev'
38
+ Requires-Dist: pytest-asyncio; extra == 'dev'
39
+ Requires-Dist: pytest-cov; extra == 'dev'
40
+ Requires-Dist: pytest>=8.0; extra == 'dev'
41
+ Requires-Dist: respx; extra == 'dev'
42
+ Requires-Dist: ruff; extra == 'dev'
43
+ Provides-Extra: gcp
44
+ Requires-Dist: anthropic[vertex]>=0.25; extra == 'gcp'
45
+ Requires-Dist: google-cloud-aiplatform>=1.38; extra == 'gcp'
46
+ Provides-Extra: openai
47
+ Requires-Dist: openai>=1.0; extra == 'openai'
48
+ Provides-Extra: treesitter
49
+ Requires-Dist: tree-sitter-c-sharp>=0.21; extra == 'treesitter'
50
+ Requires-Dist: tree-sitter-go>=0.21; extra == 'treesitter'
51
+ Requires-Dist: tree-sitter-java>=0.21; extra == 'treesitter'
52
+ Requires-Dist: tree-sitter-javascript>=0.21; extra == 'treesitter'
53
+ Requires-Dist: tree-sitter-php>=0.21; extra == 'treesitter'
54
+ Requires-Dist: tree-sitter-python>=0.21; extra == 'treesitter'
55
+ Requires-Dist: tree-sitter-ruby>=0.21; extra == 'treesitter'
56
+ Requires-Dist: tree-sitter-rust>=0.21; extra == 'treesitter'
57
+ Requires-Dist: tree-sitter-typescript>=0.21; extra == 'treesitter'
58
+ Requires-Dist: tree-sitter>=0.21; extra == 'treesitter'
59
+ Description-Content-Type: text/markdown
60
+
61
+ <div align="center">
62
+ <h1>codebase-retrieval-context-engine</h1>
63
+ <p><strong>Code retrieval engine for LLM context via MCP.</strong></p>
64
+ <p>
65
+ <a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License"/></a>
66
+ </p>
67
+ </div>
68
+
69
+ ---
70
+
71
+ ## Add to Claude Code
72
+
73
+ ```bash
74
+ claude mcp add codebase-retrieval -e CORBELL_LLM_PROVIDER=google -e GOOGLE_API_KEY=your-google-api-key -e GOOGLE_MODEL=gemini-3.1-flash-lite -e CORBELL_EMBEDDING_MODEL=voyage-4-lite -e VOYAGE_API_KEY=your-voyage-api-key -- uvx codebase-retrieval-context-engine
75
+ ```
76
+
77
+ That's it. The AI agent passes workspace path and triggers index builds automatically.
78
+
79
+ ---
80
+
81
+ ## Environment variables
82
+
83
+ | Variable | Description |
84
+ |---|---|
85
+ | `CORBELL_LLM_PROVIDER` | LLM provider for reranking (`google`, `anthropic`, `openai`) |
86
+ | `GOOGLE_API_KEY` | Google AI API key (supports multiple: `key1,key2,key3`) |
87
+ | `GOOGLE_MODEL` | e.g. `gemini-3.1-flash-lite` |
88
+ | `CORBELL_EMBEDDING_MODEL` | `voyage-4-lite`, `voyage-code-3`, or `gemini-embedding-001` |
89
+ | `VOYAGE_API_KEY` | Voyage AI API key (supports multiple: `key1,key2,key3`). Add a card to billing to unlock rate limits. |
90
+
91
+ ---
92
+
93
+ ## License
94
+
95
+ Apache 2.0
@@ -0,0 +1,35 @@
1
+ <div align="center">
2
+ <h1>codebase-retrieval-context-engine</h1>
3
+ <p><strong>Code retrieval engine for LLM context via MCP.</strong></p>
4
+ <p>
5
+ <a href="LICENSE"><img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License"/></a>
6
+ </p>
7
+ </div>
8
+
9
+ ---
10
+
11
+ ## Add to Claude Code
12
+
13
+ ```bash
14
+ claude mcp add codebase-retrieval -e CORBELL_LLM_PROVIDER=google -e GOOGLE_API_KEY=your-google-api-key -e GOOGLE_MODEL=gemini-3.1-flash-lite -e CORBELL_EMBEDDING_MODEL=voyage-4-lite -e VOYAGE_API_KEY=your-voyage-api-key -- uvx codebase-retrieval-context-engine
15
+ ```
16
+
17
+ That's it. The AI agent passes workspace path and triggers index builds automatically.
18
+
19
+ ---
20
+
21
+ ## Environment variables
22
+
23
+ | Variable | Description |
24
+ |---|---|
25
+ | `CORBELL_LLM_PROVIDER` | LLM provider for reranking (`google`, `anthropic`, `openai`) |
26
+ | `GOOGLE_API_KEY` | Google AI API key (supports multiple: `key1,key2,key3`) |
27
+ | `GOOGLE_MODEL` | e.g. `gemini-3.1-flash-lite` |
28
+ | `CORBELL_EMBEDDING_MODEL` | `voyage-4-lite`, `voyage-code-3`, or `gemini-embedding-001` |
29
+ | `VOYAGE_API_KEY` | Voyage AI API key (supports multiple: `key1,key2,key3`). Add a card to billing to unlock rate limits. |
30
+
31
+ ---
32
+
33
+ ## License
34
+
35
+ Apache 2.0
@@ -2,5 +2,5 @@
2
2
  Corbell — Code retrieval engine for LLM context injection.
3
3
  """
4
4
 
5
- __version__ = "2.0.2"
5
+ __version__ = "2.0.4"
6
6
  __all__ = ["__version__"]
@@ -0,0 +1,305 @@
1
+ """CLI: corbell debug — launch a Gradio UI for inspecting query pipeline internals."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ app = typer.Typer(no_args_is_help=False, help="Query debug UI commands.")
12
+ console = Console()
13
+
14
+
15
+ @app.callback(invoke_without_command=True)
16
+ def debug(
17
+ ctx: typer.Context,
18
+ workspace: str = typer.Option(
19
+ "",
20
+ "--workspace",
21
+ "-w",
22
+ help="Path to the workspace root (default: current directory).",
23
+ ),
24
+ port: int = typer.Option(7860, "--port", "-p", help="Port for the Gradio server."),
25
+ share: bool = typer.Option(False, "--share", help="Create a public Gradio share link."),
26
+ ) -> None:
27
+ """Launch the Gradio debug UI for inspecting the query pipeline.
28
+
29
+ The UI lets you run a query against a workspace and inspect:
30
+ - Per-phase timing
31
+ - Final formatted results
32
+ - Pre-rerank chunk table (file, lines, score, symbol, type)
33
+ - LLM rerank prompts and raw response
34
+ """
35
+ if ctx.invoked_subcommand is not None:
36
+ return
37
+
38
+ try:
39
+ import gradio as gr # type: ignore[import-untyped]
40
+ except ImportError:
41
+ console.print(
42
+ "[red]Gradio is not installed. Install it with:[/red]\n"
43
+ " pip install 'codebase-retrieval-context-engine[debug]'"
44
+ )
45
+ raise typer.Exit(1)
46
+
47
+ default_workspace = workspace or os.environ.get("CORBELL_WORKSPACE") or str(Path.cwd())
48
+
49
+ def run_mcp_tool(
50
+ env_vars_text: str,
51
+ mcp_workspace: str,
52
+ mcp_query: str,
53
+ ): # type: ignore[no-untyped-def]
54
+ """Invoke context_engine_codebase_retrieval directly and return results."""
55
+ if not mcp_query.strip():
56
+ return "", ""
57
+
58
+ # Apply env var overrides for this invocation
59
+ env_backup: dict[str, str | None] = {}
60
+ if env_vars_text.strip():
61
+ for line in env_vars_text.strip().splitlines():
62
+ line = line.strip()
63
+ if not line or line.startswith("#"):
64
+ continue
65
+ if "=" not in line:
66
+ continue
67
+ key, _, value = line.partition("=")
68
+ key, value = key.strip(), value.strip()
69
+ env_backup[key] = os.environ.get(key)
70
+ os.environ[key] = value
71
+
72
+ try:
73
+ from corbell.core.mcp.server import context_engine_codebase_retrieval
74
+
75
+ result = context_engine_codebase_retrieval(
76
+ query=mcp_query.strip(),
77
+ workspace_full_path=mcp_workspace.strip(),
78
+ )
79
+
80
+ if result.startswith("Error:"):
81
+ return result, ""
82
+ return "", result
83
+ except Exception as exc:
84
+ return f"Error: {exc}", ""
85
+ finally:
86
+ for key, original in env_backup.items():
87
+ if original is None:
88
+ os.environ.pop(key, None)
89
+ else:
90
+ os.environ[key] = original
91
+
92
+ def run_query(workspace_path: str, query: str): # type: ignore[no-untyped-def]
93
+ """Run the debug pipeline and return Gradio component values."""
94
+ from corbell.core.query.engine import codebase_retrieval_debug
95
+
96
+ if not query.strip():
97
+ return (
98
+ "", # error_box
99
+ "", # timing_md
100
+ "", # final_results
101
+ [], # pre_rerank_table
102
+ "", # rerank_system
103
+ "", # rerank_user
104
+ "", # rerank_response
105
+ )
106
+
107
+ ws = workspace_path.strip() or default_workspace
108
+ result = codebase_retrieval_debug(query=query, workspace_path=ws)
109
+
110
+ # --- Error banner ---
111
+ error_text = result.error or ""
112
+
113
+ # --- Timing table ---
114
+ timing = result.diagnostics.timing if result.diagnostics else {}
115
+ if timing:
116
+ rows = "".join(
117
+ f"| {phase} | {elapsed:.3f}s |\n"
118
+ for phase, elapsed in timing.items()
119
+ )
120
+ timing_md = (
121
+ "| Phase | Elapsed |\n"
122
+ "|---|---|\n"
123
+ + rows
124
+ )
125
+ else:
126
+ timing_md = "_No timing data available._"
127
+
128
+ # --- Final results ---
129
+ final_results = result.final_output or ""
130
+
131
+ # --- Pre-rerank table ---
132
+ pre_rerank_rows = []
133
+ graph_ids = set()
134
+ if result.diagnostics and result.diagnostics.graph_chunk_ids:
135
+ graph_ids = result.diagnostics.graph_chunk_ids
136
+ for chunk in result.pre_rerank_chunks:
137
+ chunk_id = getattr(chunk, "chunk_id", "")
138
+ parts = chunk_id.split("+") if chunk_id else []
139
+ has_graph = any(p in graph_ids for p in parts) if graph_ids else False
140
+ has_embedding = any(p not in graph_ids for p in parts) if graph_ids else True
141
+ if has_graph and has_embedding and len(parts) > 1:
142
+ source = "embedding+graph"
143
+ elif has_graph:
144
+ source = "graph"
145
+ else:
146
+ source = "embedding"
147
+ pre_rerank_rows.append([
148
+ getattr(chunk, "file_path", ""),
149
+ f"{getattr(chunk, 'start_line', '')}-{getattr(chunk, 'end_line', '')}",
150
+ f"{getattr(chunk, 'score', 0.0):.4f}",
151
+ getattr(chunk, "symbol", "") or "",
152
+ getattr(chunk, "chunk_type", "") or "",
153
+ source,
154
+ getattr(chunk, "content", "") or "",
155
+ ])
156
+
157
+ # --- Rerank prompts ---
158
+ detail = result.rerank_detail
159
+ if detail is None or not detail.system_prompt:
160
+ rerank_system = "_LLM not configured — reranking skipped_"
161
+ rerank_user = ""
162
+ rerank_response = ""
163
+ else:
164
+ rerank_system = detail.system_prompt
165
+ rerank_user = detail.user_prompt
166
+ rerank_response = detail.raw_response or "_No response (LLM call failed)_"
167
+
168
+ return (
169
+ error_text,
170
+ timing_md,
171
+ final_results,
172
+ pre_rerank_rows,
173
+ rerank_system,
174
+ rerank_user,
175
+ rerank_response,
176
+ )
177
+
178
+ with gr.Blocks(title="Corbell Query Debugger") as demo:
179
+ gr.Markdown("# Corbell Query Debugger")
180
+ gr.Markdown("Inspect query pipeline internals: timing, pre-rerank chunks, and LLM rerank prompts.")
181
+
182
+ with gr.Row():
183
+ workspace_input = gr.Textbox(
184
+ label="Workspace Path",
185
+ value=default_workspace,
186
+ placeholder="Path to repository root",
187
+ scale=2,
188
+ )
189
+ query_input = gr.Textbox(
190
+ label="Query",
191
+ placeholder="e.g. authentication middleware",
192
+ scale=3,
193
+ )
194
+
195
+ run_btn = gr.Button("Run Query", variant="primary")
196
+
197
+ error_box = gr.Textbox(
198
+ label="Error",
199
+ visible=True,
200
+ interactive=False,
201
+ lines=2,
202
+ )
203
+
204
+ timing_md = gr.Markdown(label="Timing")
205
+
206
+ with gr.Tabs():
207
+ with gr.Tab("Final Results"):
208
+ final_output = gr.Code(label="Formatted Output", language=None)
209
+
210
+ with gr.Tab("Pre-Rerank Chunks"):
211
+ pre_rerank_table = gr.Dataframe(
212
+ headers=["File", "Lines", "Score", "Symbol", "Type", "Source", "Content"],
213
+ datatype=["str", "str", "str", "str", "str", "str", "str"],
214
+ label="Chunks before reranking",
215
+ wrap=False,
216
+ )
217
+
218
+ with gr.Tab("LLM Rerank"):
219
+ rerank_system_box = gr.Textbox(
220
+ label="System Prompt",
221
+ lines=6,
222
+ interactive=False,
223
+ )
224
+ rerank_user_box = gr.Textbox(
225
+ label="User Prompt",
226
+ lines=12,
227
+ interactive=False,
228
+ )
229
+ rerank_response_box = gr.Textbox(
230
+ label="Raw LLM Response",
231
+ lines=4,
232
+ interactive=False,
233
+ )
234
+
235
+ with gr.Tab("MCP Debug"):
236
+ gr.Markdown(
237
+ "### MCP Tool Tester\n"
238
+ "Configure environment and invoke "
239
+ "`context_engine_codebase_retrieval` directly."
240
+ )
241
+
242
+ with gr.Accordion("Environment Configuration", open=False):
243
+ mcp_env_vars = gr.Textbox(
244
+ label="Environment Variables (one per line, KEY=VALUE)",
245
+ placeholder=(
246
+ "# Example:\n"
247
+ "CORBELL_LLM_PROVIDER=anthropic\n"
248
+ "CORBELL_RERANK=true\n"
249
+ "ANTHROPIC_API_KEY=sk-..."
250
+ ),
251
+ lines=6,
252
+ )
253
+
254
+ gr.Markdown("#### Tool Parameters")
255
+ with gr.Row():
256
+ mcp_workspace_input = gr.Textbox(
257
+ label="workspace_full_path",
258
+ value=default_workspace,
259
+ placeholder="Path to repository root",
260
+ scale=3,
261
+ )
262
+ mcp_query_input = gr.Textbox(
263
+ label="query",
264
+ placeholder="e.g. authentication middleware",
265
+ )
266
+
267
+ mcp_run_btn = gr.Button("Invoke MCP Tool", variant="primary")
268
+
269
+ mcp_error_box = gr.Textbox(
270
+ label="Error",
271
+ visible=True,
272
+ interactive=False,
273
+ lines=2,
274
+ )
275
+ mcp_result_box = gr.Code(
276
+ label="Tool Response",
277
+ language=None,
278
+ )
279
+
280
+ run_btn.click(
281
+ fn=run_query,
282
+ inputs=[workspace_input, query_input],
283
+ outputs=[
284
+ error_box,
285
+ timing_md,
286
+ final_output,
287
+ pre_rerank_table,
288
+ rerank_system_box,
289
+ rerank_user_box,
290
+ rerank_response_box,
291
+ ],
292
+ )
293
+
294
+ mcp_run_btn.click(
295
+ fn=run_mcp_tool,
296
+ inputs=[
297
+ mcp_env_vars,
298
+ mcp_workspace_input,
299
+ mcp_query_input,
300
+ ],
301
+ outputs=[mcp_error_box, mcp_result_box],
302
+ )
303
+
304
+ console.print(f"[green]Starting Corbell debug UI on port {port}...[/green]")
305
+ demo.launch(server_port=port, share=share)