ctx-retriever 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctx_retriever-0.1.0/LICENSE +21 -0
- ctx_retriever-0.1.0/PKG-INFO +205 -0
- ctx_retriever-0.1.0/README.md +172 -0
- ctx_retriever-0.1.0/ctx_retriever.egg-info/PKG-INFO +205 -0
- ctx_retriever-0.1.0/ctx_retriever.egg-info/SOURCES.txt +43 -0
- ctx_retriever-0.1.0/ctx_retriever.egg-info/dependency_links.txt +1 -0
- ctx_retriever-0.1.0/ctx_retriever.egg-info/requires.txt +12 -0
- ctx_retriever-0.1.0/ctx_retriever.egg-info/top_level.txt +1 -0
- ctx_retriever-0.1.0/pyproject.toml +45 -0
- ctx_retriever-0.1.0/setup.cfg +4 -0
- ctx_retriever-0.1.0/src/__init__.py +0 -0
- ctx_retriever-0.1.0/src/analysis/__init__.py +0 -0
- ctx_retriever-0.1.0/src/analysis/differentiation.py +390 -0
- ctx_retriever-0.1.0/src/analysis/error_analysis.py +331 -0
- ctx_retriever-0.1.0/src/analysis/trigger_accuracy.py +408 -0
- ctx_retriever-0.1.0/src/data/__init__.py +0 -0
- ctx_retriever-0.1.0/src/data/dataset_generator.py +443 -0
- ctx_retriever-0.1.0/src/data/real_codebase_loader.py +390 -0
- ctx_retriever-0.1.0/src/evaluator/__init__.py +0 -0
- ctx_retriever-0.1.0/src/evaluator/benchmark_runner.py +530 -0
- ctx_retriever-0.1.0/src/evaluator/coir_evaluator.py +661 -0
- ctx_retriever-0.1.0/src/evaluator/doc_retrieval_eval.py +207 -0
- ctx_retriever-0.1.0/src/evaluator/downstream_quality.py +185 -0
- ctx_retriever-0.1.0/src/evaluator/hook_effectiveness_eval.py +425 -0
- ctx_retriever-0.1.0/src/evaluator/llm_quality.py +396 -0
- ctx_retriever-0.1.0/src/evaluator/metrics.py +168 -0
- ctx_retriever-0.1.0/src/evaluator/ranger_comparison.py +510 -0
- ctx_retriever-0.1.0/src/evaluator/repobench_evaluator.py +1069 -0
- ctx_retriever-0.1.0/src/evaluator/statistical_tests.py +185 -0
- ctx_retriever-0.1.0/src/retrieval/__init__.py +0 -0
- ctx_retriever-0.1.0/src/retrieval/ablation_variants.py +247 -0
- ctx_retriever-0.1.0/src/retrieval/adaptive_trigger.py +479 -0
- ctx_retriever-0.1.0/src/retrieval/bm25_retriever.py +89 -0
- ctx_retriever-0.1.0/src/retrieval/chroma_retriever.py +139 -0
- ctx_retriever-0.1.0/src/retrieval/dense_retriever.py +100 -0
- ctx_retriever-0.1.0/src/retrieval/full_context.py +67 -0
- ctx_retriever-0.1.0/src/retrieval/graph_rag.py +326 -0
- ctx_retriever-0.1.0/src/retrieval/hybrid_dense_ctx.py +290 -0
- ctx_retriever-0.1.0/src/retrieval/llamaindex_retriever.py +169 -0
- ctx_retriever-0.1.0/src/retrieval/ranger_approx.py +374 -0
- ctx_retriever-0.1.0/src/trigger/__init__.py +0 -0
- ctx_retriever-0.1.0/src/trigger/trigger_classifier.py +296 -0
- ctx_retriever-0.1.0/src/visualizer/__init__.py +0 -0
- ctx_retriever-0.1.0/src/visualizer/report.py +224 -0
- ctx_retriever-0.1.0/tests/test_trigger_classifier_ko.py +169 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 jaytoone
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ctx-retriever
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents
|
|
5
|
+
Author: jaytoone
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jaytoone/CTX
|
|
8
|
+
Project-URL: Repository, https://github.com/jaytoone/CTX
|
|
9
|
+
Project-URL: HuggingFace Demo, https://huggingface.co/spaces/jaytoone/ctx-demo
|
|
10
|
+
Keywords: retrieval,llm,code-search,context,rag,claude-code
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: rank_bm25>=0.2.2
|
|
23
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
24
|
+
Requires-Dist: numpy>=1.24.0
|
|
25
|
+
Requires-Dist: networkx>=3.0
|
|
26
|
+
Provides-Extra: dense
|
|
27
|
+
Requires-Dist: chromadb>=0.4.0; extra == "dense"
|
|
28
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "dense"
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# CTX: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents
|
|
35
|
+
|
|
36
|
+
[](https://pypi.org/project/ctx-retriever/)
|
|
37
|
+
[](LICENSE)
|
|
38
|
+
[](https://huggingface.co/spaces/Be2Jay/ctx-demo)
|
|
39
|
+
|
|
40
|
+
CTX classifies developer queries into four trigger types and routes each to a specialized retrieval pipeline. For dependency-sensitive queries, CTX traverses the codebase import graph to resolve transitive relationships that keyword and embedding methods miss. It achieves **1.9x higher Token-Efficiency Score** than BM25 while using only **5.2% of tokens**.
|
|
41
|
+
|
|
42
|
+
> **Key insight**: code import graphs encode structural dependency information that text-based RAG cannot capture. CTX achieves Recall@5 = 1.0 on implicit dependency queries vs 0.4 for BM25.
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install ctx-retriever
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Or from source:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
git clone https://github.com/jaytoone/CTX
|
|
54
|
+
cd CTX
|
|
55
|
+
pip install -e .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from src.retrieval.adaptive_trigger import AdaptiveTriggerRetriever
|
|
62
|
+
|
|
63
|
+
# Point at any codebase directory
|
|
64
|
+
retriever = AdaptiveTriggerRetriever("/path/to/your/project")
|
|
65
|
+
|
|
66
|
+
# Retrieve relevant files for any natural-language query
|
|
67
|
+
result = retriever.retrieve(
|
|
68
|
+
query_id="my_query",
|
|
69
|
+
query_text="how does authentication work?",
|
|
70
|
+
k=5
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
for filepath in result.retrieved_files:
|
|
74
|
+
print(filepath, result.scores[filepath])
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Claude Code Hook (Recommended)
|
|
78
|
+
|
|
79
|
+
CTX works best as a **live hook** that automatically injects relevant files into every Claude Code prompt:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# 1. Copy the hook to Claude Code hooks directory
|
|
83
|
+
cp hooks/ctx_real_loader.py ~/.claude/hooks/
|
|
84
|
+
|
|
85
|
+
# 2. Register in ~/.claude/settings.json
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"hooks": {
|
|
91
|
+
"UserPromptSubmit": [
|
|
92
|
+
{ "hooks": [{ "type": "command", "command": "python3 $HOME/.claude/hooks/ctx_real_loader.py" }] }
|
|
93
|
+
]
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
After setup, CTX automatically injects relevant files as context on every prompt. See [`docs/claude_code_integration.md`](docs/claude_code_integration.md) for full setup guide.
|
|
99
|
+
|
|
100
|
+
**What you get in each prompt:**
|
|
101
|
+
```
|
|
102
|
+
[CTX] Trigger: EXPLICIT_SYMBOL | Query: AuthService | Confidence: 0.70 | Intent: judge from prompt
|
|
103
|
+
Code files (3/847 total):
|
|
104
|
+
• src/auth/service.py [score=1.000]
|
|
105
|
+
• src/auth/middleware.py [score=0.823]
|
|
106
|
+
• tests/test_auth.py [score=0.741]
|
|
107
|
+
(Use the prompt intent to decide how to treat this context.)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Trigger Types
|
|
111
|
+
|
|
112
|
+
| Trigger | When Used | Mechanism |
|
|
113
|
+
|---------|-----------|-----------|
|
|
114
|
+
| `EXPLICIT_SYMBOL` | Query names a class/function | Symbol index lookup |
|
|
115
|
+
| `SEMANTIC_CONCEPT` | Query describes a concept | BM25 keyword scoring |
|
|
116
|
+
| `IMPLICIT_CONTEXT` | Dependency queries ("what uses X") | BFS import graph traversal |
|
|
117
|
+
| `TEMPORAL_HISTORY` | Recent changes / history | Session file tracker |
|
|
118
|
+
|
|
119
|
+
## Results
|
|
120
|
+
|
|
121
|
+
### Synthetic Benchmark (50 files, 166 queries)
|
|
122
|
+
|
|
123
|
+
| Strategy | Recall@5 | Token Usage | TES |
|
|
124
|
+
|----------|----------|-------------|-----|
|
|
125
|
+
| Full Context | 0.075 | 100.0% | 0.019 |
|
|
126
|
+
| BM25 | 0.982 | 18.7% | 0.410 |
|
|
127
|
+
| Dense TF-IDF | 0.973 | 21.0% | 0.406 |
|
|
128
|
+
| GraphRAG-lite | 0.523 | 24.0% | 0.218 |
|
|
129
|
+
| LlamaIndex | 0.972 | 20.1% | 0.405 |
|
|
130
|
+
| Chroma Dense | 0.829 | 19.3% | 0.346 |
|
|
131
|
+
| Hybrid Dense+CTX | 0.725 | 23.6% | 0.303 |
|
|
132
|
+
| **CTX (Ours)** | **0.874** | **5.2%** | **0.776** |
|
|
133
|
+
|
|
134
|
+
**TES** = Recall@5 / ln(1 + files_loaded). Higher = better token efficiency.
|
|
135
|
+
|
|
136
|
+
### COIR External Benchmark (CodeSearchNet Python)
|
|
137
|
+
|
|
138
|
+
| Strategy | Recall@1 | Recall@5 | MRR |
|
|
139
|
+
|----------|----------|----------|-----|
|
|
140
|
+
| Dense Embedding (MiniLM) | 0.960 | 1.000 | 0.978 |
|
|
141
|
+
| Hybrid Dense+CTX | 0.930 | 0.950 | 0.940 |
|
|
142
|
+
| BM25 | 0.920 | 0.980 | 0.946 |
|
|
143
|
+
| CTX Adaptive Trigger | 0.210 | 0.380 | 0.293 |
|
|
144
|
+
|
|
145
|
+
### Key Findings
|
|
146
|
+
|
|
147
|
+
- CTX achieves **1.9x higher TES** than BM25 with only 5.2% token usage
|
|
148
|
+
- CTX achieves **perfect Recall@5 (1.0)** on IMPLICIT_CONTEXT dependency queries
|
|
149
|
+
- Hybrid Dense+CTX achieves R@5=0.950 on COIR — best of both worlds
|
|
150
|
+
- No single strategy dominates all dimensions — workload determines optimal choice
|
|
151
|
+
|
|
152
|
+
## Running Experiments
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Synthetic benchmark
|
|
156
|
+
python run_experiment.py --dataset-size small --strategy all
|
|
157
|
+
|
|
158
|
+
# Real codebase
|
|
159
|
+
python run_experiment.py --dataset-source real --project-path /path/to/project --strategy all
|
|
160
|
+
|
|
161
|
+
# COIR external benchmark
|
|
162
|
+
python run_coir_eval.py --n-queries 100
|
|
163
|
+
|
|
164
|
+
# Ablation study
|
|
165
|
+
python run_experiment.py --dataset-size small --mode ablation
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Results are written to `benchmarks/results/`.
|
|
169
|
+
|
|
170
|
+
## Project Structure
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
CTX/
|
|
174
|
+
src/
|
|
175
|
+
retrieval/ # Retrieval strategies (8 total)
|
|
176
|
+
adaptive_trigger.py # CTX core: trigger-driven retrieval
|
|
177
|
+
hybrid_dense_ctx.py # Hybrid: dense seed + graph expansion
|
|
178
|
+
bm25_retriever.py # BM25 sparse retrieval
|
|
179
|
+
dense_retriever.py # TF-IDF dense retrieval
|
|
180
|
+
chroma_retriever.py # ChromaDB + sentence-transformers
|
|
181
|
+
graph_rag.py # GraphRAG-lite baseline
|
|
182
|
+
llamaindex_retriever.py # LlamaIndex AST-aware chunking
|
|
183
|
+
full_context.py # Full context baseline
|
|
184
|
+
trigger/ # Trigger classifier (4 types)
|
|
185
|
+
evaluator/ # Benchmark runner, metrics, COIR
|
|
186
|
+
data/ # Dataset generation, real codebase loader
|
|
187
|
+
hooks/
|
|
188
|
+
ctx_real_loader.py # Claude Code UserPromptSubmit hook
|
|
189
|
+
ctx_session_tracker.py # PostToolUse session tracker
|
|
190
|
+
benchmarks/
|
|
191
|
+
results/ # Experiment results and reports
|
|
192
|
+
docs/
|
|
193
|
+
claude_code_integration.md # Claude Code setup guide
|
|
194
|
+
paper/ # Paper draft (markdown + LaTeX)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Paper
|
|
198
|
+
|
|
199
|
+
- Paper draft: [`docs/paper/CTX_paper_draft.md`](docs/paper/CTX_paper_draft.md)
|
|
200
|
+
- arXiv: TBD
|
|
201
|
+
- EMNLP 2026 submission: TBD
|
|
202
|
+
|
|
203
|
+
## License
|
|
204
|
+
|
|
205
|
+
MIT
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# CTX: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/ctx-retriever/)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[](https://huggingface.co/spaces/Be2Jay/ctx-demo)
|
|
6
|
+
|
|
7
|
+
CTX classifies developer queries into four trigger types and routes each to a specialized retrieval pipeline. For dependency-sensitive queries, CTX traverses the codebase import graph to resolve transitive relationships that keyword and embedding methods miss. It achieves **1.9x higher Token-Efficiency Score** than BM25 while using only **5.2% of tokens**.
|
|
8
|
+
|
|
9
|
+
> **Key insight**: code import graphs encode structural dependency information that text-based RAG cannot capture. CTX achieves Recall@5 = 1.0 on implicit dependency queries vs 0.4 for BM25.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install ctx-retriever
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Or from source:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
git clone https://github.com/jaytoone/CTX
|
|
21
|
+
cd CTX
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from src.retrieval.adaptive_trigger import AdaptiveTriggerRetriever
|
|
29
|
+
|
|
30
|
+
# Point at any codebase directory
|
|
31
|
+
retriever = AdaptiveTriggerRetriever("/path/to/your/project")
|
|
32
|
+
|
|
33
|
+
# Retrieve relevant files for any natural-language query
|
|
34
|
+
result = retriever.retrieve(
|
|
35
|
+
query_id="my_query",
|
|
36
|
+
query_text="how does authentication work?",
|
|
37
|
+
k=5
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
for filepath in result.retrieved_files:
|
|
41
|
+
print(filepath, result.scores[filepath])
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Claude Code Hook (Recommended)
|
|
45
|
+
|
|
46
|
+
CTX works best as a **live hook** that automatically injects relevant files into every Claude Code prompt:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# 1. Copy the hook to Claude Code hooks directory
|
|
50
|
+
cp hooks/ctx_real_loader.py ~/.claude/hooks/
|
|
51
|
+
|
|
52
|
+
# 2. Register in ~/.claude/settings.json
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"hooks": {
|
|
58
|
+
"UserPromptSubmit": [
|
|
59
|
+
{ "hooks": [{ "type": "command", "command": "python3 $HOME/.claude/hooks/ctx_real_loader.py" }] }
|
|
60
|
+
]
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
After setup, CTX automatically injects relevant files as context on every prompt. See [`docs/claude_code_integration.md`](docs/claude_code_integration.md) for full setup guide.
|
|
66
|
+
|
|
67
|
+
**What you get in each prompt:**
|
|
68
|
+
```
|
|
69
|
+
[CTX] Trigger: EXPLICIT_SYMBOL | Query: AuthService | Confidence: 0.70 | Intent: judge from prompt
|
|
70
|
+
Code files (3/847 total):
|
|
71
|
+
• src/auth/service.py [score=1.000]
|
|
72
|
+
• src/auth/middleware.py [score=0.823]
|
|
73
|
+
• tests/test_auth.py [score=0.741]
|
|
74
|
+
(Use the prompt intent to decide how to treat this context.)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Trigger Types
|
|
78
|
+
|
|
79
|
+
| Trigger | When Used | Mechanism |
|
|
80
|
+
|---------|-----------|-----------|
|
|
81
|
+
| `EXPLICIT_SYMBOL` | Query names a class/function | Symbol index lookup |
|
|
82
|
+
| `SEMANTIC_CONCEPT` | Query describes a concept | BM25 keyword scoring |
|
|
83
|
+
| `IMPLICIT_CONTEXT` | Dependency queries ("what uses X") | BFS import graph traversal |
|
|
84
|
+
| `TEMPORAL_HISTORY` | Recent changes / history | Session file tracker |
|
|
85
|
+
|
|
86
|
+
## Results
|
|
87
|
+
|
|
88
|
+
### Synthetic Benchmark (50 files, 166 queries)
|
|
89
|
+
|
|
90
|
+
| Strategy | Recall@5 | Token Usage | TES |
|
|
91
|
+
|----------|----------|-------------|-----|
|
|
92
|
+
| Full Context | 0.075 | 100.0% | 0.019 |
|
|
93
|
+
| BM25 | 0.982 | 18.7% | 0.410 |
|
|
94
|
+
| Dense TF-IDF | 0.973 | 21.0% | 0.406 |
|
|
95
|
+
| GraphRAG-lite | 0.523 | 24.0% | 0.218 |
|
|
96
|
+
| LlamaIndex | 0.972 | 20.1% | 0.405 |
|
|
97
|
+
| Chroma Dense | 0.829 | 19.3% | 0.346 |
|
|
98
|
+
| Hybrid Dense+CTX | 0.725 | 23.6% | 0.303 |
|
|
99
|
+
| **CTX (Ours)** | **0.874** | **5.2%** | **0.776** |
|
|
100
|
+
|
|
101
|
+
**TES** = Recall@5 / ln(1 + files_loaded). Higher = better token efficiency.
|
|
102
|
+
|
|
103
|
+
### COIR External Benchmark (CodeSearchNet Python)
|
|
104
|
+
|
|
105
|
+
| Strategy | Recall@1 | Recall@5 | MRR |
|
|
106
|
+
|----------|----------|----------|-----|
|
|
107
|
+
| Dense Embedding (MiniLM) | 0.960 | 1.000 | 0.978 |
|
|
108
|
+
| Hybrid Dense+CTX | 0.930 | 0.950 | 0.940 |
|
|
109
|
+
| BM25 | 0.920 | 0.980 | 0.946 |
|
|
110
|
+
| CTX Adaptive Trigger | 0.210 | 0.380 | 0.293 |
|
|
111
|
+
|
|
112
|
+
### Key Findings
|
|
113
|
+
|
|
114
|
+
- CTX achieves **1.9x higher TES** than BM25 with only 5.2% token usage
|
|
115
|
+
- CTX achieves **perfect Recall@5 (1.0)** on IMPLICIT_CONTEXT dependency queries
|
|
116
|
+
- Hybrid Dense+CTX achieves R@5=0.950 on COIR — best of both worlds
|
|
117
|
+
- No single strategy dominates all dimensions — workload determines optimal choice
|
|
118
|
+
|
|
119
|
+
## Running Experiments
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Synthetic benchmark
|
|
123
|
+
python run_experiment.py --dataset-size small --strategy all
|
|
124
|
+
|
|
125
|
+
# Real codebase
|
|
126
|
+
python run_experiment.py --dataset-source real --project-path /path/to/project --strategy all
|
|
127
|
+
|
|
128
|
+
# COIR external benchmark
|
|
129
|
+
python run_coir_eval.py --n-queries 100
|
|
130
|
+
|
|
131
|
+
# Ablation study
|
|
132
|
+
python run_experiment.py --dataset-size small --mode ablation
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Results are written to `benchmarks/results/`.
|
|
136
|
+
|
|
137
|
+
## Project Structure
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
CTX/
|
|
141
|
+
src/
|
|
142
|
+
retrieval/ # Retrieval strategies (8 total)
|
|
143
|
+
adaptive_trigger.py # CTX core: trigger-driven retrieval
|
|
144
|
+
hybrid_dense_ctx.py # Hybrid: dense seed + graph expansion
|
|
145
|
+
bm25_retriever.py # BM25 sparse retrieval
|
|
146
|
+
dense_retriever.py # TF-IDF dense retrieval
|
|
147
|
+
chroma_retriever.py # ChromaDB + sentence-transformers
|
|
148
|
+
graph_rag.py # GraphRAG-lite baseline
|
|
149
|
+
llamaindex_retriever.py # LlamaIndex AST-aware chunking
|
|
150
|
+
full_context.py # Full context baseline
|
|
151
|
+
trigger/ # Trigger classifier (4 types)
|
|
152
|
+
evaluator/ # Benchmark runner, metrics, COIR
|
|
153
|
+
data/ # Dataset generation, real codebase loader
|
|
154
|
+
hooks/
|
|
155
|
+
ctx_real_loader.py # Claude Code UserPromptSubmit hook
|
|
156
|
+
ctx_session_tracker.py # PostToolUse session tracker
|
|
157
|
+
benchmarks/
|
|
158
|
+
results/ # Experiment results and reports
|
|
159
|
+
docs/
|
|
160
|
+
claude_code_integration.md # Claude Code setup guide
|
|
161
|
+
paper/ # Paper draft (markdown + LaTeX)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Paper
|
|
165
|
+
|
|
166
|
+
- Paper draft: [`docs/paper/CTX_paper_draft.md`](docs/paper/CTX_paper_draft.md)
|
|
167
|
+
- arXiv: TBD
|
|
168
|
+
- EMNLP 2026 submission: TBD
|
|
169
|
+
|
|
170
|
+
## License
|
|
171
|
+
|
|
172
|
+
MIT
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ctx-retriever
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents
|
|
5
|
+
Author: jaytoone
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jaytoone/CTX
|
|
8
|
+
Project-URL: Repository, https://github.com/jaytoone/CTX
|
|
9
|
+
Project-URL: HuggingFace Demo, https://huggingface.co/spaces/jaytoone/ctx-demo
|
|
10
|
+
Keywords: retrieval,llm,code-search,context,rag,claude-code
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: rank_bm25>=0.2.2
|
|
23
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
24
|
+
Requires-Dist: numpy>=1.24.0
|
|
25
|
+
Requires-Dist: networkx>=3.0
|
|
26
|
+
Provides-Extra: dense
|
|
27
|
+
Requires-Dist: chromadb>=0.4.0; extra == "dense"
|
|
28
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "dense"
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# CTX: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents
|
|
35
|
+
|
|
36
|
+
[](https://pypi.org/project/ctx-retriever/)
|
|
37
|
+
[](LICENSE)
|
|
38
|
+
[](https://huggingface.co/spaces/Be2Jay/ctx-demo)
|
|
39
|
+
|
|
40
|
+
CTX classifies developer queries into four trigger types and routes each to a specialized retrieval pipeline. For dependency-sensitive queries, CTX traverses the codebase import graph to resolve transitive relationships that keyword and embedding methods miss. It achieves **1.9x higher Token-Efficiency Score** than BM25 while using only **5.2% of tokens**.
|
|
41
|
+
|
|
42
|
+
> **Key insight**: code import graphs encode structural dependency information that text-based RAG cannot capture. CTX achieves Recall@5 = 1.0 on implicit dependency queries vs 0.4 for BM25.
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install ctx-retriever
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Or from source:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
git clone https://github.com/jaytoone/CTX
|
|
54
|
+
cd CTX
|
|
55
|
+
pip install -e .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from src.retrieval.adaptive_trigger import AdaptiveTriggerRetriever
|
|
62
|
+
|
|
63
|
+
# Point at any codebase directory
|
|
64
|
+
retriever = AdaptiveTriggerRetriever("/path/to/your/project")
|
|
65
|
+
|
|
66
|
+
# Retrieve relevant files for any natural-language query
|
|
67
|
+
result = retriever.retrieve(
|
|
68
|
+
query_id="my_query",
|
|
69
|
+
query_text="how does authentication work?",
|
|
70
|
+
k=5
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
for filepath in result.retrieved_files:
|
|
74
|
+
print(filepath, result.scores[filepath])
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Claude Code Hook (Recommended)
|
|
78
|
+
|
|
79
|
+
CTX works best as a **live hook** that automatically injects relevant files into every Claude Code prompt:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# 1. Copy the hook to Claude Code hooks directory
|
|
83
|
+
cp hooks/ctx_real_loader.py ~/.claude/hooks/
|
|
84
|
+
|
|
85
|
+
# 2. Register in ~/.claude/settings.json
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"hooks": {
|
|
91
|
+
"UserPromptSubmit": [
|
|
92
|
+
{ "hooks": [{ "type": "command", "command": "python3 $HOME/.claude/hooks/ctx_real_loader.py" }] }
|
|
93
|
+
]
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
After setup, CTX automatically injects relevant files as context on every prompt. See [`docs/claude_code_integration.md`](docs/claude_code_integration.md) for full setup guide.
|
|
99
|
+
|
|
100
|
+
**What you get in each prompt:**
|
|
101
|
+
```
|
|
102
|
+
[CTX] Trigger: EXPLICIT_SYMBOL | Query: AuthService | Confidence: 0.70 | Intent: judge from prompt
|
|
103
|
+
Code files (3/847 total):
|
|
104
|
+
• src/auth/service.py [score=1.000]
|
|
105
|
+
• src/auth/middleware.py [score=0.823]
|
|
106
|
+
• tests/test_auth.py [score=0.741]
|
|
107
|
+
(Use the prompt intent to decide how to treat this context.)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Trigger Types
|
|
111
|
+
|
|
112
|
+
| Trigger | When Used | Mechanism |
|
|
113
|
+
|---------|-----------|-----------|
|
|
114
|
+
| `EXPLICIT_SYMBOL` | Query names a class/function | Symbol index lookup |
|
|
115
|
+
| `SEMANTIC_CONCEPT` | Query describes a concept | BM25 keyword scoring |
|
|
116
|
+
| `IMPLICIT_CONTEXT` | Dependency queries ("what uses X") | BFS import graph traversal |
|
|
117
|
+
| `TEMPORAL_HISTORY` | Recent changes / history | Session file tracker |
|
|
118
|
+
|
|
119
|
+
## Results
|
|
120
|
+
|
|
121
|
+
### Synthetic Benchmark (50 files, 166 queries)
|
|
122
|
+
|
|
123
|
+
| Strategy | Recall@5 | Token Usage | TES |
|
|
124
|
+
|----------|----------|-------------|-----|
|
|
125
|
+
| Full Context | 0.075 | 100.0% | 0.019 |
|
|
126
|
+
| BM25 | 0.982 | 18.7% | 0.410 |
|
|
127
|
+
| Dense TF-IDF | 0.973 | 21.0% | 0.406 |
|
|
128
|
+
| GraphRAG-lite | 0.523 | 24.0% | 0.218 |
|
|
129
|
+
| LlamaIndex | 0.972 | 20.1% | 0.405 |
|
|
130
|
+
| Chroma Dense | 0.829 | 19.3% | 0.346 |
|
|
131
|
+
| Hybrid Dense+CTX | 0.725 | 23.6% | 0.303 |
|
|
132
|
+
| **CTX (Ours)** | **0.874** | **5.2%** | **0.776** |
|
|
133
|
+
|
|
134
|
+
**TES** = Recall@5 / ln(1 + files_loaded). Higher = better token efficiency.
|
|
135
|
+
|
|
136
|
+
### COIR External Benchmark (CodeSearchNet Python)
|
|
137
|
+
|
|
138
|
+
| Strategy | Recall@1 | Recall@5 | MRR |
|
|
139
|
+
|----------|----------|----------|-----|
|
|
140
|
+
| Dense Embedding (MiniLM) | 0.960 | 1.000 | 0.978 |
|
|
141
|
+
| Hybrid Dense+CTX | 0.930 | 0.950 | 0.940 |
|
|
142
|
+
| BM25 | 0.920 | 0.980 | 0.946 |
|
|
143
|
+
| CTX Adaptive Trigger | 0.210 | 0.380 | 0.293 |
|
|
144
|
+
|
|
145
|
+
### Key Findings
|
|
146
|
+
|
|
147
|
+
- CTX achieves **1.9x higher TES** than BM25 with only 5.2% token usage
|
|
148
|
+
- CTX achieves **perfect Recall@5 (1.0)** on IMPLICIT_CONTEXT dependency queries
|
|
149
|
+
- Hybrid Dense+CTX achieves R@5=0.950 on COIR — best of both worlds
|
|
150
|
+
- No single strategy dominates all dimensions — workload determines optimal choice
|
|
151
|
+
|
|
152
|
+
## Running Experiments
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Synthetic benchmark
|
|
156
|
+
python run_experiment.py --dataset-size small --strategy all
|
|
157
|
+
|
|
158
|
+
# Real codebase
|
|
159
|
+
python run_experiment.py --dataset-source real --project-path /path/to/project --strategy all
|
|
160
|
+
|
|
161
|
+
# COIR external benchmark
|
|
162
|
+
python run_coir_eval.py --n-queries 100
|
|
163
|
+
|
|
164
|
+
# Ablation study
|
|
165
|
+
python run_experiment.py --dataset-size small --mode ablation
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Results are written to `benchmarks/results/`.
|
|
169
|
+
|
|
170
|
+
## Project Structure
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
CTX/
|
|
174
|
+
src/
|
|
175
|
+
retrieval/ # Retrieval strategies (8 total)
|
|
176
|
+
adaptive_trigger.py # CTX core: trigger-driven retrieval
|
|
177
|
+
hybrid_dense_ctx.py # Hybrid: dense seed + graph expansion
|
|
178
|
+
bm25_retriever.py # BM25 sparse retrieval
|
|
179
|
+
dense_retriever.py # TF-IDF dense retrieval
|
|
180
|
+
chroma_retriever.py # ChromaDB + sentence-transformers
|
|
181
|
+
graph_rag.py # GraphRAG-lite baseline
|
|
182
|
+
llamaindex_retriever.py # LlamaIndex AST-aware chunking
|
|
183
|
+
full_context.py # Full context baseline
|
|
184
|
+
trigger/ # Trigger classifier (4 types)
|
|
185
|
+
evaluator/ # Benchmark runner, metrics, COIR
|
|
186
|
+
data/ # Dataset generation, real codebase loader
|
|
187
|
+
hooks/
|
|
188
|
+
ctx_real_loader.py # Claude Code UserPromptSubmit hook
|
|
189
|
+
ctx_session_tracker.py # PostToolUse session tracker
|
|
190
|
+
benchmarks/
|
|
191
|
+
results/ # Experiment results and reports
|
|
192
|
+
docs/
|
|
193
|
+
claude_code_integration.md # Claude Code setup guide
|
|
194
|
+
paper/ # Paper draft (markdown + LaTeX)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Paper
|
|
198
|
+
|
|
199
|
+
- Paper draft: [`docs/paper/CTX_paper_draft.md`](docs/paper/CTX_paper_draft.md)
|
|
200
|
+
- arXiv: TBD
|
|
201
|
+
- EMNLP 2026 submission: TBD
|
|
202
|
+
|
|
203
|
+
## License
|
|
204
|
+
|
|
205
|
+
MIT
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
ctx_retriever.egg-info/PKG-INFO
|
|
5
|
+
ctx_retriever.egg-info/SOURCES.txt
|
|
6
|
+
ctx_retriever.egg-info/dependency_links.txt
|
|
7
|
+
ctx_retriever.egg-info/requires.txt
|
|
8
|
+
ctx_retriever.egg-info/top_level.txt
|
|
9
|
+
src/__init__.py
|
|
10
|
+
src/analysis/__init__.py
|
|
11
|
+
src/analysis/differentiation.py
|
|
12
|
+
src/analysis/error_analysis.py
|
|
13
|
+
src/analysis/trigger_accuracy.py
|
|
14
|
+
src/data/__init__.py
|
|
15
|
+
src/data/dataset_generator.py
|
|
16
|
+
src/data/real_codebase_loader.py
|
|
17
|
+
src/evaluator/__init__.py
|
|
18
|
+
src/evaluator/benchmark_runner.py
|
|
19
|
+
src/evaluator/coir_evaluator.py
|
|
20
|
+
src/evaluator/doc_retrieval_eval.py
|
|
21
|
+
src/evaluator/downstream_quality.py
|
|
22
|
+
src/evaluator/hook_effectiveness_eval.py
|
|
23
|
+
src/evaluator/llm_quality.py
|
|
24
|
+
src/evaluator/metrics.py
|
|
25
|
+
src/evaluator/ranger_comparison.py
|
|
26
|
+
src/evaluator/repobench_evaluator.py
|
|
27
|
+
src/evaluator/statistical_tests.py
|
|
28
|
+
src/retrieval/__init__.py
|
|
29
|
+
src/retrieval/ablation_variants.py
|
|
30
|
+
src/retrieval/adaptive_trigger.py
|
|
31
|
+
src/retrieval/bm25_retriever.py
|
|
32
|
+
src/retrieval/chroma_retriever.py
|
|
33
|
+
src/retrieval/dense_retriever.py
|
|
34
|
+
src/retrieval/full_context.py
|
|
35
|
+
src/retrieval/graph_rag.py
|
|
36
|
+
src/retrieval/hybrid_dense_ctx.py
|
|
37
|
+
src/retrieval/llamaindex_retriever.py
|
|
38
|
+
src/retrieval/ranger_approx.py
|
|
39
|
+
src/trigger/__init__.py
|
|
40
|
+
src/trigger/trigger_classifier.py
|
|
41
|
+
src/visualizer/__init__.py
|
|
42
|
+
src/visualizer/report.py
|
|
43
|
+
tests/test_trigger_classifier_ko.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
src
|