contextpilot 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. contextpilot-0.3.2/PKG-INFO +239 -0
  2. contextpilot-0.3.2/README.md +198 -0
  3. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/__init__.py +4 -1
  4. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_index/index_construction.py +81 -9
  5. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_ordering/inter_scheduler.py +10 -9
  6. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/retriever/mem0_retriever.py +1 -1
  7. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/server/http_client.py +37 -82
  8. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/server/http_server.py +175 -112
  9. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/server/live_index.py +324 -55
  10. contextpilot-0.3.2/contextpilot.egg-info/PKG-INFO +239 -0
  11. {contextpilot-0.3.0 → contextpilot-0.3.2}/pyproject.toml +1 -1
  12. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_context_ordering.py +23 -0
  13. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_cpu_distances.py +0 -38
  14. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_multi_turn_e2e.py +41 -36
  15. contextpilot-0.3.0/PKG-INFO +0 -180
  16. contextpilot-0.3.0/README.md +0 -139
  17. contextpilot-0.3.0/contextpilot.egg-info/PKG-INFO +0 -180
  18. {contextpilot-0.3.0 → contextpilot-0.3.2}/LICENSE +0 -0
  19. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_index/__init__.py +0 -0
  20. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_index/compute_distance_cpu.py +0 -0
  21. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_index/compute_distance_gpu.py +0 -0
  22. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_index/tree_nodes.py +0 -0
  23. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_ordering/__init__.py +0 -0
  24. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/context_ordering/intra_ordering.py +0 -0
  25. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/pipeline/__init__.py +0 -0
  26. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/pipeline/components.py +0 -0
  27. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/pipeline/multi_turn.py +0 -0
  28. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/pipeline/rag_pipeline.py +0 -0
  29. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/retriever/__init__.py +0 -0
  30. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/retriever/bm25.py +0 -0
  31. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/retriever/faiss_embedding.py +0 -0
  32. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/retriever/pageindex_retriever.py +0 -0
  33. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/server/__init__.py +0 -0
  34. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/server/conversation_tracker.py +0 -0
  35. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/server/eviction_heap.py +0 -0
  36. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/server/metadata.py +0 -0
  37. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/utils/__init__.py +0 -0
  38. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/utils/eval_metrics.py +0 -0
  39. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/utils/prompt_generator.py +0 -0
  40. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot/utils/tools.py +0 -0
  41. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot.egg-info/SOURCES.txt +0 -0
  42. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot.egg-info/dependency_links.txt +0 -0
  43. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot.egg-info/requires.txt +0 -0
  44. {contextpilot-0.3.0 → contextpilot-0.3.2}/contextpilot.egg-info/top_level.txt +0 -0
  45. {contextpilot-0.3.0 → contextpilot-0.3.2}/requirements.txt +0 -0
  46. {contextpilot-0.3.0 → contextpilot-0.3.2}/setup.cfg +0 -0
  47. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_context_index.py +0 -0
  48. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_deduplication.py +0 -0
  49. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_gpu_distance_performance.py +0 -0
  50. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_gpu_distances.py +0 -0
  51. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_group_prefix_sharing.py +0 -0
  52. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_incremental_build.py +0 -0
  53. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_live_index.py +0 -0
  54. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_mem0_integration.py +0 -0
  55. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_multi_turn.py +0 -0
  56. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_pageindex_integration.py +0 -0
  57. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_performance.py +0 -0
  58. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_pipeline.py +0 -0
  59. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_server_integration.py +0 -0
  60. {contextpilot-0.3.0 → contextpilot-0.3.2}/tests/test_utils.py +0 -0
@@ -0,0 +1,239 @@
1
+ Metadata-Version: 2.4
2
+ Name: contextpilot
3
+ Version: 0.3.2
4
+ Summary: Efficient Retrieval-Augmented Generation with Accuracy-Preserving Context Reuse
5
+ Author: Yinsicheng Jiang, Chivier Humber
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/SecretSettler/ContextPilot
8
+ Project-URL: Repository, https://github.com/SecretSettler/ContextPilot
9
+ Project-URL: Issues, https://github.com/SecretSettler/ContextPilot/issues
10
+ Keywords: rag,llm,context-reuse,kv-cache,retrieval-augmented-generation
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: datasets
24
+ Requires-Dist: transformers
25
+ Requires-Dist: elasticsearch==8.18.1
26
+ Requires-Dist: aiohttp
27
+ Requires-Dist: ujson
28
+ Requires-Dist: scipy
29
+ Requires-Dist: fastapi[all]
30
+ Requires-Dist: cupy-cuda12x
31
+ Requires-Dist: pytest
32
+ Provides-Extra: dev
33
+ Requires-Dist: black; extra == "dev"
34
+ Requires-Dist: bumpver; extra == "dev"
35
+ Requires-Dist: isort; extra == "dev"
36
+ Requires-Dist: pip-tools; extra == "dev"
37
+ Requires-Dist: pytest; extra == "dev"
38
+ Requires-Dist: pytest-cov; extra == "dev"
39
+ Requires-Dist: ipython; extra == "dev"
40
+ Dynamic: license-file
41
+
42
+ <div align="center">
43
+ <img src="assets/about.png" alt="ContextPilot Logo" width="800"/>
44
+
45
+ <h1><strong>ContextPilot: Efficient Long Context Inference with Context Reuse</strong></h1>
46
+
47
+ [![Python](https://img.shields.io/badge/python-≥3.10-blue)](https://www.python.org/)
48
+ [![PyPI](https://img.shields.io/pypi/v/contextpilot)](https://pypi.org/project/contextpilot/)
49
+ [![License](https://img.shields.io/badge/license-Apache%202.0-green)](LICENSE)
50
+
51
+ </div>
52
+
53
+ --------------------------------------------------------------------------------
54
+
55
+ | [**Documentation**](docs/README.md) | [**Examples**](examples/) | [**Benchmarks**](docs/reference/benchmarks.md) |
56
+
57
+ ## News
58
+
59
+ - [2026/02] ContextPilot v0.3.2 released, supporting [PageIndex](https://github.com/VectifyAI/PageIndex) and [Mem0](https://github.com/mem0ai/mem0).
60
+ - [2026/01] ContextPilot has been accepted to MLSys 2026 🎉! See you in Bellevue, WA, USA.
61
+ - [2025/12] ContextPilot v0.2.0 released.
62
+
63
+ ## About
64
+
65
+ ContextPilot is a fast optimization system on context engineering layer for agentic workloads:
66
+ 1. **High Throughput & Cache Hit Ratio**: Boosting prefill throughput and prefix cache hit ratio with intelligent context reuse.
67
+ 2. **Strong Compatibility**: Strong compatibility with existing popular RAG libraries (PageIndex), Agentic memory layer (Mem0), KV cache optimization engine (LMCache), and Inference engines (vLLM and SGLang).
68
+ 3. **Negligible Accuracy Loss**: Achieving significant performance improvements with minimal to no accuracy degradation across various benchmarks.
69
+ 3. **Widely Tested**: Tested with a wide range of RAG and Agentic AI applications.
70
+
71
+ ## Target Workloads
72
+
73
+ 1. **Trending Topic QA** — Search and generation for breaking news and hot topics beyond model knowledge
74
+ 2. **Closed-Domain Long-Context QA** — QA over specialized corpora (novels, financial reports, legal documents) with retrieval or in-context search
75
+ 3. **Large-Batch Long-Context Execution** — High-throughput inference where many requests share overlapping contexts; ContextPilot maximizes prefix reuse regardless of the search method
76
+ 4. **Multi-Turn Conversations with Long-Term Memory** — Persistent context reuse across turns (e.g. [Mem0](https://github.com/mem0ai/mem0))
77
+
78
+ ## Benchmark and Performance
79
+
80
+ ### System Performance
81
+
82
+ <div align="center">
83
+ <img src="assets/deepseek_r1_results.png" alt="Benchmark Results" width="600"/>
84
+ </div>
85
+
86
+ ContextPilot (Stateless) on DeepSeek-R1 maintains accuracy compared to SGLang, achieving 64.68% vs 64.15% F1 on MultihopRAG and 41.08% vs 40.20% F1 on NarrativeQA.
87
+
88
+ ### Accuracy on MT-RAG Benchmark (Online Scheduling)
89
+
90
+ <div align="center">
91
+
92
+ | Method | Qwen3-4B | Llama3.1-8B | Qwen3-30B-A3B |
93
+ |--------|----------|-------------|-----------|
94
+ | LMCache | 62.56 | **68.46** | 75.12 |
95
+ | CacheBlend | 50.33 | 56.52 | X |
96
+ | RadixCache | 62.56 | **68.46** | 75.12 |
97
+ | **ContextPilot** | **64.27** | 68.12 | **75.81** |
98
+
99
+ </div>
100
+
101
+ ContextPilot delivers **4-13x** improvements in cache hit rates and **1.5-3.5x** reductions in prefill latency for large-batch RAG workloads, while maintaining or improving accuracy.
102
+
103
+ **Furthermore**, ContextPilot has been tested to reduce input token costs by around **36%** with GPT-5.2.
104
+
105
+ See [Benchmarks](docs/reference/benchmarks.md) in the documentation for GPU vs CPU performance analysis and detailed benchmark methodology.
106
+
107
+ ## Getting Started
108
+
109
+ ### Installation
110
+
111
+ **Requirements:** Python >= 3.10
112
+
113
+ ```bash
114
+ pip install contextpilot
115
+ ```
116
+
117
+ Or install from source:
118
+ ```bash
119
+ git clone https://github.com/Edinburgh-AgenticAI/ContextPilot.git
120
+ cd ContextPilot
121
+ pip install -e .
122
+ ```
123
+
124
+ More [detailed installation instructions](docs/getting_started/installation.md) are available in the docs.
125
+
126
+ ### Quick Start
127
+
128
+ **Offline / Online Stateless** — build index & schedule in one shot:
129
+
130
+ ```python
131
+ from openai import OpenAI
132
+ import contextpilot as cp
133
+
134
+ client = OpenAI(base_url="http://localhost:30000/v1", api_key="...") # Your inference engine URL and API key
135
+
136
+ queries = ["What is AI?", "Explain neural networks", "What is deep learning?"]
137
+ all_contexts = [
138
+ ["Doc about AI", "Doc about ML", "Doc about computing"],
139
+ ["Doc about neural nets", "Doc about deep learning"],
140
+ ["Doc about ML", "Doc about AI", "Doc about deep learning basics"],
141
+ ]
142
+
143
+ # Build index and schedule for prefix sharing
144
+ index = cp.build_context_index(all_contexts, use_gpu=False)
145
+ reordered, _, order, _ = cp.InterContextScheduler().schedule_contexts(index)
146
+
147
+ # Send in optimized order — shared prefixes hit KV cache
148
+ for ctx, orig_idx in zip(reordered, order):
149
+ docs_section = "\n".join(f"[{i+1}] {doc}" for i, doc in enumerate(ctx))
150
+ # Importance ranking restores original retrieval order for the model
151
+ importance_ranking = ">".join(
152
+ str(ctx.index(doc) + 1) for doc in all_contexts[orig_idx] if doc in ctx
153
+ )
154
+ response = client.chat.completions.create(
155
+ model="Qwen/Qwen3-4B",
156
+ messages=[
157
+ {"role": "system", "content": (
158
+ f"Answer the question based on the provided documents.\n\n"
159
+ f"<documents>\n{docs_section}\n</documents>\n\n"
160
+ f"Read the documents in this importance ranking: {importance_ranking}\n"
161
+ f"Prioritize information from higher-ranked documents."
162
+ )},
163
+ {"role": "user", "content": queries[orig_idx]},
164
+ ],
165
+ )
166
+ print(f"Q: {queries[orig_idx]}\nA: {response.choices[0].message.content}\n")
167
+ ```
168
+
169
+ > For online stateless scheduling via HTTP server, see the [online usage guide](docs/guides/online_usage.md).
170
+
171
+ **Stateful** — `LiveContextIndex` tracks cached state:
172
+
173
+ ```python
174
+ from openai import OpenAI
175
+ import contextpilot as cp
176
+
177
+ client = OpenAI(base_url="http://localhost:30000/v1", api_key="...")
178
+ live = cp.LiveContextIndex(use_gpu=False)
179
+
180
+ # Simulate multi-turn: each turn has batch_size=1
181
+ turns = [
182
+ {
183
+ "query": "What is AI?",
184
+ "contexts": [["Doc about AI", "Doc about ML", "Doc about computing"]],
185
+ },
186
+ {
187
+ "query": "Compare supervised and unsupervised learning",
188
+ # 2 of 3 docs overlap with Turn 1 ("Doc about AI", "Doc about ML"), different order + 1 new doc
189
+ "contexts": [["Doc about ML", "Doc about clustering", "Doc about AI"]],
190
+ },
191
+ ]
192
+
193
+ for turn_idx, turn in enumerate(turns):
194
+ contexts = turn["contexts"]
195
+ query = turn["query"]
196
+
197
+ # build_incremental handles both cold start and incremental turns
198
+ result = live.build_incremental(contexts)
199
+ reordered = result['reordered_contexts']
200
+ # Turn 2: reordered to ["Doc about AI", "Doc about ML", "Doc about clustering"]
201
+ # ^— shared prefix from Turn 1 —^ ^— new doc appended
202
+
203
+ ctx = reordered[0]
204
+ docs_section = "\n".join(f"[{i+1}] {doc}" for i, doc in enumerate(ctx))
205
+ importance_ranking = ">".join(
206
+ str(ctx.index(doc) + 1) for doc in contexts[0] if doc in ctx
207
+ )
208
+ response = client.chat.completions.create(
209
+ model="Qwen/Qwen3-4B",
210
+ messages=[
211
+ {"role": "system", "content": (
212
+ f"Answer the question based on the provided documents.\n\n"
213
+ f"<documents>\n{docs_section}\n</documents>\n\n"
214
+ f"Read the documents in this importance ranking: {importance_ranking}\n"
215
+ f"Prioritize information from higher-ranked documents."
216
+ )},
217
+ {"role": "user", "content": query},
218
+ ],
219
+ )
220
+ print(f"[Turn {turn_idx+1}] Q: {query}")
221
+ print(f"A: {response.choices[0].message.content}\n")
222
+ ```
223
+
224
+ > **Note:** Stateful mode works without eviction sync — `LiveContextIndex` tracks the previous ordering and reorders new contexts to maximize prefix cache hits. For production deployments with limited storage size where the KV cache may evict entries, install the [SGLang eviction patch](docs/guides/online_usage.md#sglang-integration) to keep the index in sync. See the [online usage guide](docs/guides/online_usage.md) for HTTP server setup.
225
+
226
+ ## Documentation
227
+
228
+ Check out the ContextPilot [documentation](docs/README.md) for comprehensive guides.
229
+
230
+ ## Examples
231
+
232
+ Go hands-on with our [examples](examples/), demonstrating how to address different use cases with ContextPilot.
233
+
234
+ ## Contributing
235
+
236
+ We welcome and value all contributions! Please feel free to submit issues and pull requests.
237
+
238
+ ## Citation
239
+ We will include the paper citation soon!
@@ -0,0 +1,198 @@
1
+ <div align="center">
2
+ <img src="assets/about.png" alt="ContextPilot Logo" width="800"/>
3
+
4
+ <h1><strong>ContextPilot: Efficient Long Context Inference with Context Reuse</strong></h1>
5
+
6
+ [![Python](https://img.shields.io/badge/python-≥3.10-blue)](https://www.python.org/)
7
+ [![PyPI](https://img.shields.io/pypi/v/contextpilot)](https://pypi.org/project/contextpilot/)
8
+ [![License](https://img.shields.io/badge/license-Apache%202.0-green)](LICENSE)
9
+
10
+ </div>
11
+
12
+ --------------------------------------------------------------------------------
13
+
14
+ | [**Documentation**](docs/README.md) | [**Examples**](examples/) | [**Benchmarks**](docs/reference/benchmarks.md) |
15
+
16
+ ## News
17
+
18
+ - [2026/02] ContextPilot v0.3.2 released, supporting [PageIndex](https://github.com/VectifyAI/PageIndex) and [Mem0](https://github.com/mem0ai/mem0).
19
+ - [2026/01] ContextPilot has been accepted to MLSys 2026 🎉! See you in Bellevue, WA, USA.
20
+ - [2025/12] ContextPilot v0.2.0 released.
21
+
22
+ ## About
23
+
24
+ ContextPilot is a fast optimization system on context engineering layer for agentic workloads:
25
+ 1. **High Throughput & Cache Hit Ratio**: Boosting prefill throughput and prefix cache hit ratio with intelligent context reuse.
26
+ 2. **Strong Compatibility**: Strong compatibility with existing popular RAG libraries (PageIndex), Agentic memory layer (Mem0), KV cache optimization engine (LMCache), and Inference engines (vLLM and SGLang).
27
+ 3. **Negligible Accuracy Loss**: Achieving significant performance improvements with minimal to no accuracy degradation across various benchmarks.
28
+ 3. **Widely Tested**: Tested with a wide range of RAG and Agentic AI applications.
29
+
30
+ ## Target Workloads
31
+
32
+ 1. **Trending Topic QA** — Search and generation for breaking news and hot topics beyond model knowledge
33
+ 2. **Closed-Domain Long-Context QA** — QA over specialized corpora (novels, financial reports, legal documents) with retrieval or in-context search
34
+ 3. **Large-Batch Long-Context Execution** — High-throughput inference where many requests share overlapping contexts; ContextPilot maximizes prefix reuse regardless of the search method
35
+ 4. **Multi-Turn Conversations with Long-Term Memory** — Persistent context reuse across turns (e.g. [Mem0](https://github.com/mem0ai/mem0))
36
+
37
+ ## Benchmark and Performance
38
+
39
+ ### System Performance
40
+
41
+ <div align="center">
42
+ <img src="assets/deepseek_r1_results.png" alt="Benchmark Results" width="600"/>
43
+ </div>
44
+
45
+ ContextPilot (Stateless) on DeepSeek-R1 maintains accuracy compared to SGLang, achieving 64.68% vs 64.15% F1 on MultihopRAG and 41.08% vs 40.20% F1 on NarrativeQA.
46
+
47
+ ### Accuracy on MT-RAG Benchmark (Online Scheduling)
48
+
49
+ <div align="center">
50
+
51
+ | Method | Qwen3-4B | Llama3.1-8B | Qwen3-30B-A3B |
52
+ |--------|----------|-------------|-----------|
53
+ | LMCache | 62.56 | **68.46** | 75.12 |
54
+ | CacheBlend | 50.33 | 56.52 | X |
55
+ | RadixCache | 62.56 | **68.46** | 75.12 |
56
+ | **ContextPilot** | **64.27** | 68.12 | **75.81** |
57
+
58
+ </div>
59
+
60
+ ContextPilot delivers **4-13x** improvements in cache hit rates and **1.5-3.5x** reductions in prefill latency for large-batch RAG workloads, while maintaining or improving accuracy.
61
+
62
+ **Furthermore**, ContextPilot has been tested to reduce input token costs by around **36%** with GPT-5.2.
63
+
64
+ See [Benchmarks](docs/reference/benchmarks.md) in the documentation for GPU vs CPU performance analysis and detailed benchmark methodology.
65
+
66
+ ## Getting Started
67
+
68
+ ### Installation
69
+
70
+ **Requirements:** Python >= 3.10
71
+
72
+ ```bash
73
+ pip install contextpilot
74
+ ```
75
+
76
+ Or install from source:
77
+ ```bash
78
+ git clone https://github.com/Edinburgh-AgenticAI/ContextPilot.git
79
+ cd ContextPilot
80
+ pip install -e .
81
+ ```
82
+
83
+ More [detailed installation instructions](docs/getting_started/installation.md) are available in the docs.
84
+
85
+ ### Quick Start
86
+
87
+ **Offline / Online Stateless** — build index & schedule in one shot:
88
+
89
+ ```python
90
+ from openai import OpenAI
91
+ import contextpilot as cp
92
+
93
+ client = OpenAI(base_url="http://localhost:30000/v1", api_key="...") # Your inference engine URL and API key
94
+
95
+ queries = ["What is AI?", "Explain neural networks", "What is deep learning?"]
96
+ all_contexts = [
97
+ ["Doc about AI", "Doc about ML", "Doc about computing"],
98
+ ["Doc about neural nets", "Doc about deep learning"],
99
+ ["Doc about ML", "Doc about AI", "Doc about deep learning basics"],
100
+ ]
101
+
102
+ # Build index and schedule for prefix sharing
103
+ index = cp.build_context_index(all_contexts, use_gpu=False)
104
+ reordered, _, order, _ = cp.InterContextScheduler().schedule_contexts(index)
105
+
106
+ # Send in optimized order — shared prefixes hit KV cache
107
+ for ctx, orig_idx in zip(reordered, order):
108
+ docs_section = "\n".join(f"[{i+1}] {doc}" for i, doc in enumerate(ctx))
109
+ # Importance ranking restores original retrieval order for the model
110
+ importance_ranking = ">".join(
111
+ str(ctx.index(doc) + 1) for doc in all_contexts[orig_idx] if doc in ctx
112
+ )
113
+ response = client.chat.completions.create(
114
+ model="Qwen/Qwen3-4B",
115
+ messages=[
116
+ {"role": "system", "content": (
117
+ f"Answer the question based on the provided documents.\n\n"
118
+ f"<documents>\n{docs_section}\n</documents>\n\n"
119
+ f"Read the documents in this importance ranking: {importance_ranking}\n"
120
+ f"Prioritize information from higher-ranked documents."
121
+ )},
122
+ {"role": "user", "content": queries[orig_idx]},
123
+ ],
124
+ )
125
+ print(f"Q: {queries[orig_idx]}\nA: {response.choices[0].message.content}\n")
126
+ ```
127
+
128
+ > For online stateless scheduling via HTTP server, see the [online usage guide](docs/guides/online_usage.md).
129
+
130
+ **Stateful** — `LiveContextIndex` tracks cached state:
131
+
132
+ ```python
133
+ from openai import OpenAI
134
+ import contextpilot as cp
135
+
136
+ client = OpenAI(base_url="http://localhost:30000/v1", api_key="...")
137
+ live = cp.LiveContextIndex(use_gpu=False)
138
+
139
+ # Simulate multi-turn: each turn has batch_size=1
140
+ turns = [
141
+ {
142
+ "query": "What is AI?",
143
+ "contexts": [["Doc about AI", "Doc about ML", "Doc about computing"]],
144
+ },
145
+ {
146
+ "query": "Compare supervised and unsupervised learning",
147
+ # 2 of 3 docs overlap with Turn 1 ("Doc about AI", "Doc about ML"), different order + 1 new doc
148
+ "contexts": [["Doc about ML", "Doc about clustering", "Doc about AI"]],
149
+ },
150
+ ]
151
+
152
+ for turn_idx, turn in enumerate(turns):
153
+ contexts = turn["contexts"]
154
+ query = turn["query"]
155
+
156
+ # build_incremental handles both cold start and incremental turns
157
+ result = live.build_incremental(contexts)
158
+ reordered = result['reordered_contexts']
159
+ # Turn 2: reordered to ["Doc about AI", "Doc about ML", "Doc about clustering"]
160
+ # ^— shared prefix from Turn 1 —^ ^— new doc appended
161
+
162
+ ctx = reordered[0]
163
+ docs_section = "\n".join(f"[{i+1}] {doc}" for i, doc in enumerate(ctx))
164
+ importance_ranking = ">".join(
165
+ str(ctx.index(doc) + 1) for doc in contexts[0] if doc in ctx
166
+ )
167
+ response = client.chat.completions.create(
168
+ model="Qwen/Qwen3-4B",
169
+ messages=[
170
+ {"role": "system", "content": (
171
+ f"Answer the question based on the provided documents.\n\n"
172
+ f"<documents>\n{docs_section}\n</documents>\n\n"
173
+ f"Read the documents in this importance ranking: {importance_ranking}\n"
174
+ f"Prioritize information from higher-ranked documents."
175
+ )},
176
+ {"role": "user", "content": query},
177
+ ],
178
+ )
179
+ print(f"[Turn {turn_idx+1}] Q: {query}")
180
+ print(f"A: {response.choices[0].message.content}\n")
181
+ ```
182
+
183
+ > **Note:** Stateful mode works without eviction sync — `LiveContextIndex` tracks the previous ordering and reorders new contexts to maximize prefix cache hits. For production deployments with limited storage size where the KV cache may evict entries, install the [SGLang eviction patch](docs/guides/online_usage.md#sglang-integration) to keep the index in sync. See the [online usage guide](docs/guides/online_usage.md) for HTTP server setup.
184
+
185
+ ## Documentation
186
+
187
+ Check out the ContextPilot [documentation](docs/README.md) for comprehensive guides.
188
+
189
+ ## Examples
190
+
191
+ Go hands-on with our [examples](examples/), demonstrating how to address different use cases with ContextPilot.
192
+
193
+ ## Contributing
194
+
195
+ We welcome and value all contributions! Please feel free to submit issues and pull requests.
196
+
197
+ ## Citation
198
+ We will include the paper citation soon!
@@ -38,6 +38,8 @@ from .context_ordering import (
38
38
  InterContextScheduler,
39
39
  )
40
40
 
41
+ from .server.live_index import LiveContextIndex
42
+
41
43
  from .retriever import (
42
44
  BM25Retriever,
43
45
  FAISSRetriever,
@@ -47,7 +49,7 @@ from .retriever import (
47
49
  MEM0_AVAILABLE,
48
50
  )
49
51
 
50
- __version__ = "0.2.0"
52
+ __version__ = "0.3.2"
51
53
 
52
54
  __all__ = [
53
55
  # High-level pipeline API
@@ -63,6 +65,7 @@ __all__ = [
63
65
  'build_context_index',
64
66
  'IntraContextOrderer',
65
67
  'InterContextScheduler',
68
+ 'LiveContextIndex',
66
69
 
67
70
  # Retrievers
68
71
  'BM25Retriever',
@@ -96,6 +96,12 @@ class ContextIndex:
96
96
  self.node_manager = NodeManager()
97
97
  self.context_orderer = IntraContextOrderer()
98
98
 
99
+ # String-to-int mapping (auto-populated when string inputs are given)
100
+ self._str_to_id: dict = {}
101
+ self._id_to_str: dict = {}
102
+ self._next_str_id: int = 0
103
+ self._is_string_input: bool = False
104
+
99
105
  if self.use_gpu:
100
106
  print("Using GPU for distance computation")
101
107
  else:
@@ -104,16 +110,48 @@ class ContextIndex:
104
110
  else:
105
111
  print("Using CPU for distance computation")
106
112
 
107
- def fit_transform(self, contexts: List[List[int]]) -> IndexResult:
113
+ def _convert_to_int(self, contexts):
114
+ """Convert string contexts to integer IDs if needed."""
115
+ if not contexts or not contexts[0]:
116
+ return contexts
117
+ if isinstance(contexts[0][0], str):
118
+ self._is_string_input = True
119
+ converted = []
120
+ for ctx in contexts:
121
+ converted_ctx = []
122
+ for item in ctx:
123
+ sid = self._str_to_id.get(item)
124
+ if sid is None:
125
+ sid = self._next_str_id
126
+ self._str_to_id[item] = sid
127
+ self._id_to_str[sid] = item
128
+ self._next_str_id += 1
129
+ converted_ctx.append(sid)
130
+ converted.append(converted_ctx)
131
+ return converted
132
+ return contexts
133
+
134
+ def _convert_to_str(self, contexts):
135
+ """Convert integer contexts back to strings if input was strings."""
136
+ if not self._is_string_input or not contexts:
137
+ return contexts
138
+ # Skip if already converted (e.g. from fit_transform output)
139
+ if contexts[0] and isinstance(contexts[0][0], str):
140
+ return contexts
141
+ return [[self._id_to_str[i] for i in ctx] for ctx in contexts]
142
+
143
+ def fit_transform(self, contexts) -> IndexResult:
108
144
  """
109
145
  Perform clustering and return results.
110
146
 
111
147
  Args:
112
- contexts: List of contexts, where each prompt is a list of chunk IDs
148
+ contexts: List of contexts, where each context is a list of chunk IDs (int) or strings.
149
+ String inputs are automatically converted to integer IDs.
113
150
 
114
151
  Returns:
115
152
  IndexResult object containing clustering results
116
153
  """
154
+ contexts = self._convert_to_int(contexts)
117
155
  n = len(contexts)
118
156
 
119
157
  if n < 2:
@@ -194,14 +232,41 @@ class ContextIndex:
194
232
  )
195
233
 
196
234
  def _handle_single_prompt(self, contexts: List[List[int]]) -> IndexResult:
197
- """Handle case with less than 2 contexts."""
235
+ """Handle case with less than 2 contexts.
236
+
237
+ Always creates an empty root node above the leaf(s) so that
238
+ leaf.is_root is never True. This prevents the root-exclusion
239
+ guard in build_incremental from skipping legitimate matches.
240
+ """
198
241
  for i, prompt in enumerate(contexts):
199
- self.node_manager.create_leaf_node(i, prompt)
242
+ node = self.node_manager.create_leaf_node(i, prompt)
243
+ # ClusterNode.__init__ sets doc_ids = sorted(content).
244
+ # Override to preserve the original context order so that
245
+ # build_incremental can use it as a correct prefix for Turn 2.
246
+ node.doc_ids = list(prompt)
247
+
248
+ # Wrap leaf node(s) under an empty root so that no leaf is the root.
249
+ # This mirrors the virtual-root logic in update_search_paths for forests,
250
+ # but applies it even for a single leaf.
251
+ leaf_ids = list(self.node_manager.unique_nodes.keys())
252
+ virtual_root_id = max(leaf_ids) + 1 if leaf_ids else 0
253
+ virtual_root = ClusterNode(
254
+ node_id=virtual_root_id,
255
+ content=set(),
256
+ original_indices=set(),
257
+ distance=0.0,
258
+ children=leaf_ids,
259
+ parent=None,
260
+ frequency=sum(self.node_manager.unique_nodes[nid].frequency for nid in leaf_ids)
261
+ )
262
+ self.node_manager.unique_nodes[virtual_root_id] = virtual_root
263
+ for nid in leaf_ids:
264
+ self.node_manager.unique_nodes[nid].parent = virtual_root_id
200
265
 
201
- # Update search paths even for single nodes
266
+ # Update search paths (now a proper rooted tree)
202
267
  self.node_manager.update_search_paths()
203
268
 
204
- # For single context, extract search paths (will be empty for root-only tree)
269
+ # For single context, extract search paths
205
270
  search_paths = self.context_orderer.extract_search_paths(
206
271
  self.node_manager.unique_nodes, len(contexts)
207
272
  )
@@ -233,7 +298,7 @@ class ContextIndex:
233
298
 
234
299
 
235
300
  # Convenience function for backward compatibility
236
- def build_context_index(contexts: List[List[int]],
301
+ def build_context_index(contexts,
237
302
  linkage_method: str = "average",
238
303
  use_gpu: bool = True,
239
304
  alpha: float = 0.005,
@@ -243,7 +308,7 @@ def build_context_index(contexts: List[List[int]],
243
308
  Convenience function for building a context index.
244
309
 
245
310
  Args:
246
- contexts: List of contexts, where each prompt is a list of chunk IDs
311
+ contexts: List of contexts, where each context is a list of chunk IDs (int) or strings
247
312
  linkage_method: Linkage method for hierarchical clustering
248
313
  use_gpu: Whether to use GPU for distance computation
249
314
  alpha: Weight for position term in distance calculation
@@ -260,4 +325,11 @@ def build_context_index(contexts: List[List[int]],
260
325
  num_workers=num_workers,
261
326
  batch_size=batch_size
262
327
  )
263
- return indexer.fit_transform(contexts)
328
+ result = indexer.fit_transform(contexts)
329
+ # Convert back to strings at the API boundary if input was strings
330
+ if indexer._is_string_input:
331
+ result.reordered_contexts = indexer._convert_to_str(result.reordered_contexts)
332
+ result.original_contexts = indexer._convert_to_str(result.original_contexts)
333
+ result.reordered_prompts = result.reordered_contexts
334
+ result.original_prompts = result.original_contexts
335
+ return result
@@ -4,7 +4,7 @@ Inter-Context Scheduler (ContextPilot Paper Algorithm)
4
4
  This module implements the scheduling algorithm described in the ContextPilot paper:
5
5
  1. Reuses search paths obtained during context ordering (no redundant tree lookups)
6
6
  2. Groups contexts by the first element of their search path, naturally separating cache regions
7
- 3. Sorts contexts within each group by path length in descending order
7
+ 3. Sorts contexts within each group by path length descending, with lexicographic tiebreaker
8
8
 
9
9
  This avoids the O(N log M) tree rescanning overhead of existing methods.
10
10
  """
@@ -20,7 +20,7 @@ class InterContextScheduler:
20
20
  This scheduler:
21
21
  - Reuses search paths obtained during context ordering (no redundant tree lookups)
22
22
  - Groups contexts by the first element of their search path
23
- - Sorts contexts within each group by path length descending
23
+ - Sorts contexts within each group by path length descending, lex tiebreaker
24
24
 
25
25
  Time complexity: O(N) grouping + O(N log N) sorting over N contexts
26
26
  (Independent of tree size M, unlike traditional O(N log M) + O(N log N) methods)
@@ -109,17 +109,19 @@ class InterContextScheduler:
109
109
  contexts: List[List[int]]
110
110
  ) -> List[List[int]]:
111
111
  """
112
- Sort contexts within each group by path length in descending order.
112
+ Sort contexts within each group by path length descending,
113
+ with lexicographic tiebreaker for equal-length paths.
113
114
 
114
- This ensures longer prefix matches execute before shorter ones,
115
- maximizing cache reuse under tight KV budgets, as described in the paper.
115
+ Primary key (length descending): longer prefix matches first.
116
+ Secondary key (lexicographic): among equal-length paths, groups
117
+ contexts sharing deeper path prefixes adjacently to maximize LCP.
116
118
 
117
- Total complexity: O(N log N) across all groups
119
+ Total complexity: O(N * L * log N) across all groups (L = tree depth)
118
120
 
119
121
  Args:
120
122
  groups_by_root: Groups of context indices by root prefix
121
123
  search_paths: Search paths for each context
122
- contexts: Reordered contexts (unused in simplified version)
124
+ contexts: Reordered contexts (unused)
123
125
 
124
126
  Returns:
125
127
  List of sorted groups
@@ -127,10 +129,9 @@ class InterContextScheduler:
127
129
  sorted_groups = []
128
130
 
129
131
  for root_prefix, group_indices in groups_by_root.items():
130
- # Sort by path length in descending order, with index as tiebreaker
131
132
  sorted_group = sorted(
132
133
  group_indices,
133
- key=lambda idx: (-len(search_paths[idx]), idx)
134
+ key=lambda idx: (-len(search_paths[idx]), search_paths[idx], idx)
134
135
  )
135
136
  sorted_groups.append(sorted_group)
136
137
 
@@ -349,7 +349,7 @@ class Mem0Retriever:
349
349
  }
350
350
 
351
351
  # Add metadata fields
352
- if "metadata" in mem:
352
+ if mem.get("metadata"):
353
353
  doc.update(mem["metadata"])
354
354
 
355
355
  # Add user/agent/run info if present