context-compress 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. context_compress-0.1.0/.gitignore +6 -0
  2. context_compress-0.1.0/ARCHITECTURE.md +597 -0
  3. context_compress-0.1.0/CODEX_PROMPT.md +220 -0
  4. context_compress-0.1.0/FEATURE_PLANS.md +3110 -0
  5. context_compress-0.1.0/LICENSE +21 -0
  6. context_compress-0.1.0/PKG-INFO +250 -0
  7. context_compress-0.1.0/README.md +216 -0
  8. context_compress-0.1.0/REAL_DATA_RESULTS.md +152 -0
  9. context_compress-0.1.0/RESEARCH-BRIEF.md +17 -0
  10. context_compress-0.1.0/RESEARCH.md +238 -0
  11. context_compress-0.1.0/benchmarks/__init__.py +1 -0
  12. context_compress-0.1.0/benchmarks/datasets/conversations.json +262 -0
  13. context_compress-0.1.0/benchmarks/datasets/documents.json +132 -0
  14. context_compress-0.1.0/benchmarks/datasets/rag_chunks.json +194 -0
  15. context_compress-0.1.0/benchmarks/datasets.py +36 -0
  16. context_compress-0.1.0/benchmarks/metrics.py +139 -0
  17. context_compress-0.1.0/benchmarks/reporter.py +88 -0
  18. context_compress-0.1.0/benchmarks/results/2026-03-15-baseline.md +31 -0
  19. context_compress-0.1.0/benchmarks/results/full_benchmark_2026-03-16.json +26 -0
  20. context_compress-0.1.0/benchmarks/runner.py +184 -0
  21. context_compress-0.1.0/cctx/__init__.py +8 -0
  22. context_compress-0.1.0/cctx/cache.py +225 -0
  23. context_compress-0.1.0/cctx/cli.py +195 -0
  24. context_compress-0.1.0/cctx/client.py +303 -0
  25. context_compress-0.1.0/cctx/compressor.py +601 -0
  26. context_compress-0.1.0/cctx/conversation.py +330 -0
  27. context_compress-0.1.0/cctx/entity_extractor.py +193 -0
  28. context_compress-0.1.0/cctx/exceptions.py +21 -0
  29. context_compress-0.1.0/cctx/llm_summarizer.py +265 -0
  30. context_compress-0.1.0/cctx/pricing.py +13 -0
  31. context_compress-0.1.0/cctx/protocol.py +247 -0
  32. context_compress-0.1.0/cctx/proxy.py +217 -0
  33. context_compress-0.1.0/cctx/scorer.py +769 -0
  34. context_compress-0.1.0/cctx/server.py +444 -0
  35. context_compress-0.1.0/cctx/tokenizer.py +64 -0
  36. context_compress-0.1.0/cctx/types.py +206 -0
  37. context_compress-0.1.0/docs/plans/2026-03-15-entity-aware-scorer.md +400 -0
  38. context_compress-0.1.0/docs/specs/2026-03-15-benchmark-datasets-design.md +41 -0
  39. context_compress-0.1.0/docs/specs/2026-03-15-entity-aware-scorer-design.md +112 -0
  40. context_compress-0.1.0/examples/basic_compression.py +35 -0
  41. context_compress-0.1.0/examples/conversation_demo.py +45 -0
  42. context_compress-0.1.0/examples/middleware_demo.py +55 -0
  43. context_compress-0.1.0/prototype/README.md +37 -0
  44. context_compress-0.1.0/prototype/RESULTS.md +42 -0
  45. context_compress-0.1.0/prototype/benchmark.py +255 -0
  46. context_compress-0.1.0/prototype/compressor.py +286 -0
  47. context_compress-0.1.0/pyproject.toml +63 -0
  48. context_compress-0.1.0/tests/conftest.py +156 -0
  49. context_compress-0.1.0/tests/real_data/agentforce_rag.txt +45 -0
  50. context_compress-0.1.0/tests/real_data/research_document.txt +238 -0
  51. context_compress-0.1.0/tests/real_data/tesla_conversation.txt +28 -0
  52. context_compress-0.1.0/tests/test_additional_coverage.py +364 -0
  53. context_compress-0.1.0/tests/test_benchmarks.py +185 -0
  54. context_compress-0.1.0/tests/test_cache.py +214 -0
  55. context_compress-0.1.0/tests/test_client_cli.py +90 -0
  56. context_compress-0.1.0/tests/test_compressor.py +101 -0
  57. context_compress-0.1.0/tests/test_conversation.py +105 -0
  58. context_compress-0.1.0/tests/test_conversation_scorer.py +167 -0
  59. context_compress-0.1.0/tests/test_entity_extractor.py +153 -0
  60. context_compress-0.1.0/tests/test_entity_scorer_boost.py +167 -0
  61. context_compress-0.1.0/tests/test_llm_scorer.py +217 -0
  62. context_compress-0.1.0/tests/test_llm_summarizer.py +215 -0
  63. context_compress-0.1.0/tests/test_protocol.py +192 -0
  64. context_compress-0.1.0/tests/test_proxy.py +374 -0
  65. context_compress-0.1.0/tests/test_real_data.py +377 -0
  66. context_compress-0.1.0/tests/test_scorer.py +80 -0
  67. context_compress-0.1.0/tests/test_server.py +98 -0
  68. context_compress-0.1.0/tests/test_tokenizer.py +20 -0
@@ -0,0 +1,6 @@
1
+ .env
2
+ dist/
3
+ *.egg-info/
4
+ __pycache__/
5
+ .coverage
6
+ .pytest_cache/
@@ -0,0 +1,597 @@
1
+ # Context Compression Middleware — Architecture Spec
2
+
3
+ > **Purpose:** This document is the complete build spec for a coding agent (Codex/Claude Code). Everything needed to implement is here. No ambiguity, no hand-waving.
4
+
5
+ ---
6
+
7
+ ## What This Is
8
+
9
+ A Python library + CLI + HTTP API that compresses LLM context into hierarchical layers, supports incremental conversation compression, and works as drop-in middleware between any application and any LLM API.
10
+
11
+ **One sentence:** You send us a big context, we give you back a smaller one that works just as well.
12
+
13
+ ---
14
+
15
+ ## Project Structure
16
+
17
+ ```
18
+ context-compression/
19
+ ├── pyproject.toml # Package config (use hatchling)
20
+ ├── README.md
21
+ ├── cctx/ # Package name: cctx (context compression)
22
+ │ ├── __init__.py # Exports: Compressor, ConversationManager, CCTXClient
23
+ │ ├── compressor.py # Core hierarchical compression engine
24
+ │ ├── conversation.py # Incremental delta compression for conversations
25
+ │ ├── scorer.py # Sentence/chunk importance scoring
26
+ │ ├── tokenizer.py # Token counting (wraps tiktoken)
27
+ │ ├── client.py # HTTP client for the API server
28
+ │ ├── server.py # FastAPI server (the middleware API)
29
+ │ ├── cli.py # CLI entry point (click)
30
+ │ └── types.py # Shared dataclasses/types
31
+ ├── tests/
32
+ │ ├── test_compressor.py
33
+ │ ├── test_conversation.py
34
+ │ ├── test_scorer.py
35
+ │ └── test_server.py
36
+ └── examples/
37
+ ├── basic_compression.py
38
+ ├── conversation_demo.py
39
+ └── middleware_demo.py # Shows drop-in usage between app and OpenAI
40
+ ```
41
+
42
+ ---
43
+
44
+ ## Core Types (`cctx/types.py`)
45
+
46
+ ```python
47
+ from dataclasses import dataclass, field
48
+ from enum import Enum
49
+ from typing import Optional
50
+
51
+ class Layer(Enum):
52
+ RAW = 0 # Full text, no compression
53
+ FACTS = 1 # Key facts & relationships extracted
54
+ SUMMARY = 2 # Executive summary
55
+
56
+ @dataclass
57
+ class CompressedResult:
58
+ """Output of compression at all 3 layers."""
59
+ layers: dict[Layer, str] # Layer -> compressed text
60
+ tokens: dict[Layer, int] # Layer -> token count
61
+ entities: list[str] # Named entities found
62
+ sections: list[Section] # Drill-down sections
63
+ metadata: dict # Timing, scores, etc.
64
+
65
+ @dataclass
66
+ class Section:
67
+ """A chunk of the original text, addressable for drill-down."""
68
+ id: str # e.g. "s0", "s1", "s2"
69
+ text: str # Full raw text of this section
70
+ tokens: int # Token count
71
+ summary: str # One-sentence summary
72
+ importance: float # 0.0 - 1.0 score
73
+
74
+ @dataclass
75
+ class ConversationState:
76
+ """Persistent state for an ongoing conversation."""
77
+ id: str # Conversation identifier
78
+ compressed_history: str # Compressed older messages
79
+ compressed_tokens: int
80
+ recent_messages: list[Message] # Last N messages (raw)
81
+ total_raw_tokens: int # Running total of all tokens ever added
82
+ compression_events: int # How many times compression has fired
83
+
84
+ @dataclass
85
+ class Message:
86
+ role: str # "user", "assistant", "system"
87
+ content: str
88
+ tokens: int
89
+
90
+ @dataclass
91
+ class CompressRequest:
92
+ """API request body."""
93
+ text: str
94
+ layer: Layer = Layer.FACTS # Target compression layer
95
+ l1_ratio: float = 0.3 # Fraction of content to keep at Layer 1
96
+ l2_max_sentences: int = 3 # Max sentences for Layer 2
97
+ sections: bool = True # Whether to generate drill-down sections
98
+
99
+ @dataclass
100
+ class ConversationRequest:
101
+ """API request for conversation compression."""
102
+ conversation_id: str
103
+ message: Message
104
+ compress_threshold: int = 1000 # Tokens before compression triggers
105
+ keep_recent: int = 5 # Number of recent messages to keep raw
106
+
107
+ @dataclass
108
+ class DrillDownRequest:
109
+ """API request to expand a section."""
110
+ section_id: str
111
+ target_layer: Layer = Layer.RAW # What layer to return
112
+ ```
113
+
114
+ ---
115
+
116
+ ## Scoring Engine (`cctx/scorer.py`)
117
+
118
+ This is the brain — decides what's important. Keep it pluggable.
119
+
120
+ ```python
121
+ from abc import ABC, abstractmethod
122
+
123
+ class Scorer(ABC):
124
+ """Base class for importance scoring strategies."""
125
+
126
+ @abstractmethod
127
+ def score_sentences(self, sentences: list[str], query: str | None = None) -> list[float]:
128
+ """Return importance score (0.0-1.0) for each sentence."""
129
+ ...
130
+
131
+ class TextRankScorer(Scorer):
132
+ """Graph-based sentence ranking. No external dependencies."""
133
+ # Implementation: sentence similarity matrix -> power iteration
134
+ # This is the DEFAULT scorer. Works offline, fast, no API calls.
135
+
136
+ class TFIDFScorer(Scorer):
137
+ """TF-IDF based scoring. Better for document-style content."""
138
+ # Uses scikit-learn TfidfVectorizer
139
+ # Score = cosine similarity to document centroid (or query if provided)
140
+
141
+ class QueryAwareScorer(Scorer):
142
+ """Scores sentences by relevance to a specific query/question."""
143
+ # Wraps another scorer but boosts sentences matching query terms
144
+ # Critical for RAG contexts where you know what question is being answered
145
+
146
+ class CompositeScorer(Scorer):
147
+ """Combines multiple scorers with configurable weights."""
148
+ def __init__(self, scorers: list[tuple[Scorer, float]]):
149
+ # scorers = [(TextRankScorer(), 0.6), (TFIDFScorer(), 0.4)]
150
+ ...
151
+ ```
152
+
153
+ **Why pluggable:** Later we can add an `LLMScorer` that uses a cheap model (GPT-4o-mini) to score importance. For now, keep it algorithmic and free.
154
+
155
+ ---
156
+
157
+ ## Compressor (`cctx/compressor.py`)
158
+
159
+ ```python
160
+ class Compressor:
161
+ def __init__(self, scorer: Scorer | None = None):
162
+ """Default scorer is TextRankScorer."""
163
+ self.scorer = scorer or TextRankScorer()
164
+
165
+ def compress(self, request: CompressRequest) -> CompressedResult:
166
+ """
167
+ Compress text into hierarchical layers.
168
+
169
+ Algorithm:
170
+ 1. Split text into sections (by paragraph/double-newline)
171
+ 2. Split each section into sentences
172
+ 3. Score all sentences with self.scorer
173
+ 4. Layer 1: Keep top `l1_ratio` sentences, in original order
174
+ 5. Layer 2: Keep top `l2_max_sentences` sentences, in original order
175
+ 6. Extract entities (regex-based NER)
176
+ 7. Generate section summaries (top sentence per section)
177
+ 8. Return CompressedResult with all layers
178
+ """
179
+
180
+ def drill_down(self, result: CompressedResult, section_id: str,
181
+ layer: Layer = Layer.RAW) -> str:
182
+ """
183
+ Retrieve a specific section at a specific layer.
184
+ Layer.RAW = full text, Layer.FACTS = compressed section.
185
+ """
186
+ ```
187
+
188
+ **Key design decision:** The compressor is stateless. Give it text, get back compressed text. Conversation state management is separate.
189
+
190
+ ---
191
+
192
+ ## Conversation Manager (`cctx/conversation.py`)
193
+
194
+ ```python
195
+ class ConversationManager:
196
+ """Manages incremental compression for ongoing conversations."""
197
+
198
+ def __init__(self, compressor: Compressor | None = None,
199
+ compress_threshold: int = 1000,
200
+ keep_recent: int = 5):
201
+ self.compressor = compressor or Compressor()
202
+ self.conversations: dict[str, ConversationState] = {}
203
+
204
+ def add_message(self, conversation_id: str, message: Message) -> dict:
205
+ """
206
+ Add a message to a conversation. Compress if threshold exceeded.
207
+
208
+ Algorithm:
209
+ 1. Append message to conversation's recent_messages
210
+ 2. Count total tokens in (compressed_history + recent_messages)
211
+ 3. If total > compress_threshold AND len(recent_messages) > keep_recent:
212
+ a. Take messages[:-keep_recent] (older ones)
213
+ b. Combine with existing compressed_history
214
+ c. Compress using self.compressor at Layer.FACTS
215
+ d. Replace compressed_history with result
216
+ e. Keep only messages[-keep_recent:] as recent
217
+ 4. Return stats dict: {action, tokens_before, tokens_after, ...}
218
+ """
219
+
220
+ def get_context(self, conversation_id: str) -> str:
221
+ """
222
+ Get the current context for LLM consumption.
223
+
224
+ Format:
225
+ [Prior context summary]
226
+ {compressed_history}
227
+ [Recent messages]
228
+ {role}: {content}
229
+ {role}: {content}
230
+ ...
231
+ """
232
+
233
+ def get_context_tokens(self, conversation_id: str) -> int:
234
+ """Token count of get_context() output."""
235
+
236
+ def reset(self, conversation_id: str):
237
+ """Clear conversation state."""
238
+ ```
239
+
240
+ **How conversations connect to compression:**
241
+ - `ConversationManager` owns a `Compressor` instance
242
+ - When compression triggers, it calls `compressor.compress()` on the older messages
243
+ - The compressed output becomes the new `compressed_history`
244
+ - Recent messages stay raw for full fidelity
245
+ - This is the "git diff" approach — you never re-compress what's already compressed
246
+
247
+ ---
248
+
249
+ ## HTTP API Server (`cctx/server.py`)
250
+
251
+ FastAPI server. Three endpoints. That's it.
252
+
253
+ ```python
254
+ from fastapi import FastAPI
255
+
256
+ app = FastAPI(title="cctx", version="0.1.0")
257
+
258
+ # Shared instances
259
+ compressor = Compressor()
260
+ conversation_mgr = ConversationManager(compressor)
261
+
262
+ @app.post("/v1/compress")
263
+ async def compress(request: CompressRequest) -> CompressedResult:
264
+ """
265
+ Compress text into hierarchical layers.
266
+
267
+ Request:
268
+ {
269
+ "text": "Your long text...",
270
+ "layer": 1, # 0=raw, 1=facts, 2=summary (default: 1)
271
+ "l1_ratio": 0.3, # optional
272
+ "l2_max_sentences": 3, # optional
273
+ "sections": true # optional, include drill-down sections
274
+ }
275
+
276
+ Response:
277
+ {
278
+ "layers": {
279
+ "0": {"text": "...", "tokens": 1674},
280
+ "1": {"text": "...", "tokens": 689},
281
+ "2": {"text": "...", "tokens": 45}
282
+ },
283
+ "entities": ["Tesla", "Salesforce", ...],
284
+ "sections": [
285
+ {"id": "s0", "summary": "...", "tokens": 120, "importance": 0.92},
286
+ ...
287
+ ],
288
+ "metadata": {
289
+ "compression_ms": 12,
290
+ "scorer": "textrank"
291
+ }
292
+ }
293
+ """
294
+
295
+ @app.post("/v1/conversation")
296
+ async def conversation(request: ConversationRequest) -> dict:
297
+ """
298
+ Add a message to a conversation with automatic incremental compression.
299
+
300
+ Request:
301
+ {
302
+ "conversation_id": "conv_123",
303
+ "message": {"role": "user", "content": "..."},
304
+ "compress_threshold": 1000, # optional
305
+ "keep_recent": 5 # optional
306
+ }
307
+
308
+ Response:
309
+ {
310
+ "context": "the full context string to send to your LLM",
311
+ "context_tokens": 569,
312
+ "total_raw_tokens": 1674,
313
+ "compression_ratio": 2.9,
314
+ "action": "compressed", # or "appended"
315
+ "stats": { ... }
316
+ }
317
+ """
318
+
319
+ @app.post("/v1/drill-down")
320
+ async def drill_down(request: DrillDownRequest) -> dict:
321
+ """
322
+ Expand a section from a previous compression result.
323
+
324
+ Request:
325
+ {
326
+ "section_id": "s0",
327
+ "target_layer": 0 # 0=raw, 1=facts
328
+ }
329
+
330
+ Response:
331
+ {
332
+ "section_id": "s0",
333
+ "layer": 0,
334
+ "text": "full raw text of this section...",
335
+ "tokens": 245
336
+ }
337
+ """
338
+ ```
339
+
340
+ ---
341
+
342
+ ## CLI (`cctx/cli.py`)
343
+
344
+ ```python
345
+ import click
346
+
347
+ @click.group()
348
+ def cli():
349
+ """cctx — context compression toolkit"""
350
+
351
+ @cli.command()
352
+ @click.argument("file", type=click.Path(exists=True))
353
+ @click.option("--layer", "-l", type=int, default=1, help="Target layer (0/1/2)")
354
+ @click.option("--ratio", "-r", type=float, default=0.3, help="L1 keep ratio")
355
+ @click.option("--output", "-o", type=click.Path(), help="Output file")
356
+ def compress(file, layer, ratio, output):
357
+ """Compress a text file."""
358
+ # Read file -> compress -> print/write result
359
+
360
+ @cli.command()
361
+ @click.option("--port", "-p", type=int, default=8420)
362
+ def serve(port):
363
+ """Start the HTTP API server."""
364
+ import uvicorn
365
+ uvicorn.run("cctx.server:app", host="0.0.0.0", port=port)
366
+
367
+ @cli.command()
368
+ @click.argument("file", type=click.Path(exists=True))
369
+ def benchmark(file):
370
+ """Run compression benchmarks on a file."""
371
+ # Compress at all layers, print stats table
372
+ ```
373
+
374
+ ---
375
+
376
+ ## Tokenizer (`cctx/tokenizer.py`)
377
+
378
+ ```python
379
+ import tiktoken
380
+
381
+ # Single shared encoder instance
382
+ _enc = tiktoken.get_encoding("cl100k_base")
383
+
384
+ def count_tokens(text: str) -> int:
385
+ """Count tokens using cl100k_base (GPT-4/Claude compatible)."""
386
+ return len(_enc.encode(text))
387
+
388
+ def truncate_to_tokens(text: str, max_tokens: int) -> str:
389
+ """Truncate text to fit within max_tokens."""
390
+ tokens = _enc.encode(text)
391
+ if len(tokens) <= max_tokens:
392
+ return text
393
+ return _enc.decode(tokens[:max_tokens])
394
+ ```
395
+
396
+ ---
397
+
398
+ ## How It All Connects
399
+
400
+ ```
401
+ User's App
402
+
403
+
404
+ ┌─────────────────┐
405
+ │ cctx Client │ ← Python SDK or HTTP calls
406
+ │ (client.py) │
407
+ └────────┬────────┘
408
+
409
+
410
+ ┌─────────────────┐
411
+ │ cctx Server │ ← FastAPI, 3 endpoints
412
+ │ (server.py) │
413
+ └────────┬────────┘
414
+
415
+ ┌────┴────┐
416
+ ▼ ▼
417
+ ┌────────┐ ┌──────────────┐
418
+ │Compress│ │Conversation │
419
+ │ or │ │ Manager │
420
+ └───┬────┘ └──────┬───────┘
421
+ │ │
422
+ ▼ ▼
423
+ ┌─────────────────────┐
424
+ │ Compressor │
425
+ │ (compressor.py) │
426
+ └─────────┬───────────┘
427
+
428
+
429
+ ┌─────────────────────┐
430
+ │ Scorer (pluggable) │
431
+ │ (scorer.py) │
432
+ └─────────┬───────────┘
433
+
434
+
435
+ ┌─────────────────────┐
436
+ │ Tokenizer │
437
+ │ (tokenizer.py) │
438
+ └─────────────────────┘
439
+ ```
440
+
441
+ **Data flow for `/v1/compress`:**
442
+ 1. Request comes in with text + options
443
+ 2. Server passes to `Compressor.compress()`
444
+ 3. Compressor splits text into sections → sentences
445
+ 4. Compressor calls `Scorer.score_sentences()` on all sentences
446
+ 5. Compressor builds Layer 1 (top N% by score, original order)
447
+ 6. Compressor builds Layer 2 (top 3 by score, original order)
448
+ 7. Compressor extracts entities, generates section summaries
449
+ 8. Returns `CompressedResult` → server serializes to JSON
450
+
451
+ **Data flow for `/v1/conversation`:**
452
+ 1. Request comes in with conversation_id + new message
453
+ 2. Server passes to `ConversationManager.add_message()`
454
+ 3. ConversationManager checks if threshold exceeded
455
+ 4. If yes: takes older messages, calls `Compressor.compress()` on them
456
+ 5. Stores compressed output as new `compressed_history`
457
+ 6. Returns current context (compressed_history + recent raw messages)
458
+
459
+ **Data flow for `/v1/drill-down`:**
460
+ 1. Request comes in with section_id
461
+ 2. Server looks up section in the last `CompressedResult` (held in memory)
462
+ 3. Returns the section at the requested layer
463
+
464
+ ---
465
+
466
+ ## Dependencies (keep minimal)
467
+
468
+ ```toml
469
+ [project]
470
+ name = "cctx"
471
+ version = "0.1.0"
472
+ requires-python = ">=3.11"
473
+ dependencies = [
474
+ "tiktoken>=0.7",
475
+ "fastapi>=0.110",
476
+ "uvicorn>=0.29",
477
+ "click>=8.1",
478
+ "scikit-learn>=1.4", # For TF-IDF scorer
479
+ "httpx>=0.27", # For client.py
480
+ ]
481
+
482
+ [project.scripts]
483
+ cctx = "cctx.cli:cli"
484
+ ```
485
+
486
+ ---
487
+
488
+ ## Client SDK (`cctx/client.py`)
489
+
490
+ ```python
491
+ import httpx
492
+
493
+ class CCTXClient:
494
+ """Python client for the cctx API."""
495
+
496
+ def __init__(self, base_url: str = "http://localhost:8420"):
497
+ self.base_url = base_url
498
+ self.http = httpx.Client(base_url=base_url)
499
+
500
+ def compress(self, text: str, layer: int = 1, **kwargs) -> CompressedResult:
501
+ """Compress text. Returns CompressedResult."""
502
+ resp = self.http.post("/v1/compress", json={
503
+ "text": text, "layer": layer, **kwargs
504
+ })
505
+ resp.raise_for_status()
506
+ return CompressedResult(**resp.json())
507
+
508
+ def add_message(self, conversation_id: str, role: str,
509
+ content: str, **kwargs) -> dict:
510
+ """Add a message to a conversation."""
511
+ resp = self.http.post("/v1/conversation", json={
512
+ "conversation_id": conversation_id,
513
+ "message": {"role": role, "content": content},
514
+ **kwargs
515
+ })
516
+ resp.raise_for_status()
517
+ return resp.json()
518
+
519
+ def drill_down(self, section_id: str, layer: int = 0) -> dict:
520
+ """Get full text of a section."""
521
+ resp = self.http.post("/v1/drill-down", json={
522
+ "section_id": section_id, "target_layer": layer
523
+ })
524
+ resp.raise_for_status()
525
+ return resp.json()
526
+ ```
527
+
528
+ ---
529
+
530
+ ## Tests
531
+
532
+ Each test file tests one module. Use pytest.
533
+
534
+ **`test_compressor.py`:**
535
+ - Test that Layer 1 has fewer tokens than Layer 0
536
+ - Test that Layer 2 has fewer tokens than Layer 1
537
+ - Test that sections are generated and addressable
538
+ - Test with empty string, single sentence, very long text
539
+ - Test that sentence order is preserved (compressed text doesn't reorder)
540
+
541
+ **`test_conversation.py`:**
542
+ - Test adding messages below threshold (no compression)
543
+ - Test that compression triggers at threshold
544
+ - Test that recent messages are always raw
545
+ - Test that compressed_history grows correctly over multiple compression cycles
546
+ - Test get_context() format
547
+
548
+ **`test_scorer.py`:**
549
+ - Test TextRankScorer returns scores for each sentence
550
+ - Test scores sum to ~1.0 (normalized)
551
+ - Test TFIDFScorer with a query boosts relevant sentences
552
+ - Test CompositeScorer combines weights correctly
553
+
554
+ **`test_server.py`:**
555
+ - Test all 3 endpoints return 200
556
+ - Test compress with different layers
557
+ - Test conversation flow (add 10 messages, verify compression happens)
558
+ - Test drill-down returns section text
559
+
560
+ ---
561
+
562
+ ## What NOT To Build (yet)
563
+
564
+ - No LLM-based scoring (keep it algorithmic for v0.1)
565
+ - No persistence (conversations live in memory — Redis/SQLite is v0.2)
566
+ - No auth on the API (add later)
567
+ - No async compression (single-threaded is fine for prototype)
568
+ - No streaming (compress the whole thing, return it)
569
+ - No Token Company API integration (we have the stub, plug it in later)
570
+
571
+ ---
572
+
573
+ ## Build Order
574
+
575
+ 1. `types.py` — all dataclasses
576
+ 2. `tokenizer.py` — token counting
577
+ 3. `scorer.py` — TextRankScorer + TFIDFScorer + CompositeScorer
578
+ 4. `compressor.py` — uses scorer + tokenizer
579
+ 5. `conversation.py` — uses compressor
580
+ 6. `server.py` — FastAPI wrapping compressor + conversation
581
+ 7. `client.py` — HTTP client
582
+ 8. `cli.py` — click CLI
583
+ 9. Tests (in parallel with each module)
584
+ 10. `examples/` — demo scripts
585
+
586
+ **Each module depends only on the ones above it.** No circular imports. No god objects.
587
+
588
+ ---
589
+
590
+ ## Success Criteria
591
+
592
+ The prototype is done when:
593
+ 1. `cctx compress long_document.txt` prints a compression summary with all 3 layers
594
+ 2. `cctx serve` starts an API on port 8420
595
+ 3. The `/v1/conversation` endpoint handles 50 messages and keeps context under a configurable token budget
596
+ 4. `pytest` passes all tests
597
+ 5. The middleware demo shows: App → cctx → OpenAI, with measurable token savings