contextops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,355 @@
1
+ """
2
+ ContextOps Core Engine.
3
+
4
+ The single orchestrator that:
5
+ 1. Runs all analyzers (tokens, redundancy, structure)
6
+ 2. Computes the 4-axis penalty score (100 - total penalty)
7
+ 3. Generates actionable recommendations (Next Best Action)
8
+ 4. Returns the final AnalysisResult (JSON-primary API contract)
9
+
10
+ This is intentionally ONE module, not a framework. V0.1 should feel
11
+ like one coherent system. We can modularize later.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import math
17
+ from collections import defaultdict
18
+
19
+ from contextops.analyzers.density import compute_density_signal
20
+ from contextops.analyzers.redundancy import analyze_redundancy
21
+ from contextops.analyzers.structure import analyze_structure
22
+ from contextops.analyzers.tokens import analyze_tokens
23
+ from contextops.core.config import ContextOpsConfig
24
+ from contextops.core.models import (
25
+ AnalysisResult,
26
+ ContextBundle,
27
+ ContextType,
28
+ DensitySignal,
29
+ FindingSeverity,
30
+ Recommendation,
31
+ RedundancyClassification,
32
+ RedundancyFinding,
33
+ ScoreBreakdown,
34
+ StructureFinding,
35
+ TokenBreakdown,
36
+ )
37
+
38
+
39
+ def analyze(
40
+ bundle: ContextBundle,
41
+ model: str = "gpt-4o",
42
+ cost_per_1k: float = 0.005,
43
+ config: ContextOpsConfig | None = None,
44
+ ) -> AnalysisResult:
45
+ """
46
+ Run the full ContextOps analysis pipeline.
47
+
48
+ Args:
49
+ bundle: Normalized context bundle (from normalizer).
50
+ model: Model name for tiktoken encoding.
51
+ cost_per_1k: Cost per 1K input tokens in USD.
52
+ config: Custom thresholds and mode configuration.
53
+
54
+ Returns:
55
+ Complete AnalysisResult ready for JSON serialization or CLI rendering.
56
+ """
57
+ config = config or ContextOpsConfig.default()
58
+
59
+ # ── Step 1: Token counting ──────────────────────────────────────
60
+ token_breakdown = analyze_tokens(bundle, model=model, cost_per_1k=cost_per_1k)
61
+
62
+ # ── Step 2: Redundancy detection ────────────────────────────────
63
+ redundancy_findings, final_wasted_tokens = analyze_redundancy(bundle)
64
+ token_breakdown.wasted_tokens = final_wasted_tokens
65
+
66
+ # ── Step 3: Structure analysis ──────────────────────────────────
67
+ structure_findings = analyze_structure(bundle, config=config)
68
+
69
+ # ── Step 3.5: Shadow Density analysis ───────────────────────────
70
+ density_signal = compute_density_signal(bundle)
71
+
72
+ # ── Step 4: Compute score ───────────────────────────────────────────
73
+ score_breakdown = _compute_score(
74
+ bundle, token_breakdown, redundancy_findings, structure_findings, density_signal
75
+ )
76
+
77
+ # Update wasted tokens in token breakdown (already set from analyze_redundancy)
78
+
79
+ # ── Step 5: Generate recommendations ────────────────────────────
80
+ recommendations = _generate_recommendations(
81
+ bundle, redundancy_findings, structure_findings, score_breakdown
82
+ )
83
+
84
+ # ── Step 6: Assemble result ─────────────────────────────────────
85
+ return AnalysisResult(
86
+ score=score_breakdown.score,
87
+ mode=config.mode,
88
+ config_version=config.version,
89
+ density_signal=density_signal,
90
+ score_breakdown=score_breakdown,
91
+ token_breakdown=token_breakdown,
92
+ redundancy_findings=redundancy_findings,
93
+ structure_findings=structure_findings,
94
+ recommendations=recommendations,
95
+ metadata={
96
+ "item_count": bundle.item_count,
97
+ "model": model,
98
+ "version": "0.1.0",
99
+ },
100
+ )
101
+
102
+ # ── Scoring ─────────────────────────────────────────────────────────────
103
+
104
+
105
+ def _compute_score(
106
+ bundle: ContextBundle,
107
+ token_breakdown: TokenBreakdown,
108
+ redundancy_findings: list[RedundancyFinding],
109
+ structure_findings: list[StructureFinding],
110
+ density_signal: DensitySignal,
111
+ ) -> ScoreBreakdown:
112
+ """
113
+ Compute the 4-axis penalty score.
114
+
115
+ Score = 100 - (redundancy + density + structure + concentration)
116
+ Each penalty has a maximum cap to prevent any single axis from dominating.
117
+
118
+ Signal contract: each axis reads only from its designated input.
119
+ No cross-axis reading is permitted.
120
+ """
121
+ redundancy = _calc_redundancy_penalty(bundle, redundancy_findings, token_breakdown)
122
+ density = _calc_density_penalty(density_signal) # reads DensitySignal only
123
+ structure = _calc_structure_penalty(structure_findings)
124
+ concentration = _calc_concentration_penalty(bundle)
125
+
126
+ return ScoreBreakdown(
127
+ redundancy_penalty=redundancy,
128
+ density_penalty=density,
129
+ structure_penalty=structure,
130
+ concentration_penalty=concentration,
131
+ )
132
+
133
+
134
+ def _calc_redundancy_penalty(
135
+ bundle: ContextBundle,
136
+ findings: list[RedundancyFinding],
137
+ token_breakdown: TokenBreakdown,
138
+ ) -> float:
139
+ """
140
+ Redundancy penalty (0–30 pts).
141
+
142
+ Formula:
143
+ (waste_penalty_ratio × 0.6 + similarity_cluster_score × 0.4) × 30
144
+
145
+ - waste_penalty_ratio: exponentially mapped from final_wasted_tokens
146
+ - similarity_cluster_score: proportion of items involved in redundancy
147
+
148
+ Signal contract: reads only redundancy analyzer outputs (wasted_tokens, findings).
149
+ Must NOT read density_signal or any structural analyzer output.
150
+ """
151
+ wasted = token_breakdown.wasted_tokens
152
+ if wasted == 0:
153
+ return 0.0
154
+
155
+ waste_penalty_ratio = 1 - math.exp(-0.001 * wasted)
156
+
157
+ # Cluster score: what fraction of items are involved in redundancy?
158
+ involved_ids: set[str] = set()
159
+ for f in findings:
160
+ if f.classification != RedundancyClassification.EXPECTED_OVERLAP:
161
+ involved_ids.add(f.item_a_id)
162
+ involved_ids.add(f.item_b_id)
163
+
164
+ cluster_score = len(involved_ids) / max(1, bundle.item_count)
165
+
166
+ penalty = (waste_penalty_ratio * 0.6 + cluster_score * 0.4) * 30.0
167
+ return min(30.0, round(penalty, 2))
168
+
169
+
170
+ def _calc_density_penalty(density_signal: DensitySignal) -> float:
171
+ """
172
+ Structural density penalty (0–30 pts).
173
+
174
+ Derived exclusively from DensitySignal — the structural analysis of raw context text.
175
+ DensitySignal measures: format overhead (FO), whitespace waste (WL), entropy compression (EC).
176
+
177
+ Formula: penalty = total_density_signal × 30
178
+ where total_density_signal ∈ [0.0, 1.0] is the weighted combination:
179
+ total = 0.4 * FO + 0.2 * WL + 0.4 * EC
180
+
181
+ Signal contract: reads ONLY DensitySignal.
182
+ Must NOT read wasted_tokens or any redundancy analyzer output.
183
+ """
184
+ penalty = density_signal.total_density_signal * 30.0
185
+ return min(30.0, round(penalty, 2))
186
+
187
+
188
+ def _calc_structure_penalty(findings: list[StructureFinding]) -> float:
189
+ """
190
+ Structure imbalance penalty (0–20 pts).
191
+
192
+ Based on how many imbalance findings exist and their severity.
193
+ Each finding contributes points based on how far the ratio exceeds threshold.
194
+ """
195
+ if not findings:
196
+ return 0.0
197
+
198
+ _SEVERITY_MULTIPLIER = {
199
+ FindingSeverity.LOW: 0.5,
200
+ FindingSeverity.MEDIUM: 1.0,
201
+ FindingSeverity.HIGH: 1.5,
202
+ FindingSeverity.CRITICAL: 2.0,
203
+ }
204
+
205
+ total = 0.0
206
+ for f in findings:
207
+ if f.threshold > 0:
208
+ # How far over the threshold? e.g., 0.80 actual vs 0.70 threshold = 0.10 excess
209
+ excess = max(0.0, f.actual_ratio - f.threshold)
210
+ # Scale: each 0.10 excess = ~5 points penalty
211
+ contribution = (excess / 0.10) * 5
212
+ else:
213
+ # Low diversity finding — flat penalty
214
+ contribution = 3.0
215
+
216
+ total += contribution * _SEVERITY_MULTIPLIER.get(f.severity, 1.0)
217
+
218
+ return min(20.0, round(total, 2))
219
+
220
+
221
+ def _calc_concentration_penalty(bundle: ContextBundle) -> float:
222
+ """
223
+ Concentration penalty (0–20 pts).
224
+
225
+ Uses a 2-axis decomposition of source behavior:
226
+ 1. Source Dominance (P_dom): over-reliance on a single document.
227
+ 2. Entropy Imbalance (P_ent): uneven distribution across multiple sources.
228
+
229
+ This matches the P_con definition in the methodology paper.
230
+ """
231
+ retrieval_items = bundle.items_by_type(ContextType.RETRIEVAL)
232
+
233
+ # 1. Protect "Gold Answer RAG" (Single-chunk lookup)
234
+ if len(retrieval_items) <= 1:
235
+ return 0.0
236
+
237
+ # 2. Token-weighted distribution
238
+ source_tokens = defaultdict(int)
239
+ total_tokens = 0
240
+ for item in retrieval_items:
241
+ src = item.source or "unknown"
242
+ source_tokens[src] += item.token_count
243
+ total_tokens += item.token_count
244
+
245
+ if total_tokens == 0:
246
+ return 0.0
247
+
248
+ num_sources = len(source_tokens)
249
+
250
+ # Signal A: Source Dominance (P_dom)
251
+ p_dom = max(source_tokens.values()) / total_tokens
252
+
253
+ # Signal B: Entropy Imbalance (P_ent)
254
+ if num_sources <= 1:
255
+ p_ent = 0.0 # Defined as 0 when math would divide-by-zero
256
+ else:
257
+ entropy = 0.0
258
+ for tokens in source_tokens.values():
259
+ p_s = tokens / total_tokens
260
+ if p_s > 0:
261
+ entropy -= p_s * math.log2(p_s)
262
+ p_ent = 1.0 - (entropy / math.log2(num_sources))
263
+
264
+ # Combine the signals
265
+ # We weight Dominance slightly higher because it's a stronger failure mode in RAG
266
+ p_con = (0.6 * p_dom) + (0.4 * p_ent)
267
+
268
+ return min(20.0, round(p_con * 20.0, 2))
269
+
270
+
271
+ # ── Recommendations ─────────────────────────────────────────────────────
272
+
273
+
274
+ def _generate_recommendations(
275
+ bundle: ContextBundle,
276
+ redundancy_findings: list[RedundancyFinding],
277
+ structure_findings: list[StructureFinding],
278
+ score_breakdown: ScoreBreakdown,
279
+ ) -> list[Recommendation]:
280
+ """
281
+ Generate actionable recommendations from findings.
282
+
283
+ Every recommendation includes:
284
+ - What the issue is
285
+ - How much score improvement to expect
286
+ - How many tokens to save
287
+ - Exactly what to do
288
+ """
289
+ recs: list[Recommendation] = []
290
+
291
+ # ── Redundancy recommendations ──────────────────────────────────
292
+ # Group redundant findings (skip expected overlaps)
293
+ real_redundancy = [
294
+ f for f in redundancy_findings
295
+ if f.classification == RedundancyClassification.REDUNDANT_CONTEXT
296
+ ]
297
+
298
+ for finding in real_redundancy[:3]: # Top 3 most impactful
299
+ # Estimate score impact: removing this finding's waste
300
+ if bundle.total_tokens > 0:
301
+ waste_ratio = finding.estimated_waste_tokens / bundle.total_tokens
302
+ estimated_score_gain = waste_ratio * 30 # impacts both redundancy and density
303
+ else:
304
+ estimated_score_gain = 0.0
305
+
306
+ recs.append(Recommendation(
307
+ issue=f"Redundant context: {finding.detail}",
308
+ impact_score=round(estimated_score_gain, 1),
309
+ token_savings=finding.estimated_waste_tokens,
310
+ fix=f"Remove the duplicate item ('{finding.item_b_id}') → save {finding.estimated_waste_tokens} tokens",
311
+ severity=FindingSeverity.HIGH if finding.similarity_score > 0.85 else FindingSeverity.MEDIUM,
312
+ ))
313
+
314
+ # Boilerplate recommendations
315
+ boilerplate = [
316
+ f for f in redundancy_findings
317
+ if f.classification == RedundancyClassification.BOILERPLATE
318
+ ]
319
+ if boilerplate:
320
+ total_bp_waste = sum(f.estimated_waste_tokens for f in boilerplate)
321
+ recs.append(Recommendation(
322
+ issue=f"Boilerplate repetition detected ({len(boilerplate)} pairs)",
323
+ impact_score=round(total_bp_waste / max(1, bundle.total_tokens) * 15, 1),
324
+ token_savings=total_bp_waste,
325
+ fix="Consolidate repeated instructions into the system prompt",
326
+ severity=FindingSeverity.MEDIUM,
327
+ ))
328
+
329
+ # ── Structure recommendations ───────────────────────────────────
330
+ for struct_finding in structure_findings:
331
+ pct = f"{struct_finding.actual_ratio * 100:.0f}%"
332
+ threshold_pct = f"{struct_finding.threshold * 100:.0f}%"
333
+
334
+ if struct_finding.issue == "Retrieval dominance":
335
+ fix = f"Reduce retrieval chunks — currently {pct} of context (threshold: {threshold_pct})"
336
+ elif struct_finding.issue == "System prompt bloat":
337
+ fix = f"Trim system prompt — currently {pct} of context (threshold: {threshold_pct})"
338
+ elif struct_finding.issue == "Memory explosion":
339
+ fix = f"Prune old memories — currently {pct} of context (threshold: {threshold_pct})"
340
+ elif struct_finding.issue == "Tool output sprawl":
341
+ fix = f"Summarize tool outputs — currently {pct} of context (threshold: {threshold_pct})"
342
+ else:
343
+ fix = f"Improve context composition — {struct_finding.issue}"
344
+
345
+ recs.append(Recommendation(
346
+ issue=struct_finding.issue,
347
+ impact_score=round(score_breakdown.structure_penalty * 0.5, 1),
348
+ token_savings=0, # structure fixes don't always save tokens directly
349
+ fix=fix,
350
+ severity=struct_finding.severity,
351
+ ))
352
+
353
+ # Sort by impact (highest first)
354
+ recs.sort(key=lambda r: r.impact_score, reverse=True)
355
+ return recs
@@ -0,0 +1,245 @@
1
+ """
2
+ ContextOps Core Data Models.
3
+
4
+ These are the canonical data structures that every module in the system
5
+ operates on. ContextItem and ContextBundle are the internal representation
6
+ of LLM context — everything gets normalized into this form.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import uuid
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ from typing import Any, Literal
15
+
16
+
17
+ class ContextType(str, Enum):
18
+ """Classification of a context item's origin."""
19
+ SYSTEM = "system"
20
+ MEMORY = "memory"
21
+ RETRIEVAL = "retrieval"
22
+ TOOL = "tool"
23
+ MESSAGE = "message"
24
+
25
+
26
+ class RedundancyClassification(str, Enum):
27
+ """How we classify detected overlap between context items."""
28
+ EXPECTED_OVERLAP = "expected_overlap" # adjacent chunks, normal RAG behavior
29
+ REDUNDANT_CONTEXT = "redundant_context" # unnecessary duplication, real waste
30
+ BOILERPLATE = "boilerplate" # repeated template/instructions
31
+
32
+
33
+ class FindingSeverity(str, Enum):
34
+ """Severity level for analysis findings."""
35
+ LOW = "low"
36
+ MEDIUM = "medium"
37
+ HIGH = "high"
38
+ CRITICAL = "critical"
39
+
40
+
41
+ @dataclass
42
+ class ContextItem:
43
+ """
44
+ A single unit of context fed into an LLM.
45
+
46
+ This is the atomic unit of the entire system. Every chunk, message,
47
+ system prompt, memory entry, or tool output becomes a ContextItem.
48
+ """
49
+ type: ContextType
50
+ content: str
51
+ token_count: int = 0
52
+ id: str = field(default_factory=lambda: uuid.uuid4().hex[:8])
53
+ source: str | None = None
54
+ metadata: dict[str, Any] = field(default_factory=dict)
55
+
56
+ def __post_init__(self) -> None:
57
+ if isinstance(self.type, str):
58
+ self.type = ContextType(self.type)
59
+
60
+
61
+ @dataclass
62
+ class ContextBundle:
63
+ """
64
+ The complete context being sent to an LLM.
65
+
66
+ This is a list of ContextItems. Every analyzer, the scoring engine,
67
+ and the recommendation engine operate exclusively on ContextBundle.
68
+ """
69
+ items: list[ContextItem] = field(default_factory=list)
70
+
71
+ @property
72
+ def total_tokens(self) -> int:
73
+ """Total token count across all items."""
74
+ return sum(item.token_count for item in self.items)
75
+
76
+ @property
77
+ def item_count(self) -> int:
78
+ """Number of context items."""
79
+ return len(self.items)
80
+
81
+ def items_by_type(self, context_type: ContextType) -> list[ContextItem]:
82
+ """Filter items by their context type."""
83
+ return [item for item in self.items if item.type == context_type]
84
+
85
+
86
+ # ── Analysis Result Models ──────────────────────────────────────────────
87
+
88
+
89
+ @dataclass
90
+ class RedundancyFinding:
91
+ """A detected redundancy between two context items."""
92
+ item_a_id: str
93
+ item_b_id: str
94
+ similarity_score: float # 0.0 to 1.0
95
+ classification: RedundancyClassification
96
+ estimated_waste_tokens: int
97
+ detail: str = ""
98
+
99
+
100
+ @dataclass
101
+ class StructureFinding:
102
+ """A detected structural imbalance in the context distribution."""
103
+ issue: str
104
+ context_type: ContextType
105
+ actual_ratio: float # 0.0 to 1.0
106
+ threshold: float # the threshold that was exceeded
107
+ severity: FindingSeverity = FindingSeverity.MEDIUM
108
+
109
+
110
+ @dataclass
111
+ class Recommendation:
112
+ """An actionable fix the user can apply."""
113
+ issue: str
114
+ impact_score: float # estimated score improvement
115
+ token_savings: int # estimated tokens saved
116
+ fix: str # human-readable fix instruction
117
+ severity: FindingSeverity = FindingSeverity.MEDIUM
118
+
119
+
120
+ @dataclass
121
+ class DensitySignal:
122
+ """Shadow metric measuring structural token waste."""
123
+ format_overhead: float # 0.0 to 1.0
124
+ whitespace_waste: float # 0.0 to 1.0
125
+ entropy_compression: float # 0.0 to 1.0
126
+ total_density_signal: float # 0.0 to 1.0
127
+
128
+
129
+ @dataclass
130
+ class ScoreBreakdown:
131
+ """Decomposed penalty breakdown for the context score."""
132
+ redundancy_penalty: float = 0.0 # 0–30
133
+ density_penalty: float = 0.0 # 0–30
134
+ structure_penalty: float = 0.0 # 0–20
135
+ concentration_penalty: float = 0.0 # 0–20
136
+
137
+ @property
138
+ def total_penalty(self) -> float:
139
+ return (
140
+ self.redundancy_penalty
141
+ + self.density_penalty
142
+ + self.structure_penalty
143
+ + self.concentration_penalty
144
+ )
145
+
146
+ @property
147
+ def score(self) -> int:
148
+ """Final 0–100 context score."""
149
+ return max(0, min(100, round(100 - self.total_penalty)))
150
+
151
+
152
+ @dataclass
153
+ class TokenBreakdown:
154
+ """Per-type token distribution."""
155
+ total_tokens: int = 0
156
+ by_type: dict[str, int] = field(default_factory=dict)
157
+ estimated_cost_usd: float = 0.0
158
+ wasted_tokens: int = 0
159
+
160
+
161
+ @dataclass
162
+ class AnalysisResult:
163
+ """
164
+ The complete output of a ContextOps analysis.
165
+
166
+ This is the JSON-primary API contract. The CLI renderer reads this.
167
+ CI mode reads this. Everything derives from this object.
168
+ """
169
+ score: int
170
+ score_breakdown: ScoreBreakdown
171
+ token_breakdown: TokenBreakdown
172
+ redundancy_findings: list[RedundancyFinding] = field(default_factory=list)
173
+ structure_findings: list[StructureFinding] = field(default_factory=list)
174
+ recommendations: list[Recommendation] = field(default_factory=list)
175
+ metadata: dict[str, Any] = field(default_factory=dict)
176
+ mode: str = "strict"
177
+ config_version: str = "1.0"
178
+ density_signal: DensitySignal | None = None
179
+ density_effect: Literal["shadow", "active"] = "shadow"
180
+
181
+ def to_dict(self) -> dict[str, Any]:
182
+ """Serialize to a plain dict suitable for JSON output."""
183
+ res = {
184
+ "score": self.score,
185
+ "mode": self.mode,
186
+ "config_version": self.config_version,
187
+ "score_breakdown": {
188
+ "redundancy_penalty": round(self.score_breakdown.redundancy_penalty, 2),
189
+ "density_penalty": round(self.score_breakdown.density_penalty, 2),
190
+ "structure_penalty": round(self.score_breakdown.structure_penalty, 2),
191
+ "concentration_penalty": round(self.score_breakdown.concentration_penalty, 2),
192
+ "total_penalty": round(self.score_breakdown.total_penalty, 2),
193
+ },
194
+ "token_breakdown": {
195
+ "total_tokens": self.token_breakdown.total_tokens,
196
+ "by_type": self.token_breakdown.by_type,
197
+ "estimated_cost_usd": round(self.token_breakdown.estimated_cost_usd, 6),
198
+ "wasted_tokens": self.token_breakdown.wasted_tokens,
199
+ },
200
+ "findings": {
201
+ "redundancy": [
202
+ {
203
+ "item_a": f.item_a_id,
204
+ "item_b": f.item_b_id,
205
+ "similarity": round(f.similarity_score, 3),
206
+ "classification": f.classification.value,
207
+ "waste_tokens": f.estimated_waste_tokens,
208
+ "detail": f.detail,
209
+ }
210
+ for f in self.redundancy_findings
211
+ ],
212
+ "structure": [
213
+ {
214
+ "issue": f.issue,
215
+ "type": f.context_type.value,
216
+ "actual_ratio": round(f.actual_ratio, 3),
217
+ "threshold": f.threshold,
218
+ "severity": f.severity.value,
219
+ }
220
+ for f in self.structure_findings
221
+ ],
222
+ },
223
+ "recommendations": [
224
+ {
225
+ "issue": r.issue,
226
+ "impact": f"+{round(r.impact_score, 1)} points",
227
+ "token_savings": r.token_savings,
228
+ "fix": r.fix,
229
+ "severity": r.severity.value,
230
+ }
231
+ for r in self.recommendations
232
+ ],
233
+ "metadata": self.metadata,
234
+ }
235
+
236
+ if self.density_signal:
237
+ res["density_signal"] = {
238
+ "format_overhead": round(self.density_signal.format_overhead, 3),
239
+ "whitespace_waste": round(self.density_signal.whitespace_waste, 3),
240
+ "entropy_compression": round(self.density_signal.entropy_compression, 3),
241
+ "total_density_signal": round(self.density_signal.total_density_signal, 3),
242
+ }
243
+ res["density_effect"] = self.density_effect
244
+
245
+ return res