contextops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ """
2
+ Token Analyzer.
3
+
4
+ Uses tiktoken to count tokens per ContextItem and estimate costs.
5
+ Nothing fancy — just reliable counting and cost math.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import tiktoken
11
+
12
+ from contextops.core.models import ContextBundle, TokenBreakdown
13
+
14
+
15
+ # ── Pricing per 1K input tokens (USD) — GPT-4o as default reference ─────
16
+ # Users can override this; these are just sensible defaults for estimation.
17
+ DEFAULT_COST_PER_1K_TOKENS: float = 0.005 # $5 per 1M input tokens
18
+
19
+
20
+ def count_tokens(text: str, model: str = "gpt-4o") -> int:
21
+ """
22
+ Count tokens for a given text using tiktoken.
23
+
24
+ Args:
25
+ text: The text to tokenize.
26
+ model: The model name for the encoding. Defaults to gpt-4o.
27
+
28
+ Returns:
29
+ The number of tokens.
30
+ """
31
+ try:
32
+ encoding = tiktoken.encoding_for_model(model)
33
+ except KeyError:
34
+ # Fallback to cl100k_base (covers GPT-4, GPT-3.5, etc.)
35
+ encoding = tiktoken.get_encoding("cl100k_base")
36
+
37
+ return len(encoding.encode(text))
38
+
39
+
40
+ def analyze_tokens(
41
+ bundle: ContextBundle,
42
+ model: str = "gpt-4o",
43
+ cost_per_1k: float = DEFAULT_COST_PER_1K_TOKENS,
44
+ ) -> TokenBreakdown:
45
+ """
46
+ Count tokens for every item in the bundle and produce a breakdown.
47
+
48
+ Side effect: sets token_count on each ContextItem in the bundle.
49
+
50
+ Args:
51
+ bundle: The context bundle to analyze.
52
+ model: Model name for tiktoken encoding selection.
53
+ cost_per_1k: Cost per 1,000 input tokens in USD.
54
+
55
+ Returns:
56
+ A TokenBreakdown with totals, per-type distribution, and cost estimate.
57
+ """
58
+ by_type: dict[str, int] = {}
59
+ total = 0
60
+
61
+ for item in bundle.items:
62
+ tokens = count_tokens(item.content, model=model)
63
+ item.token_count = tokens
64
+ total += tokens
65
+
66
+ type_key = item.type.value
67
+ by_type[type_key] = by_type.get(type_key, 0) + tokens
68
+
69
+ cost = (total / 1000) * cost_per_1k
70
+
71
+ return TokenBreakdown(
72
+ total_tokens=total,
73
+ by_type=by_type,
74
+ estimated_cost_usd=cost,
75
+ wasted_tokens=0, # filled in later by the engine after redundancy analysis
76
+ )
@@ -0,0 +1 @@
1
+ # API subpackage
contextops/api/diff.py ADDED
@@ -0,0 +1,124 @@
1
+ """
2
+ ContextOps Diff API.
3
+
4
+ Computes a deterministic delta between two context payloads.
5
+ Provides the data model and logic for `contextops diff`.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+ from contextops.api.inspect import inspect_context
16
+ from contextops.core.models import AnalysisResult, Recommendation
17
+
18
+
19
+ @dataclass
20
+ class ContextDiffResult:
21
+ """The computed difference between two context analysis results."""
22
+ # Source results
23
+ result_a: AnalysisResult
24
+ result_b: AnalysisResult
25
+
26
+ # Numeric Deltas (B - A)
27
+ score_delta: int
28
+ token_delta: int
29
+ waste_delta: int
30
+ cost_delta: float
31
+
32
+ # Structure Deltas (B - A)
33
+ structure_delta: dict[str, float]
34
+
35
+ # Recommendation Lifecycle
36
+ resolved_recommendations: list[Recommendation] = field(default_factory=list)
37
+ new_recommendations: list[Recommendation] = field(default_factory=list)
38
+ persisting_recommendations: list[Recommendation] = field(default_factory=list)
39
+
40
+ @property
41
+ def net_impact(self) -> str:
42
+ """Categorical summary of the overall change."""
43
+ if self.score_delta > 0:
44
+ return "IMPROVEMENT"
45
+ elif self.score_delta < 0:
46
+ return "DEGRADATION"
47
+ else:
48
+ return "NEUTRAL"
49
+
50
+
51
+ def get_recommendation_id(rec: Recommendation) -> str:
52
+ """
53
+ Generate a deterministic, stable ID for a recommendation.
54
+
55
+ This is critical for the set-based diff logic. We hash the normalized issue string.
56
+ Do NOT use fuzzy matching or ML embeddings here.
57
+ """
58
+ # Normalize: lowercase and collapse all whitespace to single spaces
59
+ normalized = re.sub(r'\s+', ' ', rec.issue.lower().strip())
60
+ return hashlib.md5(normalized.encode("utf-8")).hexdigest()[:12]
61
+
62
+
63
+ def diff_contexts(
64
+ raw_input_a: str | list[dict[str, Any]] | dict[str, Any],
65
+ raw_input_b: str | list[dict[str, Any]] | dict[str, Any],
66
+ ) -> ContextDiffResult:
67
+ """
68
+ Compare two context payloads and return a deterministic diff result.
69
+ """
70
+ result_a = inspect_context(raw_input_a)
71
+ result_b = inspect_context(raw_input_b)
72
+
73
+ return diff_analysis_results(result_a, result_b)
74
+
75
+
76
+ def diff_analysis_results(result_a: AnalysisResult, result_b: AnalysisResult) -> ContextDiffResult:
77
+ """Compare two pre-computed AnalysisResult objects."""
78
+
79
+ # 1. Numeric Deltas
80
+ score_delta = result_b.score - result_a.score
81
+ token_delta = result_b.token_breakdown.total_tokens - result_a.token_breakdown.total_tokens
82
+ waste_delta = result_b.token_breakdown.wasted_tokens - result_a.token_breakdown.wasted_tokens
83
+ cost_delta = result_b.token_breakdown.estimated_cost_usd - result_a.token_breakdown.estimated_cost_usd
84
+
85
+ # 2. Structure Deltas
86
+ structure_delta = {
87
+ "redundancy": result_b.score_breakdown.redundancy_penalty - result_a.score_breakdown.redundancy_penalty,
88
+ "density": result_b.score_breakdown.density_penalty - result_a.score_breakdown.density_penalty,
89
+ "structure_imbalance": result_b.score_breakdown.structure_penalty - result_a.score_breakdown.structure_penalty,
90
+ "concentration": result_b.score_breakdown.concentration_penalty - result_a.score_breakdown.concentration_penalty,
91
+ }
92
+
93
+ # 3. Recommendation Lifecycle
94
+ dict_a = {get_recommendation_id(r): r for r in result_a.recommendations}
95
+ dict_b = {get_recommendation_id(r): r for r in result_b.recommendations}
96
+
97
+ ids_a = set(dict_a.keys())
98
+ ids_b = set(dict_b.keys())
99
+
100
+ resolved_ids = ids_a - ids_b
101
+ new_ids = ids_b - ids_a
102
+ persisting_ids = ids_a & ids_b
103
+
104
+ resolved = [dict_a[rid] for rid in resolved_ids]
105
+ new = [dict_b[nid] for nid in new_ids]
106
+ persisting = [dict_b[pid] for pid in persisting_ids] # Use B's updated version
107
+
108
+ # Sort deterministically by severity/impact
109
+ resolved.sort(key=lambda r: (-r.impact_score, -r.token_savings, r.issue))
110
+ new.sort(key=lambda r: (-r.impact_score, -r.token_savings, r.issue))
111
+ persisting.sort(key=lambda r: (-r.impact_score, -r.token_savings, r.issue))
112
+
113
+ return ContextDiffResult(
114
+ result_a=result_a,
115
+ result_b=result_b,
116
+ score_delta=score_delta,
117
+ token_delta=token_delta,
118
+ waste_delta=waste_delta,
119
+ cost_delta=cost_delta,
120
+ structure_delta=structure_delta,
121
+ resolved_recommendations=resolved,
122
+ new_recommendations=new,
123
+ persisting_recommendations=persisting,
124
+ )
@@ -0,0 +1,52 @@
1
+ """
2
+ ContextOps Programmatic API.
3
+
4
+ This is the primary interface for using ContextOps as a library.
5
+ Import and call `inspect_context()` with any supported input format.
6
+
7
+ Example:
8
+ from contextops.api.inspect import inspect_context
9
+
10
+ result = inspect_context({
11
+ "system": "You are a helpful assistant.",
12
+ "chunks": ["chunk 1", "chunk 2"],
13
+ })
14
+ print(result.score)
15
+ print(json.dumps(result.to_dict(), indent=2))
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Any
21
+
22
+ from contextops.core.config import ContextOpsConfig
23
+ from contextops.core.engine import analyze
24
+ from contextops.core.models import AnalysisResult
25
+ from contextops.core.normalizer import normalize
26
+
27
+
28
+ def inspect_context(
29
+ raw_input: str | list[dict[str, Any]] | dict[str, Any],
30
+ model: str = "gpt-4o",
31
+ cost_per_1k: float = 0.005,
32
+ config: ContextOpsConfig | None = None,
33
+ ) -> AnalysisResult:
34
+ """
35
+ Analyze an LLM context and return a full AnalysisResult.
36
+
37
+ This is the main entry point for the ContextOps library.
38
+
39
+ Args:
40
+ raw_input: Raw LLM context in any supported format:
41
+ - str: treated as a system prompt
42
+ - list[dict]: OpenAI-style message list
43
+ - dict: structured dict with system/messages/chunks/memory/tools
44
+ model: Model name for tiktoken encoding.
45
+ cost_per_1k: Cost per 1K input tokens in USD.
46
+ config: Optional custom threshold configuration.
47
+
48
+ Returns:
49
+ AnalysisResult containing score, breakdown, findings, and recommendations.
50
+ """
51
+ bundle = normalize(raw_input)
52
+ return analyze(bundle, model=model, cost_per_1k=cost_per_1k, config=config)
@@ -0,0 +1,264 @@
1
+ """
2
+ ContextOps Stability API.
3
+
4
+ Runs deterministic perturbations against a context bundle to verify the scoring
5
+ engine behaves logically. Testing properties and invariants over specific scores.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import copy
11
+ import random
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+ from contextops.core.engine import analyze
16
+ from contextops.core.models import ContextItem, ContextType, RedundancyClassification
17
+ from contextops.core.normalizer import normalize
18
+
19
+
20
+ @dataclass
21
+ class InvariantResult:
22
+ """The outcome of a single stability invariant check."""
23
+ name: str
24
+ passed: bool
25
+ severity: str = "critical"
26
+ diagnostic_info: dict[str, Any] = field(default_factory=dict)
27
+
28
+
29
+ @dataclass
30
+ class StabilityReport:
31
+ """Complete stability report containing all invariant checks."""
32
+ base_score: int = 0
33
+ base_tokens: int = 0
34
+ base_waste_tokens: int = 0
35
+ invariants: list[InvariantResult] = field(default_factory=list)
36
+
37
+ @property
38
+ def score_percentage(self) -> int:
39
+ """Percentage of invariants that passed."""
40
+ if not self.invariants:
41
+ return 0
42
+ passed = sum(1 for inv in self.invariants if inv.passed)
43
+ return int((passed / len(self.invariants)) * 100)
44
+
45
+
46
+ def run_stability_report(raw_input: str | list[dict] | dict) -> StabilityReport:
47
+ """
48
+ Run the formal sanity-check layer for the scoring engine.
49
+
50
+ Applies deterministic mutations to the context bundle and verifies
51
+ that the system behaves logically.
52
+ """
53
+ base_bundle = normalize(raw_input)
54
+ base_result = analyze(base_bundle)
55
+ base_score = base_result.score
56
+
57
+ invariants = []
58
+
59
+ # 1. Shuffle Invariant
60
+ # ContextOps should care about content, not ordering.
61
+ shuffled_bundle = copy.deepcopy(base_bundle)
62
+ shuffled_bundle.items = sorted(shuffled_bundle.items, key=lambda x: x.id, reverse=True)
63
+ shuffle_result = analyze(shuffled_bundle)
64
+ invariants.append(InvariantResult(
65
+ name="Shuffle Invariant",
66
+ passed=(shuffle_result.score == base_score),
67
+ severity="critical",
68
+ ))
69
+
70
+ # 2. Duplicate Injection
71
+ # Injecting an exact duplicate must be detected and penalized.
72
+ dup_passed = True
73
+ dup_diagnostic = {}
74
+ if base_bundle.items:
75
+ retrieval_items = base_bundle.items_by_type(ContextType.RETRIEVAL)
76
+ if not retrieval_items:
77
+ retrieval_items = base_bundle.items
78
+
79
+ dup_bundle = copy.deepcopy(base_bundle)
80
+ item_to_dup = copy.deepcopy(retrieval_items[0])
81
+ item_to_dup.id = item_to_dup.id + "_dup"
82
+ dup_bundle.items.append(item_to_dup)
83
+
84
+ dup_result = analyze(dup_bundle)
85
+ score_delta = dup_result.score - base_score
86
+
87
+ dup_passed = (dup_result.score < base_score)
88
+ dup_diagnostic = {
89
+ "Score Delta": f"{score_delta:+d}",
90
+ "Expected Direction": "Decrease",
91
+ }
92
+ else:
93
+ dup_diagnostic = {"Note": "No items to duplicate"}
94
+
95
+ invariants.append(InvariantResult(
96
+ name="Duplicate Injection",
97
+ passed=dup_passed,
98
+ severity="critical",
99
+ diagnostic_info=dup_diagnostic
100
+ ))
101
+
102
+ # 3. Noise Injection
103
+ # Pure synthetic noise shouldn't magically improve the score.
104
+ noise_bundle = copy.deepcopy(base_bundle)
105
+ noise_content = " ".join([f"TOKEN_{i:04d}" for i in range(1, 101)])
106
+ noise_item = ContextItem(
107
+ type=ContextType.RETRIEVAL,
108
+ content=noise_content,
109
+ source="synthetic_noise"
110
+ )
111
+ noise_bundle.items.append(noise_item)
112
+ noise_result = analyze(noise_bundle)
113
+ noise_score_delta = noise_result.score - base_score
114
+ invariants.append(InvariantResult(
115
+ name="Noise Injection",
116
+ passed=(noise_result.score <= base_score),
117
+ severity="important",
118
+ diagnostic_info={
119
+ "Score Delta": f"{noise_score_delta:+d}",
120
+ "Expected Direction": "<= 0",
121
+ }
122
+ ))
123
+
124
+ # 4. Chunk Split Invariant
125
+ # Splitting content shouldn't dramatically alter conclusions.
126
+ split_passed = True
127
+ split_diagnostic = {}
128
+ if base_bundle.items:
129
+ split_bundle = copy.deepcopy(base_bundle)
130
+ non_system_indices = [
131
+ i for i, item in enumerate(split_bundle.items)
132
+ if item.type != ContextType.SYSTEM
133
+ ]
134
+ if non_system_indices:
135
+ longest_idx = max(non_system_indices, key=lambda i: len(split_bundle.items[i].content))
136
+ longest_item = split_bundle.items.pop(longest_idx)
137
+
138
+ mid = len(longest_item.content) // 2
139
+ part1 = longest_item.content[:mid]
140
+ part2 = longest_item.content[mid:]
141
+
142
+ item1 = ContextItem(type=longest_item.type, content=part1, source=longest_item.source)
143
+ item2 = ContextItem(type=longest_item.type, content=part2, source=longest_item.source)
144
+
145
+ split_bundle.items.append(item1)
146
+ split_bundle.items.append(item2)
147
+
148
+ split_result = analyze(split_bundle)
149
+ split_delta = split_result.score - base_score
150
+
151
+ DEFAULT_SPLIT_TOLERANCE = 10
152
+ split_passed = (abs(split_delta) <= DEFAULT_SPLIT_TOLERANCE)
153
+ split_diagnostic = {
154
+ "Base Score": base_score,
155
+ "Split Score": split_result.score,
156
+ "Delta": f"{split_delta:+d}",
157
+ }
158
+ else:
159
+ split_diagnostic = {"Note": "No non-system items to split"}
160
+ else:
161
+ split_diagnostic = {"Note": "No items to split"}
162
+
163
+ invariants.append(InvariantResult(
164
+ name="Chunk Split Invariant",
165
+ passed=split_passed,
166
+ severity="important",
167
+ diagnostic_info=split_diagnostic
168
+ ))
169
+
170
+ # 5. Boilerplate Invariant
171
+ # Tests the core philosophy: expected repetition vs real waste.
172
+ bp_bundle = copy.deepcopy(base_bundle)
173
+ bp_item = ContextItem(
174
+ type=ContextType.SYSTEM,
175
+ content="You are a helpful assistant. Please follow all instructions carefully.",
176
+ source="system"
177
+ )
178
+ bp_item_dup = copy.deepcopy(bp_item)
179
+ bp_item_dup.id = bp_item.id + "_dup"
180
+
181
+ bp_bundle.items.extend([bp_item, bp_item_dup])
182
+ bp_result = analyze(bp_bundle)
183
+
184
+ # Only check findings between the two injected boilerplate items
185
+ bp_ids = {bp_item.id, bp_item_dup.id}
186
+ bp_pair_findings = [
187
+ f for f in bp_result.redundancy_findings
188
+ if f.item_a_id in bp_ids and f.item_b_id in bp_ids
189
+ ]
190
+
191
+ detected_bp = any(
192
+ f.classification == RedundancyClassification.BOILERPLATE
193
+ for f in bp_pair_findings
194
+ )
195
+ detected_redundant = any(
196
+ f.classification == RedundancyClassification.REDUNDANT_CONTEXT
197
+ for f in bp_pair_findings
198
+ )
199
+
200
+ invariants.append(InvariantResult(
201
+ name="Boilerplate Invariant",
202
+ passed=(detected_bp and not detected_redundant),
203
+ severity="critical",
204
+ diagnostic_info={
205
+ "Detected as BOILERPLATE": detected_bp,
206
+ "Detected as REDUNDANT_CONTEXT": detected_redundant,
207
+ }
208
+ ))
209
+
210
+ # 6. Semantic Blindness Guard
211
+ # Semantic blindness is a feature, not a bug. Verify it stays that way.
212
+ SEMANTIC_BLINDNESS_CASES = [
213
+ (
214
+ "The startup raised one million dollars.",
215
+ "The company secured $1M in funding."
216
+ ),
217
+ (
218
+ "The API request timed out after thirty seconds.",
219
+ "The endpoint exceeded its 30-second timeout threshold."
220
+ ),
221
+ (
222
+ "The quick brown fox jumps over the lazy dog.",
223
+ "A fast dark-colored canine leaped above a resting dog."
224
+ ),
225
+ ]
226
+
227
+ sb_passed = True
228
+ sb_diagnostics = []
229
+
230
+ for case_idx, (text1, text2) in enumerate(SEMANTIC_BLINDNESS_CASES):
231
+ sb_bundle = copy.deepcopy(base_bundle)
232
+ item1 = ContextItem(type=ContextType.RETRIEVAL, content=text1, source=f"sb_a_{case_idx}")
233
+ item2 = ContextItem(type=ContextType.RETRIEVAL, content=text2, source=f"sb_b_{case_idx}")
234
+ sb_bundle.items.extend([item1, item2])
235
+
236
+ sb_result = analyze(sb_bundle)
237
+
238
+ has_redundancy = any(
239
+ f.classification == RedundancyClassification.REDUNDANT_CONTEXT
240
+ and ((f.item_a_id == item1.id and f.item_b_id == item2.id) or
241
+ (f.item_a_id == item2.id and f.item_b_id == item1.id))
242
+ for f in sb_result.redundancy_findings
243
+ )
244
+
245
+ if has_redundancy:
246
+ sb_passed = False
247
+ sb_diagnostics.append(f"Case {case_idx+1} triggered redundancy")
248
+
249
+ invariants.append(InvariantResult(
250
+ name="Semantic Blindness Guard",
251
+ passed=sb_passed,
252
+ severity="important",
253
+ diagnostic_info={
254
+ "Redundancy Detected": not sb_passed,
255
+ "Details": ", ".join(sb_diagnostics) if sb_diagnostics else "Clean across all cases",
256
+ }
257
+ ))
258
+
259
+ return StabilityReport(
260
+ base_score=base_score,
261
+ base_tokens=base_result.token_breakdown.total_tokens,
262
+ base_waste_tokens=base_result.token_breakdown.wasted_tokens,
263
+ invariants=invariants
264
+ )
@@ -0,0 +1 @@
1
+ # CLI subpackage