contextrot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
contextrot/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """contextrot — personal context-rot analytics for coding agents."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,12 @@
1
+ """Adapter registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextrot.adapters.base import SessionAdapter
6
+ from contextrot.adapters.claude_code import ClaudeCodeAdapter
7
+
8
+ ADAPTERS: dict[str, SessionAdapter] = {
9
+ ClaudeCodeAdapter.name: ClaudeCodeAdapter(),
10
+ }
11
+
12
+ __all__ = ["ADAPTERS", "SessionAdapter", "ClaudeCodeAdapter"]
@@ -0,0 +1,28 @@
1
+ """Adapter interface.
2
+
3
+ To add support for a new agent CLI, subclass SessionAdapter, implement
4
+ discover() and parse(), and register it in adapters/__init__.py. Adapters
5
+ must be tolerant: unknown fields are ignored, malformed lines are skipped,
6
+ and a partially parsed session is better than a crash.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from abc import ABC, abstractmethod
12
+ from pathlib import Path
13
+
14
+ from contextrot.models import Session
15
+
16
+
17
+ class SessionAdapter(ABC):
18
+ """Parses one agent CLI's native transcripts into normalized Sessions."""
19
+
20
+ name: str = "base"
21
+
22
+ @abstractmethod
23
+ def discover(self, data_dir: Path | None = None) -> list[Path]:
24
+ """Return transcript files available on this machine."""
25
+
26
+ @abstractmethod
27
+ def parse(self, path: Path) -> Session | None:
28
+ """Parse one transcript file. Return None if it holds no usable steps."""
@@ -0,0 +1,197 @@
1
+ """Claude Code transcript adapter.
2
+
3
+ Claude Code stores each session as a JSONL file under
4
+ ``~/.claude/projects/<project-slug>/<session-uuid>.jsonl``. Relevant entry
5
+ types:
6
+
7
+ - ``assistant``: one model API call. ``message.usage`` carries token
8
+ accounting (``input_tokens``, ``cache_creation_input_tokens``,
9
+ ``cache_read_input_tokens``, ``output_tokens``); ``message.content`` is a
10
+ list of blocks (``text``, ``thinking``, ``tool_use``).
11
+ - ``user``: either a human prompt (string content) or tool results
12
+ (``tool_result`` blocks with ``tool_use_id`` and ``is_error``).
13
+ - ``isSidechain: true`` marks sub-agent traffic; it is counted but excluded
14
+ from the main step list so fill percentages reflect the primary context
15
+ window.
16
+
17
+ Format observed on Claude Code 2.x. Parsing is tolerant by design: fields
18
+ we don't recognize are ignored, lines that fail to decode are skipped.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ from datetime import datetime
25
+ from pathlib import Path
26
+
27
+ from contextrot.adapters.base import SessionAdapter
28
+ from contextrot.models import Session, Step, ToolCall
29
+
30
+ # Tool input keys used as the retry/re-read "target", in priority order.
31
+ _TARGET_KEYS = ("file_path", "path", "url", "pattern", "command", "query")
32
+
33
+
34
+ def _parse_ts(raw: object) -> datetime | None:
35
+ if not isinstance(raw, str):
36
+ return None
37
+ try:
38
+ return datetime.fromisoformat(raw.replace("Z", "+00:00"))
39
+ except ValueError:
40
+ return None
41
+
42
+
43
+ def _target_of(name: str, tool_input: dict) -> str | None:
44
+ for key in _TARGET_KEYS:
45
+ val = tool_input.get(key)
46
+ if isinstance(val, str) and val:
47
+ # For shell commands, the first line is enough to identify a retry.
48
+ return val.split("\n", 1)[0][:300]
49
+ return None
50
+
51
+
52
+ def _result_text(content: object) -> str:
53
+ if isinstance(content, str):
54
+ return content
55
+ if isinstance(content, list):
56
+ parts = []
57
+ for block in content:
58
+ if isinstance(block, dict) and isinstance(block.get("text"), str):
59
+ parts.append(block["text"])
60
+ return "\n".join(parts)
61
+ return ""
62
+
63
+
64
+ class ClaudeCodeAdapter(SessionAdapter):
65
+ name = "claude-code"
66
+
67
+ def discover(self, data_dir: Path | None = None) -> list[Path]:
68
+ root = data_dir or Path.home() / ".claude" / "projects"
69
+ if not root.is_dir():
70
+ return []
71
+ return sorted(root.glob("*/*.jsonl"))
72
+
73
+ def parse(self, path: Path) -> Session | None:
74
+ session = Session(
75
+ session_id=path.stem,
76
+ source=self.name,
77
+ project=path.parent.name,
78
+ )
79
+ # tool_use_id -> ToolCall, so results (which arrive in later user
80
+ # entries) can be attached to the step that made the call.
81
+ open_calls: dict[str, ToolCall] = {}
82
+
83
+ try:
84
+ with path.open(encoding="utf-8", errors="replace") as fh:
85
+ for line in fh:
86
+ try:
87
+ entry = json.loads(line)
88
+ except json.JSONDecodeError:
89
+ continue
90
+ if not isinstance(entry, dict):
91
+ continue
92
+ self._consume(entry, session, open_calls)
93
+ except OSError:
94
+ return None
95
+
96
+ if not session.steps:
97
+ return None
98
+
99
+ # Use the real working directory as the project name when present.
100
+ return session
101
+
102
+ def _consume(self, entry: dict, session: Session, open_calls: dict[str, ToolCall]) -> None:
103
+ etype = entry.get("type")
104
+ if etype == "assistant":
105
+ if entry.get("isSidechain"):
106
+ session.sidechain_steps += 1
107
+ return
108
+ self._consume_assistant(entry, session, open_calls)
109
+ elif etype == "user" and not entry.get("isSidechain"):
110
+ self._consume_user(entry, session, open_calls)
111
+
112
+ cwd = entry.get("cwd")
113
+ if isinstance(cwd, str) and cwd:
114
+ session.project = cwd
115
+
116
+ def _consume_assistant(
117
+ self, entry: dict, session: Session, open_calls: dict[str, ToolCall]
118
+ ) -> None:
119
+ message = entry.get("message")
120
+ if not isinstance(message, dict):
121
+ return
122
+ usage = message.get("usage")
123
+ ts = _parse_ts(entry.get("timestamp"))
124
+
125
+ content = message.get("content")
126
+ texts: list[str] = []
127
+ calls: list[ToolCall] = []
128
+ if isinstance(content, list):
129
+ for block in content:
130
+ if not isinstance(block, dict):
131
+ continue
132
+ btype = block.get("type")
133
+ if btype == "text" and isinstance(block.get("text"), str):
134
+ texts.append(block["text"])
135
+ elif btype == "tool_use":
136
+ name = block.get("name") or "unknown"
137
+ raw_input = block.get("input")
138
+ tool_input: dict = raw_input if isinstance(raw_input, dict) else {}
139
+ call = ToolCall(
140
+ name=str(name),
141
+ tool_use_id=str(block.get("id") or ""),
142
+ target=_target_of(str(name), tool_input),
143
+ )
144
+ calls.append(call)
145
+ if call.tool_use_id:
146
+ open_calls[call.tool_use_id] = call
147
+
148
+ # Claude Code streams one API call across several assistant entries
149
+ # that share a requestId; usage rides on each. Merge by requestId so
150
+ # a single call isn't double counted: only the entry that carries
151
+ # usage starts a new step, subsequent content is folded in.
152
+ if isinstance(usage, dict):
153
+ step = Step(
154
+ timestamp=ts,
155
+ model=str(message.get("model") or "unknown"),
156
+ input_tokens=int(usage.get("input_tokens") or 0),
157
+ cache_creation_tokens=int(usage.get("cache_creation_input_tokens") or 0),
158
+ cache_read_tokens=int(usage.get("cache_read_input_tokens") or 0),
159
+ output_tokens=int(usage.get("output_tokens") or 0),
160
+ tool_calls=calls,
161
+ assistant_text="\n".join(texts),
162
+ )
163
+ session.steps.append(step)
164
+ if session.started_at is None:
165
+ session.started_at = ts
166
+ if ts is not None:
167
+ session.ended_at = ts
168
+ elif session.steps:
169
+ last = session.steps[-1]
170
+ last.tool_calls.extend(calls)
171
+ if texts:
172
+ last.assistant_text = (last.assistant_text + "\n" + "\n".join(texts)).strip()
173
+
174
+ def _consume_user(self, entry: dict, session: Session, open_calls: dict[str, ToolCall]) -> None:
175
+ message = entry.get("message")
176
+ if not isinstance(message, dict):
177
+ return
178
+ content = message.get("content")
179
+ if isinstance(content, str):
180
+ session.user_message_chars += len(content)
181
+ return
182
+ if not isinstance(content, list):
183
+ return
184
+ for block in content:
185
+ if not isinstance(block, dict):
186
+ continue
187
+ if block.get("type") == "tool_result":
188
+ call = open_calls.pop(str(block.get("tool_use_id") or ""), None)
189
+ if call is None:
190
+ continue
191
+ text = _result_text(block.get("content"))
192
+ call.result_chars = len(text)
193
+ if block.get("is_error"):
194
+ call.is_error = True
195
+ call.error_text = text[:500]
196
+ elif block.get("type") == "text" and isinstance(block.get("text"), str):
197
+ session.user_message_chars += len(block["text"])
@@ -0,0 +1,106 @@
1
+ """Analysis orchestrator: transcripts in, AnalysisResult out."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import UTC, datetime, timedelta
7
+ from pathlib import Path
8
+
9
+ from contextrot.adapters import ADAPTERS
10
+ from contextrot.analysis.composition import Composition, estimate_composition
11
+ from contextrot.analysis.prescriptions import Prescription, prescribe
12
+ from contextrot.analysis.rot import RotCurve, build_rot_curve
13
+ from contextrot.models import Session
14
+ from contextrot.pricing import DEFAULT_CONTEXT_WINDOW, context_window_for
15
+ from contextrot.signals import StepSignals, extract_signals
16
+
17
+
18
+ @dataclass
19
+ class AnalysisResult:
20
+ sessions: list[Session]
21
+ steps: list[StepSignals]
22
+ curve: RotCurve
23
+ composition: Composition
24
+ prescriptions: list[Prescription]
25
+ context_window: int
26
+ total_cost_usd: float
27
+ rework_cost_usd: float # cost of degraded steps + their outputs
28
+ steps_past_knee: int
29
+ days: int | None
30
+ skipped_sessions: int = 0
31
+ signal_rates: dict[str, float] = field(default_factory=dict)
32
+
33
+
34
+ def load_sessions(
35
+ data_dir: Path | None = None,
36
+ project_filter: str | None = None,
37
+ days: int | None = None,
38
+ min_steps: int = 3,
39
+ ) -> tuple[list[Session], int]:
40
+ """Discover and parse sessions across all adapters. Returns (sessions, skipped)."""
41
+ sessions: list[Session] = []
42
+ skipped = 0
43
+ cutoff = datetime.now(UTC) - timedelta(days=days) if days else None
44
+
45
+ for adapter in ADAPTERS.values():
46
+ for path in adapter.discover(data_dir):
47
+ if cutoff is not None:
48
+ try:
49
+ mtime = datetime.fromtimestamp(path.stat().st_mtime, tz=UTC)
50
+ if mtime < cutoff:
51
+ continue
52
+ except OSError:
53
+ continue
54
+ session = adapter.parse(path)
55
+ if session is None or len(session.steps) < min_steps:
56
+ skipped += 1
57
+ continue
58
+ if project_filter and project_filter.lower() not in session.project.lower():
59
+ continue
60
+ sessions.append(session)
61
+
62
+ sessions.sort(key=lambda s: (s.started_at or datetime.min.replace(tzinfo=UTC)))
63
+ return sessions, skipped
64
+
65
+
66
+ def analyze(
67
+ data_dir: Path | None = None,
68
+ project_filter: str | None = None,
69
+ days: int | None = 30,
70
+ window_override: int | None = None,
71
+ ) -> AnalysisResult:
72
+ sessions, skipped = load_sessions(data_dir, project_filter, days)
73
+
74
+ all_steps: list[StepSignals] = []
75
+ window = window_override or DEFAULT_CONTEXT_WINDOW
76
+ for s in sessions:
77
+ model = s.steps[0].model if s.steps else ""
78
+ session_window = context_window_for(model, window_override)
79
+ window = max(window, session_window)
80
+ all_steps.extend(extract_signals(s, session_window).steps)
81
+
82
+ curve = build_rot_curve(all_steps)
83
+ comp = estimate_composition(sessions, window)
84
+
85
+ total_cost = sum(st.cost_usd for st in all_steps)
86
+ rework_cost = sum(st.cost_usd for st in all_steps if st.degraded)
87
+ knee = curve.knee_pct
88
+ past_knee = sum(1 for st in all_steps if knee is not None and st.fill_pct >= knee)
89
+
90
+ n = max(len(all_steps), 1)
91
+ signal_rates = {name: count / n for name, count in curve.signal_totals.items()}
92
+
93
+ return AnalysisResult(
94
+ sessions=sessions,
95
+ steps=all_steps,
96
+ curve=curve,
97
+ composition=comp,
98
+ prescriptions=prescribe(curve, comp, rework_cost, past_knee),
99
+ context_window=window,
100
+ total_cost_usd=total_cost,
101
+ rework_cost_usd=rework_cost,
102
+ steps_past_knee=past_knee,
103
+ days=days,
104
+ skipped_sessions=skipped,
105
+ signal_rates=signal_rates,
106
+ )
@@ -0,0 +1,72 @@
1
+ """Context-composition estimation.
2
+
3
+ Transcripts don't record the literal context content, but token accounting
4
+ lets us attribute where the window goes with useful accuracy:
5
+
6
+ - session overhead: prompt tokens of the *first* API call — system prompt,
7
+ tool schemas, MCP schemas, CLAUDE.md — everything loaded before the user
8
+ typed a word
9
+ - tool outputs: estimated from result characters (chars/4 heuristic)
10
+ - conversation: user + assistant text, same heuristic
11
+ - other growth: whatever remains of peak prompt size (thinking, file
12
+ snapshots, framework bookkeeping)
13
+
14
+ Figures are labeled as estimates in every report.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass
20
+
21
+ from contextrot.models import Session
22
+
23
+ CHARS_PER_TOKEN = 4
24
+
25
+
26
+ @dataclass
27
+ class Composition:
28
+ overhead_tokens: int # loaded before the first user word
29
+ tool_output_tokens: int
30
+ conversation_tokens: int
31
+ other_growth_tokens: int
32
+ peak_prompt_tokens: int
33
+ context_window: int
34
+
35
+ @property
36
+ def overhead_pct_of_window(self) -> float:
37
+ return 100.0 * self.overhead_tokens / max(self.context_window, 1)
38
+
39
+
40
+ def estimate_composition(sessions: list[Session], context_window: int) -> Composition:
41
+ """Per-session average composition, so all figures share one scale.
42
+
43
+ Tool-output and conversation figures are tokens that *flowed through*
44
+ the session; with compaction they can exceed the window itself, which
45
+ is exactly the point — that flow is what fills it.
46
+ """
47
+ n = max(len(sessions), 1)
48
+ overhead_sum = tool_out = convo = 0
49
+ peak_sum = 0
50
+ for s in sessions:
51
+ if s.steps:
52
+ overhead_sum += s.steps[0].prompt_tokens
53
+ peak_sum += s.peak_prompt_tokens
54
+ for st in s.steps:
55
+ convo += len(st.assistant_text) // CHARS_PER_TOKEN
56
+ for c in st.tool_calls:
57
+ tool_out += c.result_chars // CHARS_PER_TOKEN
58
+ convo += s.user_message_chars // CHARS_PER_TOKEN
59
+
60
+ overhead = overhead_sum // n
61
+ avg_peak = peak_sum // n
62
+ avg_tool = tool_out // n
63
+ avg_convo = convo // n
64
+ other = max(0, avg_peak - overhead - avg_tool - avg_convo)
65
+ return Composition(
66
+ overhead_tokens=overhead,
67
+ tool_output_tokens=avg_tool,
68
+ conversation_tokens=avg_convo,
69
+ other_growth_tokens=other,
70
+ peak_prompt_tokens=avg_peak,
71
+ context_window=context_window,
72
+ )
@@ -0,0 +1,101 @@
1
+ """Prescription engine v1.
2
+
3
+ Rule-based recommendations, each quantified from the user's own data.
4
+ A prescription is only emitted when its evidence threshold is met — an
5
+ empty list is a valid, honest output.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+
12
+ from contextrot.analysis.composition import Composition
13
+ from contextrot.analysis.rot import RotCurve
14
+
15
+
16
+ @dataclass
17
+ class Prescription:
18
+ title: str
19
+ detail: str
20
+ impact: str # quantified expected benefit
21
+ priority: int # lower = more important
22
+
23
+
24
+ def prescribe(
25
+ curve: RotCurve,
26
+ comp: Composition,
27
+ rework_cost_usd: float,
28
+ steps_past_knee: int,
29
+ ) -> list[Prescription]:
30
+ out: list[Prescription] = []
31
+
32
+ if curve.knee_pct is not None and curve.high_fill_rate and curve.low_fill_rate:
33
+ out.append(
34
+ Prescription(
35
+ title=f"Compact or restart sessions before ~{curve.knee_pct}% context fill",
36
+ detail=(
37
+ f"Your failure-signal rate rises from "
38
+ f"{curve.low_fill_rate:.1%} below {int(curve.knee_pct)}% fill to "
39
+ f"{curve.high_fill_rate:.1%} in deep context. "
40
+ f"{steps_past_knee} of your recent steps ran past that threshold."
41
+ ),
42
+ impact=(
43
+ f"Estimated ${rework_cost_usd:.2f} of recent spend went to degraded "
44
+ "steps and their retries; most of it is concentrated past the knee."
45
+ ),
46
+ priority=1,
47
+ )
48
+ )
49
+
50
+ if comp.overhead_pct_of_window >= 15:
51
+ out.append(
52
+ Prescription(
53
+ title="Audit your session startup overhead",
54
+ detail=(
55
+ f"On average, ~{comp.overhead_tokens:,} tokens "
56
+ f"({comp.overhead_pct_of_window:.0f}% of the context window) are "
57
+ "loaded before your first word — system prompt, MCP tool schemas, "
58
+ "CLAUDE.md. Disable MCP servers you don't use in this project and "
59
+ "trim stale CLAUDE.md sections."
60
+ ),
61
+ impact=(
62
+ "Every point of startup overhead is a point of working context "
63
+ "you never get back, in every session."
64
+ ),
65
+ priority=2,
66
+ )
67
+ )
68
+
69
+ total_growth = comp.tool_output_tokens + comp.conversation_tokens + comp.other_growth_tokens
70
+ if total_growth > 0 and comp.tool_output_tokens / total_growth >= 0.5:
71
+ out.append(
72
+ Prescription(
73
+ title="Tool outputs dominate your context growth",
74
+ detail=(
75
+ f"~{comp.tool_output_tokens:,} tokens (est.) of context growth come "
76
+ "from tool results. Prefer targeted reads (offsets, limits), "
77
+ "narrower searches, and quieter commands over dumping whole files "
78
+ "and logs into the window."
79
+ ),
80
+ impact="Slower fill means more steps before the degradation zone.",
81
+ priority=3,
82
+ )
83
+ )
84
+
85
+ reread_total = curve.signal_totals.get("reread", 0)
86
+ if curve.total_steps and reread_total / curve.total_steps >= 0.08:
87
+ out.append(
88
+ Prescription(
89
+ title="Your agent frequently re-reads files it already read",
90
+ detail=(
91
+ f"Re-reads fired on {reread_total} steps "
92
+ f"({reread_total / curve.total_steps:.0%}). That usually means the "
93
+ "original content scrolled out of effective attention. Splitting "
94
+ "large tasks into shorter sessions keeps files 'fresh'."
95
+ ),
96
+ impact="Fewer re-reads is both cheaper and a direct rot symptom removed.",
97
+ priority=4,
98
+ )
99
+ )
100
+
101
+ return sorted(out, key=lambda p: p.priority)
@@ -0,0 +1,132 @@
1
+ """Rot-curve statistics.
2
+
3
+ Buckets steps by context-fill percentage and measures how often failure
4
+ signals fire in each bucket. All statistics are observational: this is a
5
+ diagnostic on your own sessions, not a controlled experiment, and the
6
+ report says so. Wilson score intervals are used because bucket counts are
7
+ often small and rates are near the boundary.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import math
13
+ from dataclasses import dataclass, field
14
+
15
+ from contextrot.signals import SIGNAL_NAMES, StepSignals
16
+
17
+ BUCKET_WIDTH = 10 # percent
18
+ LOW_FILL_MAX = 40.0 # "fresh context" zone
19
+ HIGH_FILL_MIN = 60.0 # "deep context" zone
20
+ MIN_BUCKET_N = 15 # buckets below this are shown but marked low-confidence
21
+ KNEE_RATIO = 1.5 # bucket rate vs baseline that marks the degradation knee
22
+
23
+
24
+ def wilson_interval(successes: int, n: int, z: float = 1.96) -> tuple[float, float]:
25
+ if n == 0:
26
+ return (0.0, 1.0)
27
+ p = successes / n
28
+ denom = 1 + z * z / n
29
+ center = (p + z * z / (2 * n)) / denom
30
+ margin = (z / denom) * math.sqrt(p * (1 - p) / n + z * z / (4 * n * n))
31
+ return (max(0.0, center - margin), min(1.0, center + margin))
32
+
33
+
34
+ @dataclass
35
+ class Bucket:
36
+ lo: int # inclusive fill %
37
+ hi: int # exclusive fill %
38
+ n: int = 0
39
+ degraded: int = 0
40
+ by_signal: dict[str, int] = field(default_factory=dict)
41
+
42
+ @property
43
+ def rate(self) -> float:
44
+ return self.degraded / self.n if self.n else 0.0
45
+
46
+ @property
47
+ def ci(self) -> tuple[float, float]:
48
+ return wilson_interval(self.degraded, self.n)
49
+
50
+ @property
51
+ def low_confidence(self) -> bool:
52
+ return self.n < MIN_BUCKET_N
53
+
54
+
55
+ @dataclass
56
+ class RotCurve:
57
+ buckets: list[Bucket]
58
+ total_steps: int
59
+ total_degraded: int
60
+ low_fill_rate: float | None # rate below LOW_FILL_MAX
61
+ high_fill_rate: float | None # rate at/above HIGH_FILL_MIN
62
+ low_fill_n: int
63
+ high_fill_n: int
64
+ degradation_ratio: float | None # high / low
65
+ ratio_significant: bool # Wilson CIs of the two zones don't overlap
66
+ knee_pct: int | None # start of first bucket where rate >= KNEE_RATIO * baseline
67
+ signal_totals: dict[str, int] = field(default_factory=dict)
68
+
69
+ @property
70
+ def overall_rate(self) -> float:
71
+ return self.total_degraded / self.total_steps if self.total_steps else 0.0
72
+
73
+
74
+ def build_rot_curve(steps: list[StepSignals]) -> RotCurve:
75
+ buckets = [Bucket(lo, lo + BUCKET_WIDTH) for lo in range(0, 100, BUCKET_WIDTH)]
76
+ signal_totals = dict.fromkeys(SIGNAL_NAMES, 0)
77
+
78
+ low_n = low_d = high_n = high_d = 0
79
+ total_degraded = 0
80
+
81
+ for s in steps:
82
+ idx = min(int(s.fill_pct // BUCKET_WIDTH), len(buckets) - 1)
83
+ b = buckets[idx]
84
+ b.n += 1
85
+ if s.degraded:
86
+ b.degraded += 1
87
+ total_degraded += 1
88
+ for name in SIGNAL_NAMES:
89
+ if getattr(s, name):
90
+ b.by_signal[name] = b.by_signal.get(name, 0) + 1
91
+ signal_totals[name] += 1
92
+
93
+ if s.fill_pct < LOW_FILL_MAX:
94
+ low_n += 1
95
+ low_d += 1 if s.degraded else 0
96
+ elif s.fill_pct >= HIGH_FILL_MIN:
97
+ high_n += 1
98
+ high_d += 1 if s.degraded else 0
99
+
100
+ low_rate = low_d / low_n if low_n else None
101
+ high_rate = high_d / high_n if high_n else None
102
+
103
+ ratio = None
104
+ significant = False
105
+ if low_rate is not None and high_rate is not None and low_n and high_n:
106
+ ratio = high_rate / low_rate if low_rate > 0 else (float("inf") if high_rate > 0 else 1.0)
107
+ lo_ci = wilson_interval(low_d, low_n)
108
+ hi_ci = wilson_interval(high_d, high_n)
109
+ significant = hi_ci[0] > lo_ci[1] # high zone's floor above low zone's ceiling
110
+
111
+ knee = None
112
+ if low_rate is not None and low_rate > 0:
113
+ for b in buckets:
114
+ if b.lo < LOW_FILL_MAX or b.low_confidence:
115
+ continue
116
+ if b.rate >= KNEE_RATIO * low_rate:
117
+ knee = b.lo
118
+ break
119
+
120
+ return RotCurve(
121
+ buckets=buckets,
122
+ total_steps=len(steps),
123
+ total_degraded=total_degraded,
124
+ low_fill_rate=low_rate,
125
+ high_fill_rate=high_rate,
126
+ low_fill_n=low_n,
127
+ high_fill_n=high_n,
128
+ degradation_ratio=ratio,
129
+ ratio_significant=significant,
130
+ knee_pct=knee,
131
+ signal_totals=signal_totals,
132
+ )