contextrot 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. contextrot-0.1.0/.github/ISSUE_TEMPLATE/adapter_request.yml +29 -0
  2. contextrot-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +38 -0
  3. contextrot-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +14 -0
  4. contextrot-0.1.0/.github/workflows/ci.yml +28 -0
  5. contextrot-0.1.0/.github/workflows/release.yml +24 -0
  6. contextrot-0.1.0/.gitignore +25 -0
  7. contextrot-0.1.0/CHANGELOG.md +19 -0
  8. contextrot-0.1.0/CONTRIBUTING.md +51 -0
  9. contextrot-0.1.0/LICENSE +21 -0
  10. contextrot-0.1.0/PKG-INFO +133 -0
  11. contextrot-0.1.0/README.md +102 -0
  12. contextrot-0.1.0/docs/methodology.md +53 -0
  13. contextrot-0.1.0/pyproject.toml +70 -0
  14. contextrot-0.1.0/src/contextrot/__init__.py +3 -0
  15. contextrot-0.1.0/src/contextrot/adapters/__init__.py +12 -0
  16. contextrot-0.1.0/src/contextrot/adapters/base.py +28 -0
  17. contextrot-0.1.0/src/contextrot/adapters/claude_code.py +197 -0
  18. contextrot-0.1.0/src/contextrot/analysis/__init__.py +106 -0
  19. contextrot-0.1.0/src/contextrot/analysis/composition.py +72 -0
  20. contextrot-0.1.0/src/contextrot/analysis/prescriptions.py +101 -0
  21. contextrot-0.1.0/src/contextrot/analysis/rot.py +132 -0
  22. contextrot-0.1.0/src/contextrot/cli.py +165 -0
  23. contextrot-0.1.0/src/contextrot/models.py +78 -0
  24. contextrot-0.1.0/src/contextrot/pricing.py +69 -0
  25. contextrot-0.1.0/src/contextrot/report/__init__.py +4 -0
  26. contextrot-0.1.0/src/contextrot/report/html.py +161 -0
  27. contextrot-0.1.0/src/contextrot/report/template.html.j2 +169 -0
  28. contextrot-0.1.0/src/contextrot/report/terminal.py +164 -0
  29. contextrot-0.1.0/src/contextrot/signals/__init__.py +140 -0
  30. contextrot-0.1.0/tests/fixtures/demo-project/11111111-2222-3333-4444-555555555555.jsonl +12 -0
  31. contextrot-0.1.0/tests/test_adapter_claude_code.py +66 -0
  32. contextrot-0.1.0/tests/test_cli.py +52 -0
  33. contextrot-0.1.0/tests/test_rot.py +55 -0
  34. contextrot-0.1.0/tests/test_signals.py +58 -0
@@ -0,0 +1,29 @@
1
+ name: Adapter request
2
+ description: Ask for (or offer to build) support for another agent CLI
3
+ labels: [adapter, good-first-issue]
4
+ body:
5
+ - type: input
6
+ id: agent
7
+ attributes:
8
+ label: Agent CLI
9
+ placeholder: "Codex CLI / OpenCode / Gemini CLI / ..."
10
+ validations:
11
+ required: true
12
+ - type: input
13
+ id: location
14
+ attributes:
15
+ label: Where transcripts live on disk
16
+ placeholder: "~/.codex/sessions/*.jsonl"
17
+ - type: textarea
18
+ id: format
19
+ attributes:
20
+ label: Format notes
21
+ description: >
22
+ Anything you know about the file format — a sanitized sample entry is
23
+ gold. See CONTRIBUTING.md if you want to build it yourself (it's one file).
24
+ - type: checkboxes
25
+ id: build
26
+ attributes:
27
+ label: Willing to build it?
28
+ options:
29
+ - label: I'd like to implement this adapter myself with guidance
@@ -0,0 +1,38 @@
1
+ name: Bug report
2
+ description: Something broke or produced wrong output
3
+ labels: [bug]
4
+ body:
5
+ - type: input
6
+ id: version
7
+ attributes:
8
+ label: contextrot version
9
+ placeholder: "0.1.0 (contextrot --version)"
10
+ validations:
11
+ required: true
12
+ - type: input
13
+ id: agent
14
+ attributes:
15
+ label: Agent CLI and version
16
+ placeholder: "Claude Code 2.1.92"
17
+ validations:
18
+ required: true
19
+ - type: input
20
+ id: os
21
+ attributes:
22
+ label: OS
23
+ placeholder: "Windows 11 / macOS 15 / Ubuntu 24.04"
24
+ - type: textarea
25
+ id: what
26
+ attributes:
27
+ label: What happened
28
+ description: Command you ran, what you expected, what you got. Paste tracebacks in full.
29
+ validations:
30
+ required: true
31
+ - type: textarea
32
+ id: transcript
33
+ attributes:
34
+ label: Sanitized transcript snippet (parsing bugs only)
35
+ description: >
36
+ If parsing failed, paste the offending JSONL line(s) with file paths,
37
+ code, and personal data replaced by placeholders. Never paste raw
38
+ transcript content.
@@ -0,0 +1,14 @@
1
+ ## What
2
+
3
+ <!-- One or two sentences: what does this change do? -->
4
+
5
+ ## Why
6
+
7
+ <!-- Link the issue, or explain the motivation. -->
8
+
9
+ ## Checklist
10
+
11
+ - [ ] `pytest`, `ruff check src tests`, and `mypy src` pass locally
12
+ - [ ] New behavior has tests (adapters: sanitized fixture + test file)
13
+ - [ ] Any fixture data is fully sanitized (no real paths, code, or personal data)
14
+ - [ ] Signal/statistics changes are reflected in `docs/methodology.md`
@@ -0,0 +1,28 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ strategy:
11
+ fail-fast: false
12
+ matrix:
13
+ os: [ubuntu-latest, windows-latest, macos-latest]
14
+ python: ["3.11", "3.12", "3.13"]
15
+ runs-on: ${{ matrix.os }}
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: ${{ matrix.python }}
21
+ - name: Install
22
+ run: pip install -e ".[dev]"
23
+ - name: Lint
24
+ run: ruff check src tests
25
+ - name: Types
26
+ run: mypy src
27
+ - name: Tests
28
+ run: pytest -q
@@ -0,0 +1,24 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ environment: pypi
11
+ permissions:
12
+ id-token: write # PyPI trusted publishing
13
+ contents: read
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+ - name: Build
20
+ run: |
21
+ pip install build
22
+ python -m build
23
+ - name: Publish to PyPI
24
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,25 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .venv/
8
+ venv/
9
+
10
+ # Tooling
11
+ .pytest_cache/
12
+ .mypy_cache/
13
+ .ruff_cache/
14
+ .coverage
15
+ htmlcov/
16
+
17
+ # Generated reports
18
+ *.html
19
+ !src/ctxprof/report/template.html.j2
20
+
21
+ # OS / editor
22
+ .DS_Store
23
+ Thumbs.db
24
+ .idea/
25
+ .vscode/
@@ -0,0 +1,19 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. Format follows
4
+ [Keep a Changelog](https://keepachangelog.com/); versioning follows
5
+ [SemVer](https://semver.org/).
6
+
7
+ ## [0.1.0] - 2026-07-02
8
+
9
+ ### Added
10
+
11
+ - Claude Code transcript adapter (`~/.claude/projects/**/*.jsonl`), tolerant parsing, sidechain exclusion
12
+ - Outcome-signal extraction: tool errors, edit failures, retry loops, file re-reads, self-corrections
13
+ - Rot curve: failure-signal rate by context-fill bucket with Wilson 95% confidence intervals
14
+ - Degradation threshold (knee) detection and fresh-vs-deep context ratio with significance check
15
+ - Context composition estimate (startup overhead, tool outputs, conversation)
16
+ - Cost accounting per step with per-model pricing; cost-of-degraded-steps estimate
17
+ - Quantified prescriptions engine
18
+ - Terminal report (Rich), self-contained HTML report (inline SVG, dark mode, tooltips, data table), `--json` output
19
+ - `contextrot sessions` listing command
@@ -0,0 +1,51 @@
1
+ # Contributing to contextrot
2
+
3
+ Thanks for considering a contribution. This project is deliberately structured so the highest-impact contribution is also the easiest one.
4
+
5
+ ## The paved path: write an adapter
6
+
7
+ contextrot supports any agent CLI whose transcripts can be parsed into the normalized session model. Each adapter is **one self-contained file** — no changes to analysis or reporting code needed.
8
+
9
+ 1. Copy `src/contextrot/adapters/claude_code.py` as a starting point.
10
+ 2. Implement the two methods of `SessionAdapter`:
11
+ - `discover()` — find transcript files on disk
12
+ - `parse(path)` — convert one file into a `Session` of `Step`s and `ToolCall`s
13
+ 3. Register it in `src/contextrot/adapters/__init__.py`.
14
+ 4. Add a small sanitized fixture transcript under `tests/fixtures/` and a test file modeled on `tests/test_adapter_claude_code.py`.
15
+
16
+ Adapter ground rules:
17
+
18
+ - **Tolerant parsing.** Skip malformed lines, ignore unknown fields, never crash on a weird file. A partial session beats an exception.
19
+ - **No network calls.** contextrot is local-only; adapters read files, period.
20
+ - **Sanitize fixtures.** Strip real file paths, code, and personal data from any transcript you commit.
21
+
22
+ Wanted adapters: Codex CLI, OpenCode, Gemini CLI, OpenClaw, Cursor CLI, OpenTelemetry GenAI spans.
23
+
24
+ ## Other contributions
25
+
26
+ - **New outcome signals** (`src/contextrot/signals/`): each signal must be independently testable, documented in `docs/methodology.md`, and reported separately in output. Open an issue first to discuss the heuristic.
27
+ - **Prescription rules** (`src/contextrot/analysis/prescriptions.py`): must be quantified from the user's own data, with an explicit evidence threshold.
28
+ - **Bug reports**: include your contextrot version, agent CLI version, OS, and — if it's a parsing bug — a *sanitized* snippet of the offending transcript line.
29
+
30
+ ## Development setup
31
+
32
+ ```bash
33
+ git clone https://github.com/Priyanshu-byte-coder/contextrot
34
+ cd contextrot
35
+ pip install -e ".[dev]"
36
+ pytest
37
+ ruff check src tests
38
+ mypy src
39
+ ```
40
+
41
+ All three must pass in CI. Python 3.11+ supported.
42
+
43
+ ## Style
44
+
45
+ - Match the existing code: type hints everywhere, docstrings explain *why* and document format assumptions.
46
+ - Conventional commits (`feat:`, `fix:`, `docs:`, `test:`, `chore:`).
47
+ - Keep dependencies minimal — new runtime deps need a strong justification (install speed is a feature).
48
+
49
+ ## Statistical honesty
50
+
51
+ This tool's credibility rests on not overclaiming. Reports must always carry n-counts, confidence intervals, and the observational-diagnostic caveat. PRs that trade rigor for a scarier headline will be declined.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Priyanshu Doshi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: contextrot
3
+ Version: 0.1.0
4
+ Summary: Find out where your coding agent starts degrading. Personal context-rot analytics from your own agent sessions.
5
+ Project-URL: Homepage, https://github.com/Priyanshu-byte-coder/contextrot
6
+ Project-URL: Repository, https://github.com/Priyanshu-byte-coder/contextrot
7
+ Project-URL: Issues, https://github.com/Priyanshu-byte-coder/contextrot/issues
8
+ Author: Priyanshu Doshi
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,claude-code,context-engineering,context-rot,developer-tools,llm
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Debuggers
21
+ Classifier: Topic :: Software Development :: Quality Assurance
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: jinja2>=3.1
24
+ Requires-Dist: rich>=13.7
25
+ Requires-Dist: typer>=0.12
26
+ Provides-Extra: dev
27
+ Requires-Dist: mypy>=1.10; extra == 'dev'
28
+ Requires-Dist: pytest>=8.0; extra == 'dev'
29
+ Requires-Dist: ruff>=0.6; extra == 'dev'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # contextrot
33
+
34
+ **Your coding agent gets worse as its context fills. contextrot proves it on your own sessions — and tells you exactly what to change.**
35
+
36
+ [![CI](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml/badge.svg)](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml)
37
+ [![PyPI](https://img.shields.io/pypi/v/contextrot)](https://pypi.org/project/contextrot/)
38
+ [![Python](https://img.shields.io/pypi/pyversions/contextrot)](https://pypi.org/project/contextrot/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
40
+
41
+ ```
42
+ uvx contextrot
43
+ ```
44
+
45
+ No config. No API keys. No uploads. contextrot reads the agent transcripts already sitting on your disk and answers a question no other tool answers:
46
+
47
+ > **At what context fill does *my* agent start failing, what's causing it, and what is it costing me?**
48
+
49
+ ```
50
+ ╭──────────────── contextrot — your context rot report ────────────────╮
51
+ │ │
52
+ │ Deep-context failure rate: 31.4% vs 14.9% in fresh context │
53
+ │ (2.1×, statistically separated) │
54
+ │ Your degradation threshold: ~60% context fill │
55
+ │ Est. spend on degraded steps: $23.40 of $148.02 total │
56
+ │ │
57
+ ╰────────────────────────────────────────────────────────────────────╯
58
+
59
+ Failure-signal rate by context fill
60
+ Fill Rate n 95% CI
61
+ 0–10% 9% ████████ 214 6%–13%
62
+ 10–20% 12% ███████████ 308 9%–16%
63
+ 20–30% 14% █████████████ 257 10%–18%
64
+ ...
65
+ 60–70% 29% ███████████████████████████ 121 22%–37%
66
+ 70–80% 34% ████████████████████████████████ 87 25%–44%
67
+ ```
68
+
69
+ ## What "context rot" is — and why a benchmark can't tell you
70
+
71
+ Research ([Chroma's context-rot report](https://www.trychroma.com/research/context-rot), several 2026 papers) shows LLM output quality degrades as input context grows — even far below the window limit. But that research runs synthetic tasks in lab conditions. Your degradation point depends on *your* projects, *your* MCP setup, *your* model, *your* prompting style.
72
+
73
+ contextrot measures it where it actually matters: in your own sessions.
74
+
75
+ ## How it works
76
+
77
+ Agent CLIs like Claude Code log every session to local JSONL transcripts. Each step in those transcripts carries token accounting *and* behavioral evidence:
78
+
79
+ - **edit failures** — the agent tried to edit code and missed
80
+ - **retry loops** — the same tool call repeated after an error
81
+ - **re-reads** — the agent re-reading files it already read (it lost track)
82
+ - **self-corrections** — "I apologize, let me fix that"
83
+ - **tool errors** — any failed tool call
84
+
85
+ contextrot extracts these signals per step, computes context fill at that moment, and correlates the two — with Wilson 95% confidence intervals, per-signal breakdowns, and honest n-counts. Then it estimates what degraded steps cost you and emits prescriptions quantified from your own data.
86
+
87
+ Full method: [docs/methodology.md](docs/methodology.md).
88
+
89
+ ## What contextrot is not
90
+
91
+ Be suspicious of any tool that won't tell you this, so:
92
+
93
+ - **Not a spend meter.** [ccusage](https://github.com/ryoppippi/ccusage) is excellent at "how much did I spend" — use it, it's complementary. contextrot answers "where does my agent *degrade* and why."
94
+ - **Not Claude Code's `/context`.** That's a point-in-time composition snapshot. contextrot correlates fill with *outcomes* across your whole history.
95
+ - **Not an observability platform.** Langfuse/Phoenix/MLflow instrument apps you build. contextrot needs zero instrumentation and analyzes the agent you *use*.
96
+ - **Not a controlled experiment.** It's an observational diagnostic on your own data, with the statistical caveats printed right on the report.
97
+
98
+ ## Install & use
99
+
100
+ ```bash
101
+ uvx contextrot # zero-install run
102
+ # or
103
+ pip install contextrot
104
+ ```
105
+
106
+ ```bash
107
+ contextrot # full report, last 30 days
108
+ contextrot --days 90 # widen the range
109
+ contextrot -p myproject # one project only
110
+ contextrot --html report.html # shareable single-file report (still local)
111
+ contextrot --json # machine-readable
112
+ contextrot sessions # list what was parsed
113
+ ```
114
+
115
+ Supported agents: **Claude Code** (today). Codex CLI, OpenCode, Gemini CLI, and OpenTelemetry GenAI spans are next — an adapter is one small file, and [writing one is the paved first-contribution path](CONTRIBUTING.md).
116
+
117
+ ## Privacy
118
+
119
+ contextrot makes **zero network calls**. It reads local transcript files, prints to your terminal, and optionally writes a local HTML file. Nothing leaves your machine. Grep the codebase for `http` — you won't find a client.
120
+
121
+ ## Roadmap
122
+
123
+ - `contextrot fix` — apply prescriptions interactively (disable unused MCP servers, trim CLAUDE.md) with before/after measurement
124
+ - More agent adapters + OTel ingestion
125
+ - Opt-in, anonymized aggregate stats → the **State of Context Rot** report: real-workload degradation curves across the community (off by default, documented schema, aggregate-only)
126
+
127
+ ## Contributing
128
+
129
+ See [CONTRIBUTING.md](CONTRIBUTING.md). The most valuable first PR: an adapter for the agent CLI you use.
130
+
131
+ ## License
132
+
133
+ [MIT](LICENSE)
@@ -0,0 +1,102 @@
1
+ # contextrot
2
+
3
+ **Your coding agent gets worse as its context fills. contextrot proves it on your own sessions — and tells you exactly what to change.**
4
+
5
+ [![CI](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml/badge.svg)](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml)
6
+ [![PyPI](https://img.shields.io/pypi/v/contextrot)](https://pypi.org/project/contextrot/)
7
+ [![Python](https://img.shields.io/pypi/pyversions/contextrot)](https://pypi.org/project/contextrot/)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
9
+
10
+ ```
11
+ uvx contextrot
12
+ ```
13
+
14
+ No config. No API keys. No uploads. contextrot reads the agent transcripts already sitting on your disk and answers a question no other tool answers:
15
+
16
+ > **At what context fill does *my* agent start failing, what's causing it, and what is it costing me?**
17
+
18
+ ```
19
+ ╭──────────────── contextrot — your context rot report ────────────────╮
20
+ │ │
21
+ │ Deep-context failure rate: 31.4% vs 14.9% in fresh context │
22
+ │ (2.1×, statistically separated) │
23
+ │ Your degradation threshold: ~60% context fill │
24
+ │ Est. spend on degraded steps: $23.40 of $148.02 total │
25
+ │ │
26
+ ╰────────────────────────────────────────────────────────────────────╯
27
+
28
+ Failure-signal rate by context fill
29
+ Fill Rate n 95% CI
30
+ 0–10% 9% ████████ 214 6%–13%
31
+ 10–20% 12% ███████████ 308 9%–16%
32
+ 20–30% 14% █████████████ 257 10%–18%
33
+ ...
34
+ 60–70% 29% ███████████████████████████ 121 22%–37%
35
+ 70–80% 34% ████████████████████████████████ 87 25%–44%
36
+ ```
37
+
38
+ ## What "context rot" is — and why a benchmark can't tell you
39
+
40
+ Research ([Chroma's context-rot report](https://www.trychroma.com/research/context-rot), several 2026 papers) shows LLM output quality degrades as input context grows — even far below the window limit. But that research runs synthetic tasks in lab conditions. Your degradation point depends on *your* projects, *your* MCP setup, *your* model, *your* prompting style.
41
+
42
+ contextrot measures it where it actually matters: in your own sessions.
43
+
44
+ ## How it works
45
+
46
+ Agent CLIs like Claude Code log every session to local JSONL transcripts. Each step in those transcripts carries token accounting *and* behavioral evidence:
47
+
48
+ - **edit failures** — the agent tried to edit code and missed
49
+ - **retry loops** — the same tool call repeated after an error
50
+ - **re-reads** — the agent re-reading files it already read (it lost track)
51
+ - **self-corrections** — "I apologize, let me fix that"
52
+ - **tool errors** — any failed tool call
53
+
54
+ contextrot extracts these signals per step, computes context fill at that moment, and correlates the two — with Wilson 95% confidence intervals, per-signal breakdowns, and honest n-counts. Then it estimates what degraded steps cost you and emits prescriptions quantified from your own data.
55
+
56
+ Full method: [docs/methodology.md](docs/methodology.md).
57
+
58
+ ## What contextrot is not
59
+
60
+ Be suspicious of any tool that won't tell you this, so:
61
+
62
+ - **Not a spend meter.** [ccusage](https://github.com/ryoppippi/ccusage) is excellent at "how much did I spend" — use it, it's complementary. contextrot answers "where does my agent *degrade* and why."
63
+ - **Not Claude Code's `/context`.** That's a point-in-time composition snapshot. contextrot correlates fill with *outcomes* across your whole history.
64
+ - **Not an observability platform.** Langfuse/Phoenix/MLflow instrument apps you build. contextrot needs zero instrumentation and analyzes the agent you *use*.
65
+ - **Not a controlled experiment.** It's an observational diagnostic on your own data, with the statistical caveats printed right on the report.
66
+
67
+ ## Install & use
68
+
69
+ ```bash
70
+ uvx contextrot # zero-install run
71
+ # or
72
+ pip install contextrot
73
+ ```
74
+
75
+ ```bash
76
+ contextrot # full report, last 30 days
77
+ contextrot --days 90 # widen the range
78
+ contextrot -p myproject # one project only
79
+ contextrot --html report.html # shareable single-file report (still local)
80
+ contextrot --json # machine-readable
81
+ contextrot sessions # list what was parsed
82
+ ```
83
+
84
+ Supported agents: **Claude Code** (today). Codex CLI, OpenCode, Gemini CLI, and OpenTelemetry GenAI spans are next — an adapter is one small file, and [writing one is the paved first-contribution path](CONTRIBUTING.md).
85
+
86
+ ## Privacy
87
+
88
+ contextrot makes **zero network calls**. It reads local transcript files, prints to your terminal, and optionally writes a local HTML file. Nothing leaves your machine. Grep the codebase for `http` — you won't find a client.
89
+
90
+ ## Roadmap
91
+
92
+ - `contextrot fix` — apply prescriptions interactively (disable unused MCP servers, trim CLAUDE.md) with before/after measurement
93
+ - More agent adapters + OTel ingestion
94
+ - Opt-in, anonymized aggregate stats → the **State of Context Rot** report: real-workload degradation curves across the community (off by default, documented schema, aggregate-only)
95
+
96
+ ## Contributing
97
+
98
+ See [CONTRIBUTING.md](CONTRIBUTING.md). The most valuable first PR: an adapter for the agent CLI you use.
99
+
100
+ ## License
101
+
102
+ [MIT](LICENSE)
@@ -0,0 +1,53 @@
1
+ # Methodology
2
+
3
+ This page documents exactly how contextrot computes what it shows, including the limitations. If you're evaluating whether to trust the numbers, read this end to end — it's short.
4
+
5
+ ## Data source
6
+
7
+ contextrot reads agent transcripts already on your disk (for Claude Code: `~/.claude/projects/<project>/<session>.jsonl`). Each transcript records every model API call with token accounting, every tool invocation, and every tool result. Nothing is instrumented and nothing is uploaded; analysis is a pure local read.
8
+
9
+ Sub-agent ("sidechain") traffic runs in its own context window, so it is excluded from the main analysis and counted separately. Sessions with fewer than 3 steps are skipped.
10
+
11
+ ## Context fill
12
+
13
+ For each model call ("step"), context fill is the prompt-side token count — `input_tokens + cache_read_input_tokens + cache_creation_input_tokens` — divided by the model's context window (200k default, `--window` to override). This is the exact size of what the model had to read at that moment, taken from the agent's own accounting, not an estimate.
14
+
15
+ ## Outcome signals
16
+
17
+ Five per-step signals, each an independent heuristic, each reported separately as well as combined:
18
+
19
+ | Signal | Definition | Rationale |
20
+ |---|---|---|
21
+ | `tool_error` | any tool call in the step returned an error | direct failure evidence |
22
+ | `edit_failure` | an editing tool (Edit/Write/MultiEdit/...) returned an error | for coding agents, the clearest "model lost track of file state" event |
23
+ | `retry` | the step repeats a (tool, target) pair that errored within the previous 6 steps | rework: paying twice for the same action |
24
+ | `reread` | the step re-Reads a file already read earlier in the session | proxy for content scrolled out of effective attention |
25
+ | `self_correction` | assistant text matches apology/correction phrases ("I apologize", "my mistake", "let me fix that", ...) | linguistic marker of a recognized error |
26
+
27
+ A step is **degraded** if any signal fired. Signals are deliberately simple and inspectable — every one can be verified by grepping your own transcript. Known noise sources: `reread` can be legitimate (file changed on disk); `self_correction` matches politeness patterns imperfectly. This is why per-signal counts are always shown: a conclusion driven by one noisy signal is visible as such.
28
+
29
+ ## The rot curve
30
+
31
+ Steps are bucketed by fill percentage (10-point buckets). Per bucket, contextrot reports the degraded-step rate with a **Wilson 95% score interval** (chosen over normal approximation because bucket counts can be small and rates sit near 0). Buckets with fewer than 15 steps are flagged low-confidence.
32
+
33
+ Two summary zones: **fresh** (< 40% fill) and **deep** (≥ 60%). The headline ratio is deep rate / fresh rate; it is labeled *statistically separated* only when the two zones' Wilson intervals don't overlap — a conservative test.
34
+
35
+ The **degradation threshold (knee)** is the start of the first non-low-confidence bucket at ≥ 40% fill whose rate reaches 1.5× the fresh-zone rate. If no bucket qualifies, no knee is reported — a flat curve is a valid result and contextrot will happily tell you your setup shows no measurable rot.
36
+
37
+ ## Cost figures
38
+
39
+ Per-step cost uses published API list prices per model (input, output, cache read, cache write). For subscription users this is the *API-equivalent value*, not a bill. "Spend on degraded steps" sums the cost of steps where a failure signal fired — a lower bound on rework cost, since it excludes the follow-up work those failures caused. Unknown models fall back to conservative defaults and are marked estimated.
40
+
41
+ ## Composition estimate
42
+
43
+ Startup overhead is the prompt size of each session's *first* API call (system prompt + tool schemas + project instructions — everything loaded before your first word), averaged per session and exact from token accounting. Tool-output and conversation figures use a 4-characters-per-token heuristic and are labeled estimates. With compaction, flow-through figures can exceed the window size; that flow is precisely what fills it.
44
+
45
+ ## What this is not
46
+
47
+ - **Not causal.** contextrot measures association between context fill and failure signals in observational data. Deep-context steps also tend to be later in harder tasks; some of the association is task difficulty, not rot. The report never claims otherwise.
48
+ - **Not a benchmark.** Results describe *your* sessions with *your* configuration. They will differ from lab results ([Chroma's context-rot report](https://www.trychroma.com/research/context-rot)) and from other users — that's the point.
49
+ - **Not ground truth on quality.** Signals are proxies with false positives and negatives. They are useful because they are consistent proxies: the same heuristics applied at every fill level, so *differences across fill levels* are meaningful even when absolute rates are noisy.
50
+
51
+ ## Reproducibility
52
+
53
+ `contextrot --json` emits every per-step signal record and per-bucket statistic, so any number in the report can be recomputed independently.
@@ -0,0 +1,70 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "contextrot"
7
+ version = "0.1.0"
8
+ description = "Find out where your coding agent starts degrading. Personal context-rot analytics from your own agent sessions."
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.11"
12
+ authors = [{ name = "Priyanshu Doshi" }]
13
+ keywords = [
14
+ "llm",
15
+ "claude-code",
16
+ "context-engineering",
17
+ "context-rot",
18
+ "agents",
19
+ "developer-tools",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 4 - Beta",
23
+ "Environment :: Console",
24
+ "Intended Audience :: Developers",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Operating System :: OS Independent",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Programming Language :: Python :: 3.13",
30
+ "Topic :: Software Development :: Debuggers",
31
+ "Topic :: Software Development :: Quality Assurance",
32
+ ]
33
+ dependencies = [
34
+ "typer>=0.12",
35
+ "rich>=13.7",
36
+ "jinja2>=3.1",
37
+ ]
38
+
39
+ [project.urls]
40
+ Homepage = "https://github.com/Priyanshu-byte-coder/contextrot"
41
+ Repository = "https://github.com/Priyanshu-byte-coder/contextrot"
42
+ Issues = "https://github.com/Priyanshu-byte-coder/contextrot/issues"
43
+
44
+ [project.scripts]
45
+ contextrot = "contextrot.cli:app"
46
+
47
+ [project.optional-dependencies]
48
+ dev = [
49
+ "pytest>=8.0",
50
+ "ruff>=0.6",
51
+ "mypy>=1.10",
52
+ ]
53
+
54
+ [tool.hatch.build.targets.wheel]
55
+ packages = ["src/contextrot"]
56
+
57
+ [tool.ruff]
58
+ line-length = 100
59
+ target-version = "py311"
60
+
61
+ [tool.ruff.lint]
62
+ select = ["E", "F", "I", "UP", "B", "SIM"]
63
+
64
+ [tool.mypy]
65
+ python_version = "3.11"
66
+ ignore_missing_imports = true
67
+ check_untyped_defs = true
68
+
69
+ [tool.pytest.ini_options]
70
+ testpaths = ["tests"]
@@ -0,0 +1,3 @@
1
+ """contextrot — personal context-rot analytics for coding agents."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,12 @@
1
+ """Adapter registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextrot.adapters.base import SessionAdapter
6
+ from contextrot.adapters.claude_code import ClaudeCodeAdapter
7
+
8
+ ADAPTERS: dict[str, SessionAdapter] = {
9
+ ClaudeCodeAdapter.name: ClaudeCodeAdapter(),
10
+ }
11
+
12
+ __all__ = ["ADAPTERS", "SessionAdapter", "ClaudeCodeAdapter"]