PyPI - contextrot - Versions diffs - 0.1.0__tar.gz - Mend

contextrot 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

contextrot-0.1.0/.github/ISSUE_TEMPLATE/adapter_request.yml +29 -0
contextrot-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +38 -0
contextrot-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +14 -0
contextrot-0.1.0/.github/workflows/ci.yml +28 -0
contextrot-0.1.0/.github/workflows/release.yml +24 -0
contextrot-0.1.0/.gitignore +25 -0
contextrot-0.1.0/CHANGELOG.md +19 -0
contextrot-0.1.0/CONTRIBUTING.md +51 -0
contextrot-0.1.0/LICENSE +21 -0
contextrot-0.1.0/PKG-INFO +133 -0
contextrot-0.1.0/README.md +102 -0
contextrot-0.1.0/docs/methodology.md +53 -0
contextrot-0.1.0/pyproject.toml +70 -0
contextrot-0.1.0/src/contextrot/__init__.py +3 -0
contextrot-0.1.0/src/contextrot/adapters/__init__.py +12 -0
contextrot-0.1.0/src/contextrot/adapters/base.py +28 -0
contextrot-0.1.0/src/contextrot/adapters/claude_code.py +197 -0
contextrot-0.1.0/src/contextrot/analysis/__init__.py +106 -0
contextrot-0.1.0/src/contextrot/analysis/composition.py +72 -0
contextrot-0.1.0/src/contextrot/analysis/prescriptions.py +101 -0
contextrot-0.1.0/src/contextrot/analysis/rot.py +132 -0
contextrot-0.1.0/src/contextrot/cli.py +165 -0
contextrot-0.1.0/src/contextrot/models.py +78 -0
contextrot-0.1.0/src/contextrot/pricing.py +69 -0
contextrot-0.1.0/src/contextrot/report/__init__.py +4 -0
contextrot-0.1.0/src/contextrot/report/html.py +161 -0
contextrot-0.1.0/src/contextrot/report/template.html.j2 +169 -0
contextrot-0.1.0/src/contextrot/report/terminal.py +164 -0
contextrot-0.1.0/src/contextrot/signals/__init__.py +140 -0
contextrot-0.1.0/tests/fixtures/demo-project/11111111-2222-3333-4444-555555555555.jsonl +12 -0
contextrot-0.1.0/tests/test_adapter_claude_code.py +66 -0
contextrot-0.1.0/tests/test_cli.py +52 -0
contextrot-0.1.0/tests/test_rot.py +55 -0
contextrot-0.1.0/tests/test_signals.py +58 -0

contextrot-0.1.0/.github/ISSUE_TEMPLATE/adapter_request.yml ADDED Viewed

@@ -0,0 +1,29 @@
+name: Adapter request
+description: Ask for (or offer to build) support for another agent CLI
+labels: [adapter, good-first-issue]
+body:
+  - type: input
+    id: agent
+    attributes:
+      label: Agent CLI
+      placeholder: "Codex CLI / OpenCode / Gemini CLI / ..."
+    validations:
+      required: true
+  - type: input
+    id: location
+    attributes:
+      label: Where transcripts live on disk
+      placeholder: "~/.codex/sessions/*.jsonl"
+  - type: textarea
+    id: format
+    attributes:
+      label: Format notes
+      description: >
+        Anything you know about the file format — a sanitized sample entry is
+        gold. See CONTRIBUTING.md if you want to build it yourself (it's one file).
+  - type: checkboxes
+    id: build
+    attributes:
+      label: Willing to build it?
+      options:
+        - label: I'd like to implement this adapter myself with guidance

contextrot-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml ADDED Viewed

@@ -0,0 +1,38 @@
+name: Bug report
+description: Something broke or produced wrong output
+labels: [bug]
+body:
+  - type: input
+    id: version
+    attributes:
+      label: contextrot version
+      placeholder: "0.1.0 (contextrot --version)"
+    validations:
+      required: true
+  - type: input
+    id: agent
+    attributes:
+      label: Agent CLI and version
+      placeholder: "Claude Code 2.1.92"
+    validations:
+      required: true
+  - type: input
+    id: os
+    attributes:
+      label: OS
+      placeholder: "Windows 11 / macOS 15 / Ubuntu 24.04"
+  - type: textarea
+    id: what
+    attributes:
+      label: What happened
+      description: Command you ran, what you expected, what you got. Paste tracebacks in full.
+    validations:
+      required: true
+  - type: textarea
+    id: transcript
+    attributes:
+      label: Sanitized transcript snippet (parsing bugs only)
+      description: >
+        If parsing failed, paste the offending JSONL line(s) with file paths,
+        code, and personal data replaced by placeholders. Never paste raw
+        transcript content.

contextrot-0.1.0/.github/PULL_REQUEST_TEMPLATE.md ADDED Viewed

@@ -0,0 +1,14 @@
+## What
+<!-- One or two sentences: what does this change do? -->
+## Why
+<!-- Link the issue, or explain the motivation. -->
+## Checklist
+- [ ] `pytest`, `ruff check src tests`, and `mypy src` pass locally
+- [ ] New behavior has tests (adapters: sanitized fixture + test file)
+- [ ] Any fixture data is fully sanitized (no real paths, code, or personal data)
+- [ ] Signal/statistics changes are reflected in `docs/methodology.md`

contextrot-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,28 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+jobs:
+  test:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python: ["3.11", "3.12", "3.13"]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install
+        run: pip install -e ".[dev]"
+      - name: Lint
+        run: ruff check src tests
+      - name: Types
+        run: mypy src
+      - name: Tests
+        run: pytest -q

contextrot-0.1.0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,24 @@
+name: Release
+on:
+  push:
+    tags: ["v*"]
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write  # PyPI trusted publishing
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Build
+        run: |
+          pip install build
+          python -m build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

contextrot-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,25 @@
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+dist/
+build/
+.venv/
+venv/
+# Tooling
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+htmlcov/
+# Generated reports
+*.html
+!src/ctxprof/report/template.html.j2
+# OS / editor
+.DS_Store
+Thumbs.db
+.idea/
+.vscode/

contextrot-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Changelog
+All notable changes to this project are documented here. Format follows
+[Keep a Changelog](https://keepachangelog.com/); versioning follows
+[SemVer](https://semver.org/).
+## [0.1.0] - 2026-07-02
+### Added
+- Claude Code transcript adapter (`~/.claude/projects/**/*.jsonl`), tolerant parsing, sidechain exclusion
+- Outcome-signal extraction: tool errors, edit failures, retry loops, file re-reads, self-corrections
+- Rot curve: failure-signal rate by context-fill bucket with Wilson 95% confidence intervals
+- Degradation threshold (knee) detection and fresh-vs-deep context ratio with significance check
+- Context composition estimate (startup overhead, tool outputs, conversation)
+- Cost accounting per step with per-model pricing; cost-of-degraded-steps estimate
+- Quantified prescriptions engine
+- Terminal report (Rich), self-contained HTML report (inline SVG, dark mode, tooltips, data table), `--json` output
+- `contextrot sessions` listing command

contextrot-0.1.0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,51 @@
+# Contributing to contextrot
+Thanks for considering a contribution. This project is deliberately structured so the highest-impact contribution is also the easiest one.
+## The paved path: write an adapter
+contextrot supports any agent CLI whose transcripts can be parsed into the normalized session model. Each adapter is **one self-contained file** — no changes to analysis or reporting code needed.
+1. Copy `src/contextrot/adapters/claude_code.py` as a starting point.
+2. Implement the two methods of `SessionAdapter`:
+   - `discover()` — find transcript files on disk
+   - `parse(path)` — convert one file into a `Session` of `Step`s and `ToolCall`s
+3. Register it in `src/contextrot/adapters/__init__.py`.
+4. Add a small sanitized fixture transcript under `tests/fixtures/` and a test file modeled on `tests/test_adapter_claude_code.py`.
+Adapter ground rules:
+- **Tolerant parsing.** Skip malformed lines, ignore unknown fields, never crash on a weird file. A partial session beats an exception.
+- **No network calls.** contextrot is local-only; adapters read files, period.
+- **Sanitize fixtures.** Strip real file paths, code, and personal data from any transcript you commit.
+Wanted adapters: Codex CLI, OpenCode, Gemini CLI, OpenClaw, Cursor CLI, OpenTelemetry GenAI spans.
+## Other contributions
+- **New outcome signals** (`src/contextrot/signals/`): each signal must be independently testable, documented in `docs/methodology.md`, and reported separately in output. Open an issue first to discuss the heuristic.
+- **Prescription rules** (`src/contextrot/analysis/prescriptions.py`): must be quantified from the user's own data, with an explicit evidence threshold.
+- **Bug reports**: include your contextrot version, agent CLI version, OS, and — if it's a parsing bug — a *sanitized* snippet of the offending transcript line.
+## Development setup
+```bash
+git clone https://github.com/Priyanshu-byte-coder/contextrot
+cd contextrot
+pip install -e ".[dev]"
+pytest
+ruff check src tests
+mypy src
+```
+All three must pass in CI. Python 3.11+ supported.
+## Style
+- Match the existing code: type hints everywhere, docstrings explain *why* and document format assumptions.
+- Conventional commits (`feat:`, `fix:`, `docs:`, `test:`, `chore:`).
+- Keep dependencies minimal — new runtime deps need a strong justification (install speed is a feature).
+## Statistical honesty
+This tool's credibility rests on not overclaiming. Reports must always carry n-counts, confidence intervals, and the observational-diagnostic caveat. PRs that trade rigor for a scarier headline will be declined.

contextrot-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Priyanshu Doshi
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

contextrot-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,133 @@
+Metadata-Version: 2.4
+Name: contextrot
+Version: 0.1.0
+Summary: Find out where your coding agent starts degrading. Personal context-rot analytics from your own agent sessions.
+Project-URL: Homepage, https://github.com/Priyanshu-byte-coder/contextrot
+Project-URL: Repository, https://github.com/Priyanshu-byte-coder/contextrot
+Project-URL: Issues, https://github.com/Priyanshu-byte-coder/contextrot/issues
+Author: Priyanshu Doshi
+License: MIT
+License-File: LICENSE
+Keywords: agents,claude-code,context-engineering,context-rot,developer-tools,llm
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Debuggers
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.11
+Requires-Dist: jinja2>=3.1
+Requires-Dist: rich>=13.7
+Requires-Dist: typer>=0.12
+Provides-Extra: dev
+Requires-Dist: mypy>=1.10; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: ruff>=0.6; extra == 'dev'
+Description-Content-Type: text/markdown
+# contextrot
+**Your coding agent gets worse as its context fills. contextrot proves it on your own sessions — and tells you exactly what to change.**
+[![CI](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml/badge.svg)](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml)
+[![PyPI](https://img.shields.io/pypi/v/contextrot)](https://pypi.org/project/contextrot/)
+[![Python](https://img.shields.io/pypi/pyversions/contextrot)](https://pypi.org/project/contextrot/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
+```
+uvx contextrot
+```
+No config. No API keys. No uploads. contextrot reads the agent transcripts already sitting on your disk and answers a question no other tool answers:
+> **At what context fill does *my* agent start failing, what's causing it, and what is it costing me?**
+```
+╭──────────────── contextrot — your context rot report ────────────────╮
+│                                                                    │
+│  Deep-context failure rate: 31.4% vs 14.9% in fresh context        │
+│  (2.1×, statistically separated)                                   │
+│  Your degradation threshold: ~60% context fill                     │
+│  Est. spend on degraded steps: $23.40 of $148.02 total             │
+│                                                                    │
+╰────────────────────────────────────────────────────────────────────╯
+           Failure-signal rate by context fill
+   Fill    Rate                                            n   95% CI
+  0–10%     9%  ████████                                 214   6%–13%
+ 10–20%    12%  ███████████                              308   9%–16%
+ 20–30%    14%  █████████████                            257  10%–18%
+ ...
+ 60–70%    29%  ███████████████████████████              121  22%–37%
+ 70–80%    34%  ████████████████████████████████          87  25%–44%
+```
+## What "context rot" is — and why a benchmark can't tell you
+Research ([Chroma's context-rot report](https://www.trychroma.com/research/context-rot), several 2026 papers) shows LLM output quality degrades as input context grows — even far below the window limit. But that research runs synthetic tasks in lab conditions. Your degradation point depends on *your* projects, *your* MCP setup, *your* model, *your* prompting style.
+contextrot measures it where it actually matters: in your own sessions.
+## How it works
+Agent CLIs like Claude Code log every session to local JSONL transcripts. Each step in those transcripts carries token accounting *and* behavioral evidence:
+- **edit failures** — the agent tried to edit code and missed
+- **retry loops** — the same tool call repeated after an error
+- **re-reads** — the agent re-reading files it already read (it lost track)
+- **self-corrections** — "I apologize, let me fix that"
+- **tool errors** — any failed tool call
+contextrot extracts these signals per step, computes context fill at that moment, and correlates the two — with Wilson 95% confidence intervals, per-signal breakdowns, and honest n-counts. Then it estimates what degraded steps cost you and emits prescriptions quantified from your own data.
+Full method: [docs/methodology.md](docs/methodology.md).
+## What contextrot is not
+Be suspicious of any tool that won't tell you this, so:
+- **Not a spend meter.** [ccusage](https://github.com/ryoppippi/ccusage) is excellent at "how much did I spend" — use it, it's complementary. contextrot answers "where does my agent *degrade* and why."
+- **Not Claude Code's `/context`.** That's a point-in-time composition snapshot. contextrot correlates fill with *outcomes* across your whole history.
+- **Not an observability platform.** Langfuse/Phoenix/MLflow instrument apps you build. contextrot needs zero instrumentation and analyzes the agent you *use*.
+- **Not a controlled experiment.** It's an observational diagnostic on your own data, with the statistical caveats printed right on the report.
+## Install & use
+```bash
+uvx contextrot            # zero-install run
+# or
+pip install contextrot
+```
+```bash
+contextrot                        # full report, last 30 days
+contextrot --days 90              # widen the range
+contextrot -p myproject           # one project only
+contextrot --html report.html     # shareable single-file report (still local)
+contextrot --json                 # machine-readable
+contextrot sessions               # list what was parsed
+```
+Supported agents: **Claude Code** (today). Codex CLI, OpenCode, Gemini CLI, and OpenTelemetry GenAI spans are next — an adapter is one small file, and [writing one is the paved first-contribution path](CONTRIBUTING.md).
+## Privacy
+contextrot makes **zero network calls**. It reads local transcript files, prints to your terminal, and optionally writes a local HTML file. Nothing leaves your machine. Grep the codebase for `http` — you won't find a client.
+## Roadmap
+- `contextrot fix` — apply prescriptions interactively (disable unused MCP servers, trim CLAUDE.md) with before/after measurement
+- More agent adapters + OTel ingestion
+- Opt-in, anonymized aggregate stats → the **State of Context Rot** report: real-workload degradation curves across the community (off by default, documented schema, aggregate-only)
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md). The most valuable first PR: an adapter for the agent CLI you use.
+## License
+[MIT](LICENSE)

contextrot-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,102 @@
+# contextrot
+**Your coding agent gets worse as its context fills. contextrot proves it on your own sessions — and tells you exactly what to change.**
+[![CI](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml/badge.svg)](https://github.com/Priyanshu-byte-coder/contextrot/actions/workflows/ci.yml)
+[![PyPI](https://img.shields.io/pypi/v/contextrot)](https://pypi.org/project/contextrot/)
+[![Python](https://img.shields.io/pypi/pyversions/contextrot)](https://pypi.org/project/contextrot/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
+```
+uvx contextrot
+```
+No config. No API keys. No uploads. contextrot reads the agent transcripts already sitting on your disk and answers a question no other tool answers:
+> **At what context fill does *my* agent start failing, what's causing it, and what is it costing me?**
+```
+╭──────────────── contextrot — your context rot report ────────────────╮
+│                                                                    │
+│  Deep-context failure rate: 31.4% vs 14.9% in fresh context        │
+│  (2.1×, statistically separated)                                   │
+│  Your degradation threshold: ~60% context fill                     │
+│  Est. spend on degraded steps: $23.40 of $148.02 total             │
+│                                                                    │
+╰────────────────────────────────────────────────────────────────────╯
+           Failure-signal rate by context fill
+   Fill    Rate                                            n   95% CI
+  0–10%     9%  ████████                                 214   6%–13%
+ 10–20%    12%  ███████████                              308   9%–16%
+ 20–30%    14%  █████████████                            257  10%–18%
+ ...
+ 60–70%    29%  ███████████████████████████              121  22%–37%
+ 70–80%    34%  ████████████████████████████████          87  25%–44%
+```
+## What "context rot" is — and why a benchmark can't tell you
+Research ([Chroma's context-rot report](https://www.trychroma.com/research/context-rot), several 2026 papers) shows LLM output quality degrades as input context grows — even far below the window limit. But that research runs synthetic tasks in lab conditions. Your degradation point depends on *your* projects, *your* MCP setup, *your* model, *your* prompting style.
+contextrot measures it where it actually matters: in your own sessions.
+## How it works
+Agent CLIs like Claude Code log every session to local JSONL transcripts. Each step in those transcripts carries token accounting *and* behavioral evidence:
+- **edit failures** — the agent tried to edit code and missed
+- **retry loops** — the same tool call repeated after an error
+- **re-reads** — the agent re-reading files it already read (it lost track)
+- **self-corrections** — "I apologize, let me fix that"
+- **tool errors** — any failed tool call
+contextrot extracts these signals per step, computes context fill at that moment, and correlates the two — with Wilson 95% confidence intervals, per-signal breakdowns, and honest n-counts. Then it estimates what degraded steps cost you and emits prescriptions quantified from your own data.
+Full method: [docs/methodology.md](docs/methodology.md).
+## What contextrot is not
+Be suspicious of any tool that won't tell you this, so:
+- **Not a spend meter.** [ccusage](https://github.com/ryoppippi/ccusage) is excellent at "how much did I spend" — use it, it's complementary. contextrot answers "where does my agent *degrade* and why."
+- **Not Claude Code's `/context`.** That's a point-in-time composition snapshot. contextrot correlates fill with *outcomes* across your whole history.
+- **Not an observability platform.** Langfuse/Phoenix/MLflow instrument apps you build. contextrot needs zero instrumentation and analyzes the agent you *use*.
+- **Not a controlled experiment.** It's an observational diagnostic on your own data, with the statistical caveats printed right on the report.
+## Install & use
+```bash
+uvx contextrot            # zero-install run
+# or
+pip install contextrot
+```
+```bash
+contextrot                        # full report, last 30 days
+contextrot --days 90              # widen the range
+contextrot -p myproject           # one project only
+contextrot --html report.html     # shareable single-file report (still local)
+contextrot --json                 # machine-readable
+contextrot sessions               # list what was parsed
+```
+Supported agents: **Claude Code** (today). Codex CLI, OpenCode, Gemini CLI, and OpenTelemetry GenAI spans are next — an adapter is one small file, and [writing one is the paved first-contribution path](CONTRIBUTING.md).
+## Privacy
+contextrot makes **zero network calls**. It reads local transcript files, prints to your terminal, and optionally writes a local HTML file. Nothing leaves your machine. Grep the codebase for `http` — you won't find a client.
+## Roadmap
+- `contextrot fix` — apply prescriptions interactively (disable unused MCP servers, trim CLAUDE.md) with before/after measurement
+- More agent adapters + OTel ingestion
+- Opt-in, anonymized aggregate stats → the **State of Context Rot** report: real-workload degradation curves across the community (off by default, documented schema, aggregate-only)
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md). The most valuable first PR: an adapter for the agent CLI you use.
+## License
+[MIT](LICENSE)

contextrot-0.1.0/docs/methodology.md ADDED Viewed

@@ -0,0 +1,53 @@
+# Methodology
+This page documents exactly how contextrot computes what it shows, including the limitations. If you're evaluating whether to trust the numbers, read this end to end — it's short.
+## Data source
+contextrot reads agent transcripts already on your disk (for Claude Code: `~/.claude/projects/<project>/<session>.jsonl`). Each transcript records every model API call with token accounting, every tool invocation, and every tool result. Nothing is instrumented and nothing is uploaded; analysis is a pure local read.
+Sub-agent ("sidechain") traffic runs in its own context window, so it is excluded from the main analysis and counted separately. Sessions with fewer than 3 steps are skipped.
+## Context fill
+For each model call ("step"), context fill is the prompt-side token count — `input_tokens + cache_read_input_tokens + cache_creation_input_tokens` — divided by the model's context window (200k default, `--window` to override). This is the exact size of what the model had to read at that moment, taken from the agent's own accounting, not an estimate.
+## Outcome signals
+Five per-step signals, each an independent heuristic, each reported separately as well as combined:
+| Signal | Definition | Rationale |
+|---|---|---|
+| `tool_error` | any tool call in the step returned an error | direct failure evidence |
+| `edit_failure` | an editing tool (Edit/Write/MultiEdit/...) returned an error | for coding agents, the clearest "model lost track of file state" event |
+| `retry` | the step repeats a (tool, target) pair that errored within the previous 6 steps | rework: paying twice for the same action |
+| `reread` | the step re-Reads a file already read earlier in the session | proxy for content scrolled out of effective attention |
+| `self_correction` | assistant text matches apology/correction phrases ("I apologize", "my mistake", "let me fix that", ...) | linguistic marker of a recognized error |
+A step is **degraded** if any signal fired. Signals are deliberately simple and inspectable — every one can be verified by grepping your own transcript. Known noise sources: `reread` can be legitimate (file changed on disk); `self_correction` matches politeness patterns imperfectly. This is why per-signal counts are always shown: a conclusion driven by one noisy signal is visible as such.
+## The rot curve
+Steps are bucketed by fill percentage (10-point buckets). Per bucket, contextrot reports the degraded-step rate with a **Wilson 95% score interval** (chosen over normal approximation because bucket counts can be small and rates sit near 0). Buckets with fewer than 15 steps are flagged low-confidence.
+Two summary zones: **fresh** (< 40% fill) and **deep** (≥ 60%). The headline ratio is deep rate / fresh rate; it is labeled *statistically separated* only when the two zones' Wilson intervals don't overlap — a conservative test.
+The **degradation threshold (knee)** is the start of the first non-low-confidence bucket at ≥ 40% fill whose rate reaches 1.5× the fresh-zone rate. If no bucket qualifies, no knee is reported — a flat curve is a valid result and contextrot will happily tell you your setup shows no measurable rot.
+## Cost figures
+Per-step cost uses published API list prices per model (input, output, cache read, cache write). For subscription users this is the *API-equivalent value*, not a bill. "Spend on degraded steps" sums the cost of steps where a failure signal fired — a lower bound on rework cost, since it excludes the follow-up work those failures caused. Unknown models fall back to conservative defaults and are marked estimated.
+## Composition estimate
+Startup overhead is the prompt size of each session's *first* API call (system prompt + tool schemas + project instructions — everything loaded before your first word), averaged per session and exact from token accounting. Tool-output and conversation figures use a 4-characters-per-token heuristic and are labeled estimates. With compaction, flow-through figures can exceed the window size; that flow is precisely what fills it.
+## What this is not
+- **Not causal.** contextrot measures association between context fill and failure signals in observational data. Deep-context steps also tend to be later in harder tasks; some of the association is task difficulty, not rot. The report never claims otherwise.
+- **Not a benchmark.** Results describe *your* sessions with *your* configuration. They will differ from lab results ([Chroma's context-rot report](https://www.trychroma.com/research/context-rot)) and from other users — that's the point.
+- **Not ground truth on quality.** Signals are proxies with false positives and negatives. They are useful because they are consistent proxies: the same heuristics applied at every fill level, so *differences across fill levels* are meaningful even when absolute rates are noisy.
+## Reproducibility
+`contextrot --json` emits every per-step signal record and per-bucket statistic, so any number in the report can be recomputed independently.

contextrot-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,70 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "contextrot"
+version = "0.1.0"
+description = "Find out where your coding agent starts degrading. Personal context-rot analytics from your own agent sessions."
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.11"
+authors = [{ name = "Priyanshu Doshi" }]
+keywords = [
+    "llm",
+    "claude-code",
+    "context-engineering",
+    "context-rot",
+    "agents",
+    "developer-tools",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Debuggers",
+    "Topic :: Software Development :: Quality Assurance",
+]
+dependencies = [
+    "typer>=0.12",
+    "rich>=13.7",
+    "jinja2>=3.1",
+]
+[project.urls]
+Homepage = "https://github.com/Priyanshu-byte-coder/contextrot"
+Repository = "https://github.com/Priyanshu-byte-coder/contextrot"
+Issues = "https://github.com/Priyanshu-byte-coder/contextrot/issues"
+[project.scripts]
+contextrot = "contextrot.cli:app"
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0",
+    "ruff>=0.6",
+    "mypy>=1.10",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["src/contextrot"]
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM"]
+[tool.mypy]
+python_version = "3.11"
+ignore_missing_imports = true
+check_untyped_defs = true
+[tool.pytest.ini_options]
+testpaths = ["tests"]

contextrot-0.1.0/src/contextrot/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""contextrot — personal context-rot analytics for coding agents."""
+__version__ = "0.1.0"

contextrot-0.1.0/src/contextrot/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Adapter registry."""
+from __future__ import annotations
+from contextrot.adapters.base import SessionAdapter
+from contextrot.adapters.claude_code import ClaudeCodeAdapter
+ADAPTERS: dict[str, SessionAdapter] = {
+    ClaudeCodeAdapter.name: ClaudeCodeAdapter(),
+}
+__all__ = ["ADAPTERS", "SessionAdapter", "ClaudeCodeAdapter"]