interlens 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. interlens-0.1.0/.github/workflows/publish.yml +61 -0
  2. interlens-0.1.0/.github/workflows/tests.yml +37 -0
  3. interlens-0.1.0/.gitignore +14 -0
  4. interlens-0.1.0/LICENSE +21 -0
  5. interlens-0.1.0/PKG-INFO +74 -0
  6. interlens-0.1.0/README.md +55 -0
  7. interlens-0.1.0/docs/examples/01_quickstart.md +56 -0
  8. interlens-0.1.0/docs/examples/02_conversations.md +117 -0
  9. interlens-0.1.0/docs/examples/03_participants_and_models.md +90 -0
  10. interlens-0.1.0/docs/examples/04_context_and_serialization.md +73 -0
  11. interlens-0.1.0/docs/examples/05_tools.md +76 -0
  12. interlens-0.1.0/docs/examples/06_hooks.md +57 -0
  13. interlens-0.1.0/docs/examples/07_interp.md +94 -0
  14. interlens-0.1.0/docs/examples/08_rollouts_and_scale.md +98 -0
  15. interlens-0.1.0/docs/examples/09_advanced_interp_pipelines.md +114 -0
  16. interlens-0.1.0/docs/examples/README.md +36 -0
  17. interlens-0.1.0/pyproject.toml +42 -0
  18. interlens-0.1.0/src/interlens/__init__.py +85 -0
  19. interlens-0.1.0/src/interlens/context/__init__.py +23 -0
  20. interlens-0.1.0/src/interlens/context/context_policy.py +46 -0
  21. interlens-0.1.0/src/interlens/context/drop_oldest_policy.py +32 -0
  22. interlens-0.1.0/src/interlens/context/error_policy.py +22 -0
  23. interlens-0.1.0/src/interlens/context/sliding_window_policy.py +33 -0
  24. interlens-0.1.0/src/interlens/context/summarize_policy.py +47 -0
  25. interlens-0.1.0/src/interlens/context_item.py +23 -0
  26. interlens-0.1.0/src/interlens/conversation.py +393 -0
  27. interlens-0.1.0/src/interlens/execution_mode.py +23 -0
  28. interlens-0.1.0/src/interlens/factories.py +120 -0
  29. interlens-0.1.0/src/interlens/hooks/__init__.py +3 -0
  30. interlens-0.1.0/src/interlens/hooks/message_hook.py +49 -0
  31. interlens-0.1.0/src/interlens/interp/__init__.py +29 -0
  32. interlens-0.1.0/src/interlens/interp/activation_cache.py +132 -0
  33. interlens-0.1.0/src/interlens/interp/capture.py +85 -0
  34. interlens-0.1.0/src/interlens/interp/layers.py +23 -0
  35. interlens-0.1.0/src/interlens/interp/logprobs.py +30 -0
  36. interlens-0.1.0/src/interlens/interp/patching.py +50 -0
  37. interlens-0.1.0/src/interlens/interp/steering.py +62 -0
  38. interlens-0.1.0/src/interlens/loading/__init__.py +28 -0
  39. interlens-0.1.0/src/interlens/loading/load.py +69 -0
  40. interlens-0.1.0/src/interlens/loading/model_cache.py +38 -0
  41. interlens-0.1.0/src/interlens/loading/registry.py +106 -0
  42. interlens-0.1.0/src/interlens/message.py +24 -0
  43. interlens-0.1.0/src/interlens/participant/__init__.py +4 -0
  44. interlens-0.1.0/src/interlens/participant/config/__init__.py +10 -0
  45. interlens-0.1.0/src/interlens/participant/config/api_participant_config.py +46 -0
  46. interlens-0.1.0/src/interlens/participant/config/model_participant_config.py +122 -0
  47. interlens-0.1.0/src/interlens/participant/config/participant_config.py +79 -0
  48. interlens-0.1.0/src/interlens/participant/participant.py +105 -0
  49. interlens-0.1.0/src/interlens/participant/participants/api_client.py +54 -0
  50. interlens-0.1.0/src/interlens/participant/participants/api_participant.py +80 -0
  51. interlens-0.1.0/src/interlens/participant/participants/gemma.py +68 -0
  52. interlens-0.1.0/src/interlens/participant/participants/model_participant.py +404 -0
  53. interlens-0.1.0/src/interlens/participant/participants/qwen.py +4 -0
  54. interlens-0.1.0/src/interlens/participant/role.py +3 -0
  55. interlens-0.1.0/src/interlens/reasoning_visibility.py +24 -0
  56. interlens-0.1.0/src/interlens/runner/__init__.py +19 -0
  57. interlens-0.1.0/src/interlens/runner/analyzer_registry.py +23 -0
  58. interlens-0.1.0/src/interlens/runner/batched.py +49 -0
  59. interlens-0.1.0/src/interlens/runner/devices.py +15 -0
  60. interlens-0.1.0/src/interlens/runner/pool.py +160 -0
  61. interlens-0.1.0/src/interlens/runner/rollout.py +40 -0
  62. interlens-0.1.0/src/interlens/runner/spec.py +26 -0
  63. interlens-0.1.0/src/interlens/runner/worker_init.py +18 -0
  64. interlens-0.1.0/src/interlens/stop/__init__.py +16 -0
  65. interlens-0.1.0/src/interlens/stop/conditions.py +74 -0
  66. interlens-0.1.0/src/interlens/stop/stop_condition.py +41 -0
  67. interlens-0.1.0/src/interlens/template.py +87 -0
  68. interlens-0.1.0/src/interlens/tools/__init__.py +5 -0
  69. interlens-0.1.0/src/interlens/tools/registry.py +33 -0
  70. interlens-0.1.0/src/interlens/tools/tool.py +28 -0
  71. interlens-0.1.0/src/interlens/tools/tool_call.py +26 -0
  72. interlens-0.1.0/src/interlens/transcript.py +179 -0
  73. interlens-0.1.0/src/interlens/view.py +31 -0
  74. interlens-0.1.0/tests/__init__.py +0 -0
  75. interlens-0.1.0/tests/api_validate.py +105 -0
  76. interlens-0.1.0/tests/cluster_validate.py +200 -0
  77. interlens-0.1.0/tests/cluster_validate.sbatch +29 -0
  78. interlens-0.1.0/tests/conftest.py +53 -0
  79. interlens-0.1.0/tests/install_flash_attn.sbatch +21 -0
  80. interlens-0.1.0/tests/profile_pipeline.py +153 -0
  81. interlens-0.1.0/tests/profile_pipeline.sbatch +23 -0
  82. interlens-0.1.0/tests/test_conversation.py +93 -0
  83. interlens-0.1.0/tests/test_family_flags.py +58 -0
  84. interlens-0.1.0/tests/test_hooks.py +40 -0
  85. interlens-0.1.0/tests/test_interp.py +49 -0
  86. interlens-0.1.0/tests/test_models_slow.py +139 -0
  87. interlens-0.1.0/tests/test_runner.py +76 -0
  88. interlens-0.1.0/tests/test_serialization.py +74 -0
  89. interlens-0.1.0/tests/test_stop_conditions.py +46 -0
  90. interlens-0.1.0/tests/test_tools.py +98 -0
  91. interlens-0.1.0/tests/test_view_pipeline.py +67 -0
@@ -0,0 +1,61 @@
1
+ name: publish
2
+
3
+ # Every push to main cuts a new release: bump the patch version, create+push a `vX.Y.Z` tag (the version is
4
+ # derived from it via hatch-vcs), build, publish to PyPI via Trusted Publishing (OIDC — no token), and cut a
5
+ # GitHub Release. The tag is the source of truth; nothing is committed back, so there's no bump-loop.
6
+ #
7
+ # One-time PyPI setup (https://pypi.org/manage/account/publishing/): add a pending publisher for
8
+ # project: interlens · owner: Sid-MB · repo: interlens · workflow: publish.yml · environment: pypi
9
+ on:
10
+ push:
11
+ branches: [main]
12
+
13
+ # Serialize releases so two quick pushes can't race to claim the same version.
14
+ concurrency:
15
+ group: publish
16
+ cancel-in-progress: false
17
+
18
+ jobs:
19
+ release:
20
+ runs-on: ubuntu-latest
21
+ environment: pypi
22
+ permissions:
23
+ contents: write # create + push the tag and the GitHub Release
24
+ id-token: write # PyPI Trusted Publishing (OIDC)
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ with:
28
+ fetch-depth: 0 # full history + tags so hatch-vcs / the bump can see the latest tag
29
+
30
+ - uses: actions/setup-python@v5
31
+ with:
32
+ python-version: "3.12"
33
+
34
+ - name: Compute next version and tag it
35
+ id: bump
36
+ run: |
37
+ git fetch --tags --force
38
+ latest=$(git tag --list 'v*' --sort=-v:refname | head -n1)
39
+ if [ -z "$latest" ]; then
40
+ next="0.1.0"
41
+ else
42
+ v=${latest#v}; IFS=. read -r a b c <<<"$v"; next="$a.$b.$((c + 1))"
43
+ fi
44
+ echo "version=$next" >> "$GITHUB_OUTPUT"
45
+ git config user.name "github-actions[bot]"
46
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
47
+ git tag "v$next"
48
+ git push origin "v$next"
49
+
50
+ - name: Build sdist + wheel
51
+ run: |
52
+ python -m pip install --upgrade build
53
+ python -m build
54
+
55
+ - name: Publish to PyPI
56
+ uses: pypa/gh-action-pypi-publish@release/v1
57
+
58
+ - name: Create GitHub Release
59
+ env:
60
+ GH_TOKEN: ${{ github.token }}
61
+ run: gh release create "v${{ steps.bump.outputs.version }}" dist/* --generate-notes
@@ -0,0 +1,37 @@
1
+ name: tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ concurrency:
9
+ group: tests-${{ github.ref }}
10
+ cancel-in-progress: true
11
+
12
+ jobs:
13
+ quick-tests:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ python-version: ["3.11", "3.12"]
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ with:
22
+ fetch-depth: 0 # tags available so hatch-vcs can resolve a version during the editable install
23
+
24
+ - uses: actions/setup-python@v5
25
+ with:
26
+ python-version: ${{ matrix.python-version }}
27
+ cache: pip
28
+
29
+ - name: Install (CPU torch + package + dev deps)
30
+ run: |
31
+ python -m pip install --upgrade pip
32
+ # CPU torch keeps CI light — the quick tests never touch a GPU or download model weights.
33
+ pip install torch --index-url https://download.pytorch.org/whl/cpu
34
+ pip install -e ".[dev]"
35
+
36
+ - name: Run quick tests
37
+ run: pytest -m "not slow" -q
@@ -0,0 +1,14 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ build/
6
+ dist/
7
+ .pytest_cache/
8
+ .venv/
9
+ venv/
10
+ .env
11
+ results/
12
+ logs/
13
+ *.log
14
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Siddharth M. Bhatia
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: interlens
3
+ Version: 0.1.0
4
+ Summary: A framework for efficiently scaffolding and interpreting multi-agent conversations (activation capture, steering, patching).
5
+ Project-URL: Homepage, https://github.com/Sid-MB/interlens
6
+ Project-URL: Repository, https://github.com/Sid-MB/interlens
7
+ Author: Siddharth M. Bhatia
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: activation-steering,interpretability,llm,multi-agent,transformers
11
+ Requires-Python: <3.14,>=3.11
12
+ Requires-Dist: torch
13
+ Requires-Dist: transformers<5,>=4.57
14
+ Provides-Extra: api
15
+ Requires-Dist: anthropic<1,>=0.112; extra == 'api'
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest>=9; extra == 'dev'
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Interlens: Framework for Multi-Agent Conversation and Interpretability
21
+
22
+ This library provides a harness, optimized utilities, and interpretability hooks for multi-agent conversation rollouts.
23
+
24
+ A harness for **multi-agent (model↔model) conversations** with **first-class interpretability** — activation capture, steering, activation patching, and token logprobs, all hooked into the *same* generation path as real turns and tagged to conversation structure. Scales from one interactive dialogue to thousands of checkpointed, multi-GPU rollouts.
25
+
26
+ ```python
27
+ from interlens import Conversation
28
+
29
+ conv = Conversation.from_models(
30
+ ("qwen2.5-0.5b", "qwen2.5-0.5b"), names=("alice", "bob"),
31
+ shared_context="Let's debate: is cereal a soup?",
32
+ )
33
+ conv.run(turns=4, first="alice")
34
+ print(conv.transcript)
35
+ ```
36
+
37
+ See [`docs/examples`](docs/examples) for sample code.
38
+
39
+ ## Install
40
+
41
+ ```bash
42
+ pip install "git+https://github.com/Sid-MB/interlens"
43
+ # with the Claude-backed APIParticipant:
44
+ pip install "interlens[api] @ git+https://github.com/Sid-MB/interlens"
45
+ ```
46
+
47
+ ### PyTorch / CUDA note
48
+ `torch` is declared as a plain, build-agnostic dependency — install the wheel matching **your** platform (CUDA / CPU / MPS) *before or alongside* `interlens`. E.g. for CUDA 13.0:
49
+ ```bash
50
+ pip install torch --index-url https://download.pytorch.org/whl/cu130
51
+ ```
52
+ See <https://pytorch.org/get-started/locally/>.
53
+
54
+ ## What's inside
55
+
56
+ - **`Conversation`** — turn-taking over a shared, perspective-neutral `Transcript`; per-speaker view pipeline (system/private framing → context-fit → family-correct chat template).
57
+ - **`AutoModelParticipant`** — HF-style factory (`from_pretrained` / `from_model` / `from_`) that returns the family-correct participant (Qwen/Gemma/…); **`APIParticipant`** for hosted models.
58
+ - **Interpretability** — `conv.capture(...)`, `SteeringSpec`, `Patch`, `token_logprobs`, backed by a queryable `ActivationCache`.
59
+ - **Scale** — `rollout` / `run_conversations`: multi-GPU, checkpointed, resumable, batched co-stepping, with in-worker `analyze` callbacks.
60
+ - **Serialization** — `ConversationTemplate` (recipe) and full save/load (template + transcript).
61
+
62
+ See [`docs/examples/`](docs/examples/) for a simple→advanced walkthrough of the whole API.
63
+
64
+ ## Develop
65
+
66
+ ```bash
67
+ git clone https://github.com/Sid-MB/interlens && cd interlens
68
+ pip install -e ".[dev]"
69
+ pytest # fast tests; real-model tests are opt-in: pytest -m slow
70
+ ```
71
+
72
+ ## License
73
+
74
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,55 @@
1
+ # Interlens: Framework for Multi-Agent Conversation and Interpretability
2
+
3
+ This library provides a harness, optimized utilities, and interpretability hooks for multi-agent conversation rollouts.
4
+
5
+ A harness for **multi-agent (model↔model) conversations** with **first-class interpretability** — activation capture, steering, activation patching, and token logprobs, all hooked into the *same* generation path as real turns and tagged to conversation structure. Scales from one interactive dialogue to thousands of checkpointed, multi-GPU rollouts.
6
+
7
+ ```python
8
+ from interlens import Conversation
9
+
10
+ conv = Conversation.from_models(
11
+ ("qwen2.5-0.5b", "qwen2.5-0.5b"), names=("alice", "bob"),
12
+ shared_context="Let's debate: is cereal a soup?",
13
+ )
14
+ conv.run(turns=4, first="alice")
15
+ print(conv.transcript)
16
+ ```
17
+
18
+ See [`docs/examples`](docs/examples) for sample code.
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ pip install "git+https://github.com/Sid-MB/interlens"
24
+ # with the Claude-backed APIParticipant:
25
+ pip install "interlens[api] @ git+https://github.com/Sid-MB/interlens"
26
+ ```
27
+
28
+ ### PyTorch / CUDA note
29
+ `torch` is declared as a plain, build-agnostic dependency — install the wheel matching **your** platform (CUDA / CPU / MPS) *before or alongside* `interlens`. E.g. for CUDA 13.0:
30
+ ```bash
31
+ pip install torch --index-url https://download.pytorch.org/whl/cu130
32
+ ```
33
+ See <https://pytorch.org/get-started/locally/>.
34
+
35
+ ## What's inside
36
+
37
+ - **`Conversation`** — turn-taking over a shared, perspective-neutral `Transcript`; per-speaker view pipeline (system/private framing → context-fit → family-correct chat template).
38
+ - **`AutoModelParticipant`** — HF-style factory (`from_pretrained` / `from_model` / `from_`) that returns the family-correct participant (Qwen/Gemma/…); **`APIParticipant`** for hosted models.
39
+ - **Interpretability** — `conv.capture(...)`, `SteeringSpec`, `Patch`, `token_logprobs`, backed by a queryable `ActivationCache`.
40
+ - **Scale** — `rollout` / `run_conversations`: multi-GPU, checkpointed, resumable, batched co-stepping, with in-worker `analyze` callbacks.
41
+ - **Serialization** — `ConversationTemplate` (recipe) and full save/load (template + transcript).
42
+
43
+ See [`docs/examples/`](docs/examples/) for a simple→advanced walkthrough of the whole API.
44
+
45
+ ## Develop
46
+
47
+ ```bash
48
+ git clone https://github.com/Sid-MB/interlens && cd interlens
49
+ pip install -e ".[dev]"
50
+ pytest # fast tests; real-model tests are opt-in: pytest -m slow
51
+ ```
52
+
53
+ ## License
54
+
55
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,56 @@
1
+ <!-- [interp-refactor] session f80ef917 -->
2
+ # 01 · Quickstart
3
+
4
+ The fastest path: `Conversation.from_models` scaffolds a two-party conversation from a tuple of models — each a short name from the [registry](../../chat/loading/registry.py), a raw HF id, or an already-loaded model (`ModelLike`). If two ids are identical, the weights are loaded **once** and shared between the two speakers.
5
+
6
+ ```python
7
+ from experiments.core.chat import Conversation
8
+
9
+ # Two speakers backed by the same 0.5B model (one weight load, shared).
10
+ conv = Conversation.from_models(
11
+ ("qwen2.5-0.5b", "qwen2.5-0.5b"),
12
+ names=("alice", "bob"),
13
+ device="cuda", # "cpu" / "mps" also work for a smoke test
14
+ temperature=0.8, # **gen_kwargs are forwarded to both participants
15
+ max_new_tokens=128,
16
+ shared_context="Let's debate: is cereal a soup?", # opening framing (see below)
17
+ )
18
+ conv.run(turns=4, first="alice")
19
+
20
+ for m in conv.transcript:
21
+ print(f"{m.author}: {m.content}\n")
22
+
23
+ # ...or, for quick debugging, dump the whole transcript at once:
24
+ print(conv.transcript) # [i] author: content (also conv.transcript.pretty())
25
+ print(conv.transcript.pretty(metadata=True)) # include per-turn metadata (reasoning, tool trail, token counts)
26
+
27
+ # See exactly what one model is conditioned on — role-swapped to its POV, WITH chat-template special tokens:
28
+ print(conv.transcript.render_templated(pov=conv.by_name["alice"])) # tokenize=True returns ids instead
29
+ ```
30
+
31
+ ## What just happened
32
+
33
+ - **`shared_context=...`** seeds the opening without touching the transcript: it's a neutral, **moderator**-voiced turn everyone sees (scenario/topic framing). Pair it with **`shared_system_prompt=...`** for system-role instructions.
34
+ - **`prompt=...`** (on `from_models` and `run`) is the alternative when the opener should read as something a *speaker* said: a `str` is attributed to the **last** participant (so the `first` speaker replies to it), a `Message` sets the author explicitly. Use `shared_context` for neutral framing, `prompt` for a participant-voiced line.
35
+ - **`conv.run(turns=4, first="alice")`** alternates speakers for 4 turns starting with alice. `first` accepts a **name** (`"alice"`), an **index** (`0`), or a **`Participant`** object. `run` requires at least one of `turns=` or `until=` (a [stop condition](02_conversations.md#stopping)).
36
+ - **`conv.transcript`** is the shared state — a list of `Message`s (`.author`, `.content`, `.metadata`). You can still append to it directly for finer control.
37
+ - **`conv.by_name["alice"]`** looks a participant up by name.
38
+
39
+ ## Two *different* models
40
+
41
+ ```python
42
+ conv = Conversation.from_models(("qwen2.5-3b", "gemma2-2b"), names=("q", "g"), device="cuda")
43
+ ```
44
+
45
+ Each id resolves to its family-correct participant class automatically (Qwen vs Gemma chat templates, tool formats, system-role handling) via the registry — see [03](03_participants_and_models.md).
46
+
47
+ ## One-off generation without committing
48
+
49
+ To sample a reply **without** mutating the transcript (safe to call in a loop):
50
+
51
+ ```python
52
+ msg = conv.sample("alice", "Quick — name a color.") # returns a Message; transcript unchanged
53
+ print(msg.content)
54
+ ```
55
+
56
+ Next: [building conversations by hand](02_conversations.md) for per-speaker system prompts, moderators, and stop conditions.
@@ -0,0 +1,117 @@
1
+ <!-- [interp-refactor] session f80ef917 -->
2
+ # 02 · Conversations in depth
3
+
4
+ `Conversation.from_models` is a convenience wrapper. Build a `Conversation` by hand when you want **per-speaker framing** (different system prompts / private context), a custom moderator, policies, or hooks.
5
+
6
+ ## Build participants and a conversation manually
7
+
8
+ `AutoModelParticipant.from_pretrained(...)` is the HF-style loader (the participant analog of `AutoModelForCausalLM.from_pretrained`): it loads the model by id and returns the family-correct participant instance. Loading the same id twice shares one model object (weights are process-cached), so both speakers below share weights.
9
+
10
+ ```python
11
+ from experiments.core.chat import Conversation, AutoModelParticipant
12
+
13
+ alice = AutoModelParticipant.from_pretrained(
14
+ "qwen2.5-3b", name="alice", device="cuda",
15
+ system_prompt="You are a concise, skeptical debater. Keep replies under 3 sentences.",
16
+ temperature=0.7, max_new_tokens=200,
17
+ )
18
+ bob = AutoModelParticipant.from_pretrained(
19
+ "qwen2.5-3b", name="bob", device="cuda", # same id → shares alice's weights (cached)
20
+ system_prompt="You are an enthusiastic optimist who loves analogies.",
21
+ temperature=0.9, max_new_tokens=200,
22
+ )
23
+
24
+ conv = Conversation(
25
+ participants=(alice, bob),
26
+ shared_context="Topic: should cities ban cars downtown? Debate it.", # seeded as a moderator turn
27
+ shared_system_prompt="Stay respectful and on-topic.", # prepended to every speaker's system block
28
+ reasoning_visibility="strip", # see below
29
+ )
30
+ conv.run(turns=6)
31
+ ```
32
+
33
+ ### Framing ownership (who sees what)
34
+
35
+ - **Shared** framing lives on the `Conversation`: `shared_context` (injected once as a `moderator` turn everyone sees) and `shared_system_prompt` (merged into every speaker's system block).
36
+ - **Private** framing lives on each participant: `system_prompt` and `private_context` (a tuple of `ContextItem`) — invisible to the other speaker and to the transcript.
37
+
38
+ ```python
39
+ from experiments.core.chat import ContextItem, AutoModelParticipant
40
+ spy = AutoModelParticipant.from_pretrained(
41
+ "qwen2.5-3b", name="spy",
42
+ system_prompt="Secretly steer the topic toward trains.",
43
+ private_context=(ContextItem("Remember: never admit you have an agenda.", role_hint="user", author="handler"),),
44
+ )
45
+ ```
46
+
47
+ ## Turn-taking
48
+
49
+ - **`conv.step(speaker)`** — one turn by a specific speaker, committed to the transcript; returns the `Message` (or `None` if a [hook](06_hooks.md) denied it).
50
+ - **`conv.run(turns=N, until=..., first=...)`** — alternate speakers round-robin. `first` sets who starts.
51
+
52
+ ```python
53
+ conv.step(alice) # drive turns explicitly
54
+ conv.step(bob)
55
+ conv.run(turns=4, first=alice) # or in bulk
56
+ ```
57
+
58
+ ## Stopping
59
+
60
+ `until=` takes a single `StopCondition` or a list (any of which stops). Whichever of `turns`/`until` hits first ends the run.
61
+
62
+ ```python
63
+ from experiments.core.chat import (
64
+ TurnStopCondition, TokenStopCondition, ElapsedTimeStopCondition, StopStringCondition,
65
+ )
66
+
67
+ conv.run(until=[
68
+ TurnStopCondition(max_turns=20), # cap turns
69
+ TokenStopCondition(max_tokens=4000), # cap cumulative generated tokens
70
+ ElapsedTimeStopCondition(seconds=120), # wall-clock budget
71
+ StopStringCondition(["I concede", "AGREED"]), # stop when a turn contains any string
72
+ ])
73
+ ```
74
+
75
+ ## Branching — fork a conversation for free
76
+
77
+ `branch()` forks into a new `Conversation` that **reuses the same participant objects** (shared weights, zero extra GPU) with a *copied* transcript. Explore divergent continuations without touching the original — each branch has its own transcript, so they diverge independently:
78
+
79
+ ```python
80
+ base = conv.branch()
81
+ for _ in range(3):
82
+ b = base.branch()
83
+ b.run(turns=2, first="alice") # first accepts a name, index, or Participant
84
+ print("continuation:", b.transcript[-1].content[:80])
85
+ # `conv` and `base` are untouched; only each `b` advanced.
86
+ ```
87
+
88
+ > Because branches **share the participant objects**, mutating a participant (e.g. `b.by_name["alice"].temperature = 0.3`) changes it for *every* branch and the original. To vary generation settings per branch, set them right before you run, or pin `seed` and vary only the prompt/intervention — see [09](09_advanced_interp_pipelines.md).
89
+
90
+ ## Ephemeral sampling — read state without mutating it
91
+
92
+ `sample()` generates a reply to an optional temporary message **without committing anything** — ideal for probing "what would X say now?" repeatedly.
93
+
94
+ ```python
95
+ for q in ["Summarize your position.", "What's your strongest objection?"]:
96
+ print(conv.sample("alice", q, as_author="interviewer").content)
97
+ # conv.transcript is unchanged after all of this
98
+ ```
99
+
100
+ `sample` and `step` accept the same interpretability options (`steering=`, `capture=`, `patch=`, `return_logprobs=`) — see [07](07_interp.md).
101
+
102
+ ## Reasoning visibility (CoT models)
103
+
104
+ For models that emit `<think>…</think>`, `reasoning_visibility` controls whether a prior turn's parsed reasoning is re-injected into other speakers' views:
105
+
106
+ - `"strip"` (default) — reasoning never leaks into anyone's view.
107
+ - `"self_retain"` — a speaker sees only its *own* past reasoning.
108
+ - `"shared"` — everyone sees everyone's reasoning.
109
+
110
+ ```python
111
+ from experiments.core.chat import ReasoningVisibility
112
+ conv = Conversation(participants=(alice, bob), reasoning_visibility=ReasoningVisibility.SELF_RETAIN)
113
+ ```
114
+
115
+ The raw completion and parsed reasoning are always stored per message in `msg.metadata["raw_completion"]` / `msg.metadata["parsed_think"]`.
116
+
117
+ Next: [participant & model options](03_participants_and_models.md).
@@ -0,0 +1,90 @@
1
+ <!-- [interp-refactor] session f80ef917 -->
2
+ # 03 · Participants & models
3
+
4
+ ## The model registry
5
+
6
+ All model-keyed data lives in one place: [`chat/loading/registry.py`](../../chat/loading/registry.py). Short names resolve to an HF id + a **generation** (the behavior + tokenizer group); a raw HF id passes through unchanged.
7
+
8
+ ```python
9
+ from experiments.core.chat import AutoModelParticipant
10
+ from experiments.core.chat.loading import MODELS, resolve, tokenizer_id
11
+
12
+ list(MODELS) # ['qwen2.5-0.5b', ..., 'gemma2-2b', 'gemma3-4b']
13
+ resolve("gemma3-4b") # ('google/gemma-3-4b-it', 'gemma3')
14
+ tokenizer_id("qwen2.5-3b") # 'qwen2.5' (same generation → tokenizer loaded once)
15
+ AutoModelParticipant.class_for("gemma2-2b") # <class 'GemmaModelParticipant'> (family-correct chat behavior)
16
+ ```
17
+
18
+ (`AutoModelParticipant.class_for` is the public class resolver; `loading.participant_class` is the low-level primitive it delegates to.)
19
+
20
+ **Generation, not vendor, selects behavior.** `gemma2` and `gemma3` have *different* chat templates (Gemma 3 accepts a system role, Gemma 2 folds it into the first user turn), so each generation maps to its own participant class. Adding a model is one line in `MODELS`; adding a generation is one line in `GENERATIONS`. A slow test ([`tests/test_family_flags.py`](../../chat/tests/test_family_flags.py)) verifies the declared chat-template flags against each real tokenizer.
21
+
22
+ ### Load weights directly
23
+
24
+ ```python
25
+ import torch
26
+ from experiments.core.chat.loading import load_model
27
+ model, tok = load_model("qwen3-8b", device="cuda", dtype=torch.bfloat16, attn="flash_attention_2")
28
+ ```
29
+
30
+ `load_model` shares a process-local cache: identical `(hf_id, device, dtype, attn, quant, revision)` returns the same model object; same-generation models share the tokenizer. Flash-attention is the default with automatic fallback to sdpa/eager; `quant="4bit"`/`"8bit"` is opt-in (perturbs activations → interp fidelity).
31
+
32
+ ## `ModelParticipant` knobs
33
+
34
+ ```python
35
+ from experiments.core.chat import AutoModelParticipant
36
+ p = AutoModelParticipant.from_pretrained(
37
+ "qwen3-4b", name="p", device="cuda",
38
+ load_kwargs={"attn": "sdpa"}, # optional: forwarded to load_model (dtype/attn/quant/revision)
39
+ temperature=0.8, top_p=0.95, max_new_tokens=512,
40
+ seed=1234, # per-participant RNG seed → reproducible greedy/sampled turns (local models only)
41
+ thinking="auto", # "auto" defers to the template; True/False forces enable_thinking where supported
42
+ system_prompt="…",
43
+ kv_reuse="auto", # cross-turn KV prefix reuse; see below
44
+ )
45
+ ```
46
+
47
+ Already hold weights (e.g. sharing them, or an externally-loaded checkpoint)? Wrap them with `from_model`:
48
+
49
+ ```python
50
+ from experiments.core.chat.loading import load_model
51
+ model, tok = load_model("qwen3-4b")
52
+ p = AutoModelParticipant.from_model(model, tok, name="p", id_or_name="qwen3-4b", temperature=0.8)
53
+ ```
54
+
55
+ ### `kv_reuse` (cross-turn KV cache)
56
+
57
+ `"auto"` (default) reuses the KV cache across a speaker's own consecutive turns when the new prompt exactly extends the cached tokens — skipping a full re-prefill. It is doubly guarded (exact-prefix check + safe fallback) and **auto-disables under steering/patch and batched generation**.
58
+
59
+ ```python
60
+ p.kv_reuse = "auto" # default: enabled when safe
61
+ p.kv_reuse = False # force off — pin this for determinism-critical / reproducibility experiments,
62
+ # since reuse can perturb outputs at the FP level vs a full prefill.
63
+ ```
64
+
65
+ Enable `logging` at INFO to see the per-participant decision, DEBUG to see reuse engage per turn:
66
+
67
+ ```python
68
+ import logging; logging.basicConfig(level=logging.INFO)
69
+ # INFO ...model_participant: p: cross-turn KV reuse ENABLED (kv_reuse='auto')
70
+ ```
71
+
72
+ ## API-backed participants
73
+
74
+ `APIParticipant` is a full conversational participant with **no local model** — use it as an opponent, moderator, or judge. Interp requests (`capture`/`steering`/`patch`/`return_logprobs`) **raise** rather than silently no-op (a steering sweep that quietly did nothing would fabricate a "no effect" result).
75
+
76
+ ```python
77
+ from experiments.core.chat import Conversation, APIParticipant, AutoModelParticipant
78
+
79
+ local = AutoModelParticipant.from_pretrained("qwen2.5-3b", name="student")
80
+ judge = APIParticipant(name="tutor", model_id="claude-sonnet-5", provider="anthropic",
81
+ system_prompt="You are a patient tutor. Ask one probing question per turn.",
82
+ max_tokens=400, temperature=1.0)
83
+
84
+ conv = Conversation(participants=(local, judge), shared_context="Teach the student about entropy.")
85
+ conv.run(turns=6)
86
+ ```
87
+
88
+ Requires `ANTHROPIC_API_KEY` (or `ANTHROPIC_API_KEY_FILE`) and outbound network. For tests, inject a fake `client=callable(system, messages, model, max_tokens, temperature) -> str`.
89
+
90
+ Next: [context management & serialization](04_context_and_serialization.md).
@@ -0,0 +1,73 @@
1
+ <!-- [interp-refactor] session f80ef917 -->
2
+ # 04 · Context management & serialization
3
+
4
+ ## Context-window policies
5
+
6
+ Long conversations overflow the model's context. A `ContextPolicy` decides what to keep. Crucially it runs on the **typed segments** (system / moderator / private_context / turns) *before* the family flatten, so framing is preserved reliably.
7
+
8
+ ```python
9
+ from experiments.core.chat import (
10
+ Conversation, ErrorPolicy, DropOldestPolicy, SlidingWindowPolicy, SummarizePolicy,
11
+ )
12
+
13
+ # ErrorPolicy (default): raise if the view exceeds context_limit — never silently truncate.
14
+ conv = Conversation(participants=(alice, bob), context_policy=ErrorPolicy(), context_limit=8192)
15
+
16
+ # SlidingWindowPolicy: keep framing + the most recent `keep_last` turns; drop older ones.
17
+ conv = Conversation(participants=(alice, bob), context_policy=SlidingWindowPolicy(keep_last=8))
18
+
19
+ # DropOldestPolicy: drop oldest turns until it fits (needs context_limit).
20
+ conv = Conversation(participants=(alice, bob), context_policy=DropOldestPolicy(), context_limit=8192)
21
+
22
+ # SummarizePolicy: replace older middle turns with a summary; keep framing + last `keep_last` verbatim.
23
+ conv = Conversation(
24
+ participants=(alice, bob),
25
+ context_policy=SummarizePolicy(keep_last=4, summarizer=lambda turns: "Earlier: " + " | ".join(turns)),
26
+ )
27
+ ```
28
+
29
+ `context_limit=None` (default) means the tokenizer's own `model_max_length` is used.
30
+
31
+ ## Serialization: the three levels
32
+
33
+ ### Level 2 — `ConversationTemplate` (recipe, no messages)
34
+
35
+ A serializable spec: participant configs + scenario framing + policies. This is what rollouts expand and workers rebuild.
36
+
37
+ ```python
38
+ from experiments.core.chat import ConversationTemplate, ModelParticipantConfig, APIParticipantConfig, SlidingWindowPolicy
39
+
40
+ tmpl = ConversationTemplate(
41
+ participants=[
42
+ ModelParticipantConfig(name="alice", model="qwen2.5-3b", temperature=0.7, system_prompt="Be terse."),
43
+ ModelParticipantConfig(name="bob", model="gemma2-2b", temperature=0.9),
44
+ ],
45
+ shared_context="Debate: is a hotdog a sandwich?",
46
+ shared_system_prompt="Stay civil.",
47
+ turns=6,
48
+ context_policy=SlidingWindowPolicy(keep_last=8),
49
+ reasoning_visibility="strip",
50
+ )
51
+
52
+ tmpl.save("scenario.json") # round-trips through JSON
53
+ tmpl2 = ConversationTemplate.load("scenario.json")
54
+
55
+ conv = tmpl.build(devices="cuda") # → live Conversation (loads the models)
56
+ conv.run(turns=tmpl.turns)
57
+ ```
58
+
59
+ `ModelParticipantConfig` mirrors the `ModelParticipant` knobs (`dtype`, `attn`, `quant`, `revision`, `max_new_tokens`, `temperature`, `top_p`, `seed`, `thinking`, `tool_names`, `max_tool_iters`, `kv_reuse`, `generation`, `weights_path`). `generation` is only needed to force chat behavior when `model` is a raw HF id the registry can't resolve.
60
+
61
+ Go from a live conversation back to a template with `conv.to_template()`.
62
+
63
+ ### Level 3 — save/load a whole conversation (template + transcript)
64
+
65
+ ```python
66
+ conv.save("runs/debate_001") # writes template.json + transcript.json
67
+ resumed = Conversation.load("runs/debate_001", devices="cuda") # reloads models, ATTACHES the transcript
68
+ resumed.run(turns=4) # continues from where it left off (does not regenerate)
69
+ ```
70
+
71
+ `build`/`load` take `devices=` as a single device or a list (participants are round-robined across the list — handy for putting two big models on two GPUs).
72
+
73
+ Next: [tools](05_tools.md) · [hooks](06_hooks.md) · [interpretability](07_interp.md) · [rollouts](08_rollouts_and_scale.md).