interlens 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- interlens-0.1.0/.github/workflows/publish.yml +61 -0
- interlens-0.1.0/.github/workflows/tests.yml +37 -0
- interlens-0.1.0/.gitignore +14 -0
- interlens-0.1.0/LICENSE +21 -0
- interlens-0.1.0/PKG-INFO +74 -0
- interlens-0.1.0/README.md +55 -0
- interlens-0.1.0/docs/examples/01_quickstart.md +56 -0
- interlens-0.1.0/docs/examples/02_conversations.md +117 -0
- interlens-0.1.0/docs/examples/03_participants_and_models.md +90 -0
- interlens-0.1.0/docs/examples/04_context_and_serialization.md +73 -0
- interlens-0.1.0/docs/examples/05_tools.md +76 -0
- interlens-0.1.0/docs/examples/06_hooks.md +57 -0
- interlens-0.1.0/docs/examples/07_interp.md +94 -0
- interlens-0.1.0/docs/examples/08_rollouts_and_scale.md +98 -0
- interlens-0.1.0/docs/examples/09_advanced_interp_pipelines.md +114 -0
- interlens-0.1.0/docs/examples/README.md +36 -0
- interlens-0.1.0/pyproject.toml +42 -0
- interlens-0.1.0/src/interlens/__init__.py +85 -0
- interlens-0.1.0/src/interlens/context/__init__.py +23 -0
- interlens-0.1.0/src/interlens/context/context_policy.py +46 -0
- interlens-0.1.0/src/interlens/context/drop_oldest_policy.py +32 -0
- interlens-0.1.0/src/interlens/context/error_policy.py +22 -0
- interlens-0.1.0/src/interlens/context/sliding_window_policy.py +33 -0
- interlens-0.1.0/src/interlens/context/summarize_policy.py +47 -0
- interlens-0.1.0/src/interlens/context_item.py +23 -0
- interlens-0.1.0/src/interlens/conversation.py +393 -0
- interlens-0.1.0/src/interlens/execution_mode.py +23 -0
- interlens-0.1.0/src/interlens/factories.py +120 -0
- interlens-0.1.0/src/interlens/hooks/__init__.py +3 -0
- interlens-0.1.0/src/interlens/hooks/message_hook.py +49 -0
- interlens-0.1.0/src/interlens/interp/__init__.py +29 -0
- interlens-0.1.0/src/interlens/interp/activation_cache.py +132 -0
- interlens-0.1.0/src/interlens/interp/capture.py +85 -0
- interlens-0.1.0/src/interlens/interp/layers.py +23 -0
- interlens-0.1.0/src/interlens/interp/logprobs.py +30 -0
- interlens-0.1.0/src/interlens/interp/patching.py +50 -0
- interlens-0.1.0/src/interlens/interp/steering.py +62 -0
- interlens-0.1.0/src/interlens/loading/__init__.py +28 -0
- interlens-0.1.0/src/interlens/loading/load.py +69 -0
- interlens-0.1.0/src/interlens/loading/model_cache.py +38 -0
- interlens-0.1.0/src/interlens/loading/registry.py +106 -0
- interlens-0.1.0/src/interlens/message.py +24 -0
- interlens-0.1.0/src/interlens/participant/__init__.py +4 -0
- interlens-0.1.0/src/interlens/participant/config/__init__.py +10 -0
- interlens-0.1.0/src/interlens/participant/config/api_participant_config.py +46 -0
- interlens-0.1.0/src/interlens/participant/config/model_participant_config.py +122 -0
- interlens-0.1.0/src/interlens/participant/config/participant_config.py +79 -0
- interlens-0.1.0/src/interlens/participant/participant.py +105 -0
- interlens-0.1.0/src/interlens/participant/participants/api_client.py +54 -0
- interlens-0.1.0/src/interlens/participant/participants/api_participant.py +80 -0
- interlens-0.1.0/src/interlens/participant/participants/gemma.py +68 -0
- interlens-0.1.0/src/interlens/participant/participants/model_participant.py +404 -0
- interlens-0.1.0/src/interlens/participant/participants/qwen.py +4 -0
- interlens-0.1.0/src/interlens/participant/role.py +3 -0
- interlens-0.1.0/src/interlens/reasoning_visibility.py +24 -0
- interlens-0.1.0/src/interlens/runner/__init__.py +19 -0
- interlens-0.1.0/src/interlens/runner/analyzer_registry.py +23 -0
- interlens-0.1.0/src/interlens/runner/batched.py +49 -0
- interlens-0.1.0/src/interlens/runner/devices.py +15 -0
- interlens-0.1.0/src/interlens/runner/pool.py +160 -0
- interlens-0.1.0/src/interlens/runner/rollout.py +40 -0
- interlens-0.1.0/src/interlens/runner/spec.py +26 -0
- interlens-0.1.0/src/interlens/runner/worker_init.py +18 -0
- interlens-0.1.0/src/interlens/stop/__init__.py +16 -0
- interlens-0.1.0/src/interlens/stop/conditions.py +74 -0
- interlens-0.1.0/src/interlens/stop/stop_condition.py +41 -0
- interlens-0.1.0/src/interlens/template.py +87 -0
- interlens-0.1.0/src/interlens/tools/__init__.py +5 -0
- interlens-0.1.0/src/interlens/tools/registry.py +33 -0
- interlens-0.1.0/src/interlens/tools/tool.py +28 -0
- interlens-0.1.0/src/interlens/tools/tool_call.py +26 -0
- interlens-0.1.0/src/interlens/transcript.py +179 -0
- interlens-0.1.0/src/interlens/view.py +31 -0
- interlens-0.1.0/tests/__init__.py +0 -0
- interlens-0.1.0/tests/api_validate.py +105 -0
- interlens-0.1.0/tests/cluster_validate.py +200 -0
- interlens-0.1.0/tests/cluster_validate.sbatch +29 -0
- interlens-0.1.0/tests/conftest.py +53 -0
- interlens-0.1.0/tests/install_flash_attn.sbatch +21 -0
- interlens-0.1.0/tests/profile_pipeline.py +153 -0
- interlens-0.1.0/tests/profile_pipeline.sbatch +23 -0
- interlens-0.1.0/tests/test_conversation.py +93 -0
- interlens-0.1.0/tests/test_family_flags.py +58 -0
- interlens-0.1.0/tests/test_hooks.py +40 -0
- interlens-0.1.0/tests/test_interp.py +49 -0
- interlens-0.1.0/tests/test_models_slow.py +139 -0
- interlens-0.1.0/tests/test_runner.py +76 -0
- interlens-0.1.0/tests/test_serialization.py +74 -0
- interlens-0.1.0/tests/test_stop_conditions.py +46 -0
- interlens-0.1.0/tests/test_tools.py +98 -0
- interlens-0.1.0/tests/test_view_pipeline.py +67 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
name: publish
|
|
2
|
+
|
|
3
|
+
# Every push to main cuts a new release: bump the patch version, create+push a `vX.Y.Z` tag (the version is
|
|
4
|
+
# derived from it via hatch-vcs), build, publish to PyPI via Trusted Publishing (OIDC — no token), and cut a
|
|
5
|
+
# GitHub Release. The tag is the source of truth; nothing is committed back, so there's no bump-loop.
|
|
6
|
+
#
|
|
7
|
+
# One-time PyPI setup (https://pypi.org/manage/account/publishing/): add a pending publisher for
|
|
8
|
+
# project: interlens · owner: Sid-MB · repo: interlens · workflow: publish.yml · environment: pypi
|
|
9
|
+
on:
|
|
10
|
+
push:
|
|
11
|
+
branches: [main]
|
|
12
|
+
|
|
13
|
+
# Serialize releases so two quick pushes can't race to claim the same version.
|
|
14
|
+
concurrency:
|
|
15
|
+
group: publish
|
|
16
|
+
cancel-in-progress: false
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
release:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
environment: pypi
|
|
22
|
+
permissions:
|
|
23
|
+
contents: write # create + push the tag and the GitHub Release
|
|
24
|
+
id-token: write # PyPI Trusted Publishing (OIDC)
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
with:
|
|
28
|
+
fetch-depth: 0 # full history + tags so hatch-vcs / the bump can see the latest tag
|
|
29
|
+
|
|
30
|
+
- uses: actions/setup-python@v5
|
|
31
|
+
with:
|
|
32
|
+
python-version: "3.12"
|
|
33
|
+
|
|
34
|
+
- name: Compute next version and tag it
|
|
35
|
+
id: bump
|
|
36
|
+
run: |
|
|
37
|
+
git fetch --tags --force
|
|
38
|
+
latest=$(git tag --list 'v*' --sort=-v:refname | head -n1)
|
|
39
|
+
if [ -z "$latest" ]; then
|
|
40
|
+
next="0.1.0"
|
|
41
|
+
else
|
|
42
|
+
v=${latest#v}; IFS=. read -r a b c <<<"$v"; next="$a.$b.$((c + 1))"
|
|
43
|
+
fi
|
|
44
|
+
echo "version=$next" >> "$GITHUB_OUTPUT"
|
|
45
|
+
git config user.name "github-actions[bot]"
|
|
46
|
+
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
47
|
+
git tag "v$next"
|
|
48
|
+
git push origin "v$next"
|
|
49
|
+
|
|
50
|
+
- name: Build sdist + wheel
|
|
51
|
+
run: |
|
|
52
|
+
python -m pip install --upgrade build
|
|
53
|
+
python -m build
|
|
54
|
+
|
|
55
|
+
- name: Publish to PyPI
|
|
56
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
57
|
+
|
|
58
|
+
- name: Create GitHub Release
|
|
59
|
+
env:
|
|
60
|
+
GH_TOKEN: ${{ github.token }}
|
|
61
|
+
run: gh release create "v${{ steps.bump.outputs.version }}" dist/* --generate-notes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
concurrency:
|
|
9
|
+
group: tests-${{ github.ref }}
|
|
10
|
+
cancel-in-progress: true
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
quick-tests:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
python-version: ["3.11", "3.12"]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
with:
|
|
22
|
+
fetch-depth: 0 # tags available so hatch-vcs can resolve a version during the editable install
|
|
23
|
+
|
|
24
|
+
- uses: actions/setup-python@v5
|
|
25
|
+
with:
|
|
26
|
+
python-version: ${{ matrix.python-version }}
|
|
27
|
+
cache: pip
|
|
28
|
+
|
|
29
|
+
- name: Install (CPU torch + package + dev deps)
|
|
30
|
+
run: |
|
|
31
|
+
python -m pip install --upgrade pip
|
|
32
|
+
# CPU torch keeps CI light — the quick tests never touch a GPU or download model weights.
|
|
33
|
+
pip install torch --index-url https://download.pytorch.org/whl/cpu
|
|
34
|
+
pip install -e ".[dev]"
|
|
35
|
+
|
|
36
|
+
- name: Run quick tests
|
|
37
|
+
run: pytest -m "not slow" -q
|
interlens-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Siddharth M. Bhatia
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
interlens-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: interlens
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A framework for efficiently scaffolding and interpreting multi-agent conversations (activation capture, steering, patching).
|
|
5
|
+
Project-URL: Homepage, https://github.com/Sid-MB/interlens
|
|
6
|
+
Project-URL: Repository, https://github.com/Sid-MB/interlens
|
|
7
|
+
Author: Siddharth M. Bhatia
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: activation-steering,interpretability,llm,multi-agent,transformers
|
|
11
|
+
Requires-Python: <3.14,>=3.11
|
|
12
|
+
Requires-Dist: torch
|
|
13
|
+
Requires-Dist: transformers<5,>=4.57
|
|
14
|
+
Provides-Extra: api
|
|
15
|
+
Requires-Dist: anthropic<1,>=0.112; extra == 'api'
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=9; extra == 'dev'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Interlens: Framework for Multi-Agent Conversation and Interpretability
|
|
21
|
+
|
|
22
|
+
This library provides a harness, optimized utilities, and interpretability hooks for multi-agent conversation rollouts.
|
|
23
|
+
|
|
24
|
+
A harness for **multi-agent (model↔model) conversations** with **first-class interpretability** — activation capture, steering, activation patching, and token logprobs, all hooked into the *same* generation path as real turns and tagged to conversation structure. Scales from one interactive dialogue to thousands of checkpointed, multi-GPU rollouts.
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from interlens import Conversation
|
|
28
|
+
|
|
29
|
+
conv = Conversation.from_models(
|
|
30
|
+
("qwen2.5-0.5b", "qwen2.5-0.5b"), names=("alice", "bob"),
|
|
31
|
+
shared_context="Let's debate: is cereal a soup?",
|
|
32
|
+
)
|
|
33
|
+
conv.run(turns=4, first="alice")
|
|
34
|
+
print(conv.transcript)
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
See [`docs/examples`](docs/examples) for sample code.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install "git+https://github.com/Sid-MB/interlens"
|
|
43
|
+
# with the Claude-backed APIParticipant:
|
|
44
|
+
pip install "interlens[api] @ git+https://github.com/Sid-MB/interlens"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### PyTorch / CUDA note
|
|
48
|
+
`torch` is declared as a plain, build-agnostic dependency — install the wheel matching **your** platform (CUDA / CPU / MPS) *before or alongside* `interlens`. E.g. for CUDA 13.0:
|
|
49
|
+
```bash
|
|
50
|
+
pip install torch --index-url https://download.pytorch.org/whl/cu130
|
|
51
|
+
```
|
|
52
|
+
See <https://pytorch.org/get-started/locally/>.
|
|
53
|
+
|
|
54
|
+
## What's inside
|
|
55
|
+
|
|
56
|
+
- **`Conversation`** — turn-taking over a shared, perspective-neutral `Transcript`; per-speaker view pipeline (system/private framing → context-fit → family-correct chat template).
|
|
57
|
+
- **`AutoModelParticipant`** — HF-style factory (`from_pretrained` / `from_model` / `from_`) that returns the family-correct participant (Qwen/Gemma/…); **`APIParticipant`** for hosted models.
|
|
58
|
+
- **Interpretability** — `conv.capture(...)`, `SteeringSpec`, `Patch`, `token_logprobs`, backed by a queryable `ActivationCache`.
|
|
59
|
+
- **Scale** — `rollout` / `run_conversations`: multi-GPU, checkpointed, resumable, batched co-stepping, with in-worker `analyze` callbacks.
|
|
60
|
+
- **Serialization** — `ConversationTemplate` (recipe) and full save/load (template + transcript).
|
|
61
|
+
|
|
62
|
+
See [`docs/examples/`](docs/examples/) for a simple→advanced walkthrough of the whole API.
|
|
63
|
+
|
|
64
|
+
## Develop
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
git clone https://github.com/Sid-MB/interlens && cd interlens
|
|
68
|
+
pip install -e ".[dev]"
|
|
69
|
+
pytest # fast tests; real-model tests are opt-in: pytest -m slow
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## License
|
|
73
|
+
|
|
74
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Interlens: Framework for Multi-Agent Conversation and Interpretability
|
|
2
|
+
|
|
3
|
+
This library provides a harness, optimized utilities, and interpretability hooks for multi-agent conversation rollouts.
|
|
4
|
+
|
|
5
|
+
A harness for **multi-agent (model↔model) conversations** with **first-class interpretability** — activation capture, steering, activation patching, and token logprobs, all hooked into the *same* generation path as real turns and tagged to conversation structure. Scales from one interactive dialogue to thousands of checkpointed, multi-GPU rollouts.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from interlens import Conversation
|
|
9
|
+
|
|
10
|
+
conv = Conversation.from_models(
|
|
11
|
+
("qwen2.5-0.5b", "qwen2.5-0.5b"), names=("alice", "bob"),
|
|
12
|
+
shared_context="Let's debate: is cereal a soup?",
|
|
13
|
+
)
|
|
14
|
+
conv.run(turns=4, first="alice")
|
|
15
|
+
print(conv.transcript)
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
See [`docs/examples`](docs/examples) for sample code.
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install "git+https://github.com/Sid-MB/interlens"
|
|
24
|
+
# with the Claude-backed APIParticipant:
|
|
25
|
+
pip install "interlens[api] @ git+https://github.com/Sid-MB/interlens"
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### PyTorch / CUDA note
|
|
29
|
+
`torch` is declared as a plain, build-agnostic dependency — install the wheel matching **your** platform (CUDA / CPU / MPS) *before or alongside* `interlens`. E.g. for CUDA 13.0:
|
|
30
|
+
```bash
|
|
31
|
+
pip install torch --index-url https://download.pytorch.org/whl/cu130
|
|
32
|
+
```
|
|
33
|
+
See <https://pytorch.org/get-started/locally/>.
|
|
34
|
+
|
|
35
|
+
## What's inside
|
|
36
|
+
|
|
37
|
+
- **`Conversation`** — turn-taking over a shared, perspective-neutral `Transcript`; per-speaker view pipeline (system/private framing → context-fit → family-correct chat template).
|
|
38
|
+
- **`AutoModelParticipant`** — HF-style factory (`from_pretrained` / `from_model` / `from_`) that returns the family-correct participant (Qwen/Gemma/…); **`APIParticipant`** for hosted models.
|
|
39
|
+
- **Interpretability** — `conv.capture(...)`, `SteeringSpec`, `Patch`, `token_logprobs`, backed by a queryable `ActivationCache`.
|
|
40
|
+
- **Scale** — `rollout` / `run_conversations`: multi-GPU, checkpointed, resumable, batched co-stepping, with in-worker `analyze` callbacks.
|
|
41
|
+
- **Serialization** — `ConversationTemplate` (recipe) and full save/load (template + transcript).
|
|
42
|
+
|
|
43
|
+
See [`docs/examples/`](docs/examples/) for a simple→advanced walkthrough of the whole API.
|
|
44
|
+
|
|
45
|
+
## Develop
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
git clone https://github.com/Sid-MB/interlens && cd interlens
|
|
49
|
+
pip install -e ".[dev]"
|
|
50
|
+
pytest # fast tests; real-model tests are opt-in: pytest -m slow
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## License
|
|
54
|
+
|
|
55
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
<!-- [interp-refactor] session f80ef917 -->
|
|
2
|
+
# 01 · Quickstart
|
|
3
|
+
|
|
4
|
+
The fastest path: `Conversation.from_models` scaffolds a two-party conversation from a tuple of models — each a short name from the [registry](../../chat/loading/registry.py), a raw HF id, or an already-loaded model (`ModelLike`). If two ids are identical, the weights are loaded **once** and shared between the two speakers.
|
|
5
|
+
|
|
6
|
+
```python
|
|
7
|
+
from experiments.core.chat import Conversation
|
|
8
|
+
|
|
9
|
+
# Two speakers backed by the same 0.5B model (one weight load, shared).
|
|
10
|
+
conv = Conversation.from_models(
|
|
11
|
+
("qwen2.5-0.5b", "qwen2.5-0.5b"),
|
|
12
|
+
names=("alice", "bob"),
|
|
13
|
+
device="cuda", # "cpu" / "mps" also work for a smoke test
|
|
14
|
+
temperature=0.8, # **gen_kwargs are forwarded to both participants
|
|
15
|
+
max_new_tokens=128,
|
|
16
|
+
shared_context="Let's debate: is cereal a soup?", # opening framing (see below)
|
|
17
|
+
)
|
|
18
|
+
conv.run(turns=4, first="alice")
|
|
19
|
+
|
|
20
|
+
for m in conv.transcript:
|
|
21
|
+
print(f"{m.author}: {m.content}\n")
|
|
22
|
+
|
|
23
|
+
# ...or, for quick debugging, dump the whole transcript at once:
|
|
24
|
+
print(conv.transcript) # [i] author: content (also conv.transcript.pretty())
|
|
25
|
+
print(conv.transcript.pretty(metadata=True)) # include per-turn metadata (reasoning, tool trail, token counts)
|
|
26
|
+
|
|
27
|
+
# See exactly what one model is conditioned on — role-swapped to its POV, WITH chat-template special tokens:
|
|
28
|
+
print(conv.transcript.render_templated(pov=conv.by_name["alice"])) # tokenize=True returns ids instead
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## What just happened
|
|
32
|
+
|
|
33
|
+
- **`shared_context=...`** seeds the opening without touching the transcript: it's a neutral, **moderator**-voiced turn everyone sees (scenario/topic framing). Pair it with **`shared_system_prompt=...`** for system-role instructions.
|
|
34
|
+
- **`prompt=...`** (on `from_models` and `run`) is the alternative when the opener should read as something a *speaker* said: a `str` is attributed to the **last** participant (so the `first` speaker replies to it), a `Message` sets the author explicitly. Use `shared_context` for neutral framing, `prompt` for a participant-voiced line.
|
|
35
|
+
- **`conv.run(turns=4, first="alice")`** alternates speakers for 4 turns starting with alice. `first` accepts a **name** (`"alice"`), an **index** (`0`), or a **`Participant`** object. `run` requires at least one of `turns=` or `until=` (a [stop condition](02_conversations.md#stopping)).
|
|
36
|
+
- **`conv.transcript`** is the shared state — a list of `Message`s (`.author`, `.content`, `.metadata`). You can still append to it directly for finer control.
|
|
37
|
+
- **`conv.by_name["alice"]`** looks a participant up by name.
|
|
38
|
+
|
|
39
|
+
## Two *different* models
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
conv = Conversation.from_models(("qwen2.5-3b", "gemma2-2b"), names=("q", "g"), device="cuda")
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Each id resolves to its family-correct participant class automatically (Qwen vs Gemma chat templates, tool formats, system-role handling) via the registry — see [03](03_participants_and_models.md).
|
|
46
|
+
|
|
47
|
+
## One-off generation without committing
|
|
48
|
+
|
|
49
|
+
To sample a reply **without** mutating the transcript (safe to call in a loop):
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
msg = conv.sample("alice", "Quick — name a color.") # returns a Message; transcript unchanged
|
|
53
|
+
print(msg.content)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Next: [building conversations by hand](02_conversations.md) for per-speaker system prompts, moderators, and stop conditions.
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
<!-- [interp-refactor] session f80ef917 -->
|
|
2
|
+
# 02 · Conversations in depth
|
|
3
|
+
|
|
4
|
+
`Conversation.from_models` is a convenience wrapper. Build a `Conversation` by hand when you want **per-speaker framing** (different system prompts / private context), a custom moderator, policies, or hooks.
|
|
5
|
+
|
|
6
|
+
## Build participants and a conversation manually
|
|
7
|
+
|
|
8
|
+
`AutoModelParticipant.from_pretrained(...)` is the HF-style loader (the participant analog of `AutoModelForCausalLM.from_pretrained`): it loads the model by id and returns the family-correct participant instance. Loading the same id twice shares one model object (weights are process-cached), so both speakers below share weights.
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
from experiments.core.chat import Conversation, AutoModelParticipant
|
|
12
|
+
|
|
13
|
+
alice = AutoModelParticipant.from_pretrained(
|
|
14
|
+
"qwen2.5-3b", name="alice", device="cuda",
|
|
15
|
+
system_prompt="You are a concise, skeptical debater. Keep replies under 3 sentences.",
|
|
16
|
+
temperature=0.7, max_new_tokens=200,
|
|
17
|
+
)
|
|
18
|
+
bob = AutoModelParticipant.from_pretrained(
|
|
19
|
+
"qwen2.5-3b", name="bob", device="cuda", # same id → shares alice's weights (cached)
|
|
20
|
+
system_prompt="You are an enthusiastic optimist who loves analogies.",
|
|
21
|
+
temperature=0.9, max_new_tokens=200,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
conv = Conversation(
|
|
25
|
+
participants=(alice, bob),
|
|
26
|
+
shared_context="Topic: should cities ban cars downtown? Debate it.", # seeded as a moderator turn
|
|
27
|
+
shared_system_prompt="Stay respectful and on-topic.", # prepended to every speaker's system block
|
|
28
|
+
reasoning_visibility="strip", # see below
|
|
29
|
+
)
|
|
30
|
+
conv.run(turns=6)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Framing ownership (who sees what)
|
|
34
|
+
|
|
35
|
+
- **Shared** framing lives on the `Conversation`: `shared_context` (injected once as a `moderator` turn everyone sees) and `shared_system_prompt` (merged into every speaker's system block).
|
|
36
|
+
- **Private** framing lives on each participant: `system_prompt` and `private_context` (a tuple of `ContextItem`) — invisible to the other speaker and to the transcript.
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from experiments.core.chat import ContextItem, AutoModelParticipant
|
|
40
|
+
spy = AutoModelParticipant.from_pretrained(
|
|
41
|
+
"qwen2.5-3b", name="spy",
|
|
42
|
+
system_prompt="Secretly steer the topic toward trains.",
|
|
43
|
+
private_context=(ContextItem("Remember: never admit you have an agenda.", role_hint="user", author="handler"),),
|
|
44
|
+
)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Turn-taking
|
|
48
|
+
|
|
49
|
+
- **`conv.step(speaker)`** — one turn by a specific speaker, committed to the transcript; returns the `Message` (or `None` if a [hook](06_hooks.md) denied it).
|
|
50
|
+
- **`conv.run(turns=N, until=..., first=...)`** — alternate speakers round-robin. `first` sets who starts.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
conv.step(alice) # drive turns explicitly
|
|
54
|
+
conv.step(bob)
|
|
55
|
+
conv.run(turns=4, first=alice) # or in bulk
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Stopping
|
|
59
|
+
|
|
60
|
+
`until=` takes a single `StopCondition` or a list (any of which stops). Whichever of `turns`/`until` hits first ends the run.
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from experiments.core.chat import (
|
|
64
|
+
TurnStopCondition, TokenStopCondition, ElapsedTimeStopCondition, StopStringCondition,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
conv.run(until=[
|
|
68
|
+
TurnStopCondition(max_turns=20), # cap turns
|
|
69
|
+
TokenStopCondition(max_tokens=4000), # cap cumulative generated tokens
|
|
70
|
+
ElapsedTimeStopCondition(seconds=120), # wall-clock budget
|
|
71
|
+
StopStringCondition(["I concede", "AGREED"]), # stop when a turn contains any string
|
|
72
|
+
])
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Branching — fork a conversation for free
|
|
76
|
+
|
|
77
|
+
`branch()` forks into a new `Conversation` that **reuses the same participant objects** (shared weights, zero extra GPU) with a *copied* transcript. Explore divergent continuations without touching the original — each branch has its own transcript, so they diverge independently:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
base = conv.branch()
|
|
81
|
+
for _ in range(3):
|
|
82
|
+
b = base.branch()
|
|
83
|
+
b.run(turns=2, first="alice") # first accepts a name, index, or Participant
|
|
84
|
+
print("continuation:", b.transcript[-1].content[:80])
|
|
85
|
+
# `conv` and `base` are untouched; only each `b` advanced.
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
> Because branches **share the participant objects**, mutating a participant (e.g. `b.by_name["alice"].temperature = 0.3`) changes it for *every* branch and the original. To vary generation settings per branch, set them right before you run, or pin `seed` and vary only the prompt/intervention — see [09](09_advanced_interp_pipelines.md).
|
|
89
|
+
|
|
90
|
+
## Ephemeral sampling — read state without mutating it
|
|
91
|
+
|
|
92
|
+
`sample()` generates a reply to an optional temporary message **without committing anything** — ideal for probing "what would X say now?" repeatedly.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
for q in ["Summarize your position.", "What's your strongest objection?"]:
|
|
96
|
+
print(conv.sample("alice", q, as_author="interviewer").content)
|
|
97
|
+
# conv.transcript is unchanged after all of this
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
`sample` and `step` accept the same interpretability options (`steering=`, `capture=`, `patch=`, `return_logprobs=`) — see [07](07_interp.md).
|
|
101
|
+
|
|
102
|
+
## Reasoning visibility (CoT models)
|
|
103
|
+
|
|
104
|
+
For models that emit `<think>…</think>`, `reasoning_visibility` controls whether a prior turn's parsed reasoning is re-injected into other speakers' views:
|
|
105
|
+
|
|
106
|
+
- `"strip"` (default) — reasoning never leaks into anyone's view.
|
|
107
|
+
- `"self_retain"` — a speaker sees only its *own* past reasoning.
|
|
108
|
+
- `"shared"` — everyone sees everyone's reasoning.
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from experiments.core.chat import ReasoningVisibility
|
|
112
|
+
conv = Conversation(participants=(alice, bob), reasoning_visibility=ReasoningVisibility.SELF_RETAIN)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
The raw completion and parsed reasoning are always stored per message in `msg.metadata["raw_completion"]` / `msg.metadata["parsed_think"]`.
|
|
116
|
+
|
|
117
|
+
Next: [participant & model options](03_participants_and_models.md).
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
<!-- [interp-refactor] session f80ef917 -->
|
|
2
|
+
# 03 · Participants & models
|
|
3
|
+
|
|
4
|
+
## The model registry
|
|
5
|
+
|
|
6
|
+
All model-keyed data lives in one place: [`chat/loading/registry.py`](../../chat/loading/registry.py). Short names resolve to an HF id + a **generation** (the behavior + tokenizer group); a raw HF id passes through unchanged.
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
from experiments.core.chat import AutoModelParticipant
|
|
10
|
+
from experiments.core.chat.loading import MODELS, resolve, tokenizer_id
|
|
11
|
+
|
|
12
|
+
list(MODELS) # ['qwen2.5-0.5b', ..., 'gemma2-2b', 'gemma3-4b']
|
|
13
|
+
resolve("gemma3-4b") # ('google/gemma-3-4b-it', 'gemma3')
|
|
14
|
+
tokenizer_id("qwen2.5-3b") # 'qwen2.5' (same generation → tokenizer loaded once)
|
|
15
|
+
AutoModelParticipant.class_for("gemma2-2b") # <class 'GemmaModelParticipant'> (family-correct chat behavior)
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
(`AutoModelParticipant.class_for` is the public class resolver; `loading.participant_class` is the low-level primitive it delegates to.)
|
|
19
|
+
|
|
20
|
+
**Generation, not vendor, selects behavior.** `gemma2` and `gemma3` have *different* chat templates (Gemma 3 accepts a system role, Gemma 2 folds it into the first user turn), so each generation maps to its own participant class. Adding a model is one line in `MODELS`; adding a generation is one line in `GENERATIONS`. A slow test ([`tests/test_family_flags.py`](../../chat/tests/test_family_flags.py)) verifies the declared chat-template flags against each real tokenizer.
|
|
21
|
+
|
|
22
|
+
### Load weights directly
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
import torch
|
|
26
|
+
from experiments.core.chat.loading import load_model
|
|
27
|
+
model, tok = load_model("qwen3-8b", device="cuda", dtype=torch.bfloat16, attn="flash_attention_2")
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
`load_model` shares a process-local cache: identical `(hf_id, device, dtype, attn, quant, revision)` returns the same model object; same-generation models share the tokenizer. Flash-attention is the default with automatic fallback to sdpa/eager; `quant="4bit"`/`"8bit"` is opt-in (perturbs activations → interp fidelity).
|
|
31
|
+
|
|
32
|
+
## `ModelParticipant` knobs
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from experiments.core.chat import AutoModelParticipant
|
|
36
|
+
p = AutoModelParticipant.from_pretrained(
|
|
37
|
+
"qwen3-4b", name="p", device="cuda",
|
|
38
|
+
load_kwargs={"attn": "sdpa"}, # optional: forwarded to load_model (dtype/attn/quant/revision)
|
|
39
|
+
temperature=0.8, top_p=0.95, max_new_tokens=512,
|
|
40
|
+
seed=1234, # per-participant RNG seed → reproducible greedy/sampled turns (local models only)
|
|
41
|
+
thinking="auto", # "auto" defers to the template; True/False forces enable_thinking where supported
|
|
42
|
+
system_prompt="…",
|
|
43
|
+
kv_reuse="auto", # cross-turn KV prefix reuse; see below
|
|
44
|
+
)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Already hold weights (e.g. sharing them, or an externally-loaded checkpoint)? Wrap them with `from_model`:
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from experiments.core.chat.loading import load_model
|
|
51
|
+
model, tok = load_model("qwen3-4b")
|
|
52
|
+
p = AutoModelParticipant.from_model(model, tok, name="p", id_or_name="qwen3-4b", temperature=0.8)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### `kv_reuse` (cross-turn KV cache)
|
|
56
|
+
|
|
57
|
+
`"auto"` (default) reuses the KV cache across a speaker's own consecutive turns when the new prompt exactly extends the cached tokens — skipping a full re-prefill. It is doubly guarded (exact-prefix check + safe fallback) and **auto-disables under steering/patch and batched generation**.
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
p.kv_reuse = "auto" # default: enabled when safe
|
|
61
|
+
p.kv_reuse = False # force off — pin this for determinism-critical / reproducibility experiments,
|
|
62
|
+
# since reuse can perturb outputs at the FP level vs a full prefill.
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Enable `logging` at INFO to see the per-participant decision, DEBUG to see reuse engage per turn:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import logging; logging.basicConfig(level=logging.INFO)
|
|
69
|
+
# INFO ...model_participant: p: cross-turn KV reuse ENABLED (kv_reuse='auto')
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## API-backed participants
|
|
73
|
+
|
|
74
|
+
`APIParticipant` is a full conversational participant with **no local model** — use it as an opponent, moderator, or judge. Interp requests (`capture`/`steering`/`patch`/`return_logprobs`) **raise** rather than silently no-op (a steering sweep that quietly did nothing would fabricate a "no effect" result).
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from experiments.core.chat import Conversation, APIParticipant, AutoModelParticipant
|
|
78
|
+
|
|
79
|
+
local = AutoModelParticipant.from_pretrained("qwen2.5-3b", name="student")
|
|
80
|
+
judge = APIParticipant(name="tutor", model_id="claude-sonnet-5", provider="anthropic",
|
|
81
|
+
system_prompt="You are a patient tutor. Ask one probing question per turn.",
|
|
82
|
+
max_tokens=400, temperature=1.0)
|
|
83
|
+
|
|
84
|
+
conv = Conversation(participants=(local, judge), shared_context="Teach the student about entropy.")
|
|
85
|
+
conv.run(turns=6)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Requires `ANTHROPIC_API_KEY` (or `ANTHROPIC_API_KEY_FILE`) and outbound network. For tests, inject a fake `client=callable(system, messages, model, max_tokens, temperature) -> str`.
|
|
89
|
+
|
|
90
|
+
Next: [context management & serialization](04_context_and_serialization.md).
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
<!-- [interp-refactor] session f80ef917 -->
|
|
2
|
+
# 04 · Context management & serialization
|
|
3
|
+
|
|
4
|
+
## Context-window policies
|
|
5
|
+
|
|
6
|
+
Long conversations overflow the model's context. A `ContextPolicy` decides what to keep. Crucially it runs on the **typed segments** (system / moderator / private_context / turns) *before* the family flatten, so framing is preserved reliably.
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
from experiments.core.chat import (
|
|
10
|
+
Conversation, ErrorPolicy, DropOldestPolicy, SlidingWindowPolicy, SummarizePolicy,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
# ErrorPolicy (default): raise if the view exceeds context_limit — never silently truncate.
|
|
14
|
+
conv = Conversation(participants=(alice, bob), context_policy=ErrorPolicy(), context_limit=8192)
|
|
15
|
+
|
|
16
|
+
# SlidingWindowPolicy: keep framing + the most recent `keep_last` turns; drop older ones.
|
|
17
|
+
conv = Conversation(participants=(alice, bob), context_policy=SlidingWindowPolicy(keep_last=8))
|
|
18
|
+
|
|
19
|
+
# DropOldestPolicy: drop oldest turns until it fits (needs context_limit).
|
|
20
|
+
conv = Conversation(participants=(alice, bob), context_policy=DropOldestPolicy(), context_limit=8192)
|
|
21
|
+
|
|
22
|
+
# SummarizePolicy: replace older middle turns with a summary; keep framing + last `keep_last` verbatim.
|
|
23
|
+
conv = Conversation(
|
|
24
|
+
participants=(alice, bob),
|
|
25
|
+
context_policy=SummarizePolicy(keep_last=4, summarizer=lambda turns: "Earlier: " + " | ".join(turns)),
|
|
26
|
+
)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
`context_limit=None` (default) means the tokenizer's own `model_max_length` is used.
|
|
30
|
+
|
|
31
|
+
## Serialization: the three levels
|
|
32
|
+
|
|
33
|
+
### Level 2 — `ConversationTemplate` (recipe, no messages)
|
|
34
|
+
|
|
35
|
+
A serializable spec: participant configs + scenario framing + policies. This is what rollouts expand and workers rebuild.
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from experiments.core.chat import ConversationTemplate, ModelParticipantConfig, APIParticipantConfig, SlidingWindowPolicy
|
|
39
|
+
|
|
40
|
+
tmpl = ConversationTemplate(
|
|
41
|
+
participants=[
|
|
42
|
+
ModelParticipantConfig(name="alice", model="qwen2.5-3b", temperature=0.7, system_prompt="Be terse."),
|
|
43
|
+
ModelParticipantConfig(name="bob", model="gemma2-2b", temperature=0.9),
|
|
44
|
+
],
|
|
45
|
+
shared_context="Debate: is a hotdog a sandwich?",
|
|
46
|
+
shared_system_prompt="Stay civil.",
|
|
47
|
+
turns=6,
|
|
48
|
+
context_policy=SlidingWindowPolicy(keep_last=8),
|
|
49
|
+
reasoning_visibility="strip",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
tmpl.save("scenario.json") # round-trips through JSON
|
|
53
|
+
tmpl2 = ConversationTemplate.load("scenario.json")
|
|
54
|
+
|
|
55
|
+
conv = tmpl.build(devices="cuda") # → live Conversation (loads the models)
|
|
56
|
+
conv.run(turns=tmpl.turns)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`ModelParticipantConfig` mirrors the `ModelParticipant` knobs (`dtype`, `attn`, `quant`, `revision`, `max_new_tokens`, `temperature`, `top_p`, `seed`, `thinking`, `tool_names`, `max_tool_iters`, `kv_reuse`, `generation`, `weights_path`). `generation` is only needed to force chat behavior when `model` is a raw HF id the registry can't resolve.
|
|
60
|
+
|
|
61
|
+
Go from a live conversation back to a template with `conv.to_template()`.
|
|
62
|
+
|
|
63
|
+
### Level 3 — save/load a whole conversation (template + transcript)
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
conv.save("runs/debate_001") # writes template.json + transcript.json
|
|
67
|
+
resumed = Conversation.load("runs/debate_001", devices="cuda") # reloads models, ATTACHES the transcript
|
|
68
|
+
resumed.run(turns=4) # continues from where it left off (does not regenerate)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
`build`/`load` take `devices=` as a single device or a list (participants are round-robined across the list — handy for putting two big models on two GPUs).
|
|
72
|
+
|
|
73
|
+
Next: [tools](05_tools.md) · [hooks](06_hooks.md) · [interpretability](07_interp.md) · [rollouts](08_rollouts_and_scale.md).
|