actopo 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ name: ci
2
+
3
+ on:
4
+ push:
5
+ paths: ["actopo/**"]
6
+ pull_request:
7
+ paths: ["actopo/**"]
8
+ workflow_dispatch: # manual trigger for the heavy integration job
9
+ schedule:
10
+ - cron: "0 6 * * 1" # weekly integration run (Mondays 06:00 UTC)
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ python-version: ["3.9", "3.11", "3.12"]
18
+ defaults:
19
+ run:
20
+ working-directory: actopo
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+ - uses: actions/setup-python@v5
24
+ with:
25
+ python-version: ${{ matrix.python-version }}
26
+ - name: Install (CPU deps + dev)
27
+ run: |
28
+ python -m pip install --upgrade pip
29
+ pip install -e ".[dev]"
30
+ - name: Run tests (CPU; skips GPU, real-model integration, data-dependent golden)
31
+ run: pytest -q -m "not gpu and not integration" --deselect tests/test_topology_golden.py
32
+ - name: Golden synthetic fixtures + bundled example data (locks paper baselines)
33
+ run: pytest -q tests/test_topology_fixtures.py tests/test_example_data.py
34
+
35
+ integration:
36
+ # Heavy: downloads real ~1GB Qwen checkpoints. Run on demand / nightly.
37
+ runs-on: ubuntu-latest
38
+ if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
39
+ defaults:
40
+ run:
41
+ working-directory: actopo
42
+ steps:
43
+ - uses: actions/checkout@v4
44
+ - uses: actions/setup-python@v5
45
+ with:
46
+ python-version: "3.11"
47
+ - uses: actions/cache@v4
48
+ with:
49
+ path: ~/.cache/huggingface
50
+ key: hf-${{ runner.os }}
51
+ - name: Install with extraction extra (CPU torch)
52
+ run: |
53
+ python -m pip install --upgrade pip
54
+ pip install -e ".[dev]"
55
+ pip install torch --index-url https://download.pytorch.org/whl/cpu
56
+ pip install "transformers>=4.40" "datasets>=2.14" tqdm
57
+ - name: Integration tests (real small models)
58
+ run: pytest -q -m integration
@@ -0,0 +1,10 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ build/
6
+ dist/
7
+ .pytest_cache/
8
+ .actopo_cache/
9
+ .coverage
10
+ htmlcov/
actopo-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Xuhao Lin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
actopo-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,126 @@
1
+ Metadata-Version: 2.4
2
+ Name: actopo
3
+ Version: 0.1.0
4
+ Summary: Activation Topology — reproducible persistent-homology measurement of LLM activation manifolds
5
+ Project-URL: Homepage, https://github.com/linxuhao/actopo
6
+ Project-URL: Repository, https://github.com/linxuhao/actopo
7
+ Author: Lin Xuhao
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: betti-numbers,interpretability,llm,persistent-homology,topological-data-analysis
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.9
16
+ Requires-Dist: numpy>=1.23
17
+ Requires-Dist: persim>=0.3.1
18
+ Requires-Dist: ripser>=0.6.4
19
+ Requires-Dist: scikit-learn>=1.1
20
+ Provides-Extra: all
21
+ Requires-Dist: datasets>=2.14; extra == 'all'
22
+ Requires-Dist: gudhi>=3.8; extra == 'all'
23
+ Requires-Dist: pytest-cov; extra == 'all'
24
+ Requires-Dist: pytest>=7; extra == 'all'
25
+ Requires-Dist: ruff; extra == 'all'
26
+ Requires-Dist: torch>=2.0; extra == 'all'
27
+ Requires-Dist: tqdm>=4.65; extra == 'all'
28
+ Requires-Dist: transformers>=4.40; extra == 'all'
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest-cov; extra == 'dev'
31
+ Requires-Dist: pytest>=7; extra == 'dev'
32
+ Requires-Dist: ruff; extra == 'dev'
33
+ Provides-Extra: extract
34
+ Requires-Dist: datasets>=2.14; extra == 'extract'
35
+ Requires-Dist: torch>=2.0; extra == 'extract'
36
+ Requires-Dist: tqdm>=4.65; extra == 'extract'
37
+ Requires-Dist: transformers>=4.40; extra == 'extract'
38
+ Provides-Extra: gpu-ripser
39
+ Requires-Dist: ripserplusplus>=1.1.2; extra == 'gpu-ripser'
40
+ Provides-Extra: gudhi
41
+ Requires-Dist: gudhi>=3.8; extra == 'gudhi'
42
+ Description-Content-Type: text/markdown
43
+
44
+ # actopo — Activation Topology
45
+
46
+ Reproducible persistent-homology measurement of LLM activation manifolds.
47
+
48
+ `actopo` provides **one** measurement protocol, **one** activation-extraction
49
+ function, and **one** topology implementation, so β₁ / survival / PHI numbers
50
+ stay consistent across every experiment. It is the tooling behind the
51
+ *Mathematical Life* paper, packaged so the experiments can be reproduced.
52
+
53
+ ## Why
54
+
55
+ Re-implementing "compute β₁" in each analysis script lets the measurement
56
+ silently diverge (different ε thresholds, wrong last-token extraction, etc.).
57
+ `actopo` centralises the protocol in a single `ProtocolConfig` and stamps every
58
+ result with the exact config + version + git revision that produced it.
59
+
60
+ ## Install
61
+
62
+ ```bash
63
+ pip install actopo # topology layer (CPU only)
64
+ pip install "actopo[extract]" # + activation extraction (torch/transformers)
65
+ pip install "actopo[all]" # everything incl. dev/test
66
+ ```
67
+
68
+ ## Quickstart
69
+
70
+ The package ships a tiny real example: reasoning activations from
71
+ **Qwen2.5-0.5B (base)** vs **Qwen2.5-0.5B-Instruct** — the paper's core
72
+ base-vs-instruct comparison.
73
+
74
+ ```python
75
+ from actopo import FROZEN_V5, measure
76
+ from actopo.metrics import union_test, center_distance
77
+ from actopo.data import load_example_cloud
78
+
79
+ base = load_example_cloud("base") # (600, 896) reasoning activations
80
+ inst = load_example_cloud("instruct")
81
+
82
+ print(measure(base, FROZEN_V5).beta1) # 12
83
+ print(measure(inst, FROZEN_V5).beta1) # 50 → alignment EXPANDS β₁ here
84
+
85
+ # base and instruct occupy the SAME topological region (β₁ changes within it):
86
+ u = union_test(base, inst, FROZEN_V5, independent_eps=True)
87
+ print(u["verdict"]) # SHARED
88
+ print(center_distance(base, inst)) # 0.62
89
+ ```
90
+
91
+ ### Extract your own activations (needs `actopo[extract]`)
92
+
93
+ ```python
94
+ from actopo import FROZEN_V5, measure
95
+ from actopo.models import load_model
96
+ from actopo.extract import extract_activations
97
+
98
+ model, tok = load_model("Qwen/Qwen2.5-0.5B", FROZEN_V5)
99
+ acts = extract_activations(model, tok, my_prompts, FROZEN_V5) # correct last-token
100
+ print(measure(acts, FROZEN_V5).beta1)
101
+ ```
102
+
103
+ Every result can be stamped + verified against the frozen protocol:
104
+
105
+ ```python
106
+ from actopo import save_result, verify, load_result, FROZEN_V5
107
+ save_result("out.json", {"beta1": 12}, FROZEN_V5)
108
+ assert verify(load_result("out.json"), FROZEN_V5) == [] # produced on-protocol
109
+ ```
110
+
111
+ ## The frozen protocol (`FROZEN_V5`)
112
+
113
+ | Parameter | Value |
114
+ |-----------|-------|
115
+ | β₁ threshold | lifetime > `0.03 × ε_max` |
116
+ | survival/PHI threshold | lifetime > `0.01 × ε_max` |
117
+ | layer | `L/2` |
118
+ | token | last (attention-mask based) |
119
+ | point cloud | 1319 prompts |
120
+ | PH backend | ripser, `maxdim=1`, euclidean |
121
+
122
+ See `actopo.protocol.ProtocolConfig` for the full, documented field set.
123
+
124
+ ## License
125
+
126
+ MIT.
actopo-0.1.0/README.md ADDED
@@ -0,0 +1,83 @@
1
+ # actopo — Activation Topology
2
+
3
+ Reproducible persistent-homology measurement of LLM activation manifolds.
4
+
5
+ `actopo` provides **one** measurement protocol, **one** activation-extraction
6
+ function, and **one** topology implementation, so β₁ / survival / PHI numbers
7
+ stay consistent across every experiment. It is the tooling behind the
8
+ *Mathematical Life* paper, packaged so the experiments can be reproduced.
9
+
10
+ ## Why
11
+
12
+ Re-implementing "compute β₁" in each analysis script lets the measurement
13
+ silently diverge (different ε thresholds, wrong last-token extraction, etc.).
14
+ `actopo` centralises the protocol in a single `ProtocolConfig` and stamps every
15
+ result with the exact config + version + git revision that produced it.
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install actopo # topology layer (CPU only)
21
+ pip install "actopo[extract]" # + activation extraction (torch/transformers)
22
+ pip install "actopo[all]" # everything incl. dev/test
23
+ ```
24
+
25
+ ## Quickstart
26
+
27
+ The package ships a tiny real example: reasoning activations from
28
+ **Qwen2.5-0.5B (base)** vs **Qwen2.5-0.5B-Instruct** — the paper's core
29
+ base-vs-instruct comparison.
30
+
31
+ ```python
32
+ from actopo import FROZEN_V5, measure
33
+ from actopo.metrics import union_test, center_distance
34
+ from actopo.data import load_example_cloud
35
+
36
+ base = load_example_cloud("base") # (600, 896) reasoning activations
37
+ inst = load_example_cloud("instruct")
38
+
39
+ print(measure(base, FROZEN_V5).beta1) # 12
40
+ print(measure(inst, FROZEN_V5).beta1) # 50 → alignment EXPANDS β₁ here
41
+
42
+ # base and instruct occupy the SAME topological region (β₁ changes within it):
43
+ u = union_test(base, inst, FROZEN_V5, independent_eps=True)
44
+ print(u["verdict"]) # SHARED
45
+ print(center_distance(base, inst)) # 0.62
46
+ ```
47
+
48
+ ### Extract your own activations (needs `actopo[extract]`)
49
+
50
+ ```python
51
+ from actopo import FROZEN_V5, measure
52
+ from actopo.models import load_model
53
+ from actopo.extract import extract_activations
54
+
55
+ model, tok = load_model("Qwen/Qwen2.5-0.5B", FROZEN_V5)
56
+ acts = extract_activations(model, tok, my_prompts, FROZEN_V5) # correct last-token
57
+ print(measure(acts, FROZEN_V5).beta1)
58
+ ```
59
+
60
+ Every result can be stamped + verified against the frozen protocol:
61
+
62
+ ```python
63
+ from actopo import save_result, verify, load_result, FROZEN_V5
64
+ save_result("out.json", {"beta1": 12}, FROZEN_V5)
65
+ assert verify(load_result("out.json"), FROZEN_V5) == [] # produced on-protocol
66
+ ```
67
+
68
+ ## The frozen protocol (`FROZEN_V5`)
69
+
70
+ | Parameter | Value |
71
+ |-----------|-------|
72
+ | β₁ threshold | lifetime > `0.03 × ε_max` |
73
+ | survival/PHI threshold | lifetime > `0.01 × ε_max` |
74
+ | layer | `L/2` |
75
+ | token | last (attention-mask based) |
76
+ | point cloud | 1319 prompts |
77
+ | PH backend | ripser, `maxdim=1`, euclidean |
78
+
79
+ See `actopo.protocol.ProtocolConfig` for the full, documented field set.
80
+
81
+ ## License
82
+
83
+ MIT.
@@ -0,0 +1,56 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "actopo"
7
+ version = "0.1.0"
8
+ description = "Activation Topology — reproducible persistent-homology measurement of LLM activation manifolds"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Lin Xuhao" }]
13
+ keywords = ["topological-data-analysis", "persistent-homology", "llm", "interpretability", "betti-numbers"]
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Intended Audience :: Science/Research",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+
21
+ # Core deps are CPU-only so the topology layer + golden tests run anywhere.
22
+ dependencies = [
23
+ "numpy>=1.23",
24
+ "scikit-learn>=1.1",
25
+ "ripser>=0.6.4",
26
+ "persim>=0.3.1",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ # Model loading / activation extraction (the GPU stage).
31
+ extract = ["torch>=2.0", "transformers>=4.40", "datasets>=2.14", "tqdm>=4.65"]
32
+ # Alternative persistent-homology backends.
33
+ gudhi = ["gudhi>=3.8"]
34
+ gpu-ripser = ["ripserplusplus>=1.1.2"]
35
+ dev = ["pytest>=7", "pytest-cov", "ruff"]
36
+ all = ["actopo[extract,gudhi,dev]"]
37
+
38
+ [project.scripts]
39
+ actopo = "actopo.cli:main"
40
+
41
+ [project.urls]
42
+ Homepage = "https://github.com/linxuhao/actopo"
43
+ Repository = "https://github.com/linxuhao/actopo"
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ packages = ["src/actopo"]
47
+ # Ship the bundled example fixtures (npy/json) inside the wheel.
48
+ artifacts = ["src/actopo/data/*.npy", "src/actopo/data/*.json"]
49
+
50
+ [tool.pytest.ini_options]
51
+ testpaths = ["tests"]
52
+ markers = [
53
+ "golden: regression tests that reproduce published paper numbers",
54
+ "gpu: tests that require a GPU + torch",
55
+ "integration: end-to-end tests that load real (tiny) models; need actopo[extract]",
56
+ ]
@@ -0,0 +1,79 @@
1
+ """actopo — Activation Topology.
2
+
3
+ Reproducible persistent-homology measurement of LLM activation manifolds.
4
+ One protocol object, one extraction function, one topology implementation —
5
+ so β₁ / survival / PHI numbers stay consistent across every experiment.
6
+
7
+ Quickstart (topology only, CPU)::
8
+
9
+ import numpy as np
10
+ from actopo import FROZEN_V5, measure
11
+
12
+ pts = np.load("reasoning.npy")
13
+ r = measure(pts, FROZEN_V5)
14
+ print(r.beta1, r.beta1_raw, r.eps_pred)
15
+ """
16
+
17
+ __version__ = "0.1.0"
18
+
19
+ from .protocol import ProtocolConfig, FROZEN_V5, PROTOCOL_SCHEMA_VERSION
20
+ from .topology import (
21
+ measure,
22
+ PersistenceResult,
23
+ eps_max,
24
+ persistence_diagram,
25
+ lifetimes,
26
+ survival_curve,
27
+ phi,
28
+ )
29
+ from .provenance import stamp, verify, Provenance
30
+ from .io import save_result, load_result
31
+ from .extract import extract_activations, select_tokens, last_token_indices
32
+ from .devices import (
33
+ detect_backend,
34
+ list_gpus,
35
+ balance,
36
+ GPUSpec,
37
+ WorkItem,
38
+ estimate_model_vram_gb,
39
+ )
40
+ from .metrics import center_distance, bottleneck_distance, union_test, union_verdict
41
+ from .cache import ActivationCache, cache_key
42
+
43
+ __all__ = [
44
+ "__version__",
45
+ "ProtocolConfig",
46
+ "FROZEN_V5",
47
+ "PROTOCOL_SCHEMA_VERSION",
48
+ "measure",
49
+ "PersistenceResult",
50
+ "eps_max",
51
+ "persistence_diagram",
52
+ "lifetimes",
53
+ "survival_curve",
54
+ "phi",
55
+ "stamp",
56
+ "verify",
57
+ "Provenance",
58
+ "save_result",
59
+ "load_result",
60
+ # extraction (GPU stage)
61
+ "extract_activations",
62
+ "select_tokens",
63
+ "last_token_indices",
64
+ # hardware / scheduling
65
+ "detect_backend",
66
+ "list_gpus",
67
+ "balance",
68
+ "GPUSpec",
69
+ "WorkItem",
70
+ "estimate_model_vram_gb",
71
+ # five-dimension metrics
72
+ "center_distance",
73
+ "bottleneck_distance",
74
+ "union_test",
75
+ "union_verdict",
76
+ # caching
77
+ "ActivationCache",
78
+ "cache_key",
79
+ ]
@@ -0,0 +1,66 @@
1
+ """Disk cache for extracted activations, keyed by (model, prompts, config).
2
+
3
+ Extraction is the expensive GPU stage; topology is cheap CPU. Caching activations
4
+ lets you re-measure (new ε, new metric) without re-running models, and makes the
5
+ two-stage pipeline skip work that's already done. Keys include the config
6
+ fingerprint, so changing the protocol correctly invalidates the cache.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+ from typing import Callable, Optional, Sequence
12
+ import hashlib
13
+ import json
14
+
15
+ import numpy as np
16
+
17
+ from .protocol import ProtocolConfig, FROZEN_V5
18
+
19
+
20
+ def _prompts_hash(prompts: Sequence[str]) -> str:
21
+ h = hashlib.sha256()
22
+ h.update(str(len(prompts)).encode())
23
+ for p in prompts:
24
+ h.update(b"\x00")
25
+ h.update(p.encode("utf-8", "replace"))
26
+ return h.hexdigest()[:12]
27
+
28
+
29
+ def cache_key(model_id: str, prompts: Sequence[str], cfg: ProtocolConfig) -> str:
30
+ """Stable filename stem for a (model, prompts, protocol) triple."""
31
+ safe_model = model_id.replace("/", "__")
32
+ return f"{safe_model}__{_prompts_hash(prompts)}__{cfg.fingerprint()}"
33
+
34
+
35
+ class ActivationCache:
36
+ """Tiny content-addressed cache of (n, dim) float32 activation arrays."""
37
+
38
+ def __init__(self, cache_dir: str | Path = ".actopo_cache"):
39
+ self.dir = Path(cache_dir)
40
+ self.dir.mkdir(parents=True, exist_ok=True)
41
+
42
+ def _paths(self, key: str) -> tuple[Path, Path]:
43
+ return self.dir / f"{key}.npy", self.dir / f"{key}.json"
44
+
45
+ def get(self, key: str) -> Optional[np.ndarray]:
46
+ npy, _ = self._paths(key)
47
+ return np.load(npy) if npy.exists() else None
48
+
49
+ def put(self, key: str, acts: np.ndarray, meta: Optional[dict] = None) -> None:
50
+ npy, side = self._paths(key)
51
+ np.save(npy, np.ascontiguousarray(acts, dtype=np.float32))
52
+ with open(side, "w") as f:
53
+ json.dump({"key": key, "shape": list(acts.shape), **(meta or {})}, f, indent=2)
54
+
55
+ def get_or_compute(self, model_id: str, prompts: Sequence[str],
56
+ cfg: ProtocolConfig,
57
+ compute: Callable[[], np.ndarray]) -> np.ndarray:
58
+ """Return cached activations or compute, store, and return them."""
59
+ key = cache_key(model_id, prompts, cfg)
60
+ hit = self.get(key)
61
+ if hit is not None:
62
+ return hit
63
+ acts = compute()
64
+ self.put(key, acts, meta={"model_id": model_id, "n_prompts": len(prompts),
65
+ "config_fingerprint": cfg.fingerprint()})
66
+ return acts
@@ -0,0 +1,127 @@
1
+ """Thin CLI over the library — replaces the dozens of ad-hoc analysis scripts.
2
+
3
+ actopo betti a.npy b.npy ... -> β₁ table
4
+ actopo curve a.npy ... -> persistence curves
5
+ actopo bottleneck a.npy b.npy -> shape distance
6
+ actopo union a.npy b.npy -> SEPARATE/SHARED/MIXED (Iron-Law guarded)
7
+ actopo verify result.json -> check a result's provenance vs FROZEN_V5
8
+
9
+ Every command takes the same protocol knobs (--eps, --survival-eps, --backend),
10
+ so there is exactly one place a constant can be set.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import json
16
+ import sys
17
+ from pathlib import Path
18
+
19
+ import numpy as np
20
+
21
+ from .protocol import FROZEN_V5, ProtocolConfig
22
+ from . import topology, metrics, io as aio, provenance
23
+
24
+
25
+ def _cfg_from_args(a) -> ProtocolConfig:
26
+ return FROZEN_V5.with_(
27
+ betti_eps_frac=a.eps,
28
+ survival_eps_frac=a.survival_eps,
29
+ ph_backend=a.backend,
30
+ )
31
+
32
+
33
+ def _add_protocol_args(p):
34
+ p.add_argument("--eps", type=float, default=FROZEN_V5.betti_eps_frac,
35
+ help="β₁ threshold as fraction of ε_max (default 0.03)")
36
+ p.add_argument("--survival-eps", type=float, default=FROZEN_V5.survival_eps_frac,
37
+ help="survival/PHI threshold fraction (default 0.01)")
38
+ p.add_argument("--backend", default=FROZEN_V5.ph_backend,
39
+ choices=["ripser", "gudhi", "ripserpp"])
40
+ p.add_argument("-o", "--output", help="write stamped JSON result here")
41
+
42
+
43
+ def _cmd_betti(a):
44
+ cfg = _cfg_from_args(a)
45
+ rows = []
46
+ for f in a.files:
47
+ r = topology.measure(np.load(f), cfg)
48
+ rows.append({"file": f, **r.to_dict()})
49
+ print(f"{Path(f).name:<48s} β₁={r.beta1:<4d} raw={r.beta1_raw:<5d} "
50
+ f"surv={r.survival_rate:>5.1f}% ε_pred={r.eps_pred}")
51
+ if a.output:
52
+ aio.save_result(a.output, {"betti": rows}, cfg)
53
+ return 0
54
+
55
+
56
+ def _cmd_curve(a):
57
+ cfg = _cfg_from_args(a)
58
+ out = [{"file": f, **topology.survival_curve(np.load(f), cfg)} for f in a.files]
59
+ if a.output:
60
+ aio.save_result(a.output, {"curves": out}, cfg)
61
+ for r in out:
62
+ print(f"{Path(r['file']).name:<48s} raw={r['beta1_raw']:<5d} ε_max={r['eps_max']}")
63
+ return 0
64
+
65
+
66
+ def _cmd_bottleneck(a):
67
+ cfg = _cfg_from_args(a)
68
+ res = metrics.bottleneck_distance(np.load(a.a), np.load(a.b), cfg)
69
+ print(json.dumps(res, indent=2))
70
+ if a.output:
71
+ aio.save_result(a.output, res, cfg)
72
+ return 0
73
+
74
+
75
+ def _cmd_union(a):
76
+ cfg = _cfg_from_args(a)
77
+ res = metrics.union_test(np.load(a.a), np.load(a.b), cfg,
78
+ independent_eps=a.independent_eps,
79
+ same_basis=not a.cross_basis)
80
+ print(json.dumps(res, indent=2))
81
+ if a.output:
82
+ aio.save_result(a.output, res, cfg)
83
+ return 0
84
+
85
+
86
+ def _cmd_verify(a):
87
+ doc = aio.load_result(a.result)
88
+ warns = provenance.verify(doc, FROZEN_V5)
89
+ if not warns:
90
+ print(f"OK — {a.result} produced with FROZEN_V5")
91
+ return 0
92
+ print(f"WARNINGS for {a.result}:")
93
+ for w in warns:
94
+ print(f" - {w}")
95
+ return 1
96
+
97
+
98
+ def main(argv=None) -> int:
99
+ p = argparse.ArgumentParser(prog="actopo", description="Activation Topology toolkit")
100
+ sub = p.add_subparsers(dest="cmd", required=True)
101
+
102
+ pb = sub.add_parser("betti", help="β₁ table for point clouds")
103
+ pb.add_argument("files", nargs="+"); _add_protocol_args(pb); pb.set_defaults(fn=_cmd_betti)
104
+
105
+ pc = sub.add_parser("curve", help="persistence/survival curves")
106
+ pc.add_argument("files", nargs="+"); _add_protocol_args(pc); pc.set_defaults(fn=_cmd_curve)
107
+
108
+ pbn = sub.add_parser("bottleneck", help="barcode shape distance")
109
+ pbn.add_argument("a"); pbn.add_argument("b"); _add_protocol_args(pbn)
110
+ pbn.set_defaults(fn=_cmd_bottleneck)
111
+
112
+ pu = sub.add_parser("union", help="region-overlap test (Iron-Law guarded)")
113
+ pu.add_argument("a"); pu.add_argument("b")
114
+ pu.add_argument("--independent-eps", action="store_true")
115
+ pu.add_argument("--cross-basis", action="store_true",
116
+ help="acknowledge clouds are not same-basis (warns)")
117
+ _add_protocol_args(pu); pu.set_defaults(fn=_cmd_union)
118
+
119
+ pv = sub.add_parser("verify", help="check a result's provenance vs FROZEN_V5")
120
+ pv.add_argument("result"); pv.set_defaults(fn=_cmd_verify)
121
+
122
+ args = p.parse_args(argv)
123
+ return args.fn(args)
124
+
125
+
126
+ if __name__ == "__main__":
127
+ sys.exit(main())
@@ -0,0 +1,41 @@
1
+ """Bundled mini example data — doubles as the documented example and as
2
+ deterministic integration/regression fixtures.
3
+
4
+ Contents (shipped in the wheel) — a base vs instruct duo of ONE model, the
5
+ paper's core comparison (does alignment reshape reasoning topology?):
6
+ - example_prompts.json : 40 reasoning + 40 control prompts (subset of paper set)
7
+ - example_base.npy : 600 reasoning activations, Qwen2.5-0.5B (base), L/2, fp16
8
+ - example_instruct.npy : 600 reasoning activations, Qwen2.5-0.5B-Instruct, L/2, fp16
9
+
10
+ These are small subsamples — illustrative and reproducible, NOT the full
11
+ 1319-point protocol clouds. Use them to learn the API and to smoke-test a build.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from importlib import resources
17
+ from typing import Dict, List
18
+
19
+ import numpy as np
20
+
21
+ _PKG = "actopo.data"
22
+
23
+
24
+ def load_example_prompts() -> Dict[str, List[str]]:
25
+ """Return {"reasoning": [...], "control": [...]} example prompt lists."""
26
+ with resources.files(_PKG).joinpath("example_prompts.json").open() as f:
27
+ return json.load(f)
28
+
29
+
30
+ def load_example_cloud(kind: str = "base") -> np.ndarray:
31
+ """Load a bundled example reasoning cloud as float32 (n=600, dim=896).
32
+
33
+ kind: "base" (Qwen2.5-0.5B) | "instruct" (Qwen2.5-0.5B-Instruct).
34
+ """
35
+ if kind not in ("base", "instruct"):
36
+ raise ValueError(f"kind must be 'base' or 'instruct', got {kind!r}")
37
+ with resources.files(_PKG).joinpath(f"example_{kind}.npy").open("rb") as f:
38
+ return np.load(f).astype(np.float32)
39
+
40
+
41
+ __all__ = ["load_example_prompts", "load_example_cloud"]