gutenberg-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ # Claude Code
2
+ .claude/
3
+ .claude-handoff.md
4
+ CLAUDE_TODO.md
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.pyc
9
+ *.pyo
10
+ *.egg-info/
11
+ .eggs/
12
+ dist/
13
+ build/
14
+ *.egg
15
+
16
+ # Virtual environments
17
+ .venv/
18
+ venv/
19
+
20
+ # Environment
21
+ .env
22
+
23
+ # IDE
24
+ .idea/
25
+ .vscode/
26
+ *.swp
27
+ *.swo
28
+
29
+ # Node
30
+ node_modules/
31
+ .next/
32
+
33
+ # OS
34
+ .DS_Store
35
+
36
+ # Testing
37
+ .pytest_cache/
38
+ .coverage
39
+ htmlcov/
40
+
41
+ # Dev database seed (contains prod data, do NOT commit)
42
+ scripts/dev-seed.sql.gz
43
+
44
+ # Re-allow committed test fixtures (root .gitignore blocks *.parquet globally)
45
+ !tests/fixtures/**/*.parquet
46
+ !gutenberg-sdk/examples/**/*.parquet
47
+
48
+ # Local dev env-state backups (worktree-dev.sh writes these — never commit)
49
+ .env.*-backup
50
+ .env.prePlanB
51
+ # Ad-hoc backups (cp .env .env.bak-<date>, .env.<thing>-snapshot-<date>, etc.)
52
+ .env.bak*
53
+ .env.*-snapshot*
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gutenberg PBC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,116 @@
1
+ Metadata-Version: 2.4
2
+ Name: gutenberg-sdk
3
+ Version: 0.1.0
4
+ Summary: Python SDK for the Gutenberg SAE Activation API
5
+ Project-URL: Homepage, https://gutenberg.ai
6
+ Project-URL: Console, https://console.gutenberg.ai
7
+ Project-URL: Documentation, https://console.gutenberg.ai/d/docs
8
+ Project-URL: Repository, https://github.com/gutenbergpbc/code
9
+ Author: Gutenberg PBC
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: activations,interpretability,llm,mechanistic-interpretability,observability,sae
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Typing :: Typed
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: httpx>=0.28
27
+ Requires-Dist: pydantic>=2.0
28
+ Requires-Dist: tqdm>=4.66
29
+ Provides-Extra: pandas
30
+ Requires-Dist: pandas>=2.0; extra == 'pandas'
31
+ Requires-Dist: pyarrow>=14; extra == 'pandas'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # gutenberg-sdk
35
+
36
+ Python SDK for the **Gutenberg** SAE activation API — interpretability-based
37
+ observability for language models. Upload text, read it through a sparse
38
+ autoencoder (SAE) feature dictionary, and find the features that separate any
39
+ two classes of documents.
40
+
41
+ - **Docs:** https://console.gutenberg.ai/d/docs
42
+ - **Console:** https://console.gutenberg.ai
43
+ - **Get an API key:** https://console.gutenberg.ai/d/keys
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ uv add gutenberg-sdk # or: pip install gutenberg-sdk
49
+ ```
50
+
51
+ The distribution is `gutenberg-sdk`; the import is `gutenberg`:
52
+
53
+ ```python
54
+ from gutenberg import gutenberg
55
+ ```
56
+
57
+ Optional `pandas` extra (for `load_activations_df` and parquet helpers):
58
+
59
+ ```bash
60
+ uv add "gutenberg-sdk[pandas]"
61
+ ```
62
+
63
+ ## Quickstart
64
+
65
+ ```python
66
+ from gutenberg import gutenberg
67
+
68
+ client = gutenberg(api_key="gtn_...") # or set GUTENBERG_API_KEY
69
+
70
+ # 1. upload a parquet dataset (text + a binary target column)
71
+ dataset = client.datasets.upload("examples/simple_binary_features_extraction_100.parquet")
72
+
73
+ # 2. launch hosted SAE feature extraction
74
+ job = client.jobs.create(
75
+ dataset_id=dataset.dataset_id,
76
+ model_id="google/gemma-3-27b-it",
77
+ sae_id="layer_31_width_262k_l0_medium",
78
+ )
79
+ job = client.jobs.wait(job.job_id)
80
+
81
+ # 3. score every feature against the target with AUROC
82
+ exp = client.experiments.create(
83
+ job_id=job.job_id,
84
+ target_column="is_ai",
85
+ target_column_type="binary",
86
+ positive_value="1",
87
+ scoring_method="auroc",
88
+ )
89
+ exp = client.experiments.wait(exp.experiment_id)
90
+
91
+ # 4. read back ranked features and token-level examples
92
+ for feature in client.experiments.features(exp.experiment_id)[:10]:
93
+ print(feature.rank, feature.feature_id, feature.score)
94
+ ```
95
+
96
+ The full runnable script lives in
97
+ [`examples/simple_binary_features_extraction.py`](examples/simple_binary_features_extraction.py),
98
+ with a companion 100-row parquet. On production the whole flow runs in a couple
99
+ of minutes. See [`docs/getting-started.md`](docs/getting-started.md) for the
100
+ walkthrough, SAE selection guidance, and how token examples are served.
101
+
102
+ > The bundled example is a **curated showcase**, not a benchmark — its 50 AI
103
+ > passages were picked to exhibit a handful of recognizable AI-writing features,
104
+ > so those features separate the two classes near-perfectly there. Run it on
105
+ > your own data to see a realistic ranking.
106
+
107
+ ## API surface
108
+
109
+ A single `gutenberg(...)` client with namespaced resources: `datasets`,
110
+ `jobs`, `experiments`, `aggregations`, `autointerp`, `meta_autointerp`,
111
+ `subsets`, plus the sync helpers `activations()`, `interpret()`, `models()`,
112
+ and `saes()`.
113
+
114
+ ## License
115
+
116
+ MIT
@@ -0,0 +1,83 @@
1
+ # gutenberg-sdk
2
+
3
+ Python SDK for the **Gutenberg** SAE activation API — interpretability-based
4
+ observability for language models. Upload text, read it through a sparse
5
+ autoencoder (SAE) feature dictionary, and find the features that separate any
6
+ two classes of documents.
7
+
8
+ - **Docs:** https://console.gutenberg.ai/d/docs
9
+ - **Console:** https://console.gutenberg.ai
10
+ - **Get an API key:** https://console.gutenberg.ai/d/keys
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ uv add gutenberg-sdk # or: pip install gutenberg-sdk
16
+ ```
17
+
18
+ The distribution is `gutenberg-sdk`; the import is `gutenberg`:
19
+
20
+ ```python
21
+ from gutenberg import gutenberg
22
+ ```
23
+
24
+ Optional `pandas` extra (for `load_activations_df` and parquet helpers):
25
+
26
+ ```bash
27
+ uv add "gutenberg-sdk[pandas]"
28
+ ```
29
+
30
+ ## Quickstart
31
+
32
+ ```python
33
+ from gutenberg import gutenberg
34
+
35
+ client = gutenberg(api_key="gtn_...") # or set GUTENBERG_API_KEY
36
+
37
+ # 1. upload a parquet dataset (text + a binary target column)
38
+ dataset = client.datasets.upload("examples/simple_binary_features_extraction_100.parquet")
39
+
40
+ # 2. launch hosted SAE feature extraction
41
+ job = client.jobs.create(
42
+ dataset_id=dataset.dataset_id,
43
+ model_id="google/gemma-3-27b-it",
44
+ sae_id="layer_31_width_262k_l0_medium",
45
+ )
46
+ job = client.jobs.wait(job.job_id)
47
+
48
+ # 3. score every feature against the target with AUROC
49
+ exp = client.experiments.create(
50
+ job_id=job.job_id,
51
+ target_column="is_ai",
52
+ target_column_type="binary",
53
+ positive_value="1",
54
+ scoring_method="auroc",
55
+ )
56
+ exp = client.experiments.wait(exp.experiment_id)
57
+
58
+ # 4. read back ranked features and token-level examples
59
+ for feature in client.experiments.features(exp.experiment_id)[:10]:
60
+ print(feature.rank, feature.feature_id, feature.score)
61
+ ```
62
+
63
+ The full runnable script lives in
64
+ [`examples/simple_binary_features_extraction.py`](examples/simple_binary_features_extraction.py),
65
+ with a companion 100-row parquet. On production the whole flow runs in a couple
66
+ of minutes. See [`docs/getting-started.md`](docs/getting-started.md) for the
67
+ walkthrough, SAE selection guidance, and how token examples are served.
68
+
69
+ > The bundled example is a **curated showcase**, not a benchmark — its 50 AI
70
+ > passages were picked to exhibit a handful of recognizable AI-writing features,
71
+ > so those features separate the two classes near-perfectly there. Run it on
72
+ > your own data to see a realistic ranking.
73
+
74
+ ## API surface
75
+
76
+ A single `gutenberg(...)` client with namespaced resources: `datasets`,
77
+ `jobs`, `experiments`, `aggregations`, `autointerp`, `meta_autointerp`,
78
+ `subsets`, plus the sync helpers `activations()`, `interpret()`, `models()`,
79
+ and `saes()`.
80
+
81
+ ## License
82
+
83
+ MIT
@@ -0,0 +1,106 @@
1
+ # Getting Started
2
+
3
+ First, obtain an API key at https://console.gutenberg.ai/d/keys.
4
+
5
+ Once the package is published, install it with:
6
+
7
+ ```bash
8
+ uv add gutenberg-sdk # or: pip install gutenberg-sdk
9
+ ```
10
+
11
+ > Until the first PyPI release lands, install from source (from the repo root):
12
+ > `uv pip install ./gutenberg-sdk` (or `pip install -e gutenberg-sdk`). The
13
+ > distribution is `gutenberg-sdk`; the import is `from gutenberg import gutenberg`.
14
+
15
+ ## Running your first experiment
16
+
17
+ Open [simple_binary_features_extraction.py](../examples/simple_binary_features_extraction.py) complete binary feature-extraction experiment with the Gutenberg SDK.
18
+
19
+ This example mirrors the public Pangram EditLens "human vs AI" experiment, using
20
+ the companion 100-row [parquet](../examples/simple_binary_features_extraction_100.parquet): 50
21
+ human-written rows and 50 AI rows (ai_generated + ai_edited). The target is is_ai.
22
+
23
+ > The same curated 100-row dataset is also published as a HuggingFace dataset,
24
+ > `gutenbergpbc/pangram-editlens-100` (private for now, pending the upstream
25
+ > Pangram EditLens public-use grant).
26
+
27
+ The script uses the public SDK surface to:
28
+ 1. upload a parquet dataset
29
+ 2. launch hosted SAE feature extraction
30
+ 3. create a binary AUROC experiment over the extracted features
31
+ 4. read back ranked features and token examples
32
+
33
+ This quickstart dataset is intentionally tiny (100 rows), so the whole flow runs
34
+ in a couple of minutes and feature browsing is live and fast the moment the
35
+ experiment completes.
36
+
37
+ > Note: this is a **curated showcase**, not a benchmark — the 50 AI passages were
38
+ > picked to exhibit a handful of recognizable AI-writing features, so those
39
+ > features separate the two classes near-perfectly here. On a representative
40
+ > human-vs-AI sample the same features score lower and other features lead; run it
41
+ > on your own data to see a realistic ranking.
42
+
43
+ ### Choosing an SAE
44
+
45
+ A `(model, SAE)` pair determines which feature dictionary your text is read
46
+ through. This quickstart pins the **layer-31** SAE
47
+ (`layer_31_width_262k_l0_medium`) — the one the Pangram EditLens detector was
48
+ validated on (AUROC ~0.853), so the human-vs-AI result reproduces here. We
49
+ recommend layer-31 when replicating Pangram-style detection.
50
+
51
+ For your own datasets the **platform default is layer-41**
52
+ (`layer_41_width_262k_l0_big`) — omit `sae_id` and the server resolves it. To
53
+ choose explicitly, pass `sae_id=` to `client.jobs.create(...)` (the example
54
+ exposes this as `--sae-id`), and list what's available for a model with:
55
+
56
+ ```python
57
+ client.saes("google/gemma-3-27b-it") # -> [{sae_id, is_default, ...}, ...]
58
+ ```
59
+
60
+ After running the script, you should see something like the following in the console:
61
+
62
+ ```
63
+ Console URL: https://console.gutenberg.ai/d/datasets/7d8a0ee4-d73b-4e2f-8ab8-bec3e1c5d7da/exp/24d3590c-e0b6-486e-b0cf-8050fd51f383
64
+ ```
65
+
66
+ (The URL above is the public example experiment — feel free to open it to follow
67
+ along. Its top features are recognizable AI tells: "delve", "it's not X, it's Y",
68
+ overformatting, formulaic conclusions, and dense formal description.)
69
+
70
+ ## Viewing features
71
+
72
+ List of all features, ordered by absolute value of `score` as defined in the script above (`auroc` here). Click question mark to view keyboard shortcuts, or view experiment metadata or search for a specific feature or explanation.
73
+
74
+ ### How token examples are served
75
+
76
+ Examples are **cache-first with an automatic on-the-fly fallback**, so a feature's
77
+ activating windows always come back without you running any manual aggregation
78
+ step. The response's `pick_semantics` field tells you which path served it:
79
+
80
+ - `v2` — the calibrated pre-built cache (the `max` / `p95` / `stratified` / `random`
81
+ classes). This is the default for the features you'll browse first.
82
+ - `fallback_topn` — a top-by-activation rebuild, served live for features not yet
83
+ in the cache.
84
+
85
+ For a small dataset like this quickstart, every feature is served fast and
86
+ calibrated. For large datasets, the complete calibrated cache is built in the
87
+ background after extraction (so the experiment viewer and autointerp come up
88
+ immediately); until it finishes, less-common features are served via the fast
89
+ `fallback_topn` path.
90
+
91
+ The example-selection strategy is **`max`** by default — the feature's strongest
92
+ activating examples (exact top-K by activation value). The full vocabulary is
93
+ `[max, p95, stratified, random]`: `p95` is a random sample of firings at/above the
94
+ 95th-percentile activation, `stratified` anchors across the activation quartiles,
95
+ and `random` samples uniformly. (`top_k` / `strongest` are accepted as deprecated
96
+ aliases for `p95`.)
97
+
98
+ ![feature_view_1.png](./feature_view_1.png)
99
+
100
+ Detailed feature view, here you can see the score on various subsets of activating examples, auto-generated explanations ("autointerp") for the feature, or you can add your own custom label. If you have any meta-autointerp runs, they will also be linked here.
101
+
102
+ ![feature_view_2.png](./feature_view_2.png)
103
+
104
+ ## Meta-autointerp runs
105
+
106
+ TODO: Add UI screenshots
@@ -0,0 +1,212 @@
1
+ """Run a complete binary feature-extraction experiment with the Gutenberg SDK.
2
+
3
+ This example mirrors the public Pangram EditLens "human vs AI" experiment, but
4
+ uses the companion 100-row parquet in this directory: 50 human-written rows and
5
+ 50 AI rows (ai_generated + ai_edited). The target column is is_ai.
6
+
7
+ The script uses only the public SDK surface:
8
+ 1. upload a parquet dataset
9
+ 2. launch hosted SAE feature extraction (layer-31 SAE — the one the Pangram
10
+ detector was validated on; the platform default is layer-41)
11
+ 3. create a binary AUROC experiment over the extracted features
12
+ 4. read back ranked features and token examples
13
+
14
+ Running it on production starts hosted compute and may also trigger any
15
+ server-side completion hooks configured for the environment.
16
+
17
+ Usage:
18
+ export GUTENBERG_API_KEY=gtn_...
19
+ python examples/simple_binary_features_extraction.py
20
+
21
+ For local repo use, the script also loads GUTENBERG_API_KEY from the nearest
22
+ .env file if it is not already set in the environment.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import os
29
+ import sys
30
+ from pathlib import Path
31
+
32
+ from gutenberg import gutenberg
33
+
34
+
35
+ DEFAULT_DATASET_PATH = Path(__file__).with_name(
36
+ "simple_binary_features_extraction_100.parquet"
37
+ )
38
+ DEFAULT_MODEL_ID = "google/gemma-3-27b-it"
39
+ # This quickstart runs the layer-31 SAE — the one the Pangram EditLens detector
40
+ # was validated on (AUROC ~0.853), so the human-vs-AI result is reproducible here.
41
+ # The platform default for new datasets is the layer-41 SAE
42
+ # (layer_41_width_262k_l0_big); pass --sae-id to override either way, or call
43
+ # client.saes("google/gemma-3-27b-it") to list what's available.
44
+ DEFAULT_SAE_ID = "layer_31_width_262k_l0_medium"
45
+ DEFAULT_BASE_URL = "https://api.gutenberg.ai"
46
+
47
+
48
+ def log(message: str = "") -> None:
49
+ print(message, flush=True)
50
+
51
+
52
+ def _strip_inline_comment(value: str) -> str:
53
+ quote: str | None = None
54
+ chars: list[str] = []
55
+ for i, ch in enumerate(value):
56
+ if ch in ("'", '"'):
57
+ if quote == ch:
58
+ quote = None
59
+ elif quote is None:
60
+ quote = ch
61
+ chars.append(ch)
62
+ elif ch == "#" and quote is None and (i == 0 or value[i - 1].isspace()):
63
+ break
64
+ else:
65
+ chars.append(ch)
66
+ return "".join(chars).strip().strip('"').strip("'")
67
+
68
+
69
+ def _load_nearest_dotenv(start: Path) -> None:
70
+ """Load missing env vars from the nearest .env without extra dependencies."""
71
+ for directory in [start, *start.parents]:
72
+ dotenv = directory / ".env"
73
+ if not dotenv.exists():
74
+ continue
75
+ for raw in dotenv.read_text().splitlines():
76
+ line = raw.strip()
77
+ if not line or line.startswith("#") or "=" not in line:
78
+ continue
79
+ key, value = line.split("=", 1)
80
+ os.environ.setdefault(key.strip(), _strip_inline_comment(value))
81
+ return
82
+
83
+
84
+ def _die_if_failed(kind: str, obj) -> None:
85
+ if obj.status == "failed":
86
+ error = getattr(obj, "error", None) or "unknown error"
87
+ raise RuntimeError(f"{kind} {obj.status}: {error}")
88
+
89
+
90
+ def parse_args() -> argparse.Namespace:
91
+ parser = argparse.ArgumentParser(
92
+ description=(
93
+ "Upload the 100-row EditLens binary sample, run hosted SAE extraction, "
94
+ "then score features against is_ai with AUROC."
95
+ )
96
+ )
97
+ parser.add_argument(
98
+ "--dataset-path",
99
+ type=Path,
100
+ default=DEFAULT_DATASET_PATH,
101
+ help="Parquet file to upload.",
102
+ )
103
+ parser.add_argument(
104
+ "--base-url",
105
+ default=os.environ.get("GUTENBERG_API_URL", DEFAULT_BASE_URL),
106
+ help="Gutenberg API base URL.",
107
+ )
108
+ parser.add_argument("--model-id", default=DEFAULT_MODEL_ID)
109
+ parser.add_argument("--sae-id", default=DEFAULT_SAE_ID)
110
+ parser.add_argument("--top-k", type=int, default=100)
111
+ parser.add_argument("--num-workers", type=int, default=1)
112
+ parser.add_argument("--timeout-seconds", type=int, default=21_600)
113
+ parser.add_argument("--poll-interval", type=float, default=15.0)
114
+ parser.add_argument("--feature-limit", type=int, default=10)
115
+ return parser.parse_args()
116
+
117
+
118
+ def main() -> int:
119
+ _load_nearest_dotenv(Path(__file__).resolve().parent)
120
+ args = parse_args()
121
+
122
+ api_key = os.environ.get("GUTENBERG_API_KEY")
123
+ if not api_key:
124
+ raise RuntimeError(
125
+ "Set GUTENBERG_API_KEY or put it in a parent .env file."
126
+ )
127
+ if not args.dataset_path.exists():
128
+ raise FileNotFoundError(args.dataset_path)
129
+
130
+ with gutenberg(api_key=api_key, base_url=args.base_url, timeout=300) as client:
131
+ log(f"Uploading dataset: {args.dataset_path}")
132
+ dataset = client.datasets.upload(args.dataset_path)
133
+ dataset = client.datasets.update(
134
+ dataset.dataset_id,
135
+ name="SDK example: EditLens human vs AI (100 rows)",
136
+ description=(
137
+ "100-row Pangram EditLens demo: 50 human-written and 50 AI rows "
138
+ "(ai_generated + ai_edited). Target is is_ai."
139
+ ),
140
+ )
141
+ log(f"Dataset: {dataset.dataset_id}")
142
+ log("Columns: " + ", ".join(client.datasets.columns(dataset.dataset_id)))
143
+
144
+ log(f"Starting extraction: {args.model_id} / {args.sae_id}")
145
+ job = client.jobs.create(
146
+ dataset_id=dataset.dataset_id,
147
+ model_id=args.model_id,
148
+ sae_id=args.sae_id,
149
+ top_k=args.top_k,
150
+ num_workers=args.num_workers,
151
+ timeout_seconds=args.timeout_seconds,
152
+ )
153
+ log(f"Job: {job.job_id} ({job.status})")
154
+ job = client.jobs.wait(job.job_id, poll_interval=args.poll_interval)
155
+ _die_if_failed("job", job)
156
+ log(f"Job completed: {job.job_id}")
157
+
158
+ log("Creating binary AUROC experiment over target column is_ai")
159
+ exp = client.experiments.create(
160
+ job_id=job.job_id,
161
+ name="SDK example: human vs AI",
162
+ target_column="is_ai",
163
+ target_column_type="binary",
164
+ positive_value="1",
165
+ scoring_method="auroc",
166
+ )
167
+ log(f"Experiment: {exp.experiment_id} ({exp.status})")
168
+ exp = client.experiments.wait(
169
+ exp.experiment_id,
170
+ poll_interval=args.poll_interval,
171
+ )
172
+ _die_if_failed("experiment", exp)
173
+ log(f"Experiment completed: {exp.experiment_id}")
174
+
175
+ features = client.experiments.features(exp.experiment_id)
176
+ if not features:
177
+ raise RuntimeError("Experiment completed but returned no features.")
178
+
179
+ log("\nTop features")
180
+ for feature in features[: args.feature_limit]:
181
+ log(
182
+ f"#{feature.rank:>3} feature={feature.feature_id:<8} "
183
+ f"score={feature.score:.4f} p={feature.p_value:.3g} "
184
+ f"n={feature.n_samples}"
185
+ )
186
+
187
+ top = features[0]
188
+ examples = client.experiments.examples(exp.experiment_id, top.feature_id)
189
+ log(f"\nExamples for top feature {top.feature_id}: {len(examples)}")
190
+ for example in examples[:3]:
191
+ max_value = max(example.values) if example.values else 0.0
192
+ log(
193
+ f"- sample_id={example.sample_id} tokens={len(example.tokens)} "
194
+ f"max_activation={max_value:.4f}"
195
+ )
196
+
197
+ if args.base_url.rstrip("/") == DEFAULT_BASE_URL:
198
+ log(
199
+ "\nConsole URL: "
200
+ f"https://console.gutenberg.ai/d/datasets/"
201
+ f"{dataset.dataset_id}/exp/{exp.experiment_id}"
202
+ )
203
+
204
+ return 0
205
+
206
+
207
+ if __name__ == "__main__":
208
+ try:
209
+ raise SystemExit(main())
210
+ except KeyboardInterrupt:
211
+ print("\nInterrupted.", file=sys.stderr)
212
+ raise SystemExit(130)
@@ -0,0 +1,10 @@
1
+ from importlib.metadata import PackageNotFoundError, version
2
+
3
+ from gutenberg.client import gutenberg
4
+
5
+ try:
6
+ __version__ = version("gutenberg-sdk")
7
+ except PackageNotFoundError: # editable / source checkout without metadata
8
+ __version__ = "0.0.0+unknown"
9
+
10
+ __all__ = ["gutenberg", "__version__"]