codebook-lab 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import shutil
5
+
6
+
7
+ def _examples_root() -> Path:
8
+ """Return the directory that stores bundled example tasks."""
9
+ return Path(__file__).resolve().parent / "tasks"
10
+
11
+
12
+ def list_example_tasks() -> list[str]:
13
+ """List bundled example task names shipped with the package.
14
+
15
+ Returns:
16
+ Sorted list of task directory names available under the package's
17
+ bundled ``tasks`` data folder.
18
+ """
19
+ root = _examples_root()
20
+ if not root.exists():
21
+ return []
22
+ return sorted(path.name for path in root.iterdir() if path.is_dir())
23
+
24
+
25
+ def get_example_task_dir(task_name: str) -> Path:
26
+ """Return the filesystem path to a bundled example task.
27
+
28
+ Args:
29
+ task_name: Bundled task directory name such as ``"policy-sentiment"``.
30
+
31
+ Returns:
32
+ Filesystem path to the bundled example task directory.
33
+ """
34
+ task_dir = _examples_root() / task_name
35
+ if not task_dir.exists():
36
+ available = ", ".join(list_example_tasks())
37
+ raise FileNotFoundError(
38
+ f"Bundled example task '{task_name}' was not found. "
39
+ f"Available example tasks: {available or 'none'}."
40
+ )
41
+ return task_dir
42
+
43
+
44
+ def get_example_task_files(task_name: str) -> dict[str, Path]:
45
+ """Return the standard file paths for a bundled example task.
46
+
47
+ Args:
48
+ task_name: Bundled task directory name.
49
+
50
+ Returns:
51
+ Dictionary containing ``task_dir``, ``codebook_path``, and
52
+ ``ground_truth_csv`` paths.
53
+ """
54
+ task_dir = get_example_task_dir(task_name)
55
+ return {
56
+ "task_dir": task_dir,
57
+ "codebook_path": task_dir / "codebook.json",
58
+ "ground_truth_csv": task_dir / "ground-truth.csv",
59
+ }
60
+
61
+
62
+ def copy_example_task(task_name: str, destination: str | Path, overwrite: bool = False) -> Path:
63
+ """Copy a bundled example task to a user-controlled directory.
64
+
65
+ Args:
66
+ task_name: Bundled task directory name.
67
+ destination: Directory where the example task folder should be copied.
68
+ overwrite: If ``True``, replace an existing destination task folder.
69
+
70
+ Returns:
71
+ Path to the copied task directory inside ``destination``.
72
+ """
73
+ source_dir = get_example_task_dir(task_name)
74
+ destination = Path(destination)
75
+ destination.mkdir(parents=True, exist_ok=True)
76
+ target_dir = destination / task_name
77
+
78
+ if target_dir.exists():
79
+ if not overwrite:
80
+ raise FileExistsError(
81
+ f"Destination task directory already exists: {target_dir}. "
82
+ "Pass overwrite=True to replace it."
83
+ )
84
+ shutil.rmtree(target_dir)
85
+
86
+ shutil.copytree(source_dir, target_dir)
87
+ return target_dir
@@ -0,0 +1,319 @@
1
+ from __future__ import annotations
2
+
3
+ from itertools import product
4
+ from pathlib import Path
5
+ import time
6
+
7
+ from .examples import get_example_task_dir
8
+ from .ollama import ensure_ollama_model
9
+ from .types import ExperimentRunResult, ExperimentSpec
10
+
11
+
12
+ def _normalize_optional_float(value) -> float | None:
13
+ """Convert empty or ``"None"``-style sweep values to ``None``, others to ``float``."""
14
+ if value in (None, "", "None"):
15
+ return None
16
+ return float(value)
17
+
18
+
19
+ def _coerce_bool(value) -> bool:
20
+ """Convert common string and numeric truthy values to ``bool``."""
21
+ if isinstance(value, bool):
22
+ return value
23
+ return str(value).strip().lower() in {"true", "1", "yes", "y", "t"}
24
+
25
+
26
+ def _coerce_sweep(values, default):
27
+ """Return a list of sweep values, falling back to a single default value."""
28
+ if values in (None, []):
29
+ return [default]
30
+ return list(values)
31
+
32
+
33
+ def expand_param_grid(param_grid: dict) -> list[ExperimentSpec]:
34
+ """Expand a parameter grid into concrete :class:`ExperimentSpec` runs.
35
+
36
+ Args:
37
+ param_grid: In-memory grid dictionary describing the Cartesian-product
38
+ sweep. Expected keys include ``tasks``, ``models``, ``use_examples``,
39
+ ``prompt_types``, ``temperatures``, ``top_ps``, ``process_textboxes``,
40
+ and ``country_iso_code``.
41
+
42
+ Returns:
43
+ A list of experiment specifications, one per Cartesian-product combination.
44
+ """
45
+ tasks = list(param_grid.get("tasks") or ["policy-sentiment"])
46
+ models = list(param_grid.get("models") or ["gemma3:270m"])
47
+ use_examples_values = _coerce_sweep(param_grid.get("use_examples"), False)
48
+ prompt_types = _coerce_sweep(param_grid.get("prompt_types"), "standard")
49
+ temperatures = _coerce_sweep(param_grid.get("temperatures"), None)
50
+ top_ps = _coerce_sweep(param_grid.get("top_ps"), None)
51
+ process_textboxes = _coerce_sweep(param_grid.get("process_textboxes"), False)
52
+ country_iso_code = str(param_grid.get("country_iso_code") or "USA")
53
+
54
+ specs = []
55
+ for task, model, use_examples, prompt_type, temperature, top_p, process_textbox in product(
56
+ tasks,
57
+ models,
58
+ use_examples_values,
59
+ prompt_types,
60
+ temperatures,
61
+ top_ps,
62
+ process_textboxes,
63
+ ):
64
+ specs.append(
65
+ ExperimentSpec(
66
+ task=str(task),
67
+ model=str(model),
68
+ use_examples=_coerce_bool(use_examples),
69
+ prompt_type=str(prompt_type),
70
+ temperature=_normalize_optional_float(temperature),
71
+ top_p=_normalize_optional_float(top_p),
72
+ process_textbox=_coerce_bool(process_textbox),
73
+ country_iso_code=country_iso_code,
74
+ )
75
+ )
76
+ return specs
77
+
78
+
79
+ def resolve_task_dir(task: str, task_root: str | Path | None = None) -> Path:
80
+ """Resolve a task directory from either a user path or bundled examples.
81
+
82
+ Args:
83
+ task: Task directory name such as ``"policy-sentiment"``.
84
+ task_root: Optional directory containing user-defined task folders.
85
+
86
+ Returns:
87
+ Filesystem path to the resolved task directory.
88
+ """
89
+ if task_root is not None:
90
+ candidate = Path(task_root) / task
91
+ if candidate.exists():
92
+ return candidate
93
+
94
+ try:
95
+ return get_example_task_dir(task)
96
+ except FileNotFoundError as exc:
97
+ searched = f"{Path(task_root) / task}" if task_root is not None else "no external task_root provided"
98
+ raise FileNotFoundError(
99
+ f"Task '{task}' was not found. Searched external task path {searched} "
100
+ "and bundled example tasks."
101
+ ) from exc
102
+
103
+
104
+ def build_experiment_paths(
105
+ *,
106
+ task: str,
107
+ model: str,
108
+ use_examples: bool,
109
+ prompt_type: str,
110
+ temperature,
111
+ top_p,
112
+ process_textbox: bool,
113
+ output_root: str | Path = "outputs",
114
+ timestamp: str | None = None,
115
+ ) -> dict[str, Path | str]:
116
+ """Build deterministic output paths for one experiment configuration.
117
+
118
+ Args:
119
+ task: Task folder name under ``tasks/``.
120
+ model: Ollama model identifier.
121
+ use_examples: Whether codebook examples are included in prompts.
122
+ prompt_type: Prompt wrapper name used for the run.
123
+ temperature: Optional temperature value or ``None``.
124
+ top_p: Optional top-p value or ``None``.
125
+ process_textbox: Whether textbox annotations are included.
126
+ output_root: Root directory where experiment outputs should be written.
127
+ timestamp: Optional timestamp string in ``YYYY-MM-DD_HH-MM-SS`` format.
128
+
129
+ Returns:
130
+ Dictionary containing the experiment directory and standard output file paths.
131
+ """
132
+ timestamp = timestamp or time.strftime("%Y-%m-%d_%H-%M-%S")
133
+ model_safe = model.replace(":", "-")
134
+ experiment_dir = Path(output_root) / task / f"{model_safe}_examples{str(use_examples).lower()}_{prompt_type}"
135
+ model_id = f"{model}_examples{str(use_examples).lower()}_{prompt_type}"
136
+
137
+ if temperature is not None:
138
+ experiment_dir = Path(f"{experiment_dir}_temp{temperature}")
139
+ model_id = f"{model_id}_temp{temperature}"
140
+ if top_p is not None:
141
+ experiment_dir = Path(f"{experiment_dir}_topp{top_p}")
142
+ model_id = f"{model_id}_topp{top_p}"
143
+ if process_textbox:
144
+ experiment_dir = Path(f"{experiment_dir}_textbox")
145
+ model_id = f"{model_id}_textbox"
146
+
147
+ experiment_dir = Path(f"{experiment_dir}_{timestamp}")
148
+
149
+ return {
150
+ "timestamp": timestamp,
151
+ "model_id": model_id,
152
+ "experiment_directory": experiment_dir,
153
+ "output_csv": experiment_dir / "output.csv",
154
+ "report_file": experiment_dir / "classification_reports.txt",
155
+ "emissions_file": experiment_dir / "emissions.csv",
156
+ "timing_file": experiment_dir / "timing_data.json",
157
+ "char_counts_file": experiment_dir / "char_counts.json",
158
+ }
159
+
160
+
161
+ def run_experiment(
162
+ spec: ExperimentSpec,
163
+ *,
164
+ task_root: str | Path | None = None,
165
+ output_root: str | Path = "outputs",
166
+ metrics_output_root: str | Path | None = None,
167
+ timestamp: str | None = None,
168
+ pull_model: bool = True,
169
+ start_ollama_if_needed: bool = True,
170
+ quantization_type: str | None = None,
171
+ ):
172
+ """Run one end-to-end experiment and evaluate it against ground truth.
173
+
174
+ Args:
175
+ spec: Concrete experiment specification to execute.
176
+ task_root: Optional directory containing user-defined task folders with
177
+ ``codebook.json`` and ``ground-truth.csv``. If omitted, bundled example
178
+ tasks are used when the task name matches one shipped with the package.
179
+ output_root: Root directory where per-run outputs should be created.
180
+ metrics_output_root: Directory for aggregate metrics CSV files. Defaults to ``output_root / "metrics"``.
181
+ timestamp: Optional timestamp string to control output folder naming.
182
+ pull_model: If ``True``, run ``ollama pull`` for ``spec.model`` before execution.
183
+ Defaults to ``True`` so experiment runs ensure the requested model is available.
184
+ start_ollama_if_needed: If ``True``, try to auto-start a local Ollama
185
+ server when the default local host is not already reachable.
186
+ Defaults to ``True`` so experiment runs can bring up local Ollama
187
+ automatically when needed.
188
+ quantization_type: Optional metadata field written to the metrics CSV.
189
+
190
+ Returns:
191
+ :class:`ExperimentRunResult` containing both annotation and metrics results.
192
+ """
193
+ from .annotate import run_annotation
194
+ from .metrics import run_metrics
195
+
196
+ output_root = Path(output_root)
197
+ metrics_output_root = Path(metrics_output_root) if metrics_output_root is not None else output_root / "metrics"
198
+
199
+ task_dir = resolve_task_dir(spec.task, task_root)
200
+ ground_truth_csv = task_dir / "ground-truth.csv"
201
+ codebook_path = task_dir / "codebook.json"
202
+
203
+ if not codebook_path.exists():
204
+ raise FileNotFoundError(f"Codebook not found for task '{spec.task}': {codebook_path}")
205
+ if not ground_truth_csv.exists():
206
+ raise FileNotFoundError(f"Ground truth file not found for task '{spec.task}': {ground_truth_csv}")
207
+
208
+ if pull_model:
209
+ ensure_ollama_model(spec.model)
210
+
211
+ paths = build_experiment_paths(
212
+ task=spec.task,
213
+ model=spec.model,
214
+ use_examples=spec.use_examples,
215
+ prompt_type=spec.prompt_type,
216
+ temperature=spec.temperature,
217
+ top_p=spec.top_p,
218
+ process_textbox=spec.process_textbox,
219
+ output_root=output_root,
220
+ timestamp=timestamp,
221
+ )
222
+
223
+ label = spec.task
224
+ metrics_output_csv = metrics_output_root / f"{spec.task}_metrics_log.csv"
225
+
226
+ annotation_result = run_annotation(
227
+ model=spec.model,
228
+ csv_path=ground_truth_csv,
229
+ codebook_path=codebook_path,
230
+ output_path=paths["output_csv"],
231
+ experiment_directory=paths["experiment_directory"],
232
+ prompt_type=spec.prompt_type,
233
+ use_examples=spec.use_examples,
234
+ temperature=spec.temperature,
235
+ top_p=spec.top_p,
236
+ process_textbox=spec.process_textbox,
237
+ country_iso_code=spec.country_iso_code,
238
+ start_ollama_if_needed=start_ollama_if_needed,
239
+ )
240
+
241
+ metrics_result = run_metrics(
242
+ ground_truth_csv=ground_truth_csv,
243
+ llm_output_csv=paths["output_csv"],
244
+ label=label,
245
+ output_csv=metrics_output_csv,
246
+ model_id=paths["model_id"],
247
+ codebook_path=codebook_path,
248
+ report_file=paths["report_file"],
249
+ quantization_type=quantization_type,
250
+ temperature=spec.temperature,
251
+ top_p=spec.top_p,
252
+ prompt_type=spec.prompt_type,
253
+ use_examples=spec.use_examples,
254
+ process_textbox=spec.process_textbox,
255
+ emissions_file=paths["emissions_file"],
256
+ experiment_directory=paths["experiment_directory"],
257
+ timestamp=paths["timestamp"],
258
+ timing_file=paths["timing_file"],
259
+ char_counts_file=paths["char_counts_file"],
260
+ )
261
+
262
+ print()
263
+ print(f"Experiment complete: {paths['experiment_directory']}")
264
+ print(metrics_result.summary_text)
265
+
266
+ return ExperimentRunResult(
267
+ spec=spec,
268
+ experiment_directory=paths["experiment_directory"],
269
+ model_id=paths["model_id"],
270
+ label=label,
271
+ annotation=annotation_result,
272
+ metrics=metrics_result,
273
+ )
274
+
275
+
276
+ def run_experiment_grid(
277
+ *,
278
+ specs: list[ExperimentSpec] | None = None,
279
+ param_grid: dict | None = None,
280
+ task_root: str | Path | None = None,
281
+ output_root: str | Path = "outputs",
282
+ metrics_output_root: str | Path | None = None,
283
+ pull_models: bool = True,
284
+ start_ollama_if_needed: bool = True,
285
+ ) -> list[ExperimentRunResult]:
286
+ """Run a whole parameter sweep from a grid or a prebuilt spec list.
287
+
288
+ Args:
289
+ specs: Optional pre-expanded list of :class:`ExperimentSpec` objects.
290
+ param_grid: Optional in-memory parameter grid dictionary.
291
+ task_root: Optional directory containing user-defined task folders.
292
+ output_root: Root directory for per-run outputs.
293
+ metrics_output_root: Directory for aggregate metrics CSV files.
294
+ pull_models: If ``True``, pull each Ollama model before its first run.
295
+ Defaults to ``True`` so sweep runs ensure requested models are available.
296
+ start_ollama_if_needed: If ``True``, try to auto-start a local Ollama
297
+ server before each run when needed. Defaults to ``True``.
298
+
299
+ Returns:
300
+ List of :class:`ExperimentRunResult` objects, one per completed run.
301
+ """
302
+ if specs is None:
303
+ if param_grid is None:
304
+ raise ValueError("Provide either specs or an in-memory param_grid dictionary.")
305
+ specs = expand_param_grid(param_grid=param_grid)
306
+
307
+ results = []
308
+ for spec in specs:
309
+ results.append(
310
+ run_experiment(
311
+ spec,
312
+ task_root=task_root,
313
+ output_root=output_root,
314
+ metrics_output_root=metrics_output_root,
315
+ pull_model=pull_models,
316
+ start_ollama_if_needed=start_ollama_if_needed,
317
+ )
318
+ )
319
+ return results