promptuna-cli 1.24.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.3
2
+ Name: promptuna-cli
3
+ Version: 1.24.0
4
+ Summary: Typer CLI for on-disk promptuna run / evaluate / optimize jobs
5
+ Requires-Dist: promptuna==1.24.0
6
+ Requires-Dist: typer>=0.12
7
+ Requires-Python: >=3.13
@@ -0,0 +1,19 @@
1
+ [project]
2
+ name = "promptuna-cli"
3
+ version = "1.24.0"
4
+ description = "Typer CLI for on-disk promptuna run / evaluate / optimize jobs"
5
+ requires-python = ">=3.13"
6
+ dependencies = [
7
+ "promptuna==1.24.0",
8
+ "typer>=0.12",
9
+ ]
10
+
11
+ [project.scripts]
12
+ promptuna = "promptuna_cli.main:run_cli"
13
+
14
+ [tool.uv.sources]
15
+ promptuna = { workspace = true }
16
+
17
+ [build-system]
18
+ requires = ["uv_build>=0.11.1,<0.12.0"]
19
+ build-backend = "uv_build"
@@ -0,0 +1,5 @@
1
+ """Typer CLI for on-disk promptuna projects."""
2
+
3
+ from promptuna_cli.main import app, run_cli
4
+
5
+ __all__ = ["app", "run_cli"]
@@ -0,0 +1,151 @@
1
+ """Shared helpers for CLI job commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import uuid
7
+ from collections.abc import Callable, Iterator
8
+ from pathlib import Path
9
+ from typing import Any, Literal
10
+
11
+ import typer
12
+
13
+ from promptuna.evaluate import RunInfo, RunResults, Scoring
14
+ from promptuna.jobs import JobArchive, JobConfig, JobKind, get_jobs_root, load_job, stream_job
15
+ from promptuna.optimize import Step
16
+ from promptuna.program import Experiment
17
+ from promptuna.projects import ProjectValidationError, set_projects_root
18
+ from promptuna.report import render_history, render_run
19
+ from promptuna.run import FailedTrial, SuccessfulTrial
20
+
21
+ OutputFormat = Literal["human", "json"]
22
+
23
+
24
+ def apply_projects_root(projects_root: Path | None) -> None:
25
+ """Apply a CLI override for the active projects root."""
26
+ if projects_root is not None:
27
+ set_projects_root(projects_root.expanduser().resolve())
28
+
29
+
30
+ def parse_metric_names(values: list[str]) -> list[str]:
31
+ """Flatten ``--metric`` values, splitting on commas when present."""
32
+ names: list[str] = []
33
+ for value in values:
34
+ for part in value.split(","):
35
+ stripped = part.strip()
36
+ if stripped:
37
+ names.append(stripped)
38
+ if not names:
39
+ raise typer.BadParameter("at least one --metric is required")
40
+ return names
41
+
42
+
43
+ def _collecting_source[T](
44
+ source: Callable[[], Iterator[T]],
45
+ ) -> tuple[Callable[[], Iterator[T]], list[T]]:
46
+ collected: list[T] = []
47
+
48
+ def wrapped() -> Iterator[T]:
49
+ for item in source():
50
+ collected.append(item)
51
+ yield item
52
+
53
+ return wrapped, collected
54
+
55
+
56
+ def execute_job(
57
+ *,
58
+ config: JobConfig,
59
+ source: Callable[[], Iterator[Any]],
60
+ experiment: Experiment,
61
+ render_human: Callable[[list[Any]], str],
62
+ output_format: OutputFormat,
63
+ ) -> None:
64
+ """Run one blocking job, persist it on disk, and print the result."""
65
+ job_id = str(uuid.uuid4())
66
+ archive = JobArchive.open(get_jobs_root(), job_id, config)
67
+ source_fn, collected = _collecting_source(source)
68
+
69
+ try:
70
+ for _ in stream_job(archive, source_fn()):
71
+ pass
72
+ except Exception:
73
+ _exit_on_failed_job(job_id)
74
+ raise
75
+
76
+ record = load_job(get_jobs_root(), job_id)
77
+ if record.manifest["status"] == "error":
78
+ _exit_on_failed_job(job_id)
79
+
80
+ typer.echo(f"job_id: {job_id}", err=True)
81
+ if output_format == "json":
82
+ typer.echo(json.dumps(record.summary, indent=2, sort_keys=True))
83
+ return
84
+
85
+ typer.echo(render_human(collected))
86
+
87
+
88
+ def _exit_on_failed_job(job_id: str) -> None:
89
+ record = load_job(get_jobs_root(), job_id)
90
+ error = record.manifest.get("error") or "unknown error"
91
+ typer.echo(f"job failed: {error}", err=True)
92
+ raise typer.Exit(code=1)
93
+
94
+
95
+ def render_run_human(experiment: Experiment, items: list[Any]) -> str:
96
+ """Render a run or evaluate job as markdown."""
97
+ trials = [item for item in items if isinstance(item, (SuccessfulTrial, FailedTrial))]
98
+ scorings = [item for item in items if isinstance(item, Scoring)]
99
+ results = RunResults(
100
+ experiment=experiment,
101
+ run=RunInfo(),
102
+ trials=trials,
103
+ scorings=scorings,
104
+ )
105
+ error_format = None if not scorings else "inputs"
106
+ return render_run(results, error_format=error_format)
107
+
108
+
109
+ def render_optimize_human(items: list[Any]) -> str:
110
+ """Render an optimize job trajectory as markdown."""
111
+ steps = [item for item in items if isinstance(item, Step)]
112
+ return render_history(steps)
113
+
114
+
115
+ def handle_project_error(exc: ProjectValidationError) -> None:
116
+ """Map project validation failures to a CLI exit."""
117
+ typer.echo(str(exc), err=True)
118
+ raise typer.Exit(code=2) from exc
119
+
120
+
121
+ def build_job_config(
122
+ *,
123
+ kind: JobKind,
124
+ project: str,
125
+ program: str,
126
+ prompt: str,
127
+ examples: str,
128
+ dataset_path: Path,
129
+ model: str,
130
+ workers: int,
131
+ metrics: tuple[str, ...] | None = None,
132
+ steps: int | None = None,
133
+ proposer_model: str | None = None,
134
+ ) -> JobConfig:
135
+ """Build a :class:`JobConfig` for the active projects root."""
136
+ from promptuna.projects import get_projects_root
137
+
138
+ return JobConfig(
139
+ kind=kind,
140
+ projects_root=get_projects_root(),
141
+ project=project,
142
+ program=program,
143
+ prompt=prompt,
144
+ examples=examples,
145
+ dataset_path=dataset_path,
146
+ model=model,
147
+ workers=workers,
148
+ metrics=metrics,
149
+ steps=steps,
150
+ proposer_model=proposer_model,
151
+ )
@@ -0,0 +1,266 @@
1
+ """Typer entry point for the promptuna CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+
11
+ from promptuna.evaluate import stream_evaluate
12
+ from promptuna.jobs import get_jobs_root, load_job
13
+ from promptuna.optimize import stream_optimize
14
+ from promptuna.projects import (
15
+ ProjectValidationError,
16
+ build_experiment,
17
+ resolve_dataset_path,
18
+ resolve_project_dir,
19
+ )
20
+ from promptuna.run import stream_run
21
+ from promptuna_cli._common import (
22
+ OutputFormat,
23
+ apply_projects_root,
24
+ build_job_config,
25
+ execute_job,
26
+ handle_project_error,
27
+ parse_metric_names,
28
+ render_optimize_human,
29
+ render_run_human,
30
+ )
31
+
32
+ app = typer.Typer(
33
+ name="promptuna",
34
+ no_args_is_help=True,
35
+ add_completion=False,
36
+ help="Run, evaluate, and optimize on-disk promptuna projects.",
37
+ )
38
+
39
+
40
+ @app.callback()
41
+ def main(
42
+ projects_root: Annotated[
43
+ Path | None,
44
+ typer.Option(
45
+ "--projects-root",
46
+ help="Directory containing project folders (overrides PROMPTUNA_PROJECTS_ROOT).",
47
+ dir_okay=True,
48
+ file_okay=False,
49
+ resolve_path=True,
50
+ ),
51
+ ] = None,
52
+ ) -> None:
53
+ """Configure workspace paths shared by every subcommand."""
54
+ apply_projects_root(projects_root)
55
+
56
+
57
+ @app.command()
58
+ def run(
59
+ project: Annotated[str, typer.Option("--project", "-p", help="Project directory name.")],
60
+ program: Annotated[str, typer.Option("--program", help="Program function in programs.py.")],
61
+ prompt: Annotated[str, typer.Option("--prompt", help="Prompt template name.")],
62
+ examples: Annotated[str, typer.Option("--examples", help="Dataset name under data/.")],
63
+ model: Annotated[str, typer.Option("--model", "-m", help="LM id for program execution.")],
64
+ workers: Annotated[
65
+ int,
66
+ typer.Option("--workers", "-w", min=1, help="Parallel trial workers."),
67
+ ] = 1,
68
+ output_format: Annotated[
69
+ OutputFormat,
70
+ typer.Option("--format", "-f", help="Output format for the finished job."),
71
+ ] = "human",
72
+ ) -> None:
73
+ """Execute a program over a dataset."""
74
+ try:
75
+ experiment, example_rows, _ = build_experiment(
76
+ project=project,
77
+ program=program,
78
+ prompt=prompt,
79
+ model=model,
80
+ examples=examples,
81
+ )
82
+ project_dir = resolve_project_dir(project)
83
+ dataset_path = resolve_dataset_path(project_dir, examples)
84
+ config = build_job_config(
85
+ kind="run",
86
+ project=project,
87
+ program=program,
88
+ prompt=prompt,
89
+ examples=examples,
90
+ dataset_path=dataset_path,
91
+ model=model,
92
+ workers=workers,
93
+ )
94
+ except ProjectValidationError as exc:
95
+ handle_project_error(exc)
96
+
97
+ execute_job(
98
+ config=config,
99
+ source=lambda: stream_run(experiment, example_rows, workers=workers),
100
+ experiment=experiment,
101
+ render_human=lambda items: render_run_human(experiment, items),
102
+ output_format=output_format,
103
+ )
104
+
105
+
106
+ @app.command()
107
+ def evaluate(
108
+ project: Annotated[str, typer.Option("--project", "-p", help="Project directory name.")],
109
+ program: Annotated[str, typer.Option("--program", help="Program function in programs.py.")],
110
+ prompt: Annotated[str, typer.Option("--prompt", help="Prompt template name.")],
111
+ examples: Annotated[str, typer.Option("--examples", help="Dataset name under data/.")],
112
+ model: Annotated[str, typer.Option("--model", "-m", help="LM id for program execution.")],
113
+ metric: Annotated[
114
+ list[str],
115
+ typer.Option("--metric", "-M", help="Metric name from metrics.py (repeatable)."),
116
+ ],
117
+ workers: Annotated[
118
+ int,
119
+ typer.Option("--workers", "-w", min=1, help="Parallel trial workers."),
120
+ ] = 1,
121
+ output_format: Annotated[
122
+ OutputFormat,
123
+ typer.Option("--format", "-f", help="Output format for the finished job."),
124
+ ] = "human",
125
+ ) -> None:
126
+ """Execute a program and score it with one or more metrics."""
127
+ metric_names = parse_metric_names(metric)
128
+ try:
129
+ experiment, example_rows, metrics = build_experiment(
130
+ project=project,
131
+ program=program,
132
+ prompt=prompt,
133
+ model=model,
134
+ examples=examples,
135
+ metrics=metric_names,
136
+ )
137
+ project_dir = resolve_project_dir(project)
138
+ dataset_path = resolve_dataset_path(project_dir, examples)
139
+ config = build_job_config(
140
+ kind="evaluate",
141
+ project=project,
142
+ program=program,
143
+ prompt=prompt,
144
+ examples=examples,
145
+ dataset_path=dataset_path,
146
+ model=model,
147
+ workers=workers,
148
+ metrics=tuple(metric_names),
149
+ )
150
+ except ProjectValidationError as exc:
151
+ handle_project_error(exc)
152
+
153
+ assert metrics is not None
154
+
155
+ def source():
156
+ return stream_evaluate(experiment, example_rows, metrics, workers=workers)
157
+
158
+ execute_job(
159
+ config=config,
160
+ source=source,
161
+ experiment=experiment,
162
+ render_human=lambda items: render_run_human(experiment, items),
163
+ output_format=output_format,
164
+ )
165
+
166
+
167
+ @app.command()
168
+ def optimize(
169
+ project: Annotated[str, typer.Option("--project", "-p", help="Project directory name.")],
170
+ program: Annotated[str, typer.Option("--program", help="Program function in programs.py.")],
171
+ prompt: Annotated[str, typer.Option("--prompt", help="Prompt template name.")],
172
+ examples: Annotated[str, typer.Option("--examples", help="Dataset name under data/.")],
173
+ model: Annotated[str, typer.Option("--model", "-m", help="LM id for program execution.")],
174
+ metric: Annotated[
175
+ list[str],
176
+ typer.Option("--metric", "-M", help="Metric name from metrics.py (repeatable)."),
177
+ ],
178
+ steps: Annotated[int, typer.Option("--steps", min=0, help="Proposer steps after baseline.")],
179
+ proposer_model: Annotated[
180
+ str,
181
+ typer.Option("--proposer-model", help="LM id for prompt-template proposals."),
182
+ ],
183
+ workers: Annotated[
184
+ int,
185
+ typer.Option("--workers", "-w", min=1, help="Parallel trial workers."),
186
+ ] = 1,
187
+ output_format: Annotated[
188
+ OutputFormat,
189
+ typer.Option("--format", "-f", help="Output format for the finished job."),
190
+ ] = "human",
191
+ ) -> None:
192
+ """Search for a better prompt template."""
193
+ metric_names = parse_metric_names(metric)
194
+ try:
195
+ experiment, example_rows, metrics = build_experiment(
196
+ project=project,
197
+ program=program,
198
+ prompt=prompt,
199
+ model=model,
200
+ examples=examples,
201
+ metrics=metric_names,
202
+ )
203
+ project_dir = resolve_project_dir(project)
204
+ dataset_path = resolve_dataset_path(project_dir, examples)
205
+ config = build_job_config(
206
+ kind="optimize",
207
+ project=project,
208
+ program=program,
209
+ prompt=prompt,
210
+ examples=examples,
211
+ dataset_path=dataset_path,
212
+ model=model,
213
+ workers=workers,
214
+ metrics=tuple(metric_names),
215
+ steps=steps,
216
+ proposer_model=proposer_model,
217
+ )
218
+ except ProjectValidationError as exc:
219
+ handle_project_error(exc)
220
+
221
+ assert metrics is not None
222
+
223
+ def source():
224
+ return stream_optimize(
225
+ experiment,
226
+ example_rows,
227
+ metrics,
228
+ proposer_model=proposer_model,
229
+ steps=steps,
230
+ workers=workers,
231
+ )
232
+
233
+ execute_job(
234
+ config=config,
235
+ source=source,
236
+ experiment=experiment,
237
+ render_human=render_optimize_human,
238
+ output_format=output_format,
239
+ )
240
+
241
+
242
+ @app.command()
243
+ def report(
244
+ job_id: Annotated[str, typer.Argument(help="Job id under <projects_root>/jobs/.")],
245
+ ) -> None:
246
+ """Print ``summary.json`` for a finished on-disk job."""
247
+ try:
248
+ record = load_job(get_jobs_root(), job_id)
249
+ except FileNotFoundError:
250
+ typer.echo(f"job {job_id!r} not found", err=True)
251
+ raise typer.Exit(code=2) from None
252
+
253
+ if record.summary is None:
254
+ typer.echo(f"job {job_id!r} has no summary yet", err=True)
255
+ raise typer.Exit(code=1)
256
+
257
+ typer.echo(json.dumps(record.summary, indent=2, sort_keys=True))
258
+
259
+
260
+ def run_cli() -> None:
261
+ """Console entry point."""
262
+ app()
263
+
264
+
265
+ if __name__ == "__main__":
266
+ run_cli()