model-gear 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
model_gear/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """model-gear — run, assess, and switch the local vLLM model."""
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version as _v
5
+
6
+ try:
7
+ __version__ = _v("model-gear")
8
+ except PackageNotFoundError: # editable install without metadata
9
+ __version__ = "0.0.0+local"
10
+
11
+ __all__ = ["__version__"]
model_gear/__main__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Allow running model-gear as ``python -m model_gear``."""
2
+
3
+ import sys
4
+
5
+ from model_gear.cli import main
6
+
7
+ if __name__ == "__main__":
8
+ sys.exit(main())
model_gear/assess.py ADDED
@@ -0,0 +1,268 @@
1
+ """API-side assessment and benchmark of a vLLM-served model (stdlib only).
2
+
3
+ Talks only to the OpenAI-compatible endpoint (``urllib``, no third-party deps).
4
+ Ported from the original ``_assess.py`` and split into two concerns:
5
+
6
+ * :func:`run_correctness` — fixed correctness probes + reasoning-trace detection
7
+ (drives ``model assess``);
8
+ * :func:`run_benchmark` — decode throughput + prefill latency (drives
9
+ ``model benchmark``).
10
+
11
+ Host-side facts (image tag, GPU memory) are gathered by the command handlers via
12
+ :mod:`model_gear.runtime._compose` and printed alongside this output.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import contextlib
18
+ import json
19
+ import time
20
+ import urllib.request
21
+
22
+ from model_gear.cli._errors import EXIT_ENV_ERROR, ModelGearError
23
+
24
+ # urllib.error.URLError is a subclass of OSError, so `except OSError` covers
25
+ # connection failures, timeouts, and HTTPError without listing it redundantly.
26
+
27
+
28
+ @contextlib.contextmanager
29
+ def _api_errors(what: str):
30
+ """Turn raw HTTP / JSON / response-shape failures into a structured error.
31
+
32
+ Without this, an ``HTTPError``/``URLError`` or an unexpected payload
33
+ (``KeyError``/``JSONDecodeError``) bubbles to the dispatcher's catch-all and
34
+ appears as ``unexpected: ...`` with no remediation.
35
+ """
36
+ try:
37
+ yield
38
+ except ModelGearError:
39
+ raise
40
+ except OSError as exc:
41
+ raise ModelGearError(
42
+ code=EXIT_ENV_ERROR,
43
+ message=f"{what} failed: {exc}",
44
+ remediation="check 'model status' / 'docker logs model-gear-vllm'",
45
+ ) from exc
46
+ except (json.JSONDecodeError, KeyError, IndexError, TypeError) as exc:
47
+ raise ModelGearError(
48
+ code=EXIT_ENV_ERROR,
49
+ message=f"{what}: unexpected response shape ({exc.__class__.__name__}: {exc})",
50
+ remediation="the served model returned an unexpected payload; check the vLLM logs",
51
+ ) from exc
52
+
53
+
54
+ # (prompt, expected-substring, table-label) — the two fixed correctness probes.
55
+ _PROBES = [
56
+ ("What is 17 * 23?", "391", "`17 * 23 = 391`"),
57
+ (
58
+ "If a train leaves at 14:45 and arrives at 17:10, how long is the journey in minutes?",
59
+ "145",
60
+ "train 14:45→17:10 = 145 min",
61
+ ),
62
+ ]
63
+
64
+
65
+ def _post(url: str, payload: dict, timeout: int = 300) -> dict:
66
+ data = json.dumps(payload).encode()
67
+ req = urllib.request.Request(
68
+ url + "/v1/chat/completions",
69
+ data=data,
70
+ headers={"Content-Type": "application/json"},
71
+ )
72
+ with urllib.request.urlopen(req, timeout=timeout) as r: # local endpoint only
73
+ return json.load(r)
74
+
75
+
76
+ def _get(url: str, path: str, timeout: int = 10):
77
+ with urllib.request.urlopen(url + path, timeout=timeout) as r: # local endpoint only
78
+ if r.headers.get("content-type", "").startswith("application/json"):
79
+ return r.status, json.load(r)
80
+ return r.status, r.read().decode()
81
+
82
+
83
+ def _trace_field(msg: dict) -> tuple[str | None, int]:
84
+ """Return ``(field_name, length)`` of the reasoning trace, whichever key holds it.
85
+
86
+ vLLM builds vary: the ``<think>`` trace lands in ``reasoning`` on the nv26.04
87
+ image, ``reasoning_content`` on older builds.
88
+ """
89
+ for key in ("reasoning", "reasoning_content"):
90
+ val = msg.get(key)
91
+ if isinstance(val, str) and val:
92
+ return key, len(val)
93
+ return None, 0
94
+
95
+
96
+ def health_status(url: str) -> int:
97
+ """Return the ``/health`` status code, or raise if the endpoint is unreachable."""
98
+ try:
99
+ status, _ = _get(url, "/health")
100
+ except OSError as exc:
101
+ raise ModelGearError(
102
+ code=EXIT_ENV_ERROR,
103
+ message=f"/health unreachable at {url} ({exc})",
104
+ remediation="start the server with 'model serve --apply'",
105
+ ) from exc
106
+ return status
107
+
108
+
109
+ def served_model(url: str, override: str | None = None) -> tuple[str, object]:
110
+ """Return ``(model_id, max_model_len)`` from ``/v1/models``. Raises if none served."""
111
+ with _api_errors("/v1/models"):
112
+ _, models = _get(url, "/v1/models")
113
+ data = models.get("data") if isinstance(models, dict) else None
114
+ if not data:
115
+ raise ModelGearError(
116
+ code=EXIT_ENV_ERROR,
117
+ message=f"/v1/models returned no models at {url}",
118
+ remediation="check 'model status' / 'docker logs model-gear-vllm'",
119
+ )
120
+ first = data[0]
121
+ return (override or first["id"]), first.get("max_model_len")
122
+
123
+
124
+ def _probe(url: str, model: str, prompt: str, expect: str) -> dict:
125
+ d = _post(
126
+ url,
127
+ {
128
+ "model": model,
129
+ "messages": [{"role": "user", "content": prompt}],
130
+ "max_tokens": 2048,
131
+ "temperature": 0.3,
132
+ },
133
+ )
134
+ msg = d["choices"][0]["message"]
135
+ content = msg.get("content") or ""
136
+ field, tlen = _trace_field(msg)
137
+ return {
138
+ "ok": expect in content,
139
+ "expect": expect,
140
+ "trace_field": field,
141
+ "trace_len": tlen,
142
+ "finish": d["choices"][0].get("finish_reason"),
143
+ "completion_tokens": d.get("usage", {}).get("completion_tokens"),
144
+ }
145
+
146
+
147
+ def _decode_throughput(url: str, model: str, n_tokens: int, runs: int = 2) -> list[float]:
148
+ rates = []
149
+ for _ in range(runs):
150
+ t0 = time.monotonic()
151
+ d = _post(
152
+ url,
153
+ {
154
+ "model": model,
155
+ "messages": [
156
+ {"role": "user", "content": "Write a detailed essay about distributed systems."}
157
+ ],
158
+ "max_tokens": n_tokens,
159
+ "temperature": 0,
160
+ "ignore_eos": True,
161
+ },
162
+ )
163
+ dt = time.monotonic() - t0
164
+ ct = d["usage"]["completion_tokens"]
165
+ rates.append(round(ct / dt, 1))
166
+ return rates
167
+
168
+
169
+ def _prefill(url: str, model: str) -> dict:
170
+ prompt = "Summarize this. " + "The system processes events. " * 400
171
+ t0 = time.monotonic()
172
+ d = _post(
173
+ url,
174
+ {
175
+ "model": model,
176
+ "messages": [{"role": "user", "content": prompt}],
177
+ "max_tokens": 16,
178
+ "temperature": 0,
179
+ },
180
+ )
181
+ dt = time.monotonic() - t0
182
+ return {"prompt_tokens": d["usage"]["prompt_tokens"], "seconds": round(dt, 2)}
183
+
184
+
185
+ def run_correctness(url: str, model: str | None = None) -> dict:
186
+ """Run the fixed correctness probes; return a structured result."""
187
+ url = url.rstrip("/")
188
+ hstatus = health_status(url)
189
+ model, max_len = served_model(url, model)
190
+ probes = []
191
+ with _api_errors("correctness probe"):
192
+ for prompt, expect, label in _PROBES:
193
+ result = _probe(url, model, prompt, expect)
194
+ result["label"] = label
195
+ probes.append(result)
196
+ trace_field = next((p["trace_field"] for p in probes if p["trace_field"]), None)
197
+ trace_len = max((p["trace_len"] for p in probes), default=0)
198
+ return {
199
+ "model": model,
200
+ "endpoint": url,
201
+ "health": hstatus,
202
+ "max_model_len": max_len,
203
+ "probes": probes,
204
+ "trace_field": trace_field or "(none)",
205
+ "trace_len": trace_len,
206
+ "passed": all(p["ok"] for p in probes),
207
+ }
208
+
209
+
210
+ def run_benchmark(
211
+ url: str, model: str | None = None, decode_tokens: int = 512, runs: int = 2
212
+ ) -> dict:
213
+ """Measure decode throughput + prefill latency; return a structured result."""
214
+ url = url.rstrip("/")
215
+ health_status(url)
216
+ model, max_len = served_model(url, model)
217
+ with _api_errors("benchmark"):
218
+ rates = _decode_throughput(url, model, decode_tokens, runs)
219
+ pf = _prefill(url, model)
220
+ return {
221
+ "model": model,
222
+ "endpoint": url,
223
+ "max_model_len": max_len,
224
+ "decode_tokens": decode_tokens,
225
+ "decode_rates": rates,
226
+ "prefill": pf,
227
+ }
228
+
229
+
230
+ def render_correctness(result: dict) -> str:
231
+ """Render :func:`run_correctness` output as a markdown block for a per-model doc."""
232
+ lines = [
233
+ f"## Assessment — `{result['model']}`",
234
+ "",
235
+ f"- Endpoint: `{result['endpoint']}` · `/health` {result['health']} · "
236
+ f"`max_model_len` {result['max_model_len']}",
237
+ "",
238
+ "| Check | Result |",
239
+ "|---|---|",
240
+ ]
241
+ for p in result["probes"]:
242
+ mark = "PASS" if p["ok"] else "FAIL"
243
+ lines.append(
244
+ f"| {p['label']} | {mark} (finish={p['finish']}, {p['completion_tokens']} tok) |"
245
+ )
246
+ lines.append(
247
+ f"| reasoning trace field | `{result['trace_field']}` (len {result['trace_len']}) |"
248
+ )
249
+ return "\n".join(lines)
250
+
251
+
252
+ def render_benchmark(result: dict) -> str:
253
+ """Render :func:`run_benchmark` output as a markdown block for a per-model doc."""
254
+ rates = "/".join(str(r) for r in result["decode_rates"])
255
+ pf = result["prefill"]
256
+ return "\n".join(
257
+ [
258
+ f"## Benchmark — `{result['model']}`",
259
+ "",
260
+ f"- Endpoint: `{result['endpoint']}` · `max_model_len` {result['max_model_len']}",
261
+ "",
262
+ "| Metric | Result |",
263
+ "|---|---|",
264
+ f"| **decode throughput** | **{rates} tok/s** (batch=1, greedy, "
265
+ f"{result['decode_tokens']} tok forced) |",
266
+ f"| prefill | {pf['prompt_tokens']} prompt tokens + 16 gen in {pf['seconds']} s |",
267
+ ]
268
+ )
@@ -0,0 +1,149 @@
1
+ """Unified CLI entry point for model-gear (binary: ``model``).
2
+
3
+ The model-ops verbs (``switch``, ``serve``/``stop``, ``status``, ``assess``,
4
+ ``benchmark``, ``init``) are the heart of the tool; the agent-first verbs
5
+ (``whoami``, ``learn``, ``explain``, ``overview``, ``doctor``, ``cli``) keep the
6
+ sibling rubric satisfied. Each verb module exposes ``register(sub)`` following
7
+ the same pattern.
8
+
9
+ Error propagation contract
10
+ --------------------------
11
+ Every handler raises :class:`model_gear.cli._errors.ModelGearError` on failure;
12
+ ``main()`` catches it via :func:`_dispatch` and routes through
13
+ :mod:`model_gear.cli._output`. Unknown exceptions are wrapped into a
14
+ ``ModelGearError`` so no Python traceback leaks to stderr.
15
+
16
+ Argparse errors (unknown verb, missing arg) also route through the structured
17
+ format — ``_ModelGearArgumentParser`` overrides ``.error()`` and the subparsers
18
+ are built with ``parser_class=_ModelGearArgumentParser``. Whether errors render
19
+ as text or JSON depends on whether ``--json`` appears in the raw argv
20
+ (:func:`main` sets ``_json_hint`` before ``parse_args``).
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import sys
27
+
28
+ from model_gear import __version__
29
+ from model_gear.cli._errors import EXIT_USER_ERROR, ModelGearError
30
+ from model_gear.cli._output import emit_error
31
+
32
+
33
+ class _ModelGearArgumentParser(argparse.ArgumentParser):
34
+ """ArgumentParser that routes errors through :func:`emit_error`.
35
+
36
+ Argparse's default error handler writes ``prog: error: <msg>`` to stderr
37
+ and exits 2, skipping the ModelGearError plumbing (and the ``hint:`` line
38
+ agents look for). This subclass emits the structured format and exits with
39
+ :attr:`EXIT_USER_ERROR`.
40
+
41
+ JSON mode: parse-time errors happen before ``args.json`` exists, so we rely
42
+ on a class-level ``_json_hint`` that :func:`main` pre-populates by scanning
43
+ raw argv for ``--json``. Shared across all subparser instances.
44
+ """
45
+
46
+ _json_hint: bool = False
47
+
48
+ def error(self, message: str) -> None: # type: ignore[override]
49
+ err = ModelGearError(
50
+ code=EXIT_USER_ERROR,
51
+ message=message,
52
+ remediation=f"run '{self.prog} --help' to see valid arguments",
53
+ )
54
+ emit_error(err, json_mode=type(self)._json_hint)
55
+ raise SystemExit(err.code)
56
+
57
+
58
+ def _argv_has_json(argv: list[str] | None) -> bool:
59
+ tokens = argv if argv is not None else sys.argv[1:]
60
+ return any(t == "--json" or t.startswith("--json=") for t in tokens)
61
+
62
+
63
+ def _build_parser() -> argparse.ArgumentParser:
64
+ from model_gear.cli._commands import assess as _assess_cmd
65
+ from model_gear.cli._commands import benchmark as _benchmark_cmd
66
+ from model_gear.cli._commands import cli as _cli_group
67
+ from model_gear.cli._commands import doctor as _doctor_cmd
68
+ from model_gear.cli._commands import explain as _explain_cmd
69
+ from model_gear.cli._commands import init as _init_cmd
70
+ from model_gear.cli._commands import learn as _learn_cmd
71
+ from model_gear.cli._commands import overview as _overview_cmd
72
+ from model_gear.cli._commands import serve as _serve_cmd
73
+ from model_gear.cli._commands import status as _status_cmd
74
+ from model_gear.cli._commands import stop as _stop_cmd
75
+ from model_gear.cli._commands import switch as _switch_cmd
76
+ from model_gear.cli._commands import whoami as _whoami_cmd
77
+
78
+ parser = _ModelGearArgumentParser(
79
+ prog="model",
80
+ description="model-gear — run, assess, and switch the local vLLM model",
81
+ )
82
+ parser.add_argument(
83
+ "--version",
84
+ action="version",
85
+ version=f"%(prog)s {__version__}",
86
+ )
87
+ # parser_class propagates to every subparser so their .error() routes
88
+ # through _ModelGearArgumentParser too.
89
+ sub = parser.add_subparsers(dest="command", parser_class=_ModelGearArgumentParser)
90
+
91
+ # Model-ops verbs (the heart of the tool).
92
+ _switch_cmd.register(sub)
93
+ _serve_cmd.register(sub)
94
+ _stop_cmd.register(sub)
95
+ _status_cmd.register(sub)
96
+ _assess_cmd.register(sub)
97
+ _benchmark_cmd.register(sub)
98
+ _init_cmd.register(sub)
99
+
100
+ # Agent-first / introspection verbs (sibling rubric).
101
+ _whoami_cmd.register(sub)
102
+ _learn_cmd.register(sub)
103
+ _explain_cmd.register(sub)
104
+ _overview_cmd.register(sub)
105
+ _doctor_cmd.register(sub)
106
+ _cli_group.register(sub)
107
+
108
+ return parser
109
+
110
+
111
+ def _dispatch(args: argparse.Namespace) -> int:
112
+ """Invoke the registered handler and translate exceptions to exit codes.
113
+
114
+ A handler may return ``None`` (success, exit 0) or an ``int`` exit code.
115
+ Failures MUST raise :class:`ModelGearError`; any other exception is wrapped
116
+ into one so no Python traceback leaks.
117
+ """
118
+ json_mode = bool(getattr(args, "json", False))
119
+ try:
120
+ rc = args.func(args)
121
+ except ModelGearError as err:
122
+ emit_error(err, json_mode=json_mode)
123
+ return err.code
124
+ except Exception as err: # noqa: BLE001 - last-resort; wrap and route cleanly
125
+ wrapped = ModelGearError(
126
+ code=EXIT_USER_ERROR,
127
+ message=f"unexpected: {err.__class__.__name__}: {err}",
128
+ remediation="file a bug at https://github.com/agentculture/model-gear/issues",
129
+ )
130
+ emit_error(wrapped, json_mode=json_mode)
131
+ return wrapped.code
132
+ return rc if rc is not None else 0
133
+
134
+
135
+ def main(argv: list[str] | None = None) -> int:
136
+ # Pre-parse peek so argparse-level errors honour --json.
137
+ _ModelGearArgumentParser._json_hint = _argv_has_json(argv)
138
+ parser = _build_parser()
139
+ args = parser.parse_args(argv)
140
+
141
+ if args.command is None:
142
+ parser.print_help()
143
+ return 0
144
+
145
+ return _dispatch(args)
146
+
147
+
148
+ if __name__ == "__main__":
149
+ sys.exit(main())
File without changes
@@ -0,0 +1,53 @@
1
+ """``model assess`` — correctness probes against the served model.
2
+
3
+ Read-only. Runs the two fixed correctness probes and detects the reasoning-trace
4
+ field, then emits a markdown block (plus host-side facts) ready to paste into a
5
+ per-model doc under ``docs/``. Throughput lives in ``model benchmark``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+
12
+ from model_gear import assess as _assess
13
+ from model_gear.cli import _runtime_ops
14
+ from model_gear.cli._output import emit_result
15
+ from model_gear.runtime import _compose, _env
16
+
17
+
18
+ def cmd_assess(args: argparse.Namespace) -> int:
19
+ json_mode = bool(getattr(args, "json", False))
20
+ port, deploy_dir = _runtime_ops.resolve_port_soft(args)
21
+ model = args.model
22
+ if model is None and deploy_dir is not None:
23
+ model = _env.read_env(deploy_dir / _compose.ENV_FILE, "VLLM_SERVED_NAME")
24
+
25
+ url = f"http://localhost:{port}"
26
+ result = _assess.run_correctness(url, model)
27
+ host = {"image": _compose.container_image(), "gpu_memory": _compose.gpu_engine_mem()}
28
+
29
+ if json_mode:
30
+ emit_result({**result, "host": host}, json_mode=True)
31
+ else:
32
+ header = (
33
+ "### Host-side\n"
34
+ f"- Image: `{host['image']}` · GPU memory (EngineCore): {host['gpu_memory']}\n"
35
+ )
36
+ emit_result(header + "\n" + _assess.render_correctness(result), json_mode=False)
37
+ return 0
38
+
39
+
40
+ def register(sub: argparse._SubParsersAction) -> None:
41
+ p = sub.add_parser(
42
+ "assess",
43
+ help="Correctness probes against the served model (markdown for a per-model doc).",
44
+ )
45
+ p.add_argument("--port", type=int, help="Host port (default: VLLM_PORT in .env, else 8000).")
46
+ p.add_argument(
47
+ "--model", help="Served model name (default: VLLM_SERVED_NAME, else first /v1/models)."
48
+ )
49
+ p.add_argument(
50
+ "--compose-dir", help="Deployment dir (default: $MODEL_GEAR_DIR or ~/.model-gear)."
51
+ )
52
+ p.add_argument("--json", action="store_true", help="Emit structured JSON.")
53
+ p.set_defaults(func=cmd_assess)
@@ -0,0 +1,57 @@
1
+ """``model benchmark`` — decode throughput + prefill latency for the served model.
2
+
3
+ Read-only. Forces a fixed decode length over a couple of runs and measures a
4
+ large-prompt prefill, then emits a markdown block (plus host-side facts) for a
5
+ per-model doc under ``docs/``. Correctness lives in ``model assess``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+
12
+ from model_gear import assess as _assess
13
+ from model_gear.cli import _runtime_ops
14
+ from model_gear.cli._output import emit_result
15
+ from model_gear.runtime import _compose, _env
16
+
17
+
18
+ def cmd_benchmark(args: argparse.Namespace) -> int:
19
+ json_mode = bool(getattr(args, "json", False))
20
+ port, deploy_dir = _runtime_ops.resolve_port_soft(args)
21
+ model = args.model
22
+ if model is None and deploy_dir is not None:
23
+ model = _env.read_env(deploy_dir / _compose.ENV_FILE, "VLLM_SERVED_NAME")
24
+
25
+ url = f"http://localhost:{port}"
26
+ result = _assess.run_benchmark(url, model, decode_tokens=args.decode_tokens, runs=args.runs)
27
+ host = {"image": _compose.container_image(), "gpu_memory": _compose.gpu_engine_mem()}
28
+
29
+ if json_mode:
30
+ emit_result({**result, "host": host}, json_mode=True)
31
+ else:
32
+ header = (
33
+ "### Host-side\n"
34
+ f"- Image: `{host['image']}` · GPU memory (EngineCore): {host['gpu_memory']}\n"
35
+ )
36
+ emit_result(header + "\n" + _assess.render_benchmark(result), json_mode=False)
37
+ return 0
38
+
39
+
40
+ def register(sub: argparse._SubParsersAction) -> None:
41
+ p = sub.add_parser(
42
+ "benchmark",
43
+ help="Decode throughput + prefill latency for the served model (markdown for a doc).",
44
+ )
45
+ p.add_argument("--port", type=int, help="Host port (default: VLLM_PORT in .env, else 8000).")
46
+ p.add_argument(
47
+ "--model", help="Served model name (default: VLLM_SERVED_NAME, else first /v1/models)."
48
+ )
49
+ p.add_argument(
50
+ "--decode-tokens", type=int, default=512, help="Forced decode length (default 512)."
51
+ )
52
+ p.add_argument("--runs", type=int, default=2, help="Decode-throughput repetitions (default 2).")
53
+ p.add_argument(
54
+ "--compose-dir", help="Deployment dir (default: $MODEL_GEAR_DIR or ~/.model-gear)."
55
+ )
56
+ p.add_argument("--json", action="store_true", help="Emit structured JSON.")
57
+ p.set_defaults(func=cmd_benchmark)
@@ -0,0 +1,38 @@
1
+ """``model cli`` — noun grouping CLI-surface introspection.
2
+
3
+ Exists to satisfy the agent-first rubric's ``overview_cli_noun_exists`` check.
4
+ ``model cli overview`` describes the CLI surface itself (distinct from the global
5
+ ``overview``, which describes the tool and the served model).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+
12
+ from model_gear.cli._commands.overview import cli_sections, emit_overview
13
+
14
+
15
+ def cmd_cli_overview(args: argparse.Namespace) -> int:
16
+ emit_overview("model cli", cli_sections(), json_mode=bool(getattr(args, "json", False)))
17
+ return 0
18
+
19
+
20
+ def _no_verb(args: argparse.Namespace) -> int:
21
+ # `model cli` with no sub-verb prints the noun's overview.
22
+ return cmd_cli_overview(args)
23
+
24
+
25
+ def register(sub: argparse._SubParsersAction) -> None:
26
+ p = sub.add_parser(
27
+ "cli",
28
+ help="CLI-surface introspection (see 'model cli overview').",
29
+ )
30
+ p.add_argument("--json", action="store_true", help="Emit structured JSON.")
31
+ p.set_defaults(func=_no_verb, json=False)
32
+ # `p` is a _ModelGearArgumentParser (the top-level subparsers were built with
33
+ # that parser_class); propagate it so `cli overview` parse errors route through
34
+ # the structured error contract instead of argparse's default stderr/exit 2.
35
+ noun_sub = p.add_subparsers(dest="cli_command", parser_class=type(p))
36
+ ov = noun_sub.add_parser("overview", help="Describe the model-gear CLI surface.")
37
+ ov.add_argument("--json", action="store_true", help="Emit structured JSON.")
38
+ ov.set_defaults(func=cmd_cli_overview)