otari-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- otari_cli/__init__.py +18 -0
- otari_cli/__main__.py +8 -0
- otari_cli/_client.py +43 -0
- otari_cli/_context.py +17 -0
- otari_cli/_errors.py +67 -0
- otari_cli/_output.py +91 -0
- otari_cli/_params.py +46 -0
- otari_cli/cli.py +103 -0
- otari_cli/commands/__init__.py +6 -0
- otari_cli/commands/batches.py +153 -0
- otari_cli/commands/budgets.py +96 -0
- otari_cli/commands/completion.py +83 -0
- otari_cli/commands/embedding.py +53 -0
- otari_cli/commands/health.py +50 -0
- otari_cli/commands/keys.py +120 -0
- otari_cli/commands/message.py +82 -0
- otari_cli/commands/models.py +28 -0
- otari_cli/commands/moderation.py +32 -0
- otari_cli/commands/pricing.py +108 -0
- otari_cli/commands/rerank.py +33 -0
- otari_cli/commands/response.py +74 -0
- otari_cli/commands/usage.py +53 -0
- otari_cli/commands/users.py +131 -0
- otari_cli/config.py +62 -0
- otari_cli/py.typed +0 -0
- otari_cli-0.1.0.dist-info/METADATA +167 -0
- otari_cli-0.1.0.dist-info/RECORD +30 -0
- otari_cli-0.1.0.dist-info/WHEEL +4 -0
- otari_cli-0.1.0.dist-info/entry_points.txt +2 -0
- otari_cli-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""``otari completion`` - create a chat completion."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from otari_cli import _client
|
|
10
|
+
from otari_cli._errors import handle_errors
|
|
11
|
+
from otari_cli._output import console, print_json
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Iterable
|
|
15
|
+
|
|
16
|
+
from otari_cli._context import AppContext
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def completion(
|
|
20
|
+
ctx: typer.Context,
|
|
21
|
+
prompt: str = typer.Argument(..., help="The user prompt to send to the model."),
|
|
22
|
+
model: str = typer.Option(..., "--model", "-m", help="Model id, e.g. 'openai:gpt-4o-mini'."),
|
|
23
|
+
system: str | None = typer.Option(None, "--system", "-s", help="Optional system prompt."),
|
|
24
|
+
stream: bool = typer.Option(False, "--stream", help="Stream tokens as they are generated."),
|
|
25
|
+
temperature: float | None = typer.Option(None, "--temperature", help="Sampling temperature."),
|
|
26
|
+
max_tokens: int | None = typer.Option(None, "--max-tokens", help="Maximum tokens to generate."),
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Create a chat completion and print the assistant's reply."""
|
|
29
|
+
app_ctx: AppContext = ctx.obj
|
|
30
|
+
|
|
31
|
+
messages: list[dict[str, str]] = []
|
|
32
|
+
if system is not None:
|
|
33
|
+
messages.append({"role": "system", "content": system})
|
|
34
|
+
messages.append({"role": "user", "content": prompt})
|
|
35
|
+
|
|
36
|
+
kwargs: dict[str, Any] = {}
|
|
37
|
+
if temperature is not None:
|
|
38
|
+
kwargs["temperature"] = temperature
|
|
39
|
+
if max_tokens is not None:
|
|
40
|
+
kwargs["max_tokens"] = max_tokens
|
|
41
|
+
|
|
42
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
43
|
+
if stream:
|
|
44
|
+
chunks = client.completion(model=model, messages=messages, stream=True, **kwargs)
|
|
45
|
+
_render_stream(chunks, output_json=app_ctx.output_json)
|
|
46
|
+
else:
|
|
47
|
+
result = client.completion(model=model, messages=messages, **kwargs)
|
|
48
|
+
_render_completion(result, output_json=app_ctx.output_json)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _render_completion(result: Any, *, output_json: bool) -> None:
|
|
52
|
+
if output_json:
|
|
53
|
+
print_json(result)
|
|
54
|
+
return
|
|
55
|
+
console().print(_completion_text(result))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _render_stream(chunks: Iterable[Any], *, output_json: bool) -> None:
|
|
59
|
+
for chunk in chunks:
|
|
60
|
+
if output_json:
|
|
61
|
+
print_json(chunk)
|
|
62
|
+
continue
|
|
63
|
+
delta = _chunk_text(chunk)
|
|
64
|
+
if delta:
|
|
65
|
+
console().print(delta, end="")
|
|
66
|
+
if not output_json:
|
|
67
|
+
console().print()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _completion_text(result: Any) -> str:
|
|
71
|
+
choices = getattr(result, "choices", None) or []
|
|
72
|
+
if not choices:
|
|
73
|
+
return ""
|
|
74
|
+
message = getattr(choices[0], "message", None)
|
|
75
|
+
return getattr(message, "content", "") or ""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _chunk_text(chunk: Any) -> str:
|
|
79
|
+
choices = getattr(chunk, "choices", None) or []
|
|
80
|
+
if not choices:
|
|
81
|
+
return ""
|
|
82
|
+
delta = getattr(choices[0], "delta", None)
|
|
83
|
+
return getattr(delta, "content", "") or ""
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""``otari embedding`` - create embeddings for one or more inputs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from otari_cli import _client
|
|
10
|
+
from otari_cli._errors import handle_errors
|
|
11
|
+
from otari_cli._output import print_json, render_records
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from otari_cli._context import AppContext
|
|
15
|
+
|
|
16
|
+
_PREVIEW = 4
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def embedding(
|
|
20
|
+
ctx: typer.Context,
|
|
21
|
+
inputs: list[str] = typer.Argument(..., help="One or more input strings to embed."),
|
|
22
|
+
model: str = typer.Option(..., "--model", "-m", help="Model id, e.g. 'openai:text-embedding-3-small'."),
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Create embeddings. Human output is a summary; use --json for the vectors."""
|
|
25
|
+
app_ctx: AppContext = ctx.obj
|
|
26
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
27
|
+
result = client.embedding(model=model, input=inputs)
|
|
28
|
+
if app_ctx.output_json:
|
|
29
|
+
print_json(result)
|
|
30
|
+
return
|
|
31
|
+
render_records(
|
|
32
|
+
_summary_rows(result),
|
|
33
|
+
output_json=False,
|
|
34
|
+
title="Embeddings",
|
|
35
|
+
empty_message="No embeddings returned.",
|
|
36
|
+
columns=["index", "dimensions", "preview"],
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _summary_rows(result: Any) -> list[dict[str, Any]]:
|
|
41
|
+
rows: list[dict[str, Any]] = []
|
|
42
|
+
for item in getattr(result, "data", None) or []:
|
|
43
|
+
vector = getattr(item, "embedding", None) or []
|
|
44
|
+
preview = ", ".join(f"{value:.4f}" for value in vector[:_PREVIEW])
|
|
45
|
+
suffix = ", ..." if len(vector) > _PREVIEW else ""
|
|
46
|
+
rows.append(
|
|
47
|
+
{
|
|
48
|
+
"index": getattr(item, "index", None),
|
|
49
|
+
"dimensions": len(vector),
|
|
50
|
+
"preview": f"[{preview}{suffix}]",
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
return rows
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""``otari health`` - check gateway connectivity via ``GET /health``.
|
|
2
|
+
|
|
3
|
+
The SDK does not expose a health method, so this command issues a direct
|
|
4
|
+
unauthenticated request to the gateway's ``/health`` endpoint using the
|
|
5
|
+
resolved base URL.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
import typer
|
|
14
|
+
|
|
15
|
+
from otari_cli._errors import EXIT_ERROR
|
|
16
|
+
from otari_cli._output import console, error_console, print_json
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from otari_cli._context import AppContext
|
|
20
|
+
|
|
21
|
+
_HEALTH_TIMEOUT = 10.0
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def health(ctx: typer.Context) -> None:
|
|
25
|
+
"""Report whether the configured gateway is reachable and healthy."""
|
|
26
|
+
app_ctx: AppContext = ctx.obj
|
|
27
|
+
base = app_ctx.config.api_base
|
|
28
|
+
if not base:
|
|
29
|
+
error_console().print(
|
|
30
|
+
"[bold red]Error:[/] no gateway URL configured; set --api-base or GATEWAY_API_BASE."
|
|
31
|
+
)
|
|
32
|
+
raise typer.Exit(EXIT_ERROR)
|
|
33
|
+
|
|
34
|
+
url = base.rstrip("/") + "/health"
|
|
35
|
+
try:
|
|
36
|
+
response = httpx.get(url, timeout=_HEALTH_TIMEOUT)
|
|
37
|
+
except httpx.HTTPError as exc:
|
|
38
|
+
error_console().print(f"[bold red]Error:[/] could not reach gateway at {url}: {exc}")
|
|
39
|
+
raise typer.Exit(EXIT_ERROR) from exc
|
|
40
|
+
|
|
41
|
+
ok = response.status_code == 200
|
|
42
|
+
if app_ctx.output_json:
|
|
43
|
+
print_json({"url": url, "status_code": response.status_code, "ok": ok})
|
|
44
|
+
elif ok:
|
|
45
|
+
console().print(f"[bold green]OK[/] {url} ({response.status_code})")
|
|
46
|
+
else:
|
|
47
|
+
error_console().print(f"[bold red]Unhealthy[/] {url} ({response.status_code})")
|
|
48
|
+
|
|
49
|
+
if not ok:
|
|
50
|
+
raise typer.Exit(EXIT_ERROR)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""``otari keys`` - manage gateway virtual API keys (control plane).
|
|
2
|
+
|
|
3
|
+
These commands hit the gateway control plane and therefore require an admin
|
|
4
|
+
credential (``--admin-key`` / ``GATEWAY_ADMIN_KEY``, or the platform token in
|
|
5
|
+
platform mode). They are only available against a self-hosted/standalone
|
|
6
|
+
gateway.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
import typer
|
|
14
|
+
from otari._client import CreateKeyRequest, UpdateKeyRequest
|
|
15
|
+
|
|
16
|
+
from otari_cli import _client
|
|
17
|
+
from otari_cli._errors import handle_errors
|
|
18
|
+
from otari_cli._output import console, print_json, render_records
|
|
19
|
+
from otari_cli._params import drop_none, parse_datetime, parse_json_object
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from otari_cli._context import AppContext
|
|
23
|
+
|
|
24
|
+
app = typer.Typer(
|
|
25
|
+
name="keys",
|
|
26
|
+
help="Manage gateway virtual API keys (admin / self-hosted only).",
|
|
27
|
+
no_args_is_help=True,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@app.command("list")
|
|
32
|
+
def list_keys(
|
|
33
|
+
ctx: typer.Context,
|
|
34
|
+
skip: int | None = typer.Option(None, "--skip", help="Number of keys to skip."),
|
|
35
|
+
limit: int | None = typer.Option(None, "--limit", help="Maximum number of keys to return."),
|
|
36
|
+
) -> None:
|
|
37
|
+
"""List virtual API keys."""
|
|
38
|
+
app_ctx: AppContext = ctx.obj
|
|
39
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
40
|
+
result = client.control_plane.keys.list(skip=skip, limit=limit)
|
|
41
|
+
render_records(
|
|
42
|
+
list(result),
|
|
43
|
+
output_json=app_ctx.output_json,
|
|
44
|
+
title="API keys",
|
|
45
|
+
empty_message="No API keys found.",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@app.command("get")
|
|
50
|
+
def get_key(
|
|
51
|
+
ctx: typer.Context,
|
|
52
|
+
key_id: str = typer.Argument(..., help="Identifier of the key to retrieve."),
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Show details for a single virtual API key."""
|
|
55
|
+
app_ctx: AppContext = ctx.obj
|
|
56
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
57
|
+
result = client.control_plane.keys.get(key_id)
|
|
58
|
+
print_json(result)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@app.command("create")
|
|
62
|
+
def create_key(
|
|
63
|
+
ctx: typer.Context,
|
|
64
|
+
key_name: str | None = typer.Option(None, "--name", help="Optional name for the key."),
|
|
65
|
+
user_id: str | None = typer.Option(None, "--user", help="User id to associate with the key."),
|
|
66
|
+
expires_at: str | None = typer.Option(None, "--expires-at", help="Expiration (ISO-8601)."),
|
|
67
|
+
metadata: str | None = typer.Option(None, "--metadata", help="Metadata as a JSON object."),
|
|
68
|
+
) -> None:
|
|
69
|
+
"""Create a virtual API key. The response includes the new key value."""
|
|
70
|
+
app_ctx: AppContext = ctx.obj
|
|
71
|
+
request = CreateKeyRequest(
|
|
72
|
+
**drop_none(
|
|
73
|
+
key_name=key_name,
|
|
74
|
+
user_id=user_id,
|
|
75
|
+
expires_at=parse_datetime(expires_at, flag="--expires-at"),
|
|
76
|
+
metadata=parse_json_object(metadata, flag="--metadata"),
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
80
|
+
result = client.control_plane.keys.create(request)
|
|
81
|
+
print_json(result)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@app.command("update")
|
|
85
|
+
def update_key(
|
|
86
|
+
ctx: typer.Context,
|
|
87
|
+
key_id: str = typer.Argument(..., help="Identifier of the key to update."),
|
|
88
|
+
key_name: str | None = typer.Option(None, "--name", help="New name for the key."),
|
|
89
|
+
active: bool | None = typer.Option(None, "--active/--inactive", help="Enable or disable the key."),
|
|
90
|
+
expires_at: str | None = typer.Option(None, "--expires-at", help="New expiration (ISO-8601)."),
|
|
91
|
+
metadata: str | None = typer.Option(None, "--metadata", help="Metadata as a JSON object."),
|
|
92
|
+
) -> None:
|
|
93
|
+
"""Update a virtual API key. Only the provided fields are changed."""
|
|
94
|
+
app_ctx: AppContext = ctx.obj
|
|
95
|
+
request = UpdateKeyRequest(
|
|
96
|
+
**drop_none(
|
|
97
|
+
key_name=key_name,
|
|
98
|
+
is_active=active,
|
|
99
|
+
expires_at=parse_datetime(expires_at, flag="--expires-at"),
|
|
100
|
+
metadata=parse_json_object(metadata, flag="--metadata"),
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
104
|
+
result = client.control_plane.keys.update(key_id, request)
|
|
105
|
+
print_json(result)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@app.command("delete")
|
|
109
|
+
def delete_key(
|
|
110
|
+
ctx: typer.Context,
|
|
111
|
+
key_id: str = typer.Argument(..., help="Identifier of the key to delete."),
|
|
112
|
+
yes: bool = typer.Option(False, "--yes", "-y", help="Skip the confirmation prompt."),
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Delete a virtual API key."""
|
|
115
|
+
app_ctx: AppContext = ctx.obj
|
|
116
|
+
if not yes:
|
|
117
|
+
typer.confirm(f"Delete API key {key_id!r}?", abort=True)
|
|
118
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
119
|
+
client.control_plane.keys.delete(key_id)
|
|
120
|
+
console().print(f"[bold green]Deleted[/] API key {key_id!r}.")
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""``otari message`` - create an Anthropic-style message via ``/messages``."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from otari_cli import _client
|
|
10
|
+
from otari_cli._errors import handle_errors
|
|
11
|
+
from otari_cli._output import console, print_json
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Iterable
|
|
15
|
+
|
|
16
|
+
from otari_cli._context import AppContext
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def message(
|
|
20
|
+
ctx: typer.Context,
|
|
21
|
+
prompt: str = typer.Argument(..., help="The user prompt to send to the model."),
|
|
22
|
+
model: str = typer.Option(..., "--model", "-m", help="Model id, e.g. 'anthropic:claude-3-5-sonnet'."),
|
|
23
|
+
max_tokens: int = typer.Option(1024, "--max-tokens", help="Maximum tokens to generate (required by /messages)."),
|
|
24
|
+
system: str | None = typer.Option(None, "--system", "-s", help="Optional system prompt."),
|
|
25
|
+
stream: bool = typer.Option(False, "--stream", help="Stream the response as it is generated."),
|
|
26
|
+
temperature: float | None = typer.Option(None, "--temperature", help="Sampling temperature."),
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Create an Anthropic-style message and print the reply."""
|
|
29
|
+
app_ctx: AppContext = ctx.obj
|
|
30
|
+
|
|
31
|
+
messages = [{"role": "user", "content": prompt}]
|
|
32
|
+
kwargs: dict[str, Any] = {}
|
|
33
|
+
if system is not None:
|
|
34
|
+
kwargs["system"] = system
|
|
35
|
+
if temperature is not None:
|
|
36
|
+
kwargs["temperature"] = temperature
|
|
37
|
+
|
|
38
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
39
|
+
if stream:
|
|
40
|
+
events = client.message(model=model, messages=messages, max_tokens=max_tokens, stream=True, **kwargs)
|
|
41
|
+
_render_stream(events, output_json=app_ctx.output_json)
|
|
42
|
+
else:
|
|
43
|
+
result = client.message(model=model, messages=messages, max_tokens=max_tokens, **kwargs)
|
|
44
|
+
_render_message(result, output_json=app_ctx.output_json)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _render_message(result: Any, *, output_json: bool) -> None:
|
|
48
|
+
if output_json:
|
|
49
|
+
print_json(result)
|
|
50
|
+
return
|
|
51
|
+
console().print(_message_text(result))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _render_stream(events: Iterable[Any], *, output_json: bool) -> None:
|
|
55
|
+
# The /messages stream yields raw event dicts with no single typed chunk
|
|
56
|
+
# model, so non-JSON mode prints text deltas and JSON mode prints events.
|
|
57
|
+
for event in events:
|
|
58
|
+
if output_json:
|
|
59
|
+
print_json(event)
|
|
60
|
+
continue
|
|
61
|
+
delta = _event_text(event)
|
|
62
|
+
if delta:
|
|
63
|
+
console().print(delta, end="")
|
|
64
|
+
if not output_json:
|
|
65
|
+
console().print()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _message_text(result: Any) -> str:
|
|
69
|
+
blocks = getattr(result, "content", None) or []
|
|
70
|
+
parts = [getattr(block, "text", "") or "" for block in blocks if getattr(block, "type", None) == "text"]
|
|
71
|
+
return "".join(parts)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _event_text(event: Any) -> str:
|
|
75
|
+
# content_block_delta events carry {"delta": {"type": "text_delta", "text": ...}}.
|
|
76
|
+
if not isinstance(event, dict):
|
|
77
|
+
return ""
|
|
78
|
+
delta = event.get("delta")
|
|
79
|
+
if isinstance(delta, dict) and delta.get("type") == "text_delta":
|
|
80
|
+
text = delta.get("text", "")
|
|
81
|
+
return text if isinstance(text, str) else ""
|
|
82
|
+
return ""
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""``otari models`` - list models available through the gateway."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import typer # noqa: TC002 (Typer resolves the Context annotation at runtime)
|
|
8
|
+
|
|
9
|
+
from otari_cli import _client
|
|
10
|
+
from otari_cli._errors import handle_errors
|
|
11
|
+
from otari_cli._output import render_records
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from otari_cli._context import AppContext
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def models(ctx: typer.Context) -> None:
|
|
18
|
+
"""List the models the gateway can route to."""
|
|
19
|
+
app_ctx: AppContext = ctx.obj
|
|
20
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
21
|
+
result = client.list_models()
|
|
22
|
+
render_records(
|
|
23
|
+
list(result),
|
|
24
|
+
output_json=app_ctx.output_json,
|
|
25
|
+
title="Models",
|
|
26
|
+
empty_message="No models available.",
|
|
27
|
+
columns=["id", "owned_by", "created"],
|
|
28
|
+
)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""``otari moderation`` - classify text against the moderation endpoint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from otari_cli import _client
|
|
10
|
+
from otari_cli._errors import handle_errors
|
|
11
|
+
from otari_cli._output import render_records
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from otari_cli._context import AppContext
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def moderation(
|
|
18
|
+
ctx: typer.Context,
|
|
19
|
+
inputs: list[str] = typer.Argument(..., help="One or more input strings to classify."),
|
|
20
|
+
model: str = typer.Option(..., "--model", "-m", help="Model id, e.g. 'openai:omni-moderation-latest'."),
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Classify text and report the moderation results."""
|
|
23
|
+
app_ctx: AppContext = ctx.obj
|
|
24
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
25
|
+
result = client.moderation(model=model, input=inputs)
|
|
26
|
+
results = getattr(result, "results", None) or []
|
|
27
|
+
render_records(
|
|
28
|
+
list(results),
|
|
29
|
+
output_json=app_ctx.output_json,
|
|
30
|
+
title="Moderation",
|
|
31
|
+
empty_message="No moderation results returned.",
|
|
32
|
+
)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""``otari pricing`` - manage per-model pricing (control plane).
|
|
2
|
+
|
|
3
|
+
Requires an admin credential and a self-hosted/standalone gateway.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
from otari._client import SetPricingRequest
|
|
12
|
+
|
|
13
|
+
from otari_cli import _client
|
|
14
|
+
from otari_cli._errors import handle_errors
|
|
15
|
+
from otari_cli._output import console, print_json, render_records
|
|
16
|
+
from otari_cli._params import drop_none, parse_datetime
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from otari_cli._context import AppContext
|
|
20
|
+
|
|
21
|
+
app = typer.Typer(
|
|
22
|
+
name="pricing",
|
|
23
|
+
help="Manage per-model pricing (admin / self-hosted only).",
|
|
24
|
+
no_args_is_help=True,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@app.command("list")
|
|
29
|
+
def list_pricing(
|
|
30
|
+
ctx: typer.Context,
|
|
31
|
+
skip: int | None = typer.Option(None, "--skip", help="Number of entries to skip."),
|
|
32
|
+
limit: int | None = typer.Option(None, "--limit", help="Maximum number of entries to return."),
|
|
33
|
+
) -> None:
|
|
34
|
+
"""List model pricing entries."""
|
|
35
|
+
app_ctx: AppContext = ctx.obj
|
|
36
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
37
|
+
result = client.control_plane.pricing.list(skip=skip, limit=limit)
|
|
38
|
+
render_records(
|
|
39
|
+
list(result),
|
|
40
|
+
output_json=app_ctx.output_json,
|
|
41
|
+
title="Pricing",
|
|
42
|
+
empty_message="No pricing entries found.",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@app.command("get")
|
|
47
|
+
def get_pricing(
|
|
48
|
+
ctx: typer.Context,
|
|
49
|
+
model_key: str = typer.Argument(..., help="Model identifier, e.g. 'openai:gpt-4o-mini'."),
|
|
50
|
+
) -> None:
|
|
51
|
+
"""Show the current pricing for a model."""
|
|
52
|
+
app_ctx: AppContext = ctx.obj
|
|
53
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
54
|
+
result = client.control_plane.pricing.get(model_key)
|
|
55
|
+
print_json(result)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@app.command("set")
|
|
59
|
+
def set_pricing(
|
|
60
|
+
ctx: typer.Context,
|
|
61
|
+
model_key: str = typer.Argument(..., help="Model identifier, e.g. 'openai:gpt-4o-mini'."),
|
|
62
|
+
input_price: float = typer.Option(..., "--input-price", help="Price per 1M input tokens."),
|
|
63
|
+
output_price: float = typer.Option(..., "--output-price", help="Price per 1M output tokens."),
|
|
64
|
+
effective_at: str | None = typer.Option(None, "--effective-at", help="When the price applies (ISO-8601)."),
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Set the pricing for a model."""
|
|
67
|
+
app_ctx: AppContext = ctx.obj
|
|
68
|
+
request = SetPricingRequest(
|
|
69
|
+
model_key=model_key,
|
|
70
|
+
input_price_per_million=input_price,
|
|
71
|
+
output_price_per_million=output_price,
|
|
72
|
+
**drop_none(effective_at=parse_datetime(effective_at, flag="--effective-at")),
|
|
73
|
+
)
|
|
74
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
75
|
+
result = client.control_plane.pricing.set(request)
|
|
76
|
+
print_json(result)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command("history")
|
|
80
|
+
def pricing_history(
|
|
81
|
+
ctx: typer.Context,
|
|
82
|
+
model_key: str = typer.Argument(..., help="Model identifier, e.g. 'openai:gpt-4o-mini'."),
|
|
83
|
+
) -> None:
|
|
84
|
+
"""Show the pricing history for a model."""
|
|
85
|
+
app_ctx: AppContext = ctx.obj
|
|
86
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
87
|
+
result = client.control_plane.pricing.get_history(model_key)
|
|
88
|
+
render_records(
|
|
89
|
+
list(result),
|
|
90
|
+
output_json=app_ctx.output_json,
|
|
91
|
+
title=f"Pricing history for {model_key}",
|
|
92
|
+
empty_message="No pricing history found.",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@app.command("delete")
|
|
97
|
+
def delete_pricing(
|
|
98
|
+
ctx: typer.Context,
|
|
99
|
+
model_key: str = typer.Argument(..., help="Model identifier to delete pricing for."),
|
|
100
|
+
yes: bool = typer.Option(False, "--yes", "-y", help="Skip the confirmation prompt."),
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Delete the pricing for a model."""
|
|
103
|
+
app_ctx: AppContext = ctx.obj
|
|
104
|
+
if not yes:
|
|
105
|
+
typer.confirm(f"Delete pricing for {model_key!r}?", abort=True)
|
|
106
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
107
|
+
client.control_plane.pricing.delete(model_key)
|
|
108
|
+
console().print(f"[bold green]Deleted[/] pricing for {model_key!r}.")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""``otari rerank`` - rerank documents by relevance to a query."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from otari_cli import _client
|
|
10
|
+
from otari_cli._errors import handle_errors
|
|
11
|
+
from otari_cli._output import render_records
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from otari_cli._context import AppContext
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def rerank(
|
|
18
|
+
ctx: typer.Context,
|
|
19
|
+
documents: list[str] = typer.Argument(..., help="Documents to rerank."),
|
|
20
|
+
model: str = typer.Option(..., "--model", "-m", help="Model id, e.g. 'cohere:rerank-v3.5'."),
|
|
21
|
+
query: str = typer.Option(..., "--query", "-q", help="The query to rank documents against."),
|
|
22
|
+
) -> None:
|
|
23
|
+
"""Rerank documents by relevance to the query."""
|
|
24
|
+
app_ctx: AppContext = ctx.obj
|
|
25
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
26
|
+
result = client.rerank(model=model, query=query, documents=documents)
|
|
27
|
+
results = getattr(result, "results", None) or []
|
|
28
|
+
render_records(
|
|
29
|
+
list(results),
|
|
30
|
+
output_json=app_ctx.output_json,
|
|
31
|
+
title="Rerank",
|
|
32
|
+
empty_message="No rerank results returned.",
|
|
33
|
+
)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""``otari response`` - create a response via the OpenAI-style Responses API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from otari_cli import _client
|
|
10
|
+
from otari_cli._errors import handle_errors
|
|
11
|
+
from otari_cli._output import console, print_json
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Iterable
|
|
15
|
+
|
|
16
|
+
from otari_cli._context import AppContext
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def response(
|
|
20
|
+
ctx: typer.Context,
|
|
21
|
+
text: str = typer.Argument(..., help="The input text to send to the model."),
|
|
22
|
+
model: str = typer.Option(..., "--model", "-m", help="Model id, e.g. 'openai:gpt-4o-mini'."),
|
|
23
|
+
stream: bool = typer.Option(False, "--stream", help="Stream the response as it is generated."),
|
|
24
|
+
temperature: float | None = typer.Option(None, "--temperature", help="Sampling temperature."),
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Create a response and print the model output."""
|
|
27
|
+
app_ctx: AppContext = ctx.obj
|
|
28
|
+
|
|
29
|
+
kwargs: dict[str, Any] = {}
|
|
30
|
+
if temperature is not None:
|
|
31
|
+
kwargs["temperature"] = temperature
|
|
32
|
+
|
|
33
|
+
with handle_errors(), _client.session(app_ctx.config) as client:
|
|
34
|
+
if stream:
|
|
35
|
+
events = client.response(model=model, input=text, stream=True, **kwargs)
|
|
36
|
+
_render_stream(events, output_json=app_ctx.output_json)
|
|
37
|
+
else:
|
|
38
|
+
result = client.response(model=model, input=text, **kwargs)
|
|
39
|
+
_render_response(result, output_json=app_ctx.output_json)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _render_response(result: Any, *, output_json: bool) -> None:
|
|
43
|
+
if output_json:
|
|
44
|
+
print_json(result)
|
|
45
|
+
return
|
|
46
|
+
# The Responses API exposes an `output_text` convenience when available;
|
|
47
|
+
# otherwise fall back to the full JSON so nothing is silently dropped.
|
|
48
|
+
output_text = getattr(result, "output_text", None)
|
|
49
|
+
if isinstance(output_text, str) and output_text:
|
|
50
|
+
console().print(output_text)
|
|
51
|
+
else:
|
|
52
|
+
print_json(result)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _render_stream(events: Iterable[Any], *, output_json: bool) -> None:
|
|
56
|
+
for event in events:
|
|
57
|
+
if output_json:
|
|
58
|
+
print_json(event)
|
|
59
|
+
continue
|
|
60
|
+
delta = _event_text(event)
|
|
61
|
+
if delta:
|
|
62
|
+
console().print(delta, end="")
|
|
63
|
+
if not output_json:
|
|
64
|
+
console().print()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _event_text(event: Any) -> str:
|
|
68
|
+
# response.output_text.delta events carry {"delta": "..."}.
|
|
69
|
+
if not isinstance(event, dict):
|
|
70
|
+
return ""
|
|
71
|
+
if str(event.get("type", "")).endswith("output_text.delta"):
|
|
72
|
+
delta = event.get("delta", "")
|
|
73
|
+
return delta if isinstance(delta, str) else ""
|
|
74
|
+
return ""
|