lyceum-cli 1.0.21__py3-none-any.whl → 1.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyceum/__init__.py +1 -0
- lyceum/external/__init__.py +1 -0
- lyceum/external/auth/__init__.py +1 -0
- lyceum/external/auth/login.py +68 -60
- lyceum/external/compute/__init__.py +1 -0
- lyceum/external/compute/execution/__init__.py +1 -0
- lyceum/external/compute/execution/python.py +20 -16
- lyceum/external/compute/inference/batch.py +73 -64
- lyceum/external/compute/inference/chat.py +36 -41
- lyceum/external/compute/inference/models.py +20 -14
- lyceum/external/general/__init__.py +1 -0
- lyceum/external/vms/__init__.py +0 -0
- lyceum/external/vms/instances.py +303 -0
- lyceum/external/vms/management.py +253 -0
- lyceum/main.py +3 -1
- lyceum/shared/__init__.py +1 -0
- lyceum/shared/config.py +37 -32
- lyceum/shared/display.py +3 -5
- lyceum/shared/streaming.py +60 -44
- {lyceum_cli-1.0.21.dist-info → lyceum_cli-1.0.23.dist-info}/METADATA +1 -1
- lyceum_cli-1.0.23.dist-info/RECORD +28 -0
- lyceum_cloud_execution_api_client/__init__.py +1 -0
- lyceum_cloud_execution_api_client/api/__init__.py +1 -0
- lyceum_cli-1.0.21.dist-info/RECORD +0 -25
- {lyceum_cli-1.0.21.dist-info → lyceum_cli-1.0.23.dist-info}/WHEEL +0 -0
- {lyceum_cli-1.0.21.dist-info → lyceum_cli-1.0.23.dist-info}/entry_points.txt +0 -0
- {lyceum_cli-1.0.21.dist-info → lyceum_cli-1.0.23.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
"""
|
|
2
|
-
OpenAI-compatible batch processing commands
|
|
3
|
-
"""
|
|
1
|
+
"""OpenAI-compatible batch processing commands"""
|
|
4
2
|
|
|
5
3
|
import os
|
|
6
|
-
import time
|
|
7
4
|
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
import typer
|
|
5
|
+
|
|
10
6
|
import httpx
|
|
7
|
+
import typer
|
|
11
8
|
from rich.console import Console
|
|
12
9
|
|
|
13
10
|
from ....shared.config import config
|
|
@@ -28,19 +25,19 @@ def upload_file(
|
|
|
28
25
|
if not Path(file_path).exists():
|
|
29
26
|
console.print(f"[red]Error: File '{file_path}' not found[/red]")
|
|
30
27
|
raise typer.Exit(1)
|
|
31
|
-
|
|
28
|
+
|
|
32
29
|
# Validate file extension
|
|
33
30
|
if not file_path.endswith('.jsonl'):
|
|
34
31
|
console.print("[yellow]Warning: File doesn't have .jsonl extension[/yellow]")
|
|
35
|
-
|
|
32
|
+
|
|
36
33
|
try:
|
|
37
34
|
console.print(f"[dim]📤 Uploading {os.path.basename(file_path)} for {purpose}...[/dim]")
|
|
38
|
-
|
|
35
|
+
|
|
39
36
|
# Upload file using multipart form data
|
|
40
37
|
with open(file_path, 'rb') as f:
|
|
41
38
|
files = {'file': (os.path.basename(file_path), f, 'application/jsonl')}
|
|
42
39
|
data = {'purpose': purpose}
|
|
43
|
-
|
|
40
|
+
|
|
44
41
|
response = httpx.post(
|
|
45
42
|
f"{config.base_url}/api/v2/external/files",
|
|
46
43
|
headers={"Authorization": f"Bearer {config.api_key}"},
|
|
@@ -48,20 +45,20 @@ def upload_file(
|
|
|
48
45
|
data=data,
|
|
49
46
|
timeout=60.0
|
|
50
47
|
)
|
|
51
|
-
|
|
48
|
+
|
|
52
49
|
if response.status_code != 200:
|
|
53
50
|
console.print(f"[red]Error: HTTP {response.status_code}[/red]")
|
|
54
51
|
console.print(f"[red]{response.text}[/red]")
|
|
55
52
|
raise typer.Exit(1)
|
|
56
|
-
|
|
53
|
+
|
|
57
54
|
data = response.json()
|
|
58
|
-
|
|
59
|
-
console.print(
|
|
55
|
+
|
|
56
|
+
console.print("[green]✅ File uploaded successfully![/green]")
|
|
60
57
|
console.print(f"[cyan]File ID: {data['id']}[/cyan]")
|
|
61
58
|
console.print(f"[dim]Size: {data['bytes']} bytes[/dim]")
|
|
62
59
|
console.print(f"[dim]Purpose: {data['purpose']}[/dim]")
|
|
63
60
|
console.print(f"[dim]Created: {data['created_at']}[/dim]")
|
|
64
|
-
|
|
61
|
+
|
|
65
62
|
except Exception as e:
|
|
66
63
|
console.print(f"[red]Error: {e}[/red]")
|
|
67
64
|
raise typer.Exit(1)
|
|
@@ -70,51 +67,57 @@ def upload_file(
|
|
|
70
67
|
@batch_app.command("create")
|
|
71
68
|
def create_batch(
|
|
72
69
|
input_file_id: str = typer.Argument(..., help="File ID of uploaded JSONL file"),
|
|
73
|
-
endpoint:
|
|
74
|
-
|
|
75
|
-
|
|
70
|
+
endpoint: str | None = typer.Option(
|
|
71
|
+
None, "--endpoint", "-e", help="API endpoint (optional, uses URLs from JSONL if not specified)"
|
|
72
|
+
),
|
|
73
|
+
model: str | None = typer.Option(
|
|
74
|
+
None, "--model", "-m", help="Model to use for all requests (overrides model in JSONL)"
|
|
75
|
+
),
|
|
76
|
+
completion_window: str = typer.Option(
|
|
77
|
+
"24h", "--completion-window", "-w", help="Completion window (24h only)"
|
|
78
|
+
),
|
|
76
79
|
):
|
|
77
80
|
"""Create a batch processing job"""
|
|
78
81
|
try:
|
|
79
82
|
console.print(f"[dim]🚀 Creating batch job for file {input_file_id}...[/dim]")
|
|
80
|
-
|
|
83
|
+
|
|
81
84
|
request_data = {
|
|
82
85
|
"input_file_id": input_file_id,
|
|
83
86
|
"completion_window": completion_window
|
|
84
87
|
}
|
|
85
|
-
|
|
88
|
+
|
|
86
89
|
# Only include endpoint if explicitly provided as override
|
|
87
90
|
if endpoint:
|
|
88
91
|
request_data["endpoint"] = endpoint
|
|
89
92
|
console.print(f"[dim]Using endpoint override: {endpoint}[/dim]")
|
|
90
93
|
# Note: endpoint is NOT required - the API should use URLs from JSONL file
|
|
91
|
-
|
|
94
|
+
|
|
92
95
|
# Include model override if specified
|
|
93
96
|
if model:
|
|
94
97
|
request_data["model"] = model
|
|
95
98
|
console.print(f"[dim]Using model override: {model}[/dim]")
|
|
96
|
-
|
|
99
|
+
|
|
97
100
|
response = httpx.post(
|
|
98
101
|
f"{config.base_url}/api/v2/external/batches",
|
|
99
102
|
headers={"Authorization": f"Bearer {config.api_key}"},
|
|
100
103
|
json=request_data,
|
|
101
104
|
timeout=30.0
|
|
102
105
|
)
|
|
103
|
-
|
|
106
|
+
|
|
104
107
|
if response.status_code != 200:
|
|
105
108
|
console.print(f"[red]Error: HTTP {response.status_code}[/red]")
|
|
106
109
|
console.print(f"[red]{response.text}[/red]")
|
|
107
110
|
raise typer.Exit(1)
|
|
108
|
-
|
|
111
|
+
|
|
109
112
|
data = response.json()
|
|
110
|
-
|
|
111
|
-
console.print(
|
|
113
|
+
|
|
114
|
+
console.print("[green]✅ Batch job created successfully![/green]")
|
|
112
115
|
console.print(f"[cyan]Batch ID: {data['id']}[/cyan]")
|
|
113
116
|
console.print(f"[yellow]Status: {data['status']}[/yellow]")
|
|
114
117
|
console.print(f"[dim]Endpoint: {data['endpoint']}[/dim]")
|
|
115
118
|
console.print(f"[dim]Input File ID: {data['input_file_id']}[/dim]")
|
|
116
119
|
console.print(f"[dim]Expires: {data['expires_at']}[/dim]")
|
|
117
|
-
|
|
120
|
+
|
|
118
121
|
except Exception as e:
|
|
119
122
|
console.print(f"[red]Error: {e}[/red]")
|
|
120
123
|
raise typer.Exit(1)
|
|
@@ -127,20 +130,20 @@ def get_batch(
|
|
|
127
130
|
"""Get batch job status and details"""
|
|
128
131
|
try:
|
|
129
132
|
console.print(f"[dim]🔍 Retrieving batch {batch_id}...[/dim]")
|
|
130
|
-
|
|
133
|
+
|
|
131
134
|
response = httpx.get(
|
|
132
135
|
f"{config.base_url}/api/v2/external/batches/{batch_id}",
|
|
133
136
|
headers={"Authorization": f"Bearer {config.api_key}"},
|
|
134
137
|
timeout=30.0
|
|
135
138
|
)
|
|
136
|
-
|
|
139
|
+
|
|
137
140
|
if response.status_code != 200:
|
|
138
141
|
console.print(f"[red]Error: HTTP {response.status_code}[/red]")
|
|
139
142
|
console.print(f"[red]{response.text}[/red]")
|
|
140
143
|
raise typer.Exit(1)
|
|
141
|
-
|
|
144
|
+
|
|
142
145
|
data = response.json()
|
|
143
|
-
|
|
146
|
+
|
|
144
147
|
# Status color coding
|
|
145
148
|
status = data['status']
|
|
146
149
|
if status == "completed":
|
|
@@ -151,29 +154,33 @@ def get_batch(
|
|
|
151
154
|
status_color = "yellow"
|
|
152
155
|
else:
|
|
153
156
|
status_color = "dim"
|
|
154
|
-
|
|
157
|
+
|
|
155
158
|
console.print(f"[cyan]Batch ID: {data['id']}[/cyan]")
|
|
156
159
|
console.print(f"[{status_color}]Status: {status}[/{status_color}]")
|
|
157
160
|
console.print(f"[dim]Endpoint: {data['endpoint']}[/dim]")
|
|
158
161
|
console.print(f"[dim]Input File: {data['input_file_id']}[/dim]")
|
|
159
|
-
|
|
162
|
+
|
|
160
163
|
if data.get('output_file_id'):
|
|
161
164
|
console.print(f"[green]Output File: {data['output_file_id']}[/green]")
|
|
162
|
-
|
|
165
|
+
|
|
163
166
|
if data.get('error_file_id'):
|
|
164
167
|
console.print(f"[red]Error File: {data['error_file_id']}[/red]")
|
|
165
|
-
|
|
168
|
+
|
|
166
169
|
# Request statistics
|
|
167
170
|
counts = data.get('request_counts', {})
|
|
168
|
-
console.print(
|
|
169
|
-
|
|
171
|
+
console.print(
|
|
172
|
+
f"[dim]Requests - Total: {counts.get('total', 0)}, "
|
|
173
|
+
f"Completed: {counts.get('completed', 0)}, "
|
|
174
|
+
f"Failed: {counts.get('failed', 0)}[/dim]"
|
|
175
|
+
)
|
|
176
|
+
|
|
170
177
|
# Timestamps
|
|
171
178
|
console.print(f"[dim]Created: {data.get('created_at', 'N/A')}[/dim]")
|
|
172
179
|
if data.get('completed_at'):
|
|
173
180
|
console.print(f"[dim]Completed: {data['completed_at']}[/dim]")
|
|
174
181
|
if data.get('expires_at'):
|
|
175
182
|
console.print(f"[dim]Expires: {data['expires_at']}[/dim]")
|
|
176
|
-
|
|
183
|
+
|
|
177
184
|
except Exception as e:
|
|
178
185
|
console.print(f"[red]Error: {e}[/red]")
|
|
179
186
|
raise typer.Exit(1)
|
|
@@ -181,36 +188,36 @@ def get_batch(
|
|
|
181
188
|
|
|
182
189
|
@batch_app.command("list")
|
|
183
190
|
def list_batches(
|
|
184
|
-
after:
|
|
191
|
+
after: str | None = typer.Option(None, "--after", help="List batches after this batch ID"),
|
|
185
192
|
limit: int = typer.Option(20, "--limit", "-l", help="Number of batches to return"),
|
|
186
193
|
):
|
|
187
194
|
"""List batch jobs"""
|
|
188
195
|
try:
|
|
189
196
|
console.print("[dim]📋 Listing batch jobs...[/dim]")
|
|
190
|
-
|
|
197
|
+
|
|
191
198
|
params = {"limit": limit}
|
|
192
199
|
if after:
|
|
193
200
|
params["after"] = after
|
|
194
|
-
|
|
201
|
+
|
|
195
202
|
response = httpx.get(
|
|
196
203
|
f"{config.base_url}/api/v2/external/batches",
|
|
197
204
|
headers={"Authorization": f"Bearer {config.api_key}"},
|
|
198
205
|
params=params,
|
|
199
206
|
timeout=30.0
|
|
200
207
|
)
|
|
201
|
-
|
|
208
|
+
|
|
202
209
|
if response.status_code != 200:
|
|
203
210
|
console.print(f"[red]Error: HTTP {response.status_code}[/red]")
|
|
204
211
|
console.print(f"[red]{response.text}[/red]")
|
|
205
212
|
raise typer.Exit(1)
|
|
206
|
-
|
|
213
|
+
|
|
207
214
|
data = response.json()
|
|
208
215
|
batches = data.get('data', [])
|
|
209
|
-
|
|
216
|
+
|
|
210
217
|
if not batches:
|
|
211
218
|
console.print("[dim]No batch jobs found[/dim]")
|
|
212
219
|
return
|
|
213
|
-
|
|
220
|
+
|
|
214
221
|
columns = [
|
|
215
222
|
{"header": "Batch ID", "style": "cyan", "no_wrap": True, "max_width": 16},
|
|
216
223
|
{"header": "Status", "style": "yellow"},
|
|
@@ -218,16 +225,16 @@ def list_batches(
|
|
|
218
225
|
{"header": "Requests", "style": "magenta", "justify": "center"},
|
|
219
226
|
{"header": "Created", "style": "dim"}
|
|
220
227
|
]
|
|
221
|
-
|
|
228
|
+
|
|
222
229
|
table = create_table("Batch Jobs", columns)
|
|
223
|
-
|
|
230
|
+
|
|
224
231
|
for batch in batches:
|
|
225
232
|
batch_id = batch['id']
|
|
226
233
|
short_id = truncate_id(batch_id, 12)
|
|
227
|
-
|
|
234
|
+
|
|
228
235
|
counts = batch.get('request_counts', {})
|
|
229
236
|
request_stats = f"{counts.get('completed', 0)}/{counts.get('total', 0)}"
|
|
230
|
-
|
|
237
|
+
|
|
231
238
|
table.add_row(
|
|
232
239
|
short_id,
|
|
233
240
|
batch['status'],
|
|
@@ -235,10 +242,10 @@ def list_batches(
|
|
|
235
242
|
request_stats,
|
|
236
243
|
format_timestamp(batch.get('created_at'))
|
|
237
244
|
)
|
|
238
|
-
|
|
245
|
+
|
|
239
246
|
console.print(table)
|
|
240
247
|
console.print(f"\n[dim]Found {len(batches)} batch jobs[/dim]")
|
|
241
|
-
|
|
248
|
+
|
|
242
249
|
except Exception as e:
|
|
243
250
|
console.print(f"[red]Error: {e}[/red]")
|
|
244
251
|
raise typer.Exit(1)
|
|
@@ -251,24 +258,24 @@ def cancel_batch(
|
|
|
251
258
|
"""Cancel a batch job"""
|
|
252
259
|
try:
|
|
253
260
|
console.print(f"[dim]🛑 Cancelling batch {batch_id}...[/dim]")
|
|
254
|
-
|
|
261
|
+
|
|
255
262
|
response = httpx.post(
|
|
256
263
|
f"{config.base_url}/api/v2/external/batches/{batch_id}/cancel",
|
|
257
264
|
headers={"Authorization": f"Bearer {config.api_key}"},
|
|
258
265
|
timeout=30.0
|
|
259
266
|
)
|
|
260
|
-
|
|
267
|
+
|
|
261
268
|
if response.status_code != 200:
|
|
262
269
|
console.print(f"[red]Error: HTTP {response.status_code}[/red]")
|
|
263
270
|
console.print(f"[red]{response.text}[/red]")
|
|
264
271
|
raise typer.Exit(1)
|
|
265
|
-
|
|
272
|
+
|
|
266
273
|
data = response.json()
|
|
267
|
-
|
|
268
|
-
console.print(
|
|
274
|
+
|
|
275
|
+
console.print("[green]✅ Batch cancelled successfully![/green]")
|
|
269
276
|
console.print(f"[cyan]Batch ID: {data['id']}[/cyan]")
|
|
270
277
|
console.print(f"[yellow]Status: {data['status']}[/yellow]")
|
|
271
|
-
|
|
278
|
+
|
|
272
279
|
except Exception as e:
|
|
273
280
|
console.print(f"[red]Error: {e}[/red]")
|
|
274
281
|
raise typer.Exit(1)
|
|
@@ -277,25 +284,27 @@ def cancel_batch(
|
|
|
277
284
|
@batch_app.command("download")
|
|
278
285
|
def download_file(
|
|
279
286
|
file_id: str = typer.Argument(..., help="File ID to download"),
|
|
280
|
-
output_file:
|
|
287
|
+
output_file: str | None = typer.Option(
|
|
288
|
+
None, "--output", "-o", help="Output file path (prints to console if not specified)"
|
|
289
|
+
),
|
|
281
290
|
):
|
|
282
291
|
"""Download batch file content (input, output, or error files)"""
|
|
283
292
|
try:
|
|
284
293
|
console.print(f"[dim]⬇️ Downloading file {file_id}...[/dim]")
|
|
285
|
-
|
|
294
|
+
|
|
286
295
|
response = httpx.get(
|
|
287
296
|
f"{config.base_url}/api/v2/external/files/{file_id}/content",
|
|
288
297
|
headers={"Authorization": f"Bearer {config.api_key}"},
|
|
289
298
|
timeout=60.0
|
|
290
299
|
)
|
|
291
|
-
|
|
300
|
+
|
|
292
301
|
if response.status_code != 200:
|
|
293
302
|
console.print(f"[red]Error: HTTP {response.status_code}[/red]")
|
|
294
303
|
console.print(f"[red]{response.text}[/red]")
|
|
295
304
|
raise typer.Exit(1)
|
|
296
|
-
|
|
305
|
+
|
|
297
306
|
content = response.text
|
|
298
|
-
|
|
307
|
+
|
|
299
308
|
if output_file:
|
|
300
309
|
# Save to file
|
|
301
310
|
with open(output_file, 'w') as f:
|
|
@@ -309,7 +318,7 @@ def download_file(
|
|
|
309
318
|
console.print(content)
|
|
310
319
|
console.print("-" * 50)
|
|
311
320
|
console.print(f"[dim]Size: {len(content)} characters[/dim]")
|
|
312
|
-
|
|
321
|
+
|
|
313
322
|
except Exception as e:
|
|
314
323
|
console.print(f"[red]Error: {e}[/red]")
|
|
315
|
-
raise typer.Exit(1)
|
|
324
|
+
raise typer.Exit(1)
|
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
"""Synchronous inference commands for chat/completion"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
4
|
|
|
5
|
-
from typing import Optional
|
|
6
5
|
import typer
|
|
7
6
|
from rich.console import Console
|
|
8
7
|
from rich.table import Table
|
|
9
|
-
import json
|
|
10
8
|
|
|
11
9
|
from ....shared.config import config
|
|
12
10
|
|
|
@@ -24,12 +22,12 @@ def send_message(
|
|
|
24
22
|
model: str = typer.Option("gpt-4", "--model", "-m", help="Model to use for inference"),
|
|
25
23
|
max_tokens: int = typer.Option(1000, "--max-tokens", help="Maximum tokens in response"),
|
|
26
24
|
temperature: float = typer.Option(0.7, "--temperature", "-t", help="Temperature (0.0-2.0)"),
|
|
27
|
-
system_prompt:
|
|
25
|
+
system_prompt: str | None = typer.Option(None, "--system", "-s", help="System prompt"),
|
|
28
26
|
):
|
|
29
27
|
"""Send a message to an AI model and get a response"""
|
|
30
28
|
try:
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
config.get_client()
|
|
30
|
+
|
|
33
31
|
# Create the sync request payload directly
|
|
34
32
|
sync_request = {
|
|
35
33
|
"model_id": model,
|
|
@@ -42,15 +40,15 @@ def send_message(
|
|
|
42
40
|
"top_p": 1.0,
|
|
43
41
|
"stream": False
|
|
44
42
|
}
|
|
45
|
-
|
|
43
|
+
|
|
46
44
|
console.print(f"[dim]🤖 Sending message to {model}...[/dim]")
|
|
47
|
-
|
|
45
|
+
|
|
48
46
|
# Make the API call using httpx directly (since we don't have generated client for sync inference yet)
|
|
49
47
|
import httpx
|
|
50
|
-
|
|
48
|
+
|
|
51
49
|
url = f"{config.base_url}/api/v2/external/sync/"
|
|
52
50
|
headers = {"Authorization": f"Bearer {config.api_key}", "Content-Type": "application/json"}
|
|
53
|
-
|
|
51
|
+
|
|
54
52
|
with httpx.Client() as http_client:
|
|
55
53
|
response = http_client.post(
|
|
56
54
|
url,
|
|
@@ -58,20 +56,20 @@ def send_message(
|
|
|
58
56
|
headers=headers,
|
|
59
57
|
timeout=60.0
|
|
60
58
|
)
|
|
61
|
-
|
|
59
|
+
|
|
62
60
|
if response.status_code == 200:
|
|
63
61
|
result = response.json()
|
|
64
|
-
|
|
62
|
+
|
|
65
63
|
console.print(f"[green]✅ Response from {model}:[/green]")
|
|
66
64
|
console.print(f"[cyan]{result['output']}[/cyan]")
|
|
67
|
-
|
|
65
|
+
|
|
68
66
|
# Show usage stats
|
|
69
67
|
if 'usage' in result:
|
|
70
68
|
usage = result['usage']
|
|
71
69
|
console.print(f"[dim]📊 Tokens: {usage.get('total_tokens', 0)} | "
|
|
72
70
|
f"Latency: {result.get('latency_ms', 0)}ms | "
|
|
73
71
|
f"Cost: ${result.get('cost', 0):.4f}[/dim]")
|
|
74
|
-
|
|
72
|
+
|
|
75
73
|
elif response.status_code == 503:
|
|
76
74
|
console.print(f"[red]❌ Model {model} is not running. Please contact support to start the model.[/red]")
|
|
77
75
|
raise typer.Exit(1)
|
|
@@ -79,30 +77,30 @@ def send_message(
|
|
|
79
77
|
console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
|
|
80
78
|
console.print(f"[red]{response.text}[/red]")
|
|
81
79
|
raise typer.Exit(1)
|
|
82
|
-
|
|
80
|
+
|
|
83
81
|
except Exception as e:
|
|
84
82
|
console.print(f"[red]❌ Error: {e}[/red]")
|
|
85
83
|
raise typer.Exit(1)
|
|
86
84
|
|
|
87
85
|
|
|
88
|
-
@chat_app.command("models")
|
|
86
|
+
@chat_app.command("models")
|
|
89
87
|
def list_models():
|
|
90
88
|
"""List available models for inference"""
|
|
91
89
|
try:
|
|
92
|
-
|
|
93
|
-
|
|
90
|
+
config.get_client()
|
|
91
|
+
|
|
94
92
|
# Make API call to get available models
|
|
95
93
|
import httpx
|
|
96
|
-
|
|
94
|
+
|
|
97
95
|
url = f"{config.base_url}/api/v2/external/models/"
|
|
98
96
|
headers = {"Authorization": f"Bearer {config.api_key}"}
|
|
99
|
-
|
|
97
|
+
|
|
100
98
|
with httpx.Client() as http_client:
|
|
101
99
|
response = http_client.get(url, headers=headers, timeout=10.0)
|
|
102
|
-
|
|
100
|
+
|
|
103
101
|
if response.status_code == 200:
|
|
104
102
|
models = response.json()
|
|
105
|
-
|
|
103
|
+
|
|
106
104
|
if models:
|
|
107
105
|
# Create a table
|
|
108
106
|
table = Table(title="Available AI Models")
|
|
@@ -110,56 +108,53 @@ def list_models():
|
|
|
110
108
|
table.add_column("Type", style="magenta")
|
|
111
109
|
table.add_column("Status", justify="center")
|
|
112
110
|
table.add_column("Price/1K tokens", justify="right", style="green")
|
|
113
|
-
|
|
111
|
+
|
|
114
112
|
# Sort models: running first, then by type, then by name
|
|
115
113
|
sorted_models = sorted(models, key=lambda m: (
|
|
116
114
|
not m.get('available', False), # Running models first
|
|
117
115
|
m.get('type', 'text'), # Then by type
|
|
118
116
|
m.get('model_id', '') # Then by name
|
|
119
117
|
))
|
|
120
|
-
|
|
118
|
+
|
|
121
119
|
for model in sorted_models:
|
|
122
120
|
# Status with emoji
|
|
123
121
|
status = "🟢 Running" if model.get('available') else "🔴 Stopped"
|
|
124
|
-
|
|
122
|
+
|
|
125
123
|
# Model type with emoji
|
|
126
124
|
model_type = model.get('type', 'text')
|
|
127
125
|
type_emoji = {
|
|
128
126
|
'text': 'Text',
|
|
129
|
-
'image': 'Image',
|
|
127
|
+
'image': 'Image',
|
|
130
128
|
'audio': 'Audio',
|
|
131
129
|
'multimodal': 'Multi',
|
|
132
130
|
'embedding': 'Embed'
|
|
133
131
|
}.get(model_type, f'❓ {model_type.title()}')
|
|
134
|
-
|
|
135
|
-
# Provider
|
|
136
|
-
provider = model.get('provider', 'unknown').title()
|
|
137
|
-
|
|
132
|
+
|
|
138
133
|
# Price
|
|
139
134
|
price = model.get('price_per_1k_tokens', 0)
|
|
140
135
|
price_str = f"${price:.4f}" if price > 0 else "Free"
|
|
141
|
-
|
|
136
|
+
|
|
142
137
|
table.add_row(
|
|
143
138
|
model.get('model_id', 'Unknown'),
|
|
144
139
|
type_emoji,
|
|
145
140
|
status,
|
|
146
141
|
price_str
|
|
147
142
|
)
|
|
148
|
-
|
|
143
|
+
|
|
149
144
|
console.print(table)
|
|
150
|
-
|
|
145
|
+
|
|
151
146
|
# Show summary
|
|
152
147
|
running_count = sum(1 for m in models if m.get('available'))
|
|
153
148
|
total_count = len(models)
|
|
154
149
|
console.print(f"\n[dim]📊 {running_count}/{total_count} models running[/dim]")
|
|
155
|
-
|
|
150
|
+
|
|
156
151
|
else:
|
|
157
152
|
console.print("[yellow]No models are currently available[/yellow]")
|
|
158
153
|
else:
|
|
159
154
|
console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
|
|
160
155
|
console.print(f"[red]{response.text}[/red]")
|
|
161
156
|
raise typer.Exit(1)
|
|
162
|
-
|
|
157
|
+
|
|
163
158
|
except Exception as e:
|
|
164
159
|
console.print(f"[red]❌ Error: {e}[/red]")
|
|
165
160
|
raise typer.Exit(1)
|
|
@@ -174,7 +169,7 @@ def analyze_image(
|
|
|
174
169
|
):
|
|
175
170
|
"""Analyze an image with AI vision models"""
|
|
176
171
|
try:
|
|
177
|
-
|
|
172
|
+
config.get_client()
|
|
178
173
|
|
|
179
174
|
# Create request payload for image analysis
|
|
180
175
|
sync_request = {
|
|
@@ -207,10 +202,10 @@ def analyze_image(
|
|
|
207
202
|
result = response.json()
|
|
208
203
|
|
|
209
204
|
if raw_output:
|
|
210
|
-
console.print(
|
|
205
|
+
console.print("[green]✅ Raw Response:[/green]")
|
|
211
206
|
console.print(json.dumps(result.get('raw_response', result['output']), indent=2))
|
|
212
207
|
else:
|
|
213
|
-
console.print(
|
|
208
|
+
console.print("[green]✅ Image Analysis:[/green]")
|
|
214
209
|
console.print(f"[cyan]{result['output']}[/cyan]")
|
|
215
210
|
|
|
216
211
|
elif response.status_code == 503:
|
|
@@ -223,4 +218,4 @@ def analyze_image(
|
|
|
223
218
|
|
|
224
219
|
except Exception as e:
|
|
225
220
|
console.print(f"[red]❌ Error: {e}[/red]")
|
|
226
|
-
raise typer.Exit(1)
|
|
221
|
+
raise typer.Exit(1)
|
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
1
|
+
"""Model discovery and information commands for AI inference"""
|
|
2
|
+
|
|
4
3
|
|
|
5
|
-
from typing import Optional
|
|
6
|
-
import typer
|
|
7
4
|
import httpx
|
|
5
|
+
import typer
|
|
8
6
|
from rich.console import Console
|
|
9
|
-
from rich.table import Table
|
|
10
7
|
from rich.panel import Panel
|
|
8
|
+
from rich.table import Table
|
|
11
9
|
|
|
12
10
|
from ....shared.config import config
|
|
13
11
|
|
|
@@ -18,10 +16,18 @@ models_app = typer.Typer(name="models", help="Model discovery and information")
|
|
|
18
16
|
|
|
19
17
|
@models_app.command("list")
|
|
20
18
|
def list_models(
|
|
21
|
-
model_type:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
19
|
+
model_type: str | None = typer.Option(
|
|
20
|
+
None, "--type", "-t", help="Filter by model type (text, image, multimodal, etc.)"
|
|
21
|
+
),
|
|
22
|
+
available_only: bool = typer.Option(
|
|
23
|
+
False, "--available", "-a", help="Show only available models"
|
|
24
|
+
),
|
|
25
|
+
sync_only: bool = typer.Option(
|
|
26
|
+
False, "--sync", help="Show only models that support synchronous inference"
|
|
27
|
+
),
|
|
28
|
+
async_only: bool = typer.Option(
|
|
29
|
+
False, "--async", help="Show only models that support async/batch inference"
|
|
30
|
+
),
|
|
25
31
|
):
|
|
26
32
|
"""List all available AI models"""
|
|
27
33
|
try:
|
|
@@ -149,22 +155,22 @@ def get_model_info(
|
|
|
149
155
|
f"[bold]Version:[/bold] {model.get('version', 'N/A')}",
|
|
150
156
|
f"[bold]Status:[/bold] [{status_color}]{status_text}[/{status_color}]",
|
|
151
157
|
"",
|
|
152
|
-
|
|
158
|
+
"[bold yellow]Capabilities:[/bold yellow]",
|
|
153
159
|
f" • Synchronous inference: {'Yes ✓' if model.get('supports_sync', True) else 'No ✗'}",
|
|
154
160
|
f" • Asynchronous/Batch: {'Yes ✓' if model.get('supports_async', True) else 'No ✗'}",
|
|
155
161
|
f" • GPU Required: {'Yes' if model.get('gpu_required', False) else 'No'}",
|
|
156
162
|
"",
|
|
157
|
-
|
|
163
|
+
"[bold green]Input/Output:[/bold green]",
|
|
158
164
|
f" • Input types: {', '.join(model.get('input_types', []))}",
|
|
159
165
|
f" • Output types: {', '.join(model.get('output_types', []))}",
|
|
160
166
|
f" • Max input tokens: {model.get('max_input_tokens', 'N/A'):,}",
|
|
161
167
|
f" • Max output tokens: {model.get('max_output_tokens', 'N/A'):,}",
|
|
162
168
|
"",
|
|
163
|
-
|
|
169
|
+
"[bold green]Pricing:[/bold green]",
|
|
164
170
|
f" • Base price: ${model.get('price_per_1k_tokens', 0):.4f} per 1K tokens",
|
|
165
171
|
f" • Batch discount: {model.get('batch_pricing_discount', 0.5) * 100:.0f}% off",
|
|
166
172
|
"",
|
|
167
|
-
|
|
173
|
+
"[bold blue]Performance:[/bold blue]",
|
|
168
174
|
f" • Estimated latency: {model.get('estimated_latency_ms', 0):,} ms",
|
|
169
175
|
]
|
|
170
176
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
File without changes
|