lyceum-cli 1.0.28__py3-none-any.whl → 1.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,221 +1,136 @@
1
- """Synchronous inference commands for chat/completion"""
1
+ """Inference chat command"""
2
2
 
3
3
  import json
4
+ import os
5
+ from pathlib import Path
4
6
 
5
7
  import typer
6
8
  from rich.console import Console
7
- from rich.table import Table
8
9
 
9
10
  from ....shared.config import config
10
11
 
11
12
  console = Console()
12
13
 
13
- # Import generated client modules
14
- # We'll use direct HTTP calls since the sync inference models aren't generated yet
15
-
16
- chat_app = typer.Typer(name="chat", help="Synchronous inference commands")
17
-
18
-
19
- @chat_app.command("send")
20
- def send_message(
21
- message: str = typer.Argument(..., help="Message to send to the model"),
22
- model: str = typer.Option("gpt-4", "--model", "-m", help="Model to use for inference"),
23
- max_tokens: int = typer.Option(1000, "--max-tokens", help="Maximum tokens in response"),
24
- temperature: float = typer.Option(0.7, "--temperature", "-t", help="Temperature (0.0-2.0)"),
25
- system_prompt: str | None = typer.Option(None, "--system", "-s", help="System prompt"),
14
+ def chat_cmd(
15
+ prompt: str = typer.Option(None, "--prompt", "-p", help="The message or path to file (.txt/.yaml/.xml)"),
16
+ no_stream: bool = typer.Option(False, "--no-stream", "-n", help="Disable streaming response"),
17
+ image: str = typer.Option(None, "--image", "-i", help="Image path or base64"),
18
+ image_url: str = typer.Option(None, "--url", help="Image URL"),
19
+ image_dir: str = typer.Option(None, "--dir", help="Directory of images"),
20
+ base64: bool = typer.Option(False, "--base64", help="Treat image input as base64"),
21
+ model: str = typer.Option("gpt-4", "--model", "-m", help="Model to use"),
22
+ max_tokens: int = typer.Option(1000, "--tokens", "-t", help="Max output tokens"),
23
+ output_type: str = typer.Option("text", "--type", help="Output type (e.g. json, markdown)"),
24
+ batch_file: str = typer.Option(None, "--batch", "-b", help="JSONL file for batch processing"),
26
25
  ):
27
- """Send a message to an AI model and get a response"""
26
+ """
27
+ Perform inference (Chat, Image, or Batch).
28
+ """
28
29
  try:
29
30
  config.get_client()
30
-
31
- # Create the sync request payload directly
32
- sync_request = {
33
- "model_id": model,
34
- "input": {
35
- "text": message,
36
- "parameters": {"system_prompt": system_prompt} if system_prompt else {}
37
- },
38
- "max_tokens": max_tokens,
39
- "temperature": temperature,
40
- "top_p": 1.0,
41
- "stream": False
42
- }
43
-
44
- console.print(f"[dim]🤖 Sending message to {model}...[/dim]")
45
-
46
- # Make the API call using httpx directly (since we don't have generated client for sync inference yet)
47
31
  import httpx
48
32
 
49
- url = f"{config.base_url}/api/v2/external/sync/"
50
- headers = {"Authorization": f"Bearer {config.api_key}", "Content-Type": "application/json"}
51
-
52
- with httpx.Client() as http_client:
53
- response = http_client.post(
54
- url,
55
- json=sync_request,
56
- headers=headers,
57
- timeout=60.0
58
- )
59
-
60
- if response.status_code == 200:
61
- result = response.json()
62
-
63
- console.print(f"[green]✅ Response from {model}:[/green]")
64
- console.print(f"[cyan]{result['output']}[/cyan]")
65
-
66
- # Show usage stats
67
- if 'usage' in result:
68
- usage = result['usage']
69
- console.print(f"[dim]📊 Tokens: {usage.get('total_tokens', 0)} | "
70
- f"Latency: {result.get('latency_ms', 0)}ms | "
71
- f"Cost: ${result.get('cost', 0):.4f}[/dim]")
72
-
73
- elif response.status_code == 503:
74
- console.print(f"[red]❌ Model {model} is not running. Please contact support to start the model.[/red]")
33
+ # 1. Batch Processing
34
+ if batch_file:
35
+ console.print(f"[dim]Initiating batch processing from {batch_file}...[/dim]")
36
+ if not os.path.exists(batch_file):
37
+ console.print(f"[red]File not found: {batch_file}[/red]")
38
+ raise typer.Exit(1)
39
+
40
+ # Upload
41
+ with open(batch_file, 'rb') as f:
42
+ files = {'file': (os.path.basename(batch_file), f, 'application/jsonl')}
43
+ response = httpx.post(
44
+ f"{config.base_url}/api/v2/external/files",
45
+ headers={"Authorization": f"Bearer {config.api_key}"},
46
+ files=files,
47
+ data={'purpose': 'batch'},
48
+ timeout=60.0
49
+ )
50
+ if response.status_code != 200:
51
+ console.print(f"[red]Upload failed: {response.text}[/red]")
75
52
  raise typer.Exit(1)
76
- else:
77
- console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
78
- console.print(f"[red]{response.text}[/red]")
79
- raise typer.Exit(1)
80
-
81
- except Exception as e:
82
- console.print(f"[red]❌ Error: {e}[/red]")
83
- raise typer.Exit(1)
84
53
 
54
+ file_id = response.json()['id']
85
55
 
86
- @chat_app.command("models")
87
- def list_models():
88
- """List available models for inference"""
89
- try:
90
- config.get_client()
91
-
92
- # Make API call to get available models
93
- import httpx
94
-
95
- url = f"{config.base_url}/api/v2/external/models/"
96
- headers = {"Authorization": f"Bearer {config.api_key}"}
97
-
98
- with httpx.Client() as http_client:
99
- response = http_client.get(url, headers=headers, timeout=10.0)
100
-
101
- if response.status_code == 200:
102
- models = response.json()
103
-
104
- if models:
105
- # Create a table
106
- table = Table(title="Available AI Models")
107
- table.add_column("Model", style="cyan", no_wrap=True)
108
- table.add_column("Type", style="magenta")
109
- table.add_column("Status", justify="center")
110
- table.add_column("Price/1K tokens", justify="right", style="green")
111
-
112
- # Sort models: running first, then by type, then by name
113
- sorted_models = sorted(models, key=lambda m: (
114
- not m.get('available', False), # Running models first
115
- m.get('type', 'text'), # Then by type
116
- m.get('model_id', '') # Then by name
117
- ))
118
-
119
- for model in sorted_models:
120
- # Status with emoji
121
- status = "🟢 Running" if model.get('available') else "🔴 Stopped"
122
-
123
- # Model type with emoji
124
- model_type = model.get('type', 'text')
125
- type_emoji = {
126
- 'text': 'Text',
127
- 'image': 'Image',
128
- 'audio': 'Audio',
129
- 'multimodal': 'Multi',
130
- 'embedding': 'Embed'
131
- }.get(model_type, f'❓ {model_type.title()}')
132
-
133
- # Price
134
- price = model.get('price_per_1k_tokens', 0)
135
- price_str = f"${price:.4f}" if price > 0 else "Free"
136
-
137
- table.add_row(
138
- model.get('model_id', 'Unknown'),
139
- type_emoji,
140
- status,
141
- price_str
142
- )
143
-
144
- console.print(table)
145
-
146
- # Show summary
147
- running_count = sum(1 for m in models if m.get('available'))
148
- total_count = len(models)
149
- console.print(f"\n[dim]📊 {running_count}/{total_count} models running[/dim]")
150
-
151
- else:
152
- console.print("[yellow]No models are currently available[/yellow]")
153
- else:
154
- console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
155
- console.print(f"[red]{response.text}[/red]")
56
+ # Create Batch
57
+ response = httpx.post(
58
+ f"{config.base_url}/api/v2/external/batches",
59
+ headers={"Authorization": f"Bearer {config.api_key}"},
60
+ json={"input_file_id": file_id, "model": model},
61
+ timeout=30.0
62
+ )
63
+ if response.status_code != 200:
64
+ console.print(f"[red]Batch creation failed: {response.text}[/red]")
156
65
  raise typer.Exit(1)
157
66
 
158
- except Exception as e:
159
- console.print(f"[red] Error: {e}[/red]")
160
- raise typer.Exit(1)
67
+ data = response.json()
68
+ console.print(f"[green]Batch Job Created: {data['id']}[/green]")
69
+ return
70
+
71
+ # 2. Image Analysis
72
+ if image or image_url or image_dir:
73
+ if image_dir:
74
+ console.print("[yellow]Directory processing not yet implemented[/yellow]")
75
+ return
76
+
77
+ console.print(f"[dim]Analyzing image with {model}...[/dim]")
78
+
79
+ img_input = image_url if image_url else image
80
+ payload = {
81
+ "model_id": model,
82
+ "input": {
83
+ "text": prompt or "Describe this image",
84
+ # Simple heuristic: if it looks like a URL, treat as URL, else file/base64 logic
85
+ "image_url": img_input
86
+ },
87
+ "max_tokens": max_tokens,
88
+ "stream": not no_stream
89
+ }
90
+
91
+ url = f"{config.base_url}/api/v2/external/sync/"
92
+ headers = {"Authorization": f"Bearer {config.api_key}"}
93
+
94
+ with httpx.Client() as client:
95
+ response = client.post(url, json=payload, headers=headers, timeout=60.0)
96
+ if response.status_code != 200:
97
+ console.print(f"[red]Error: {response.text}[/red]")
98
+ raise typer.Exit(1)
161
99
 
100
+ result = response.json()
101
+ console.print(f"[cyan]{result.get('output', '')}[/cyan]")
102
+ return
162
103
 
163
- @chat_app.command("image")
164
- def analyze_image(
165
- image_url: str = typer.Argument(..., help="URL of image to analyze"),
166
- prompt: str = typer.Option("What do you see in this image?", "--prompt", "-p", help="Question about the image"),
167
- model: str = typer.Option("gpt-4-vision", "--model", "-m", help="Vision model to use"),
168
- raw_output: bool = typer.Option(False, "--raw", help="Return full model response instead of just content"),
169
- ):
170
- """Analyze an image with AI vision models"""
171
- try:
172
- config.get_client()
173
-
174
- # Create request payload for image analysis
175
- sync_request = {
176
- "model_id": model,
177
- "input": {
178
- "text": prompt,
179
- "image_url": image_url
180
- },
181
- "max_tokens": 1000,
182
- "temperature": 0.7,
183
- "raw_output": raw_output
184
- }
104
+ # 3. Text Chat (Prompt)
105
+ if prompt:
106
+ # Check if prompt is a file
107
+ if os.path.exists(prompt):
108
+ prompt = Path(prompt).read_text()
185
109
 
186
- console.print(f"[dim]👁️ Analyzing image with {model}...[/dim]")
110
+ console.print(f"[dim]Sending message to {model}...[/dim]")
187
111
 
188
- import httpx
112
+ payload = {
113
+ "model_id": model,
114
+ "input": {"text": prompt},
115
+ "max_tokens": max_tokens,
116
+ "stream": not no_stream
117
+ }
189
118
 
190
- url = f"{config.base_url}/api/v2/external/sync/"
191
- headers = {"Authorization": f"Bearer {config.api_key}", "Content-Type": "application/json"}
119
+ url = f"{config.base_url}/api/v2/external/sync/"
120
+ headers = {"Authorization": f"Bearer {config.api_key}"}
192
121
 
193
- with httpx.Client() as http_client:
194
- response = http_client.post(
195
- url,
196
- json=sync_request,
197
- headers=headers,
198
- timeout=60.0
199
- )
122
+ with httpx.Client() as client:
123
+ response = client.post(url, json=payload, headers=headers, timeout=60.0)
124
+ if response.status_code != 200:
125
+ console.print(f"[red]Error: {response.text}[/red]")
126
+ raise typer.Exit(1)
200
127
 
201
- if response.status_code == 200:
202
128
  result = response.json()
129
+ console.print(f"[cyan]{result.get('output', '')}[/cyan]")
130
+ return
203
131
 
204
- if raw_output:
205
- console.print("[green]✅ Raw Response:[/green]")
206
- console.print(json.dumps(result.get('raw_response', result['output']), indent=2))
207
- else:
208
- console.print("[green]✅ Image Analysis:[/green]")
209
- console.print(f"[cyan]{result['output']}[/cyan]")
210
-
211
- elif response.status_code == 503:
212
- console.print(f"[red]❌ Vision model {model} is not running.[/red]")
213
- raise typer.Exit(1)
214
- else:
215
- console.print(f"[red]❌ Error: HTTP {response.status_code}[/red]")
216
- console.print(f"[red]{response.text}[/red]")
217
- raise typer.Exit(1)
132
+ console.print("[yellow]Please provide input via --prompt, --image, or --batch[/yellow]")
218
133
 
219
134
  except Exception as e:
220
- console.print(f"[red]Error: {e}[/red]")
135
+ console.print(f"[red]Error: {e}[/red]")
221
136
  raise typer.Exit(1)
@@ -0,0 +1,101 @@
1
+ """Inference command group"""
2
+ import typer
3
+ import os
4
+ from rich.console import Console
5
+ import httpx
6
+ from ....shared.config import config
7
+ from .chat import chat_cmd
8
+ from .models import models_cmd
9
+ from .batch import jobs_cmd
10
+
11
+ console = Console()
12
+
13
+ infer_app = typer.Typer(name="infer", help="Inference commands")
14
+
15
+ # Mount sub-apps / commands
16
+ infer_app.command("chat")(chat_cmd)
17
+ infer_app.command("models")(models_cmd)
18
+ infer_app.command("jobs")(jobs_cmd)
19
+
20
+ @infer_app.command("deploy")
21
+ def deploy_model(
22
+ hf_model_id: str = typer.Argument(..., help="HuggingFace model ID to deploy"),
23
+ vllm_config: str = typer.Option(None, "--config", "-c", help="Name of server-side vLLM config to use"),
24
+ hf_token: str = typer.Option(None, "--hf-token", "-t", help="HuggingFace token to use for deployment"),
25
+ ):
26
+ """Deploy a model for inference"""
27
+ # Interactive prompt for token if not provided and not in env
28
+ if hf_token is None and not (os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")):
29
+ if typer.confirm("No HF token detected. Do you want to provide one (required for private models)?", default=False):
30
+ hf_token = typer.prompt("HuggingFace Token", hide_input=True)
31
+
32
+ try:
33
+ config.get_client()
34
+
35
+ url = f"{config.base_url}/api/v2/external/inference/deploy"
36
+ headers = {"Authorization": f"Bearer {config.api_key}"}
37
+
38
+ payload = {
39
+ "hf_model_id": hf_model_id
40
+ }
41
+ if vllm_config:
42
+ payload["vllm_config_name"] = vllm_config
43
+ if hf_token:
44
+ payload["hf_token"] = hf_token
45
+
46
+ console.print(f"[dim]Deploying model {hf_model_id}...[/dim]")
47
+
48
+ with httpx.Client() as client:
49
+ response = client.post(url, json=payload, headers=headers, timeout=30.0)
50
+
51
+ if response.status_code != 200:
52
+ console.print(f"[red]Error: HTTP {response.status_code}[/red]")
53
+ console.print(f"[red]{response.text}[/red]")
54
+ raise typer.Exit(1)
55
+
56
+ data = response.json()
57
+
58
+ console.print(f"[green]Deployment initiated![/green]")
59
+ console.print(f"Model ID: [cyan]{data.get('model_id')}[/cyan]")
60
+ console.print(f"Deployment ID: [cyan]{data.get('deployment_id')}[/cyan]")
61
+ console.print(f"Status: [yellow]{data.get('status')}[/yellow]")
62
+ console.print(f"Instance URL: [blue]{data.get('instance_url')}[/blue]")
63
+ console.print("\n[dim]Use 'lyceum infer models' to check status[/dim]")
64
+
65
+ except Exception as e:
66
+ console.print(f"[red]Error: {e}[/red]")
67
+ raise typer.Exit(1)
68
+
69
+ @infer_app.command("spindown")
70
+ def spindown_model(
71
+ model_id: str = typer.Argument(..., help="Model ID to spin down"),
72
+ ):
73
+ """Spin down a deployed model"""
74
+ try:
75
+ config.get_client()
76
+
77
+ url = f"{config.base_url}/api/v2/external/inference/spindown"
78
+ headers = {"Authorization": f"Bearer {config.api_key}"}
79
+
80
+ payload = {"model_id": model_id}
81
+
82
+ console.print(f"[dim]Spinning down model {model_id}...[/dim]")
83
+
84
+ with httpx.Client() as client:
85
+ response = client.post(url, json=payload, headers=headers, timeout=30.0)
86
+
87
+ if response.status_code != 200:
88
+ console.print(f"[red]Error: HTTP {response.status_code}[/red]")
89
+ console.print(f"[red]{response.text}[/red]")
90
+ raise typer.Exit(1)
91
+
92
+ data = response.json()
93
+
94
+ console.print(f"[green]Spindown initiated![/green]")
95
+ console.print(f"Model ID: [cyan]{data.get('model_id')}[/cyan]")
96
+ console.print(f"Status: [yellow]{data.get('status')}[/yellow]")
97
+ console.print(f"Message: {data.get('message')}")
98
+
99
+ except Exception as e:
100
+ console.print(f"[red]Error: {e}[/red]")
101
+ raise typer.Exit(1)