gemi-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gemi/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
gemi/agent/__init__.py ADDED
File without changes
gemi/agent/loop.py ADDED
@@ -0,0 +1,594 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from rich.console import Console
6
+ from rich.live import Live
7
+ from rich.markdown import Markdown
8
+ from rich.panel import Panel
9
+
10
+ from gemi.agent.tools import TOOL_DEFINITIONS, execute_tool
11
+ from gemi.compaction import compact_messages, estimate_tokens, needs_compaction
12
+ from gemi.config import load_config
13
+ from gemi.keys.manager import KeyManager
14
+ from gemi.providers.base import Chunk, Message
15
+ from gemi.providers.gemini import GeminiProvider
16
+ from gemi.providers.ollama import OllamaProvider
17
+ from gemi.providers.openai_compat import OpenAICompatProvider
18
+ from gemi.registry import PROVIDERS, get_base_url, get_context_window, get_default_model, get_provider_info, get_provider_type
19
+ from gemi.sessions import save_session
20
+ from gemi.ui import print_plan, print_plan_approval, print_tool_call, print_tool_result
21
+
22
+ console = Console()
23
+
24
+ CHARS_PER_TOKEN = 4
25
+
26
+ SYSTEM_PROMPT = """You are gemi, an expert AI coding agent running in the user's terminal. You are a world-class software engineer. You can read, write, and edit files, run commands, search code, and manage git — all through your tools.
27
+
28
+ CRITICAL RULES — FOLLOW THESE EXACTLY:
29
+ 1. NEVER ASK QUESTIONS. When the user asks you to do something, DO IT IMMEDIATELY. Do not ask "what command?", "what framework?", "which directory?", "what language?" — use your tools to find out. The ONLY exception: truly destructive operations (deleting production data, force pushing).
30
+ 2. You have FULL ACCESS to the file system. NEVER say "I can't access files" or "please provide the path". You CAN read and write any file. All paths are relative to the current working directory.
31
+ 3. ALWAYS use tools FIRST. When asked about code, files, or the project — read them with tools before responding. Never guess file contents.
32
+ 4. When asked to "build", "complete", "fix", or "run" something — take action immediately. List directories, read package.json/Makefile/setup.py, figure out the right command, and run it. Show results.
33
+ 5. When a project is in a subdirectory, run commands from that subdirectory. Use `cd subdirectory && command` or `--prefix subdirectory` for npm commands. NEVER run npm/pip/make in the wrong directory.
34
+ 6. If a command fails, READ the error, understand it, and fix it yourself. Do not ask the user to fix it.
35
+
36
+ About gemi:
37
+ - gemi is a free open-source AI coding CLI with multi-provider support
38
+ - Providers: {providers_summary}
39
+ - Currently using: {current_provider}/{current_model}
40
+ - Features: multi-account key rotation, auto provider failover, encrypted key storage, session persistence
41
+ - Manage keys: `gemi key add <provider>` | View providers: `gemi providers` | Switch model: `/model <name>`
42
+ {active_keys_info}
43
+
44
+ Environment:
45
+ - Working directory: {cwd}
46
+ - Project structure:
47
+ {project_structure}
48
+ {project_context}
49
+
50
+ Tools available:
51
+ - list_directory(path) — list files. "." for current dir, "subfolder" for subfolder
52
+ - read_file(path) — read file contents with line numbers
53
+ - write_file(path, content) — create or overwrite a file
54
+ - edit_file(path, old_text, new_text) — find-and-replace in a file (exact match)
55
+ - run_command(command) — run shell command. Fast commands return immediately, long-running ones return after 30s with partial output while continuing in background
56
+ - search_files(pattern, path) — grep for text in files
57
+ - find_files(pattern) — find files by glob ("**/*.py", "*.json")
58
+ - git_status(), git_diff(), git_log(), git_commit(message, files), git_branch(action)
59
+ - create_plan(title, steps) — create a step-by-step plan for the user to review before execution
60
+
61
+ Planning vs Direct Execution:
62
+ - For COMPLEX tasks (building new features, creating projects, multi-file refactors, setting up infrastructure): FIRST call create_plan() with clear steps, then wait for user approval before executing.
63
+ - For SIMPLE tasks (fix a bug, read a file, run a command, small edits, answer a question): execute directly without a plan.
64
+ - A task is complex if it involves 3+ files or 3+ distinct actions.
65
+
66
+ Workflow:
67
+ 1. User gives a task → read the codebase with tools to understand it
68
+ 2. If complex → call create_plan() with steps, then execute each step after approval
69
+ 3. If simple → execute directly
70
+ 4. Verify: run tests, start dev server, check output
71
+ 5. Report what you did in 1-2 sentences
72
+
73
+ Guidelines:
74
+ - Read files before editing them
75
+ - Make minimal, targeted changes
76
+ - Keep responses short — 1-2 sentences, not paragraphs
77
+ - When a project is in a subdirectory, always cd into it or use the right prefix
78
+ - When users ask about gemi itself (providers, features, keys), answer from your knowledge above
79
+ """
80
+
81
+
82
+ def _get_project_structure() -> str:
83
+ cwd = Path.cwd()
84
+ lines = []
85
+ for entry in sorted(cwd.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower())):
86
+ if entry.name.startswith(".") and entry.name not in (".env", ".gitignore"):
87
+ continue
88
+ prefix = "📁" if entry.is_dir() else " "
89
+ lines.append(f"{prefix} {entry.name}")
90
+ return "\n".join(lines[:30]) or "(empty directory)"
91
+
92
+
93
+ def _get_project_context() -> str:
94
+ gemi_md = Path.cwd() / ".gemi.md"
95
+ if gemi_md.exists():
96
+ content = gemi_md.read_text(errors="replace")
97
+ if len(content) > 3000:
98
+ content = content[:3000] + "\n... (truncated)"
99
+ return f"\nProject context (.gemi.md):\n{content}\n"
100
+ return ""
101
+
102
+
103
+ def _create_provider(provider_name: str, api_key: str, config: dict):
104
+ provider_type = get_provider_type(provider_name)
105
+
106
+ if provider_type == "gemini":
107
+ return GeminiProvider(api_key=api_key)
108
+ elif provider_type == "ollama":
109
+ base_url = get_base_url(provider_name) or "http://localhost:11434"
110
+ return OllamaProvider(base_url=base_url)
111
+ elif provider_type == "openai_compat":
112
+ base_url = get_base_url(provider_name) or "https://api.openai.com/v1"
113
+ return OpenAICompatProvider(api_key=api_key, base_url=base_url)
114
+ else:
115
+ base_url = get_base_url(provider_name) or "https://api.openai.com/v1"
116
+ return OpenAICompatProvider(api_key=api_key, base_url=base_url)
117
+
118
+
119
+ def _get_model(provider_name: str, config: dict) -> str:
120
+ if provider_name == config.get("default_provider", "gemini"):
121
+ return config.get("default_model", get_default_model(provider_name))
122
+ return get_default_model(provider_name)
123
+
124
+
125
+ def _parse_retry_after(error: Exception) -> float | None:
126
+ retry_after = None
127
+
128
+ if hasattr(error, "response") and error.response is not None:
129
+ headers = getattr(error.response, "headers", {})
130
+ if "retry-after" in headers:
131
+ try:
132
+ retry_after = float(headers["retry-after"])
133
+ except (ValueError, TypeError):
134
+ pass
135
+ if not retry_after and "x-ratelimit-reset" in headers:
136
+ try:
137
+ import time
138
+ reset_ts = float(headers["x-ratelimit-reset"])
139
+ retry_after = max(1, reset_ts - time.time())
140
+ except (ValueError, TypeError):
141
+ pass
142
+
143
+ if not retry_after:
144
+ import re
145
+ match = re.search(r"retry.{0,5}?(\d+)\s*s", str(error).lower())
146
+ if match:
147
+ retry_after = float(match.group(1))
148
+ else:
149
+ match = re.search(r"try again in (\d+)", str(error).lower())
150
+ if match:
151
+ retry_after = float(match.group(1))
152
+
153
+ return retry_after
154
+
155
+
156
+ class AgentLoop:
157
+ def __init__(self, session_id: str | None = None):
158
+ self.config = load_config()
159
+ self.key_manager = KeyManager(config=self.config)
160
+ self.messages: list[Message] = []
161
+ self.provider = None
162
+ self.model = None
163
+ self.session_id = session_id or ""
164
+ self.total_tokens_used = 0
165
+ self.total_requests = 0
166
+ self.provider_usage: dict[str, dict] = {}
167
+ self.current_plan: dict | None = None
168
+ self._init_provider()
169
+
170
+ def _init_provider(self):
171
+ key_state = self.key_manager.get_current_key()
172
+ if not key_state:
173
+ console.print("[bold red]No API keys configured. Run: gemi key add gemini[/bold red]")
174
+ return
175
+ provider_name = self.key_manager.get_current_provider()
176
+ self.provider = _create_provider(provider_name, key_state.api_key, self.config)
177
+ self.model = _get_model(provider_name, self.config)
178
+
179
+ providers_summary = ", ".join(
180
+ f"{name} ({info['name']})" for name, info in PROVIDERS.items()
181
+ )
182
+
183
+ key_status = self.key_manager.get_status()
184
+ configured = [s for s in key_status if s["state"] != "no key"]
185
+ if configured:
186
+ lines = [f"- Configured providers with keys: {', '.join(dict.fromkeys(s['provider'] for s in configured))}"]
187
+ active = [s for s in configured if s["state"] == "active"]
188
+ if active:
189
+ lines.append(f"- Active key: {active[0]['provider']}/{active[0]['name']}")
190
+ active_keys_info = "\n".join(lines)
191
+ else:
192
+ active_keys_info = "- No API keys configured yet"
193
+
194
+ system_prompt = SYSTEM_PROMPT.format(
195
+ cwd=os.getcwd(),
196
+ project_structure=_get_project_structure(),
197
+ project_context=_get_project_context(),
198
+ providers_summary=providers_summary,
199
+ current_provider=provider_name,
200
+ current_model=self.model,
201
+ active_keys_info=active_keys_info,
202
+ )
203
+ self.messages = [Message(role="system", content=system_prompt)]
204
+
205
+ def load_session(self, messages: list[Message]):
206
+ self.messages = messages
207
+
208
+ def _switch_provider(self):
209
+ key_state = self.key_manager.get_current_key()
210
+ if not key_state:
211
+ return False
212
+ provider_name = self.key_manager.get_current_provider()
213
+ self.provider = _create_provider(provider_name, key_state.api_key, self.config)
214
+ model_from_manager = self.key_manager.get_current_model()
215
+ self.model = model_from_manager or _get_model(provider_name, self.config)
216
+ self._fit_context_to_model()
217
+ return True
218
+
219
+ def _fit_context_to_model(self):
220
+ provider_name = self.key_manager.get_current_provider()
221
+ max_tokens = get_context_window(provider_name, self.model)
222
+ max_chars = int(max_tokens * CHARS_PER_TOKEN * 0.8)
223
+
224
+ total_chars = sum(len(m.content or "") for m in self.messages)
225
+ if total_chars <= max_chars:
226
+ return
227
+
228
+ system_msg = self.messages[0] if self.messages and self.messages[0].role == "system" else None
229
+ recent = self.messages[1:] if system_msg else self.messages[:]
230
+
231
+ kept = []
232
+ used_chars = len(system_msg.content) if system_msg else 0
233
+
234
+ for msg in reversed(recent):
235
+ msg_chars = len(msg.content or "")
236
+ if used_chars + msg_chars > max_chars:
237
+ break
238
+ kept.insert(0, msg)
239
+ used_chars += msg_chars
240
+
241
+ dropped = len(recent) - len(kept)
242
+ if dropped > 0:
243
+ summary = Message(
244
+ role="user",
245
+ content=f"[{dropped} earlier messages were trimmed to fit the current model's context window. The conversation continues below.]",
246
+ )
247
+ self.messages = ([system_msg] if system_msg else []) + [summary] + kept
248
+ console.print(
249
+ f" [dim]Trimmed {dropped} old messages to fit {self.model} context window[/dim]"
250
+ )
251
+
252
+ def _maybe_compact(self):
253
+ provider_name = self.key_manager.get_current_provider()
254
+ max_tokens = get_context_window(provider_name, self.model)
255
+ if needs_compaction(self.messages, max_tokens):
256
+ old_count = len(self.messages)
257
+ self.messages = compact_messages(self.messages, max_tokens)
258
+ new_count = len(self.messages)
259
+ if new_count < old_count:
260
+ console.print(
261
+ f" [dim]Compacted context: {old_count} → {new_count} messages[/dim]"
262
+ )
263
+
264
+ def _auto_save(self):
265
+ if self.session_id:
266
+ save_session(
267
+ self.session_id,
268
+ self.messages,
269
+ metadata={
270
+ "cwd": os.getcwd(),
271
+ "provider": self.key_manager.get_current_provider(),
272
+ "model": self.model,
273
+ "tokens_used": self.total_tokens_used,
274
+ },
275
+ )
276
+
277
+ def get_status_line(self) -> str:
278
+ provider = self.key_manager.get_current_provider()
279
+ tokens = self.total_tokens_used
280
+ reqs = self.total_requests
281
+ ctx = estimate_tokens(self.messages)
282
+ max_ctx = get_context_window(provider, self.model)
283
+ ctx_pct = min(100, int(ctx / max_ctx * 100))
284
+ return f"{provider}/{self.model} | tokens: ~{tokens:,} / {max_ctx:,} | reqs: {reqs} | context: {ctx_pct}%"
285
+
286
+ def get_detailed_status(self) -> str:
287
+ provider = self.key_manager.get_current_provider()
288
+ max_ctx = get_context_window(provider, self.model)
289
+ ctx = estimate_tokens(self.messages)
290
+ ctx_pct = min(100, int(ctx / max_ctx * 100))
291
+ keys = self.key_manager.get_status()
292
+ provider_keys = [k for k in keys if k["provider"] == provider]
293
+ total_keys = len(provider_keys)
294
+ active_keys = len([k for k in provider_keys if k["state"] not in ("exhausted",)])
295
+
296
+ lines = [
297
+ f" [bold]Current[/bold]",
298
+ f" Provider: [green]{provider}[/green]",
299
+ f" Model: [green]{self.model}[/green]",
300
+ f" Context: [cyan]{ctx:,}[/cyan] / {max_ctx:,} ({ctx_pct}%)",
301
+ f" Keys: [cyan]{active_keys}[/cyan] active / {total_keys} total",
302
+ f" Session: [dim]{self.session_id}[/dim]",
303
+ "",
304
+ f" [bold]Usage This Session[/bold]",
305
+ ]
306
+
307
+ if self.provider_usage:
308
+ for prov, usage in self.provider_usage.items():
309
+ prov_max = get_context_window(prov, usage["model"])
310
+ marker = " [green]◀ active[/green]" if prov == provider else ""
311
+ lines.append(f" [cyan]{prov}[/cyan] ({usage['model']}){marker}")
312
+ lines.append(f" Tokens: ~{usage['tokens']:,} / {prov_max:,} | Requests: {usage['requests']}")
313
+ else:
314
+ lines.append(f" [dim]No requests made yet[/dim]")
315
+
316
+ lines.append("")
317
+ lines.append(f" [bold]Total:[/bold] ~{self.total_tokens_used:,} tokens | {self.total_requests} requests")
318
+
319
+ return "\n".join(lines)
320
+
321
+ def _update_plan_progress(self, tool_name: str):
322
+ if not self.current_plan:
323
+ return
324
+ steps = self.current_plan["steps"]
325
+ for step in steps:
326
+ if step["status"] == "pending":
327
+ step["status"] = "in_progress"
328
+ break
329
+ for i, step in enumerate(steps):
330
+ if step["status"] == "in_progress" and i > 0:
331
+ prev = steps[i - 1]
332
+ if prev["status"] == "in_progress":
333
+ prev["status"] = "done"
334
+ in_progress = [s for s in steps if s["status"] == "in_progress"]
335
+ if not in_progress:
336
+ pending = [s for s in steps if s["status"] == "pending"]
337
+ if not pending:
338
+ for s in steps:
339
+ if s["status"] != "done":
340
+ s["status"] = "done"
341
+ self.current_plan = None
342
+ return
343
+ print_plan(self.current_plan["title"], steps)
344
+
345
+ def get_plan(self) -> dict | None:
346
+ return self.current_plan
347
+
348
+ async def chat(self, user_input: str):
349
+ if not self.provider:
350
+ console.print("[bold red]No provider available. Add API keys first.[/bold red]")
351
+ return
352
+
353
+ self.messages.append(Message(role="user", content=user_input))
354
+ self._maybe_compact()
355
+
356
+ max_iterations = self.config.get("agent", {}).get("max_iterations", 50)
357
+ auto_approve_reads = self.config.get("agent", {}).get("auto_approve_reads", True)
358
+ auto_approve_writes = self.config.get("agent", {}).get("auto_approve_writes", False)
359
+
360
+ for iteration in range(max_iterations):
361
+ text_response, tool_calls, tokens_used = await self._call_provider()
362
+
363
+ self.total_tokens_used += tokens_used
364
+ self.total_requests += 1
365
+
366
+ current = self.key_manager.get_current_provider()
367
+ if current not in self.provider_usage:
368
+ self.provider_usage[current] = {"tokens": 0, "requests": 0, "model": self.model}
369
+ self.provider_usage[current]["tokens"] += tokens_used
370
+ self.provider_usage[current]["requests"] += 1
371
+ self.provider_usage[current]["model"] = self.model
372
+ self.key_manager.record_usage(tokens_used)
373
+
374
+ if not tool_calls:
375
+ if not text_response.strip() and tokens_used == 0:
376
+ console.print("\n [bold red]Could not get a response. All providers failed or returned empty.[/bold red]")
377
+ console.print(" [yellow]Try again, or check provider status with /status[/yellow]")
378
+ return
379
+ self.messages.append(Message(role="assistant", content=text_response))
380
+ break
381
+
382
+ self.messages.append(Message(
383
+ role="assistant",
384
+ content=text_response,
385
+ tool_calls=tool_calls,
386
+ ))
387
+
388
+ plan_created = False
389
+ for tc in tool_calls:
390
+ func_name = tc["function"]["name"]
391
+ func_args = tc["function"]["arguments"]
392
+ if isinstance(func_args, str):
393
+ try:
394
+ func_args = json.loads(func_args)
395
+ except json.JSONDecodeError:
396
+ func_args = {}
397
+
398
+ if func_name == "create_plan":
399
+ plan_title = func_args.get("title", "Plan")
400
+ plan_steps = func_args.get("steps", [])
401
+ for step in plan_steps:
402
+ step["status"] = "pending"
403
+ self.current_plan = {"title": plan_title, "steps": plan_steps}
404
+ print_plan(plan_title, plan_steps)
405
+ approved = print_plan_approval()
406
+ if approved:
407
+ result = "Plan approved by user. Execute each step now. After completing each step, briefly state what you did."
408
+ else:
409
+ result = "Plan rejected by user. Ask what they'd like to change."
410
+ self.current_plan = None
411
+ self.messages.append(Message(
412
+ role="tool",
413
+ content=result,
414
+ tool_call_id=tc["id"],
415
+ name=func_name,
416
+ ))
417
+ plan_created = True
418
+ continue
419
+
420
+ WRITE_TOOLS = {"write_file", "edit_file", "run_command", "git_commit"}
421
+
422
+ print_tool_call(func_name, func_args)
423
+
424
+ result = execute_tool(
425
+ func_name,
426
+ func_args,
427
+ auto_approve_reads=auto_approve_reads,
428
+ auto_approve_writes=auto_approve_writes,
429
+ )
430
+
431
+ if func_name in WRITE_TOOLS:
432
+ print_tool_result(func_name, result)
433
+
434
+ if self.current_plan:
435
+ self._update_plan_progress(func_name)
436
+
437
+ self.messages.append(Message(
438
+ role="tool",
439
+ content=result,
440
+ tool_call_id=tc["id"],
441
+ name=func_name,
442
+ ))
443
+
444
+
445
+ self._auto_save()
446
+ provider = self.key_manager.get_current_provider()
447
+ max_ctx = get_context_window(provider, self.model)
448
+ ctx = estimate_tokens(self.messages)
449
+ ctx_pct = min(100, int(ctx / max_ctx * 100))
450
+ bar_width = 20
451
+ filled = int(bar_width * ctx_pct / 100)
452
+ bar = "[green]" + "━" * filled + "[/green][dim]" + "━" * (bar_width - filled) + "[/dim]"
453
+ console.print(f"\n [dim]{provider}/{self.model}[/dim] ~{self.total_tokens_used:,} tokens {bar} {ctx_pct}%")
454
+
455
+ async def _call_provider(self, max_cycles: int = 5) -> tuple[str, list[dict] | None, int]:
456
+ import asyncio
457
+
458
+ max_attempts = 50
459
+ attempt = 0
460
+
461
+ for cycle in range(max_cycles):
462
+ while attempt < max_attempts:
463
+ attempt += 1
464
+ live = None
465
+ try:
466
+ text_parts = []
467
+ all_tool_calls = []
468
+
469
+ provider_label = f"[dim]{self.key_manager.get_current_provider()}/{self.model}[/dim]"
470
+
471
+ def _render(content_text):
472
+ md = Markdown(content_text) if content_text else Markdown("")
473
+ return Panel(md, border_style="blue", padding=(0, 1), subtitle=provider_label, subtitle_align="right")
474
+
475
+ async for chunk in self.provider.chat(
476
+ messages=self.messages,
477
+ tools=TOOL_DEFINITIONS,
478
+ model=self.model,
479
+ stream=True,
480
+ ):
481
+ if chunk.text:
482
+ text_parts.append(chunk.text)
483
+ if not live:
484
+ live = Live(_render("".join(text_parts)), console=console, refresh_per_second=12, vertical_overflow="visible")
485
+ live.start()
486
+ else:
487
+ live.update(_render("".join(text_parts)))
488
+ if chunk.tool_calls:
489
+ all_tool_calls.extend(chunk.tool_calls)
490
+
491
+ if live:
492
+ live.stop()
493
+ text = "".join(text_parts)
494
+
495
+ error_phrases = ["provider returned error", "model is overloaded", "no endpoints found", "service unavailable"]
496
+ text_lower = text.strip().lower()
497
+ is_error_response = any(phrase in text_lower for phrase in error_phrases)
498
+
499
+ if is_error_response or (not text.strip() and not all_tool_calls):
500
+ provider = self.key_manager.get_current_provider()
501
+ reason = f"error response: {text.strip()[:80]}" if is_error_response else "empty response"
502
+ console.print(f"\n [red]Error on {provider}/{self.model}: {reason}[/red]")
503
+ next_model = self.key_manager.try_next_model()
504
+ if next_model:
505
+ self.model = next_model
506
+ continue
507
+ self.key_manager.report_rate_limit(retry_after=30)
508
+ if self._switch_provider():
509
+ continue
510
+ break
511
+
512
+ tokens = len(text) // CHARS_PER_TOKEN
513
+ return text, all_tool_calls if all_tool_calls else None, tokens
514
+
515
+ except Exception as e:
516
+ if live and live.is_started:
517
+ live.stop()
518
+ error_str = str(e).lower()
519
+ provider = self.key_manager.get_current_provider()
520
+ retry_after = _parse_retry_after(e)
521
+
522
+ if "401" in error_str or "403" in error_str or "unauthorized" in error_str or "user not found" in error_str or ("invalid" in error_str and "key" in error_str):
523
+ console.print(f"\n [red]Auth error on {provider}: invalid API key[/red]")
524
+ console.print(f" [yellow]Check with: gemi key list {provider}[/yellow]")
525
+ self.key_manager.report_exhausted()
526
+ if self._switch_provider():
527
+ continue
528
+ break
529
+
530
+ elif "429" in error_str or "rate" in error_str or "quota" in error_str or "resource" in error_str:
531
+ next_model = self.key_manager.try_next_model()
532
+ if next_model:
533
+ self.model = next_model
534
+ continue
535
+ self.key_manager.report_rate_limit(retry_after=retry_after)
536
+ if self._switch_provider():
537
+ continue
538
+ break
539
+
540
+ elif "connect" in error_str or "connection" in error_str or "timeout" in error_str or "unreachable" in error_str:
541
+ console.print(f"\n [red]Can't reach {provider}[/red]")
542
+ self.key_manager.report_exhausted()
543
+ if self._switch_provider():
544
+ continue
545
+ break
546
+
547
+ elif "provider returned error" in error_str or "no endpoints" in error_str or "overloaded" in error_str or "service unavailable" in error_str:
548
+ console.print(f"\n [red]Error on {provider}/{self.model}:[/red] [dim]{str(e)[:100]}[/dim]")
549
+ next_model = self.key_manager.try_next_model()
550
+ if next_model:
551
+ self.model = next_model
552
+ continue
553
+ self.key_manager.report_rate_limit(retry_after=retry_after)
554
+ if self._switch_provider():
555
+ continue
556
+ break
557
+
558
+ else:
559
+ console.print(f"\n [red]Error on {provider}/{self.model}: {e}[/red]")
560
+ next_model = self.key_manager.try_next_model()
561
+ if next_model:
562
+ self.model = next_model
563
+ continue
564
+ self.key_manager.report_rate_limit(retry_after=retry_after)
565
+ if self._switch_provider():
566
+ continue
567
+ break
568
+
569
+ # Inner loop exhausted all models/providers — wait for cooldown before next cycle
570
+ available = self.key_manager.get_any_available_key()
571
+ if available:
572
+ self._switch_provider()
573
+ continue
574
+
575
+ wait_time = self.key_manager.get_nearest_cooldown()
576
+ if wait_time and cycle < max_cycles - 1:
577
+ wait_secs = min(wait_time + 2, 120)
578
+ mins, secs = divmod(int(wait_secs), 60)
579
+ wait_str = f"{mins}m {secs}s" if mins > 0 else f"{secs}s"
580
+ console.print(f"\n [yellow]All providers on cooldown. Waiting {wait_str} before retry (cycle {cycle + 1}/{max_cycles})...[/yellow]")
581
+ await asyncio.sleep(wait_secs)
582
+ self.key_manager.reset_failed_models()
583
+ available = self.key_manager.get_any_available_key()
584
+ if available:
585
+ info = get_provider_info(available.provider)
586
+ display = info["name"] if info else available.provider
587
+ console.print(f" [green]Retrying with {display} ({available.name})...[/green]")
588
+ self._switch_provider()
589
+ continue
590
+
591
+ console.print(f"\n [bold red]All providers and models exhausted after {cycle + 1} cycles. Add more keys or wait.[/bold red]")
592
+ return "", None, 0
593
+
594
+ return "", None, 0