gemi-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gemi/__init__.py +1 -0
- gemi/agent/__init__.py +0 -0
- gemi/agent/loop.py +594 -0
- gemi/agent/tools.py +571 -0
- gemi/compaction.py +67 -0
- gemi/config.py +53 -0
- gemi/keys/__init__.py +0 -0
- gemi/keys/manager.py +265 -0
- gemi/keys/store.py +92 -0
- gemi/main.py +426 -0
- gemi/providers/__init__.py +0 -0
- gemi/providers/base.py +35 -0
- gemi/providers/gemini.py +126 -0
- gemi/providers/ollama.py +72 -0
- gemi/providers/openai_compat.py +140 -0
- gemi/registry.py +201 -0
- gemi/sessions.py +84 -0
- gemi/ui.py +387 -0
- gemi_cli-0.1.0.dist-info/METADATA +462 -0
- gemi_cli-0.1.0.dist-info/RECORD +22 -0
- gemi_cli-0.1.0.dist-info/WHEEL +4 -0
- gemi_cli-0.1.0.dist-info/entry_points.txt +2 -0
gemi/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
gemi/agent/__init__.py
ADDED
|
File without changes
|
gemi/agent/loop.py
ADDED
|
@@ -0,0 +1,594 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.live import Live
|
|
7
|
+
from rich.markdown import Markdown
|
|
8
|
+
from rich.panel import Panel
|
|
9
|
+
|
|
10
|
+
from gemi.agent.tools import TOOL_DEFINITIONS, execute_tool
|
|
11
|
+
from gemi.compaction import compact_messages, estimate_tokens, needs_compaction
|
|
12
|
+
from gemi.config import load_config
|
|
13
|
+
from gemi.keys.manager import KeyManager
|
|
14
|
+
from gemi.providers.base import Chunk, Message
|
|
15
|
+
from gemi.providers.gemini import GeminiProvider
|
|
16
|
+
from gemi.providers.ollama import OllamaProvider
|
|
17
|
+
from gemi.providers.openai_compat import OpenAICompatProvider
|
|
18
|
+
from gemi.registry import PROVIDERS, get_base_url, get_context_window, get_default_model, get_provider_info, get_provider_type
|
|
19
|
+
from gemi.sessions import save_session
|
|
20
|
+
from gemi.ui import print_plan, print_plan_approval, print_tool_call, print_tool_result
|
|
21
|
+
|
|
22
|
+
console = Console()
|
|
23
|
+
|
|
24
|
+
CHARS_PER_TOKEN = 4
|
|
25
|
+
|
|
26
|
+
SYSTEM_PROMPT = """You are gemi, an expert AI coding agent running in the user's terminal. You are a world-class software engineer. You can read, write, and edit files, run commands, search code, and manage git — all through your tools.
|
|
27
|
+
|
|
28
|
+
CRITICAL RULES — FOLLOW THESE EXACTLY:
|
|
29
|
+
1. NEVER ASK QUESTIONS. When the user asks you to do something, DO IT IMMEDIATELY. Do not ask "what command?", "what framework?", "which directory?", "what language?" — use your tools to find out. The ONLY exception: truly destructive operations (deleting production data, force pushing).
|
|
30
|
+
2. You have FULL ACCESS to the file system. NEVER say "I can't access files" or "please provide the path". You CAN read and write any file. All paths are relative to the current working directory.
|
|
31
|
+
3. ALWAYS use tools FIRST. When asked about code, files, or the project — read them with tools before responding. Never guess file contents.
|
|
32
|
+
4. When asked to "build", "complete", "fix", or "run" something — take action immediately. List directories, read package.json/Makefile/setup.py, figure out the right command, and run it. Show results.
|
|
33
|
+
5. When a project is in a subdirectory, run commands from that subdirectory. Use `cd subdirectory && command` or `--prefix subdirectory` for npm commands. NEVER run npm/pip/make in the wrong directory.
|
|
34
|
+
6. If a command fails, READ the error, understand it, and fix it yourself. Do not ask the user to fix it.
|
|
35
|
+
|
|
36
|
+
About gemi:
|
|
37
|
+
- gemi is a free open-source AI coding CLI with multi-provider support
|
|
38
|
+
- Providers: {providers_summary}
|
|
39
|
+
- Currently using: {current_provider}/{current_model}
|
|
40
|
+
- Features: multi-account key rotation, auto provider failover, encrypted key storage, session persistence
|
|
41
|
+
- Manage keys: `gemi key add <provider>` | View providers: `gemi providers` | Switch model: `/model <name>`
|
|
42
|
+
{active_keys_info}
|
|
43
|
+
|
|
44
|
+
Environment:
|
|
45
|
+
- Working directory: {cwd}
|
|
46
|
+
- Project structure:
|
|
47
|
+
{project_structure}
|
|
48
|
+
{project_context}
|
|
49
|
+
|
|
50
|
+
Tools available:
|
|
51
|
+
- list_directory(path) — list files. "." for current dir, "subfolder" for subfolder
|
|
52
|
+
- read_file(path) — read file contents with line numbers
|
|
53
|
+
- write_file(path, content) — create or overwrite a file
|
|
54
|
+
- edit_file(path, old_text, new_text) — find-and-replace in a file (exact match)
|
|
55
|
+
- run_command(command) — run shell command. Fast commands return immediately, long-running ones return after 30s with partial output while continuing in background
|
|
56
|
+
- search_files(pattern, path) — grep for text in files
|
|
57
|
+
- find_files(pattern) — find files by glob ("**/*.py", "*.json")
|
|
58
|
+
- git_status(), git_diff(), git_log(), git_commit(message, files), git_branch(action)
|
|
59
|
+
- create_plan(title, steps) — create a step-by-step plan for the user to review before execution
|
|
60
|
+
|
|
61
|
+
Planning vs Direct Execution:
|
|
62
|
+
- For COMPLEX tasks (building new features, creating projects, multi-file refactors, setting up infrastructure): FIRST call create_plan() with clear steps, then wait for user approval before executing.
|
|
63
|
+
- For SIMPLE tasks (fix a bug, read a file, run a command, small edits, answer a question): execute directly without a plan.
|
|
64
|
+
- A task is complex if it involves 3+ files or 3+ distinct actions.
|
|
65
|
+
|
|
66
|
+
Workflow:
|
|
67
|
+
1. User gives a task → read the codebase with tools to understand it
|
|
68
|
+
2. If complex → call create_plan() with steps, then execute each step after approval
|
|
69
|
+
3. If simple → execute directly
|
|
70
|
+
4. Verify: run tests, start dev server, check output
|
|
71
|
+
5. Report what you did in 1-2 sentences
|
|
72
|
+
|
|
73
|
+
Guidelines:
|
|
74
|
+
- Read files before editing them
|
|
75
|
+
- Make minimal, targeted changes
|
|
76
|
+
- Keep responses short — 1-2 sentences, not paragraphs
|
|
77
|
+
- When a project is in a subdirectory, always cd into it or use the right prefix
|
|
78
|
+
- When users ask about gemi itself (providers, features, keys), answer from your knowledge above
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _get_project_structure() -> str:
|
|
83
|
+
cwd = Path.cwd()
|
|
84
|
+
lines = []
|
|
85
|
+
for entry in sorted(cwd.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower())):
|
|
86
|
+
if entry.name.startswith(".") and entry.name not in (".env", ".gitignore"):
|
|
87
|
+
continue
|
|
88
|
+
prefix = "📁" if entry.is_dir() else " "
|
|
89
|
+
lines.append(f"{prefix} {entry.name}")
|
|
90
|
+
return "\n".join(lines[:30]) or "(empty directory)"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _get_project_context() -> str:
|
|
94
|
+
gemi_md = Path.cwd() / ".gemi.md"
|
|
95
|
+
if gemi_md.exists():
|
|
96
|
+
content = gemi_md.read_text(errors="replace")
|
|
97
|
+
if len(content) > 3000:
|
|
98
|
+
content = content[:3000] + "\n... (truncated)"
|
|
99
|
+
return f"\nProject context (.gemi.md):\n{content}\n"
|
|
100
|
+
return ""
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _create_provider(provider_name: str, api_key: str, config: dict):
|
|
104
|
+
provider_type = get_provider_type(provider_name)
|
|
105
|
+
|
|
106
|
+
if provider_type == "gemini":
|
|
107
|
+
return GeminiProvider(api_key=api_key)
|
|
108
|
+
elif provider_type == "ollama":
|
|
109
|
+
base_url = get_base_url(provider_name) or "http://localhost:11434"
|
|
110
|
+
return OllamaProvider(base_url=base_url)
|
|
111
|
+
elif provider_type == "openai_compat":
|
|
112
|
+
base_url = get_base_url(provider_name) or "https://api.openai.com/v1"
|
|
113
|
+
return OpenAICompatProvider(api_key=api_key, base_url=base_url)
|
|
114
|
+
else:
|
|
115
|
+
base_url = get_base_url(provider_name) or "https://api.openai.com/v1"
|
|
116
|
+
return OpenAICompatProvider(api_key=api_key, base_url=base_url)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _get_model(provider_name: str, config: dict) -> str:
|
|
120
|
+
if provider_name == config.get("default_provider", "gemini"):
|
|
121
|
+
return config.get("default_model", get_default_model(provider_name))
|
|
122
|
+
return get_default_model(provider_name)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _parse_retry_after(error: Exception) -> float | None:
|
|
126
|
+
retry_after = None
|
|
127
|
+
|
|
128
|
+
if hasattr(error, "response") and error.response is not None:
|
|
129
|
+
headers = getattr(error.response, "headers", {})
|
|
130
|
+
if "retry-after" in headers:
|
|
131
|
+
try:
|
|
132
|
+
retry_after = float(headers["retry-after"])
|
|
133
|
+
except (ValueError, TypeError):
|
|
134
|
+
pass
|
|
135
|
+
if not retry_after and "x-ratelimit-reset" in headers:
|
|
136
|
+
try:
|
|
137
|
+
import time
|
|
138
|
+
reset_ts = float(headers["x-ratelimit-reset"])
|
|
139
|
+
retry_after = max(1, reset_ts - time.time())
|
|
140
|
+
except (ValueError, TypeError):
|
|
141
|
+
pass
|
|
142
|
+
|
|
143
|
+
if not retry_after:
|
|
144
|
+
import re
|
|
145
|
+
match = re.search(r"retry.{0,5}?(\d+)\s*s", str(error).lower())
|
|
146
|
+
if match:
|
|
147
|
+
retry_after = float(match.group(1))
|
|
148
|
+
else:
|
|
149
|
+
match = re.search(r"try again in (\d+)", str(error).lower())
|
|
150
|
+
if match:
|
|
151
|
+
retry_after = float(match.group(1))
|
|
152
|
+
|
|
153
|
+
return retry_after
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class AgentLoop:
|
|
157
|
+
def __init__(self, session_id: str | None = None):
|
|
158
|
+
self.config = load_config()
|
|
159
|
+
self.key_manager = KeyManager(config=self.config)
|
|
160
|
+
self.messages: list[Message] = []
|
|
161
|
+
self.provider = None
|
|
162
|
+
self.model = None
|
|
163
|
+
self.session_id = session_id or ""
|
|
164
|
+
self.total_tokens_used = 0
|
|
165
|
+
self.total_requests = 0
|
|
166
|
+
self.provider_usage: dict[str, dict] = {}
|
|
167
|
+
self.current_plan: dict | None = None
|
|
168
|
+
self._init_provider()
|
|
169
|
+
|
|
170
|
+
def _init_provider(self):
|
|
171
|
+
key_state = self.key_manager.get_current_key()
|
|
172
|
+
if not key_state:
|
|
173
|
+
console.print("[bold red]No API keys configured. Run: gemi key add gemini[/bold red]")
|
|
174
|
+
return
|
|
175
|
+
provider_name = self.key_manager.get_current_provider()
|
|
176
|
+
self.provider = _create_provider(provider_name, key_state.api_key, self.config)
|
|
177
|
+
self.model = _get_model(provider_name, self.config)
|
|
178
|
+
|
|
179
|
+
providers_summary = ", ".join(
|
|
180
|
+
f"{name} ({info['name']})" for name, info in PROVIDERS.items()
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
key_status = self.key_manager.get_status()
|
|
184
|
+
configured = [s for s in key_status if s["state"] != "no key"]
|
|
185
|
+
if configured:
|
|
186
|
+
lines = [f"- Configured providers with keys: {', '.join(dict.fromkeys(s['provider'] for s in configured))}"]
|
|
187
|
+
active = [s for s in configured if s["state"] == "active"]
|
|
188
|
+
if active:
|
|
189
|
+
lines.append(f"- Active key: {active[0]['provider']}/{active[0]['name']}")
|
|
190
|
+
active_keys_info = "\n".join(lines)
|
|
191
|
+
else:
|
|
192
|
+
active_keys_info = "- No API keys configured yet"
|
|
193
|
+
|
|
194
|
+
system_prompt = SYSTEM_PROMPT.format(
|
|
195
|
+
cwd=os.getcwd(),
|
|
196
|
+
project_structure=_get_project_structure(),
|
|
197
|
+
project_context=_get_project_context(),
|
|
198
|
+
providers_summary=providers_summary,
|
|
199
|
+
current_provider=provider_name,
|
|
200
|
+
current_model=self.model,
|
|
201
|
+
active_keys_info=active_keys_info,
|
|
202
|
+
)
|
|
203
|
+
self.messages = [Message(role="system", content=system_prompt)]
|
|
204
|
+
|
|
205
|
+
def load_session(self, messages: list[Message]):
|
|
206
|
+
self.messages = messages
|
|
207
|
+
|
|
208
|
+
def _switch_provider(self):
|
|
209
|
+
key_state = self.key_manager.get_current_key()
|
|
210
|
+
if not key_state:
|
|
211
|
+
return False
|
|
212
|
+
provider_name = self.key_manager.get_current_provider()
|
|
213
|
+
self.provider = _create_provider(provider_name, key_state.api_key, self.config)
|
|
214
|
+
model_from_manager = self.key_manager.get_current_model()
|
|
215
|
+
self.model = model_from_manager or _get_model(provider_name, self.config)
|
|
216
|
+
self._fit_context_to_model()
|
|
217
|
+
return True
|
|
218
|
+
|
|
219
|
+
def _fit_context_to_model(self):
|
|
220
|
+
provider_name = self.key_manager.get_current_provider()
|
|
221
|
+
max_tokens = get_context_window(provider_name, self.model)
|
|
222
|
+
max_chars = int(max_tokens * CHARS_PER_TOKEN * 0.8)
|
|
223
|
+
|
|
224
|
+
total_chars = sum(len(m.content or "") for m in self.messages)
|
|
225
|
+
if total_chars <= max_chars:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
system_msg = self.messages[0] if self.messages and self.messages[0].role == "system" else None
|
|
229
|
+
recent = self.messages[1:] if system_msg else self.messages[:]
|
|
230
|
+
|
|
231
|
+
kept = []
|
|
232
|
+
used_chars = len(system_msg.content) if system_msg else 0
|
|
233
|
+
|
|
234
|
+
for msg in reversed(recent):
|
|
235
|
+
msg_chars = len(msg.content or "")
|
|
236
|
+
if used_chars + msg_chars > max_chars:
|
|
237
|
+
break
|
|
238
|
+
kept.insert(0, msg)
|
|
239
|
+
used_chars += msg_chars
|
|
240
|
+
|
|
241
|
+
dropped = len(recent) - len(kept)
|
|
242
|
+
if dropped > 0:
|
|
243
|
+
summary = Message(
|
|
244
|
+
role="user",
|
|
245
|
+
content=f"[{dropped} earlier messages were trimmed to fit the current model's context window. The conversation continues below.]",
|
|
246
|
+
)
|
|
247
|
+
self.messages = ([system_msg] if system_msg else []) + [summary] + kept
|
|
248
|
+
console.print(
|
|
249
|
+
f" [dim]Trimmed {dropped} old messages to fit {self.model} context window[/dim]"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def _maybe_compact(self):
|
|
253
|
+
provider_name = self.key_manager.get_current_provider()
|
|
254
|
+
max_tokens = get_context_window(provider_name, self.model)
|
|
255
|
+
if needs_compaction(self.messages, max_tokens):
|
|
256
|
+
old_count = len(self.messages)
|
|
257
|
+
self.messages = compact_messages(self.messages, max_tokens)
|
|
258
|
+
new_count = len(self.messages)
|
|
259
|
+
if new_count < old_count:
|
|
260
|
+
console.print(
|
|
261
|
+
f" [dim]Compacted context: {old_count} → {new_count} messages[/dim]"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def _auto_save(self):
|
|
265
|
+
if self.session_id:
|
|
266
|
+
save_session(
|
|
267
|
+
self.session_id,
|
|
268
|
+
self.messages,
|
|
269
|
+
metadata={
|
|
270
|
+
"cwd": os.getcwd(),
|
|
271
|
+
"provider": self.key_manager.get_current_provider(),
|
|
272
|
+
"model": self.model,
|
|
273
|
+
"tokens_used": self.total_tokens_used,
|
|
274
|
+
},
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
def get_status_line(self) -> str:
|
|
278
|
+
provider = self.key_manager.get_current_provider()
|
|
279
|
+
tokens = self.total_tokens_used
|
|
280
|
+
reqs = self.total_requests
|
|
281
|
+
ctx = estimate_tokens(self.messages)
|
|
282
|
+
max_ctx = get_context_window(provider, self.model)
|
|
283
|
+
ctx_pct = min(100, int(ctx / max_ctx * 100))
|
|
284
|
+
return f"{provider}/{self.model} | tokens: ~{tokens:,} / {max_ctx:,} | reqs: {reqs} | context: {ctx_pct}%"
|
|
285
|
+
|
|
286
|
+
def get_detailed_status(self) -> str:
|
|
287
|
+
provider = self.key_manager.get_current_provider()
|
|
288
|
+
max_ctx = get_context_window(provider, self.model)
|
|
289
|
+
ctx = estimate_tokens(self.messages)
|
|
290
|
+
ctx_pct = min(100, int(ctx / max_ctx * 100))
|
|
291
|
+
keys = self.key_manager.get_status()
|
|
292
|
+
provider_keys = [k for k in keys if k["provider"] == provider]
|
|
293
|
+
total_keys = len(provider_keys)
|
|
294
|
+
active_keys = len([k for k in provider_keys if k["state"] not in ("exhausted",)])
|
|
295
|
+
|
|
296
|
+
lines = [
|
|
297
|
+
f" [bold]Current[/bold]",
|
|
298
|
+
f" Provider: [green]{provider}[/green]",
|
|
299
|
+
f" Model: [green]{self.model}[/green]",
|
|
300
|
+
f" Context: [cyan]{ctx:,}[/cyan] / {max_ctx:,} ({ctx_pct}%)",
|
|
301
|
+
f" Keys: [cyan]{active_keys}[/cyan] active / {total_keys} total",
|
|
302
|
+
f" Session: [dim]{self.session_id}[/dim]",
|
|
303
|
+
"",
|
|
304
|
+
f" [bold]Usage This Session[/bold]",
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
if self.provider_usage:
|
|
308
|
+
for prov, usage in self.provider_usage.items():
|
|
309
|
+
prov_max = get_context_window(prov, usage["model"])
|
|
310
|
+
marker = " [green]◀ active[/green]" if prov == provider else ""
|
|
311
|
+
lines.append(f" [cyan]{prov}[/cyan] ({usage['model']}){marker}")
|
|
312
|
+
lines.append(f" Tokens: ~{usage['tokens']:,} / {prov_max:,} | Requests: {usage['requests']}")
|
|
313
|
+
else:
|
|
314
|
+
lines.append(f" [dim]No requests made yet[/dim]")
|
|
315
|
+
|
|
316
|
+
lines.append("")
|
|
317
|
+
lines.append(f" [bold]Total:[/bold] ~{self.total_tokens_used:,} tokens | {self.total_requests} requests")
|
|
318
|
+
|
|
319
|
+
return "\n".join(lines)
|
|
320
|
+
|
|
321
|
+
def _update_plan_progress(self, tool_name: str):
|
|
322
|
+
if not self.current_plan:
|
|
323
|
+
return
|
|
324
|
+
steps = self.current_plan["steps"]
|
|
325
|
+
for step in steps:
|
|
326
|
+
if step["status"] == "pending":
|
|
327
|
+
step["status"] = "in_progress"
|
|
328
|
+
break
|
|
329
|
+
for i, step in enumerate(steps):
|
|
330
|
+
if step["status"] == "in_progress" and i > 0:
|
|
331
|
+
prev = steps[i - 1]
|
|
332
|
+
if prev["status"] == "in_progress":
|
|
333
|
+
prev["status"] = "done"
|
|
334
|
+
in_progress = [s for s in steps if s["status"] == "in_progress"]
|
|
335
|
+
if not in_progress:
|
|
336
|
+
pending = [s for s in steps if s["status"] == "pending"]
|
|
337
|
+
if not pending:
|
|
338
|
+
for s in steps:
|
|
339
|
+
if s["status"] != "done":
|
|
340
|
+
s["status"] = "done"
|
|
341
|
+
self.current_plan = None
|
|
342
|
+
return
|
|
343
|
+
print_plan(self.current_plan["title"], steps)
|
|
344
|
+
|
|
345
|
+
def get_plan(self) -> dict | None:
|
|
346
|
+
return self.current_plan
|
|
347
|
+
|
|
348
|
+
async def chat(self, user_input: str):
|
|
349
|
+
if not self.provider:
|
|
350
|
+
console.print("[bold red]No provider available. Add API keys first.[/bold red]")
|
|
351
|
+
return
|
|
352
|
+
|
|
353
|
+
self.messages.append(Message(role="user", content=user_input))
|
|
354
|
+
self._maybe_compact()
|
|
355
|
+
|
|
356
|
+
max_iterations = self.config.get("agent", {}).get("max_iterations", 50)
|
|
357
|
+
auto_approve_reads = self.config.get("agent", {}).get("auto_approve_reads", True)
|
|
358
|
+
auto_approve_writes = self.config.get("agent", {}).get("auto_approve_writes", False)
|
|
359
|
+
|
|
360
|
+
for iteration in range(max_iterations):
|
|
361
|
+
text_response, tool_calls, tokens_used = await self._call_provider()
|
|
362
|
+
|
|
363
|
+
self.total_tokens_used += tokens_used
|
|
364
|
+
self.total_requests += 1
|
|
365
|
+
|
|
366
|
+
current = self.key_manager.get_current_provider()
|
|
367
|
+
if current not in self.provider_usage:
|
|
368
|
+
self.provider_usage[current] = {"tokens": 0, "requests": 0, "model": self.model}
|
|
369
|
+
self.provider_usage[current]["tokens"] += tokens_used
|
|
370
|
+
self.provider_usage[current]["requests"] += 1
|
|
371
|
+
self.provider_usage[current]["model"] = self.model
|
|
372
|
+
self.key_manager.record_usage(tokens_used)
|
|
373
|
+
|
|
374
|
+
if not tool_calls:
|
|
375
|
+
if not text_response.strip() and tokens_used == 0:
|
|
376
|
+
console.print("\n [bold red]Could not get a response. All providers failed or returned empty.[/bold red]")
|
|
377
|
+
console.print(" [yellow]Try again, or check provider status with /status[/yellow]")
|
|
378
|
+
return
|
|
379
|
+
self.messages.append(Message(role="assistant", content=text_response))
|
|
380
|
+
break
|
|
381
|
+
|
|
382
|
+
self.messages.append(Message(
|
|
383
|
+
role="assistant",
|
|
384
|
+
content=text_response,
|
|
385
|
+
tool_calls=tool_calls,
|
|
386
|
+
))
|
|
387
|
+
|
|
388
|
+
plan_created = False
|
|
389
|
+
for tc in tool_calls:
|
|
390
|
+
func_name = tc["function"]["name"]
|
|
391
|
+
func_args = tc["function"]["arguments"]
|
|
392
|
+
if isinstance(func_args, str):
|
|
393
|
+
try:
|
|
394
|
+
func_args = json.loads(func_args)
|
|
395
|
+
except json.JSONDecodeError:
|
|
396
|
+
func_args = {}
|
|
397
|
+
|
|
398
|
+
if func_name == "create_plan":
|
|
399
|
+
plan_title = func_args.get("title", "Plan")
|
|
400
|
+
plan_steps = func_args.get("steps", [])
|
|
401
|
+
for step in plan_steps:
|
|
402
|
+
step["status"] = "pending"
|
|
403
|
+
self.current_plan = {"title": plan_title, "steps": plan_steps}
|
|
404
|
+
print_plan(plan_title, plan_steps)
|
|
405
|
+
approved = print_plan_approval()
|
|
406
|
+
if approved:
|
|
407
|
+
result = "Plan approved by user. Execute each step now. After completing each step, briefly state what you did."
|
|
408
|
+
else:
|
|
409
|
+
result = "Plan rejected by user. Ask what they'd like to change."
|
|
410
|
+
self.current_plan = None
|
|
411
|
+
self.messages.append(Message(
|
|
412
|
+
role="tool",
|
|
413
|
+
content=result,
|
|
414
|
+
tool_call_id=tc["id"],
|
|
415
|
+
name=func_name,
|
|
416
|
+
))
|
|
417
|
+
plan_created = True
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
WRITE_TOOLS = {"write_file", "edit_file", "run_command", "git_commit"}
|
|
421
|
+
|
|
422
|
+
print_tool_call(func_name, func_args)
|
|
423
|
+
|
|
424
|
+
result = execute_tool(
|
|
425
|
+
func_name,
|
|
426
|
+
func_args,
|
|
427
|
+
auto_approve_reads=auto_approve_reads,
|
|
428
|
+
auto_approve_writes=auto_approve_writes,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
if func_name in WRITE_TOOLS:
|
|
432
|
+
print_tool_result(func_name, result)
|
|
433
|
+
|
|
434
|
+
if self.current_plan:
|
|
435
|
+
self._update_plan_progress(func_name)
|
|
436
|
+
|
|
437
|
+
self.messages.append(Message(
|
|
438
|
+
role="tool",
|
|
439
|
+
content=result,
|
|
440
|
+
tool_call_id=tc["id"],
|
|
441
|
+
name=func_name,
|
|
442
|
+
))
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
self._auto_save()
|
|
446
|
+
provider = self.key_manager.get_current_provider()
|
|
447
|
+
max_ctx = get_context_window(provider, self.model)
|
|
448
|
+
ctx = estimate_tokens(self.messages)
|
|
449
|
+
ctx_pct = min(100, int(ctx / max_ctx * 100))
|
|
450
|
+
bar_width = 20
|
|
451
|
+
filled = int(bar_width * ctx_pct / 100)
|
|
452
|
+
bar = "[green]" + "━" * filled + "[/green][dim]" + "━" * (bar_width - filled) + "[/dim]"
|
|
453
|
+
console.print(f"\n [dim]{provider}/{self.model}[/dim] ~{self.total_tokens_used:,} tokens {bar} {ctx_pct}%")
|
|
454
|
+
|
|
455
|
+
async def _call_provider(self, max_cycles: int = 5) -> tuple[str, list[dict] | None, int]:
|
|
456
|
+
import asyncio
|
|
457
|
+
|
|
458
|
+
max_attempts = 50
|
|
459
|
+
attempt = 0
|
|
460
|
+
|
|
461
|
+
for cycle in range(max_cycles):
|
|
462
|
+
while attempt < max_attempts:
|
|
463
|
+
attempt += 1
|
|
464
|
+
live = None
|
|
465
|
+
try:
|
|
466
|
+
text_parts = []
|
|
467
|
+
all_tool_calls = []
|
|
468
|
+
|
|
469
|
+
provider_label = f"[dim]{self.key_manager.get_current_provider()}/{self.model}[/dim]"
|
|
470
|
+
|
|
471
|
+
def _render(content_text):
|
|
472
|
+
md = Markdown(content_text) if content_text else Markdown("")
|
|
473
|
+
return Panel(md, border_style="blue", padding=(0, 1), subtitle=provider_label, subtitle_align="right")
|
|
474
|
+
|
|
475
|
+
async for chunk in self.provider.chat(
|
|
476
|
+
messages=self.messages,
|
|
477
|
+
tools=TOOL_DEFINITIONS,
|
|
478
|
+
model=self.model,
|
|
479
|
+
stream=True,
|
|
480
|
+
):
|
|
481
|
+
if chunk.text:
|
|
482
|
+
text_parts.append(chunk.text)
|
|
483
|
+
if not live:
|
|
484
|
+
live = Live(_render("".join(text_parts)), console=console, refresh_per_second=12, vertical_overflow="visible")
|
|
485
|
+
live.start()
|
|
486
|
+
else:
|
|
487
|
+
live.update(_render("".join(text_parts)))
|
|
488
|
+
if chunk.tool_calls:
|
|
489
|
+
all_tool_calls.extend(chunk.tool_calls)
|
|
490
|
+
|
|
491
|
+
if live:
|
|
492
|
+
live.stop()
|
|
493
|
+
text = "".join(text_parts)
|
|
494
|
+
|
|
495
|
+
error_phrases = ["provider returned error", "model is overloaded", "no endpoints found", "service unavailable"]
|
|
496
|
+
text_lower = text.strip().lower()
|
|
497
|
+
is_error_response = any(phrase in text_lower for phrase in error_phrases)
|
|
498
|
+
|
|
499
|
+
if is_error_response or (not text.strip() and not all_tool_calls):
|
|
500
|
+
provider = self.key_manager.get_current_provider()
|
|
501
|
+
reason = f"error response: {text.strip()[:80]}" if is_error_response else "empty response"
|
|
502
|
+
console.print(f"\n [red]Error on {provider}/{self.model}: {reason}[/red]")
|
|
503
|
+
next_model = self.key_manager.try_next_model()
|
|
504
|
+
if next_model:
|
|
505
|
+
self.model = next_model
|
|
506
|
+
continue
|
|
507
|
+
self.key_manager.report_rate_limit(retry_after=30)
|
|
508
|
+
if self._switch_provider():
|
|
509
|
+
continue
|
|
510
|
+
break
|
|
511
|
+
|
|
512
|
+
tokens = len(text) // CHARS_PER_TOKEN
|
|
513
|
+
return text, all_tool_calls if all_tool_calls else None, tokens
|
|
514
|
+
|
|
515
|
+
except Exception as e:
|
|
516
|
+
if live and live.is_started:
|
|
517
|
+
live.stop()
|
|
518
|
+
error_str = str(e).lower()
|
|
519
|
+
provider = self.key_manager.get_current_provider()
|
|
520
|
+
retry_after = _parse_retry_after(e)
|
|
521
|
+
|
|
522
|
+
if "401" in error_str or "403" in error_str or "unauthorized" in error_str or "user not found" in error_str or ("invalid" in error_str and "key" in error_str):
|
|
523
|
+
console.print(f"\n [red]Auth error on {provider}: invalid API key[/red]")
|
|
524
|
+
console.print(f" [yellow]Check with: gemi key list {provider}[/yellow]")
|
|
525
|
+
self.key_manager.report_exhausted()
|
|
526
|
+
if self._switch_provider():
|
|
527
|
+
continue
|
|
528
|
+
break
|
|
529
|
+
|
|
530
|
+
elif "429" in error_str or "rate" in error_str or "quota" in error_str or "resource" in error_str:
|
|
531
|
+
next_model = self.key_manager.try_next_model()
|
|
532
|
+
if next_model:
|
|
533
|
+
self.model = next_model
|
|
534
|
+
continue
|
|
535
|
+
self.key_manager.report_rate_limit(retry_after=retry_after)
|
|
536
|
+
if self._switch_provider():
|
|
537
|
+
continue
|
|
538
|
+
break
|
|
539
|
+
|
|
540
|
+
elif "connect" in error_str or "connection" in error_str or "timeout" in error_str or "unreachable" in error_str:
|
|
541
|
+
console.print(f"\n [red]Can't reach {provider}[/red]")
|
|
542
|
+
self.key_manager.report_exhausted()
|
|
543
|
+
if self._switch_provider():
|
|
544
|
+
continue
|
|
545
|
+
break
|
|
546
|
+
|
|
547
|
+
elif "provider returned error" in error_str or "no endpoints" in error_str or "overloaded" in error_str or "service unavailable" in error_str:
|
|
548
|
+
console.print(f"\n [red]Error on {provider}/{self.model}:[/red] [dim]{str(e)[:100]}[/dim]")
|
|
549
|
+
next_model = self.key_manager.try_next_model()
|
|
550
|
+
if next_model:
|
|
551
|
+
self.model = next_model
|
|
552
|
+
continue
|
|
553
|
+
self.key_manager.report_rate_limit(retry_after=retry_after)
|
|
554
|
+
if self._switch_provider():
|
|
555
|
+
continue
|
|
556
|
+
break
|
|
557
|
+
|
|
558
|
+
else:
|
|
559
|
+
console.print(f"\n [red]Error on {provider}/{self.model}: {e}[/red]")
|
|
560
|
+
next_model = self.key_manager.try_next_model()
|
|
561
|
+
if next_model:
|
|
562
|
+
self.model = next_model
|
|
563
|
+
continue
|
|
564
|
+
self.key_manager.report_rate_limit(retry_after=retry_after)
|
|
565
|
+
if self._switch_provider():
|
|
566
|
+
continue
|
|
567
|
+
break
|
|
568
|
+
|
|
569
|
+
# Inner loop exhausted all models/providers — wait for cooldown before next cycle
|
|
570
|
+
available = self.key_manager.get_any_available_key()
|
|
571
|
+
if available:
|
|
572
|
+
self._switch_provider()
|
|
573
|
+
continue
|
|
574
|
+
|
|
575
|
+
wait_time = self.key_manager.get_nearest_cooldown()
|
|
576
|
+
if wait_time and cycle < max_cycles - 1:
|
|
577
|
+
wait_secs = min(wait_time + 2, 120)
|
|
578
|
+
mins, secs = divmod(int(wait_secs), 60)
|
|
579
|
+
wait_str = f"{mins}m {secs}s" if mins > 0 else f"{secs}s"
|
|
580
|
+
console.print(f"\n [yellow]All providers on cooldown. Waiting {wait_str} before retry (cycle {cycle + 1}/{max_cycles})...[/yellow]")
|
|
581
|
+
await asyncio.sleep(wait_secs)
|
|
582
|
+
self.key_manager.reset_failed_models()
|
|
583
|
+
available = self.key_manager.get_any_available_key()
|
|
584
|
+
if available:
|
|
585
|
+
info = get_provider_info(available.provider)
|
|
586
|
+
display = info["name"] if info else available.provider
|
|
587
|
+
console.print(f" [green]Retrying with {display} ({available.name})...[/green]")
|
|
588
|
+
self._switch_provider()
|
|
589
|
+
continue
|
|
590
|
+
|
|
591
|
+
console.print(f"\n [bold red]All providers and models exhausted after {cycle + 1} cycles. Add more keys or wait.[/bold red]")
|
|
592
|
+
return "", None, 0
|
|
593
|
+
|
|
594
|
+
return "", None, 0
|