grucli 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
grucli/api.py ADDED
@@ -0,0 +1,725 @@
1
+ import requests
2
+ import json
3
+ import sys
4
+ import time
5
+ import os
6
+ import uuid
7
+ import platform
8
+ from . import tools, auth, config, interrupt
9
+ from .theme import Colors, Icons
10
+ from .stats import STATS
11
+
12
+ # Constants
13
+ BASE_URL = "http://localhost:1234/v1"
14
+ GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
15
+ CODE_ASSIST_ENDPOINT = 'https://cloudcode-pa.googleapis.com'
16
+ CODE_ASSIST_API_VERSION = 'v1internal'
17
+ CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
18
+ ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1"
19
+ OPENAI_BASE_URL = "https://api.openai.com/v1"
20
+ OLLAMA_BASE_URL = "http://localhost:11434"
21
+ OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
22
+
23
+ # State
24
+ GEMINI_API_KEY = None
25
+ CEREBRAS_API_KEY = None
26
+ ANTHROPIC_API_KEY = None
27
+ OPENAI_API_KEY = None
28
+ OLLAMA_API_KEY = None
29
+ CURRENT_API = "lm_studio"
30
+ SHOW_REASONING = False
31
+
32
+ def get_user_agent(model_id="unknown"):
33
+ os_name = platform.system().lower()
34
+ arch = platform.machine().lower()
35
+ return f"GeminiCLI/0.21.0/{model_id} ({os_name}; {arch})"
36
+
37
+ def _google_code_assist_request(endpoint, token, project_id=None, extra_payload=None):
38
+ url = f"{CODE_ASSIST_ENDPOINT}/{CODE_ASSIST_API_VERSION}:{endpoint}"
39
+ headers = {
40
+ "Authorization": f"Bearer {token}",
41
+ "Content-Type": "application/json",
42
+ "User-Agent": get_user_agent()
43
+ }
44
+
45
+ payload = {
46
+ "metadata": {
47
+ "ideType": "IDE_UNSPECIFIED",
48
+ "platform": "PLATFORM_UNSPECIFIED",
49
+ "pluginType": "GEMINI"
50
+ }
51
+ }
52
+ if project_id:
53
+ payload["cloudaicompanionProject"] = project_id
54
+ payload["metadata"]["duetProject"] = project_id
55
+
56
+ if extra_payload:
57
+ payload.update(extra_payload)
58
+
59
+ try:
60
+ resp = requests.post(url, json=payload, headers=headers)
61
+ if resp.status_code == 200:
62
+ return resp.json()
63
+ else:
64
+ print(f"{endpoint} Error: {resp.status_code} - {resp.text}")
65
+ return None
66
+ except Exception as e:
67
+ print(f"{endpoint} Exception: {e}")
68
+ return None
69
+
70
+ def load_code_assist(token, project_id=None):
71
+ return _google_code_assist_request("loadCodeAssist", token, project_id)
72
+
73
+ def onboard_user(token, tier_id, project_id=None):
74
+ return _google_code_assist_request("onboardUser", token, project_id, {"tierId": tier_id})
75
+
76
+ def get_system_prompt():
77
+ dir_path = os.path.dirname(os.path.realpath(__file__))
78
+ path = os.path.join(dir_path, "sysprompts", "main_sysprompt.txt")
79
+ default = "You are a helpful assistant."
80
+
81
+ content = default
82
+ if os.path.exists(path):
83
+ try:
84
+ with open(path, "r") as f:
85
+ content = f.read().strip()
86
+ except Exception as e:
87
+ print(f"\nerror reading sysprompt: {e}")
88
+
89
+ if "<auto_inject_file_tree>" in content:
90
+ tree = tools.get_file_tree()
91
+ content = content.replace("<auto_inject_file_tree>", tree)
92
+
93
+ return content
94
+
95
+ def set_api_config(api_type, api_key=None):
96
+ global CURRENT_API, GEMINI_API_KEY, CEREBRAS_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_API_KEY
97
+ CURRENT_API = api_type
98
+ if api_type == "gemini": GEMINI_API_KEY = api_key
99
+ elif api_type == "cerebras": CEREBRAS_API_KEY = api_key
100
+ elif api_type == "anthropic": ANTHROPIC_API_KEY = api_key
101
+ elif api_type == "openai": OPENAI_API_KEY = api_key
102
+ elif api_type == "ollama": OLLAMA_API_KEY = api_key
103
+
104
+ def get_gemini_models():
105
+ return ["gemini-2.5-flash", "gemini-2.5-pro", "gemini-3-flash-preview", "gemini-3-pro-preview", "custom"]
106
+
107
+ def get_cerebras_models():
108
+ return ["llama3.1-8b", "llama-3.3-70b", "gpt-oss-120b", "qwen-3-32b", "custom"]
109
+
110
+ def get_anthropic_models():
111
+ return [
112
+ "claude-opus-4-5-20251101", "claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001",
113
+ "claude-4-5-opus-latest", "claude-4-5-sonnet-latest", "claude-4-5-haiku-latest",
114
+ "claude-4-opus-latest", "claude-4-sonnet-latest", "claude-3-7-sonnet-latest",
115
+ "claude-3-5-sonnet-latest", "claude-3-5-haiku-latest", "claude-3-opus-latest", "custom"
116
+ ]
117
+
118
+ def get_openai_models():
119
+ return [
120
+ "gpt-5.2-pro", "gpt-5.2-thinking", "gpt-5.2-instant", "gpt-5", "gpt-5-chat",
121
+ "gpt-5-mini", "gpt-5-nano", "o4-mini", "o4-mini-high", "o3", "o3-mini",
122
+ "o3-pro", "o1", "o1-mini", "o1-pro", "o1-preview", "gpt-4.5", "gpt-4o",
123
+ "gpt-4o-mini", "custom"
124
+ ]
125
+
126
+ def get_ollama_models():
127
+ cloud_models = ["gpt-oss:120b-cloud", "gpt-oss:20b-cloud", "qwen3-coder:480b-cloud", "qwen3-vl:235b-cloud"]
128
+ local_models = []
129
+ try:
130
+ resp = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=2)
131
+ if resp.status_code == 200:
132
+ local_models = [m.get('name') for m in resp.json().get('models', [])]
133
+ except:
134
+ pass
135
+ return cloud_models + local_models + ["custom"]
136
+
137
+ def get_models():
138
+ model_funcs = {
139
+ "gemini": get_gemini_models,
140
+ "cerebras": get_cerebras_models,
141
+ "anthropic": get_anthropic_models,
142
+ "openai": get_openai_models,
143
+ "ollama": get_ollama_models
144
+ }
145
+
146
+ if CURRENT_API in model_funcs:
147
+ return [{"id": model, "context_length": "N/A", "size": 0} for model in model_funcs[CURRENT_API]()]
148
+ else:
149
+ try:
150
+ resp = requests.get(f"{BASE_URL}/models")
151
+ resp.raise_for_status()
152
+ return resp.json()['data']
153
+ except:
154
+ print("lm studio server dead, start it up pls")
155
+ sys.exit(1)
156
+
157
+ def load_model_and_verify(model_id):
158
+ payload = {
159
+ "model": model_id,
160
+ "messages": [{"role": "user", "content": "hello"}],
161
+ "stream": True,
162
+ "max_tokens": 1
163
+ }
164
+ try:
165
+ with requests.post(f"{BASE_URL}/chat/completions", json=payload, stream=True) as r:
166
+ for line in r.iter_lines():
167
+ if line and line.decode('utf-8').startswith("data: "):
168
+ return True
169
+ return False
170
+ except Exception:
171
+ return False
172
+
173
+ # --- Parsers ---
174
+
175
+ def _parse_openai_chunk(line_str):
176
+ if not line_str.startswith("data: "):
177
+ return "", "", False
178
+ content_json = line_str[6:]
179
+ if content_json.strip() == "[DONE]":
180
+ return "", "", True
181
+ try:
182
+ chunk = json.loads(content_json)
183
+ if 'choices' in chunk and len(chunk['choices']) > 0:
184
+ delta_obj = chunk['choices'][0]['delta']
185
+ # Support multiple reasoning field names (reasoning_content, reasoning, or new structured fields)
186
+ reasoning = delta_obj.get('reasoning_content') or delta_obj.get('reasoning')
187
+ if not reasoning and 'reasoning_summary_text' in delta_obj:
188
+ reasoning = delta_obj['reasoning_summary_text'].get('delta', '')
189
+
190
+ return delta_obj.get('content', '') or '', reasoning or '', False
191
+ except json.JSONDecodeError:
192
+ pass
193
+ return "", "", False
194
+
195
+ def _parse_anthropic_chunk(line_str):
196
+ if not line_str.startswith("data: "):
197
+ return "", "", False
198
+ content = line_str[6:]
199
+ try:
200
+ chunk = json.loads(content)
201
+ if chunk['type'] == 'content_block_delta':
202
+ delta = chunk['delta']
203
+ d_type = delta.get('type')
204
+ if d_type == 'text_delta':
205
+ return delta.get('text', ''), "", False
206
+ elif d_type == 'thinking_delta':
207
+ return "", delta.get('thinking', ''), False
208
+ elif 'text' in delta: # Fallback for older formats
209
+ return delta.get('text', ''), "", False
210
+ elif chunk['type'] == 'message_stop':
211
+ return "", "", True
212
+ except (json.JSONDecodeError, KeyError):
213
+ pass
214
+ return "", "", False
215
+
216
+ def _parse_gemini_chunk(line_str):
217
+ if not line_str.startswith("data: "):
218
+ return "", "", False
219
+ content = line_str[6:]
220
+ if content.strip() == "[DONE]":
221
+ return "", "", True
222
+ try:
223
+ chunk = json.loads(content)
224
+ if 'candidates' in chunk:
225
+ text = ""
226
+ for candidate in chunk['candidates']:
227
+ if 'content' in candidate and 'parts' in candidate['content']:
228
+ for part in candidate['content']['parts']:
229
+ text += part.get('text', '')
230
+ return text, "", False
231
+ except json.JSONDecodeError:
232
+ pass
233
+ return "", "", False
234
+
235
+ def _process_stream_ui(response, parser_func, line_generator=None):
236
+ """Process streaming response and display to terminal with themed colors/status."""
237
+ import re
238
+
239
+ VALID_TOOLS = ['read_file', 'create_file', 'edit_file', 'delete_file', 'get_current_directory_structure']
240
+
241
+ # State tracking
242
+ buffer = ""
243
+ in_tool_call = False
244
+ current_tool = None
245
+ processed_idx = 0
246
+
247
+ start_time = time.time()
248
+ first_token = True
249
+
250
+ # Styling
251
+ AI_COLOR = "\033[38;5;141m"
252
+ TOOL_COLOR = "\033[38;5;214m" # Orange-ish for tools
253
+ THINK_COLOR = "\033[38;5;213m"
254
+ DIM = "\033[90m"
255
+ RESET = "\033[0m"
256
+
257
+ ai_prefix = f"{Colors.SECONDARY}{Icons.DIAMOND}{Colors.RESET} "
258
+ # print(ai_prefix, end="", flush=True) # Now printed by stream_chat for immediate feedback
259
+
260
+ iterator = line_generator(response) if line_generator else response.iter_lines()
261
+
262
+ full_response = ""
263
+ full_reasoning = ""
264
+ thinking_duration = 0
265
+ is_thinking = False
266
+ thinking_via_tag = False
267
+ thinking_start = 0
268
+ check_for_think = True
269
+
270
+ def check_tool_start(text):
271
+ """Check if text ends with a potential tool start."""
272
+ for tool in VALID_TOOLS:
273
+ # Check full match with optional whitespace
274
+ match = re.search(rf"{tool}\s*\(", text)
275
+ if match:
276
+ return tool, match.start()
277
+
278
+ # Check partial match at end
279
+ # Account for potential space: tool + "(" or tool + " ("
280
+ targets = [tool + "(", tool + " ("]
281
+ for target in targets:
282
+ for i in range(len(target), 0, -1):
283
+ prefix = target[:i]
284
+ if text.endswith(prefix):
285
+ return None, -1 # Potential match, keep buffering
286
+ return False, 0 # No match
287
+
288
+ def get_tool_path(text):
289
+ """Extract path argument from tool call string."""
290
+ match = re.search(r'path=["\']([^"\']+)["\']', text)
291
+ if match:
292
+ return match.group(1)
293
+ return None
294
+
295
+ def is_tool_complete(text, start_idx):
296
+ """Check if the tool call starting at start_idx is complete using balanced parens."""
297
+ depth = 0
298
+ in_str = None
299
+ escape = False
300
+
301
+ i = start_idx + len(current_tool)
302
+ while i < len(text):
303
+ c = text[i]
304
+
305
+ if escape:
306
+ escape = False
307
+ i += 1
308
+ continue
309
+ if c == '\\':
310
+ escape = True
311
+ i += 1
312
+ continue
313
+
314
+ if in_str is None:
315
+ if text[i:i+3] in ('"""', "'''"):
316
+ in_str = text[i:i+3]
317
+ i += 3
318
+ continue
319
+ elif c in ("'", '"'):
320
+ in_str = c
321
+ i += 1
322
+ continue
323
+ elif in_str in ('"""', "'''"):
324
+ if text[i:i+3] == in_str:
325
+ in_str = None
326
+ i += 3
327
+ continue
328
+ elif in_str in ("'", '"'):
329
+ if c == in_str:
330
+ in_str = None
331
+
332
+ if in_str is None:
333
+ if c == "(":
334
+ depth += 1
335
+ elif c == ")":
336
+ depth -= 1
337
+ if depth == 0:
338
+ return i + 1
339
+ i += 1
340
+ return -1
341
+
342
+ last_status_len = 0
343
+
344
+ for line in iterator:
345
+ if not line: continue
346
+
347
+ decoded = line.decode('utf-8') if isinstance(line, bytes) else line
348
+ text_delta, reasoning_delta, is_done = parser_func(decoded)
349
+
350
+ # Handle Reasoning/Thinking
351
+ if reasoning_delta:
352
+ full_reasoning += reasoning_delta
353
+ if not is_thinking:
354
+ is_thinking = True
355
+ thinking_via_tag = False
356
+ thinking_start = time.time()
357
+ # Clear any partial tool text if we jumped to thinking
358
+ if buffer:
359
+ print(buffer, end="", flush=True)
360
+ buffer = ""
361
+ if SHOW_REASONING:
362
+ print()
363
+
364
+ if SHOW_REASONING:
365
+ print(f"{DIM}{reasoning_delta}{RESET}", end="", flush=True)
366
+ else:
367
+ elapsed = time.time() - thinking_start
368
+ print(f"\r{ai_prefix}{THINK_COLOR}[Thinking... {elapsed:.0f}s]{RESET}\033[K", end="", flush=True)
369
+ continue
370
+
371
+ if not text_delta: continue
372
+
373
+ # Handle end of thinking (for reasoning field)
374
+ if is_thinking and not thinking_via_tag and not reasoning_delta:
375
+ is_thinking = False
376
+ duration = time.time() - thinking_start
377
+ thinking_duration = duration
378
+ if SHOW_REASONING:
379
+ print(f"\n{ai_prefix}", end="", flush=True)
380
+ else:
381
+ print(f"\r{ai_prefix}{DIM}[Thought for {duration:.0f}s]{RESET}\033[K")
382
+ # Resume AI prefix
383
+
384
+ full_response += text_delta
385
+ buffer += text_delta
386
+
387
+ # <think> tag handling (for models that output it in text)
388
+ if is_thinking and thinking_via_tag:
389
+ full_reasoning += text_delta
390
+ if SHOW_REASONING:
391
+ print(f"{DIM}{text_delta}{RESET}", end="", flush=True)
392
+ else:
393
+ elapsed = time.time() - thinking_start
394
+ print(f"\r{ai_prefix}{THINK_COLOR}[Thinking... {elapsed:.0f}s]{RESET}\033[K", end="", flush=True)
395
+
396
+ if "</think>" in buffer:
397
+ is_thinking = False
398
+ thinking_via_tag = False
399
+ duration = time.time() - thinking_start
400
+ thinking_duration = duration
401
+ if SHOW_REASONING:
402
+ print(f"\n{ai_prefix}", end="", flush=True)
403
+ else:
404
+ # Print thought summary
405
+ print(f"\r{ai_prefix}{DIM}[Thought for {duration:.0f}s]{RESET}\033[K")
406
+ # Remaining content
407
+ _, post = buffer.split("</think>", 1)
408
+ buffer = post
409
+ # Don't continue, so post-think content can be processed for tools
410
+ else:
411
+ continue
412
+
413
+ elif not is_thinking and (check_for_think or CURRENT_API == "lm_studio"):
414
+ if "<think>" in buffer:
415
+ is_thinking = True
416
+ thinking_via_tag = True
417
+ thinking_start = time.time()
418
+ check_for_think = False
419
+ # Remove <think> and print pre-content
420
+ pre, post = buffer.split("<think>", 1)
421
+ if pre: print(pre, end="", flush=True)
422
+ buffer = post
423
+ continue
424
+
425
+ # Check for partial <think> to avoid leaking tags like <thi
426
+ is_partial = False
427
+ for i in range(1, 7):
428
+ if buffer.endswith("<think>"[:i]):
429
+ is_partial = True
430
+ break
431
+
432
+ if is_partial:
433
+ continue # Keep buffering
434
+
435
+ if not CURRENT_API == "lm_studio":
436
+ check_for_think = False
437
+
438
+ # --- Tool Call Detection & Streaming ---
439
+
440
+ if not is_thinking:
441
+ # We already added text_delta to buffer at the start of loop
442
+
443
+ if not in_tool_call:
444
+ # Check if we found a tool start
445
+ tool, start_idx = check_tool_start(buffer)
446
+
447
+ if tool:
448
+ # Found a valid tool!
449
+ in_tool_call = True
450
+ current_tool = tool
451
+
452
+ # Print everything before the tool
453
+ pre_tool = buffer[:start_idx]
454
+ if pre_tool:
455
+ print(pre_tool, end="", flush=True)
456
+
457
+ # Keep ONLY the tool call in buffer
458
+ buffer = buffer[start_idx:]
459
+ processed_idx = 0 # Reset processed tracker
460
+
461
+ # Determine friendly action name immediately
462
+ if tool == 'create_file':
463
+ action = "Creating"
464
+ elif tool == 'edit_file':
465
+ action = "Editing"
466
+ elif tool == 'delete_file':
467
+ action = "Deleting"
468
+ elif tool == 'read_file':
469
+ action = "Reading"
470
+ elif tool == 'get_current_directory_structure':
471
+ action = "Scanning directory"
472
+ else:
473
+ action = "Running"
474
+
475
+ # Initial status with friendly name
476
+ status = f" {TOOL_COLOR}> {action}...{RESET}"
477
+ print(f"\n{status}", end="", flush=True)
478
+ last_status_len = len(status) - len(TOOL_COLOR) - len(RESET) # Approx visible len
479
+ elif start_idx == -1:
480
+ pass
481
+ else:
482
+ safe_len = len(buffer)
483
+ for t in VALID_TOOLS:
484
+ for i in range(len(t) + 1, 0, -1):
485
+ prefix = (t + "(")[:i]
486
+ if buffer.endswith(prefix):
487
+ safe_len = len(buffer) - i
488
+ break
489
+ if safe_len < len(buffer): break
490
+
491
+ if safe_len > 0:
492
+ to_print = buffer[:safe_len]
493
+ print(to_print, end="", flush=True)
494
+ buffer = buffer[safe_len:]
495
+
496
+ else:
497
+ # We ARE in a tool call
498
+ end_idx = is_tool_complete(buffer, 0)
499
+
500
+ # Update status
501
+ path = get_tool_path(buffer)
502
+ path_str = f" {path}" if path else ""
503
+
504
+ if current_tool == 'create_file':
505
+ action = "Creating"
506
+ elif current_tool == 'edit_file':
507
+ action = "Editing"
508
+ elif current_tool == 'delete_file':
509
+ action = "Deleting"
510
+ elif current_tool == 'read_file':
511
+ action = "Reading"
512
+ elif current_tool == 'get_current_directory_structure':
513
+ action = "Scanning directory"
514
+ else:
515
+ action = "Running"
516
+
517
+ status_line = f"\r{TOOL_COLOR}> {action}{path_str}...{RESET}"
518
+ # Pad with spaces to clear previous longer status
519
+ padding = " " * max(0, last_status_len - len(status_line) + 10)
520
+ print(f"{status_line}{padding}", end="", flush=True)
521
+ last_status_len = len(status_line)
522
+
523
+ if end_idx != -1:
524
+ # found the end of the tool call!
525
+ in_tool_call = False
526
+ current_tool = None
527
+ print("\r\033[K", end="") # Clear the status line
528
+
529
+ # Truncate full_response to end exactly at the closing parenthesis
530
+ # This prevents leaking any text that might have followed in the same chunk
531
+ extra_chars = len(buffer) - end_idx
532
+ if extra_chars > 0:
533
+ full_response = full_response[:-extra_chars]
534
+ return full_response, full_reasoning, thinking_duration
535
+
536
+ if is_done:
537
+ break
538
+
539
+ if buffer:
540
+ if in_tool_call:
541
+ # Clear the status line before printing the partial tool call
542
+ print("\r\033[K", end="")
543
+ print(buffer, end="", flush=True)
544
+
545
+ print("\n")
546
+ return full_response, full_reasoning, thinking_duration
547
+
548
+ def _generic_chat_stream(url, headers, payload, parser_func, line_generator=None):
549
+ """Make streaming API request and handle response."""
550
+ try:
551
+ with requests.post(url, json=payload, stream=True, headers=headers) as r:
552
+ if r.status_code != 200:
553
+ print(f"\n\033[91mAPI Error: {r.status_code} - {r.text}\033[0m")
554
+ return None
555
+ return _process_stream_ui(r, parser_func, line_generator)
556
+ except KeyboardInterrupt:
557
+ if interrupt.should_quit(): sys.exit(0)
558
+ print("\033[38;5;213m\nGeneration cancelled\033[0m")
559
+ return None
560
+ except Exception as e:
561
+ print(f"\n\033[91mAPI Error: {e}\033[0m")
562
+ return None
563
+
564
+ def stream_chat_anthropic(model_id, messages):
565
+ system_prompt = ""
566
+ anthropic_messages = []
567
+ for msg in messages:
568
+ if msg['role'] == 'system': system_prompt = msg['content']
569
+ else: anthropic_messages.append({"role": msg['role'], "content": msg['content']})
570
+
571
+ payload = {
572
+ "model": model_id, "max_tokens": 4096, "messages": anthropic_messages, "stream": True
573
+ }
574
+ if system_prompt: payload["system"] = system_prompt
575
+
576
+ headers = {
577
+ "x-api-key": ANTHROPIC_API_KEY, "anthropic-version": "2023-06-01", "content-type": "application/json"
578
+ }
579
+ return _generic_chat_stream(f"{ANTHROPIC_BASE_URL}/messages", headers, payload, _parse_anthropic_chunk)
580
+
581
+ def stream_chat_openai(model_id, messages):
582
+ payload = {"model": model_id, "messages": messages, "stream": True}
583
+ headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
584
+ return _generic_chat_stream(f"{OPENAI_BASE_URL}/chat/completions", headers, payload, _parse_openai_chunk)
585
+
586
+ def stream_chat_ollama(model_id, messages):
587
+ payload = {"model": model_id, "messages": messages, "stream": True}
588
+ is_cloud = "cloud" in model_id.lower()
589
+ url = f"{OLLAMA_CLOUD_BASE_URL}/chat/completions" if is_cloud else f"{OLLAMA_BASE_URL}/v1/chat/completions"
590
+ headers = {"Content-Type": "application/json"}
591
+ if OLLAMA_API_KEY: headers["Authorization"] = f"Bearer {OLLAMA_API_KEY}"
592
+ return _generic_chat_stream(url, headers, payload, _parse_openai_chunk)
593
+
594
+ def stream_chat_cerebras(model_id, messages):
595
+ payload = {"model": model_id, "messages": messages, "stream": True}
596
+ headers = {"Authorization": f"Bearer {CEREBRAS_API_KEY}", "Content-Type": "application/json"}
597
+ return _generic_chat_stream(f"{CEREBRAS_BASE_URL}/chat/completions", headers, payload, _parse_openai_chunk)
598
+
599
+ def stream_chat_gemini(model_id, messages):
600
+ if config.is_using_google_auth(): return stream_chat_gemini_oauth(model_id, messages)
601
+
602
+ contents = []
603
+ system_instruction = None
604
+ for msg in messages:
605
+ if msg['role'] == 'system': system_instruction = msg['content']
606
+ elif msg['role'] == 'user': contents.append({"role": "user", "parts": [{"text": msg['content']}]})
607
+ elif msg['role'] == 'assistant': contents.append({"role": "model", "parts": [{"text": msg['content']}]})
608
+
609
+ payload = {"contents": contents}
610
+ if system_instruction: payload["system_instruction"] = {"parts": [{"text": system_instruction}]}
611
+
612
+ url = f"{GEMINI_BASE_URL}/models/{model_id}:streamGenerateContent?key={GEMINI_API_KEY}&alt=sse"
613
+ return _generic_chat_stream(url, {}, payload, _parse_gemini_chunk)
614
+
615
+ def stream_chat_gemini_oauth(model_id, messages):
616
+ token = auth.get_auth_token()
617
+ if not token:
618
+ print("\033[91mNo Google Auth token found. Please login first using '/login'.\033[0m")
619
+ return None
620
+
621
+ project_id = os.environ.get('GOOGLE_CLOUD_PROJECT') or os.environ.get('GOOGLE_CLOUD_PROJECT_ID') or config.get_google_cloud_project()
622
+ if not project_id:
623
+ print("\033[91mGoogle Cloud Project ID is required. Set it in /gemini-auth-mode.\033[0m")
624
+ return None
625
+
626
+ contents = []
627
+ system_instruction = None
628
+ for msg in messages:
629
+ if msg['role'] == 'system': system_instruction = {"parts": [{"text": msg['content']}]}
630
+ elif msg['role'] == 'user': contents.append({"role": "user", "parts": [{"text": msg['content']}]})
631
+ elif msg['role'] == 'assistant': contents.append({"role": "model", "parts": [{"text": msg['content']}]})
632
+
633
+ clean_model_id = model_id[7:] if model_id.startswith('models/') else model_id
634
+ payload = {
635
+ "model": clean_model_id,
636
+ "user_prompt_id": str(uuid.uuid4()),
637
+ "request": {"contents": contents}
638
+ }
639
+ if project_id: payload["project"] = project_id
640
+ if system_instruction: payload["request"]["systemInstruction"] = system_instruction
641
+
642
+ url = f"{CODE_ASSIST_ENDPOINT}/{CODE_ASSIST_API_VERSION}:streamGenerateContent?alt=sse"
643
+ headers = {
644
+ "Authorization": f"Bearer {token}",
645
+ "Content-Type": "application/json",
646
+ "User-Agent": get_user_agent(clean_model_id)
647
+ }
648
+
649
+ def smart_oauth_generator(response):
650
+ buffered_lines = []
651
+ for line in response.iter_lines():
652
+ if not line:
653
+ if buffered_lines:
654
+ yield "\n".join(buffered_lines)
655
+ buffered_lines = []
656
+ continue
657
+ decoded = line.decode('utf-8')
658
+ if decoded.startswith("data: "):
659
+ buffered_lines.append(decoded[6:].strip())
660
+ elif decoded.strip() == "[DONE]":
661
+ break
662
+
663
+ def smart_oauth_parser(json_str):
664
+ try:
665
+ chunk = json.loads(json_str)
666
+ candidate_source = chunk.get('response', chunk)
667
+ text = ""
668
+ if 'candidates' in candidate_source:
669
+ for candidate in candidate_source['candidates']:
670
+ if 'content' in candidate and 'parts' in candidate['content']:
671
+ for part in candidate['content']['parts']:
672
+ text += part.get('text', '')
673
+ return text, "", False
674
+ except json.JSONDecodeError:
675
+ pass
676
+ return "", "", False
677
+
678
+ return _generic_chat_stream(url, headers, payload, smart_oauth_parser, smart_oauth_generator)
679
+
680
+ def stream_chat(model_id, messages):
681
+ start_time = time.time()
682
+ response = None
683
+ reasoning = ""
684
+ thinking_duration = 0
685
+
686
+ # Immediate feedback
687
+ ai_prefix = f"{Colors.SECONDARY}{Icons.DIAMOND}{Colors.RESET} "
688
+ print(ai_prefix, end="", flush=True)
689
+
690
+ # Disable typing during generation
691
+ interrupt.set_echo(False)
692
+
693
+ try:
694
+ if CURRENT_API == "gemini":
695
+ res = stream_chat_gemini(model_id, messages)
696
+ elif CURRENT_API == "cerebras":
697
+ res = stream_chat_cerebras(model_id, messages)
698
+ elif CURRENT_API == "anthropic":
699
+ res = stream_chat_anthropic(model_id, messages)
700
+ elif CURRENT_API == "openai":
701
+ res = stream_chat_openai(model_id, messages)
702
+ elif CURRENT_API == "ollama":
703
+ res = stream_chat_ollama(model_id, messages)
704
+ else:
705
+ payload = {"model": model_id, "messages": messages, "stream": True}
706
+ res = _generic_chat_stream(f"{BASE_URL}/chat/completions", {}, payload, _parse_openai_chunk)
707
+
708
+ if isinstance(res, tuple):
709
+ if len(res) == 3:
710
+ response, reasoning, thinking_duration = res
711
+ else:
712
+ response, reasoning = res
713
+ else:
714
+ response = res
715
+
716
+ finally:
717
+ # Re-enable typing and clear any buffered input
718
+ interrupt.flush_input()
719
+ interrupt.set_echo(True)
720
+
721
+ if response:
722
+ duration = time.time() - start_time
723
+ STATS.record_request(model_id, duration)
724
+
725
+ return response, reasoning, thinking_duration