gemcode 0.3.59__tar.gz → 0.3.64__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gemcode-0.3.59/src/gemcode.egg-info → gemcode-0.3.64}/PKG-INFO +1 -1
- {gemcode-0.3.59 → gemcode-0.3.64}/pyproject.toml +1 -1
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/agent.py +115 -25
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/callbacks.py +24 -9
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/invoke.py +41 -3
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/model_errors.py +32 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/plugins/terminal_hooks_plugin.py +13 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/session_runtime.py +111 -19
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/__init__.py +44 -1
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/bash.py +30 -4
- gemcode-0.3.64/src/gemcode/tools/notebook.py +242 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/subtask.py +82 -17
- gemcode-0.3.64/src/gemcode/tools/tasks.py +211 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/todo.py +33 -0
- gemcode-0.3.64/src/gemcode/tools/web_search.py +247 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tui/scrollback.py +12 -0
- {gemcode-0.3.59 → gemcode-0.3.64/src/gemcode.egg-info}/PKG-INFO +1 -1
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode.egg-info/SOURCES.txt +3 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/LICENSE +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/MANIFEST.in +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/README.md +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/setup.cfg +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/__init__.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/__main__.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/audit.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/autocompact.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/capability_routing.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/cli.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/compaction.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/computer_use/__init__.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/computer_use/browser_computer.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/config.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/context_budget.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/context_warning.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/credentials.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/hitl_session.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/hooks.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/intent_classifier.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/interactions.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/kairos_daemon.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/limits.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/live_audio_engine.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/logging_config.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/mcp_loader.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/memory/__init__.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/memory/embedding_memory_service.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/memory/file_memory_service.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/modality_tools.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/model_routing.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/openapi_loader.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/paths.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/permissions.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/plugins/__init__.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/plugins/tool_recovery_plugin.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/pricing.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/prompt_suggestions.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/query/__init__.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/query/config.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/query/deps.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/query/engine.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/query/stop_hooks.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/query/token_budget.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/query/transitions.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/refine.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/repl_commands.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/repl_slash.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/review_agent.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/session_store.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/slash_commands.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/thinking.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tool_prompt_manifest.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tool_registry.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/browser.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/edit.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/filesystem.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/notes.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/search.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/shell.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/shell_gate.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/think.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools/web.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tools_inspector.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/trust.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tui/input_handler.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tui/spinner.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tui/welcome_banner.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/tui/welcome_rich.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/version.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/vertex.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/web/__init__.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/web/claude_sse_adapter.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/web/terminal_repl.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode/workspace_hints.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode.egg-info/dependency_links.txt +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode.egg-info/entry_points.txt +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode.egg-info/requires.txt +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/src/gemcode.egg-info/top_level.txt +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_agent_instruction.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_autocompact.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_capability_routing.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_claude_web_adapter_sse.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_cli_init.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_computer_use_permissions.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_context_budget.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_context_warning.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_credentials.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_interactive_permission_ask.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_kairos_scheduler.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_modality_tools.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_model_error_retry.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_model_errors.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_model_routing.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_paths.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_permissions.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_prompt_suggestions.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_repl_commands.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_repl_slash.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_slash_commands.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_thinking_config.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_token_budget.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_tool_context_circulation.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_tools.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_tools_inspector.py +0 -0
- {gemcode-0.3.59 → gemcode-0.3.64}/tests/test_workspace_hints.py +0 -0
|
@@ -29,6 +29,17 @@ from gemcode.tools import build_function_tools
|
|
|
29
29
|
from gemcode.tool_prompt_manifest import build_tool_manifest
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
def build_global_instruction() -> str:
|
|
33
|
+
"""Global instruction applied to the entire agent tree (via ADK plugin)."""
|
|
34
|
+
return (
|
|
35
|
+
"You are GemCode, an expert software engineering agent powered by Google Gemini. "
|
|
36
|
+
"Think deeply about what the person actually wants before you do anything. "
|
|
37
|
+
"Use exactly as many tools as the task genuinely requires — no more. "
|
|
38
|
+
"Act fully and autonomously when action is needed. "
|
|
39
|
+
"Always use read-only tools before shell or write tools."
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
32
43
|
def _chain_before_model_callbacks(*callbacks):
|
|
33
44
|
cbs = [c for c in callbacks if c is not None]
|
|
34
45
|
if not cbs:
|
|
@@ -197,7 +208,7 @@ def _build_runtime_facts(cfg: GemCodeConfig) -> str:
|
|
|
197
208
|
if getattr(cfg, "enable_memory", False):
|
|
198
209
|
mem_path = root / ".gemcode" / "memories.jsonl"
|
|
199
210
|
mem_kind = "embedding-backed" if getattr(cfg, "enable_embeddings", False) else "keyword-backed"
|
|
200
|
-
caps.append(f"memory ON ({mem_kind}, stored at {mem_path}; ADK preload_memory injects relevant memories before each turn)")
|
|
211
|
+
caps.append(f"memory ON ({mem_kind}, stored at {mem_path}; ADK preload_memory auto-injects relevant memories before each turn; use load_memory(query) for explicit on-demand retrieval)")
|
|
201
212
|
if getattr(cfg, "enable_computer_use", False):
|
|
202
213
|
caps.append("computer_use ON (tools: navigate, click_at, type_text_at, browser_screenshot, browser_find_element, etc.)")
|
|
203
214
|
if getattr(cfg, "enable_code_executor", False):
|
|
@@ -515,11 +526,12 @@ You have native deep thinking capability — use it actively:
|
|
|
515
526
|
- For **subfolders**: `bash("cargo build --release", cwd_subdir="backend")`
|
|
516
527
|
|
|
517
528
|
- **Long-running servers / watchers** — use `bash` with `background=True`:
|
|
518
|
-
- `bash("npm run dev", background=True)` — start the dev server in background
|
|
529
|
+
- `bash("npm run dev", background=True)` — start the dev server in background → returns PID
|
|
519
530
|
- `bash("python manage.py runserver", background=True)` — Django server
|
|
520
|
-
- `bash("tail -f logs/app.log", background=True)` — background log watcher
|
|
521
531
|
- NEVER call `bash("npm run dev")` without `background=True` — it blocks forever and crashes the turn
|
|
522
|
-
- After starting
|
|
532
|
+
- After starting: use `task_output(pid)` to read startup logs, then check if port is ready
|
|
533
|
+
- Use `list_tasks()` to see all running background processes
|
|
534
|
+
- Use `kill_task(pid)` to stop a background server when done
|
|
523
535
|
|
|
524
536
|
- **`run_command`** — simple single-executable calls without shell features:
|
|
525
537
|
- `run_command("npm", args=["install", "--legacy-peer-deps"])` — clean npm install
|
|
@@ -549,7 +561,21 @@ You have native deep thinking capability — use it actively:
|
|
|
549
561
|
- **`move_file`** — rename or reorganize files/directories within the project.
|
|
550
562
|
- **`delete_file`** — remove a single file.
|
|
551
563
|
|
|
564
|
+
### Memory (when memory is enabled)
|
|
565
|
+
- **`preload_memory`** — automatically injects relevant past memories before each turn (runs in background).
|
|
566
|
+
- **`load_memory`** — explicit on-demand memory search:
|
|
567
|
+
- `load_memory("authentication patterns used in this project")` — recall specific knowledge
|
|
568
|
+
- `load_memory("previous bugs fixed in the auth module")` — targeted retrieval
|
|
569
|
+
- Use when the preloaded context is missing something specific you know you've seen before.
|
|
570
|
+
|
|
552
571
|
### Research and documentation
|
|
572
|
+
- **`web_search`** — search the web without any API key or research mode:
|
|
573
|
+
- `web_search("python asyncio tutorial 2025")` — general search
|
|
574
|
+
- `web_search("fastapi jwt authentication example")` — find code examples
|
|
575
|
+
- `web_search("react 19 breaking changes")` — check recent releases
|
|
576
|
+
- Returns titles, URLs, and snippets. Follow with `web_fetch(url)` to read full content.
|
|
577
|
+
- Use this for quick lookups; use `/research on` for deep multi-page research.
|
|
578
|
+
|
|
553
579
|
- **`web_fetch`** — fetch docs, APIs, changelogs, READMEs from the web:
|
|
554
580
|
- `web_fetch("https://docs.python.org/3/library/asyncio.html")` — official docs
|
|
555
581
|
- `web_fetch("https://api.github.com/repos/owner/repo/releases/latest")` — API data
|
|
@@ -566,6 +592,23 @@ You have native deep thinking capability — use it actively:
|
|
|
566
592
|
- **`todo_write`** — track work items. Use for any task with 3+ steps.
|
|
567
593
|
- Create at task start, mark completed as you finish, merge updates.
|
|
568
594
|
|
|
595
|
+
- **`todo_read`** — read the current session todo list.
|
|
596
|
+
- Call this to check progress, find task ids for a merge update, or verify what's pending.
|
|
597
|
+
|
|
598
|
+
- **`notebook_read`** — read a Jupyter notebook (.ipynb) as structured cells.
|
|
599
|
+
- Always prefer this over `read_file` for `.ipynb` files — gives clean cell-by-cell output.
|
|
600
|
+
- `notebook_read("analysis.ipynb")` — shows all cells with source and outputs.
|
|
601
|
+
|
|
602
|
+
- **`notebook_edit`** — edit a cell in a Jupyter notebook:
|
|
603
|
+
- `notebook_edit("nb.ipynb", cell_index=2, new_source="import pandas as pd")` — replace cell
|
|
604
|
+
- `notebook_edit("nb.ipynb", cell_index=0, new_source="# Title", cell_type="markdown", edit_mode="insert")` — insert
|
|
605
|
+
- `notebook_edit("nb.ipynb", cell_index=3, new_source="", edit_mode="delete")` — delete cell
|
|
606
|
+
|
|
607
|
+
- **Background task management** — for processes started with `bash(..., background=True)`:
|
|
608
|
+
- `list_tasks()` — see all background tasks (PID, command, status: running/finished)
|
|
609
|
+
- `task_output(pid)` — read stdout/stderr captured from a background task
|
|
610
|
+
- `kill_task(pid)` — gracefully stop a background task (use `force=True` for SIGKILL)
|
|
611
|
+
|
|
569
612
|
- **`run_subtask`** — spawn an isolated sub-agent with its own fresh context window.
|
|
570
613
|
- The sub-agent has the same tools (bash, read_file, grep, etc.) but starts from scratch.
|
|
571
614
|
- Use when a task would bloat your context too much: e.g. "read all 40 test files and find patterns"
|
|
@@ -587,14 +630,34 @@ One user message = many model↔tool rounds (up to 256 LLM calls by default). Th
|
|
|
587
630
|
|
|
588
631
|
**Do not stop after step 2 or 3** — complete the full task.
|
|
589
632
|
|
|
590
|
-
## Parallelism — batch independent work
|
|
633
|
+
## Parallelism — batch independent work aggressively
|
|
591
634
|
Issue independent tool calls **in the same turn** when outputs don't depend on each other.
|
|
592
|
-
This is faster
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
-
|
|
635
|
+
This is always faster. **Default to parallel; only serialize when you must.**
|
|
636
|
+
|
|
637
|
+
Concrete patterns:
|
|
638
|
+
|
|
639
|
+
**Parallel file exploration (always do this):**
|
|
640
|
+
- Reading multiple files → emit all `read_file` calls in one turn, not one by one
|
|
641
|
+
- Grepping different patterns → multiple `grep_content` in one response
|
|
642
|
+
- `list_directory` + `glob_files` → both at once
|
|
643
|
+
|
|
644
|
+
**Parallel sub-agent exploration (OpenClaude pattern):**
|
|
645
|
+
When a task requires understanding several subsystems before acting:
|
|
646
|
+
1. Spawn parallel `run_subtask` workers, one per subsystem
|
|
647
|
+
2. Wait for all results to return in the same turn
|
|
648
|
+
3. Synthesise findings and execute the change
|
|
649
|
+
|
|
650
|
+
Example — understanding a codebase before a big refactor:
|
|
651
|
+
```
|
|
652
|
+
run_subtask("Analyse src/auth/ — how does authentication flow work? List all key files and patterns.")
|
|
653
|
+
run_subtask("Analyse src/api/ — what endpoints exist? How are they protected?")
|
|
654
|
+
run_subtask("Analyse tests/auth* — what is the test coverage for auth?")
|
|
655
|
+
```
|
|
656
|
+
All three run concurrently. Then synthesise and act.
|
|
657
|
+
|
|
658
|
+
**Parallel git + build:**
|
|
659
|
+
- `git status && git diff --stat` → one bash call
|
|
660
|
+
- Running lint + type-check → `npm run lint && npm run typecheck` in one call
|
|
598
661
|
|
|
599
662
|
Sequential only when step B genuinely needs step A's output.
|
|
600
663
|
|
|
@@ -642,10 +705,36 @@ When `code_executor ON` (see Runtime facts above):
|
|
|
642
705
|
- The sandbox does NOT have internet access or filesystem access — use for pure computation
|
|
643
706
|
- For file I/O or shell commands, use the standard tools (`bash`, `write_file`, etc.)
|
|
644
707
|
|
|
708
|
+
## Verification contract (mandatory for non-trivial tasks)
|
|
709
|
+
|
|
710
|
+
After completing any implementation that touches **3 or more files**, introduces a new feature, or fixes a bug, you **MUST** run a verification pass before calling the task done.
|
|
711
|
+
|
|
712
|
+
**How to verify:**
|
|
713
|
+
|
|
714
|
+
Option A — Run tests/build (preferred when tests exist):
|
|
715
|
+
```
|
|
716
|
+
bash("npm run build 2>&1 | tail -30")
|
|
717
|
+
bash("pytest tests/ -x -q --tb=short 2>&1 | head -80")
|
|
718
|
+
```
|
|
719
|
+
|
|
720
|
+
Option B — Spawn a verification sub-agent (for complex multi-file changes):
|
|
721
|
+
```
|
|
722
|
+
run_subtask(
|
|
723
|
+
task="You are a strict code reviewer. Verify the following changes are correct, complete, and consistent. Check: (1) syntax errors, (2) logic bugs, (3) broken imports, (4) missing edge cases, (5) consistency across all modified files. Report PASS or FAIL with specific findings.",
|
|
724
|
+
context="Files changed: [list them]. Change summary: [what you did]."
|
|
725
|
+
)
|
|
726
|
+
```
|
|
727
|
+
|
|
728
|
+
**Rules:**
|
|
729
|
+
- If verification finds issues → fix them → verify again. Never stop at a failed verification.
|
|
730
|
+
- Only report "done" after a clean verification pass.
|
|
731
|
+
- For destructive changes (delete, refactor) always run both Option A and Option B.
|
|
732
|
+
- For simple single-file edits, a quick `bash("python3 -c 'import <module>'")` or syntax check is sufficient.
|
|
733
|
+
|
|
645
734
|
## Evaluator-optimizer loop
|
|
646
735
|
For tasks where quality matters:
|
|
647
736
|
1. Complete the task (execute tools, write code, run commands)
|
|
648
|
-
2.
|
|
737
|
+
2. Verify — run tests, build, or spawn a verification sub-agent (see Verification contract above)
|
|
649
738
|
3. If verification fails, read the error, fix, re-verify
|
|
650
739
|
4. Report done only when verified
|
|
651
740
|
|
|
@@ -704,7 +793,9 @@ Use `gh pr create` via `bash`. When asked to create a PR:
|
|
|
704
793
|
- Do NOT retry failing commands in a sleep loop — diagnose the root cause first
|
|
705
794
|
|
|
706
795
|
## Communication
|
|
707
|
-
-
|
|
796
|
+
- **ACT FIRST, narrate after.** Do NOT write out a multi-step numbered plan as prose and then stop. Execute immediately — use tools right away.
|
|
797
|
+
- One short line before the first tool call is fine (e.g. "Reading the auth module..."). That's it. No verbose announcements.
|
|
798
|
+
- If you want to plan, use the **`think` tool privately** — never dump a plan into your text response before acting. The user cannot run plan text; they need results.
|
|
708
799
|
- Summarize tool results in plain language — the user doesn't see raw tool internals.
|
|
709
800
|
- After completing a task: clear summary of what changed, where, and why.
|
|
710
801
|
- If the user pastes UI copy / noise / error output, extract the real intent and act on source files.
|
|
@@ -851,22 +942,10 @@ def build_root_agent(
|
|
|
851
942
|
tool_config=tool_cfg,
|
|
852
943
|
)
|
|
853
944
|
|
|
854
|
-
# global_instruction applies to the entire agent tree (including sub-agents
|
|
855
|
-
# spawned via run_subtask or multi-agent delegation). Keep it short — it's
|
|
856
|
-
# prepended to every agent's effective instruction.
|
|
857
|
-
global_instr = (
|
|
858
|
-
"You are GemCode, an expert software engineering agent powered by Google Gemini. "
|
|
859
|
-
"Think deeply about what the person actually wants before you do anything. "
|
|
860
|
-
"Use exactly as many tools as the task genuinely requires — no more. "
|
|
861
|
-
"Act fully and autonomously when action is needed. "
|
|
862
|
-
"Always use read-only tools before shell or write tools."
|
|
863
|
-
)
|
|
864
|
-
|
|
865
945
|
agent_kwargs: dict = dict(
|
|
866
946
|
model=cfg.model,
|
|
867
947
|
name="gemcode",
|
|
868
948
|
instruction=build_instruction(cfg),
|
|
869
|
-
global_instruction=global_instr,
|
|
870
949
|
tools=tools,
|
|
871
950
|
generate_content_config=gen_cfg,
|
|
872
951
|
**cb_kwargs,
|
|
@@ -876,6 +955,17 @@ def build_root_agent(
|
|
|
876
955
|
if code_executor is not None:
|
|
877
956
|
agent_kwargs["code_executor"] = code_executor
|
|
878
957
|
|
|
958
|
+
# Optional: ADK PlanReActPlanner — injects a structured "plan then act" pass
|
|
959
|
+
# into every turn at the framework level (not just via prompting).
|
|
960
|
+
# Enable with: GEMCODE_PLANREACT=1
|
|
961
|
+
import os as _os
|
|
962
|
+
if _os.environ.get("GEMCODE_PLANREACT", "").lower() in ("1", "true", "yes", "on"):
|
|
963
|
+
try:
|
|
964
|
+
from google.adk.planners import PlanReActPlanner
|
|
965
|
+
agent_kwargs["planner"] = PlanReActPlanner()
|
|
966
|
+
except Exception:
|
|
967
|
+
pass
|
|
968
|
+
|
|
879
969
|
return LlmAgent(**agent_kwargs)
|
|
880
970
|
|
|
881
971
|
|
|
@@ -626,29 +626,44 @@ def make_on_tool_error_callback(cfg: GemCodeConfig):
|
|
|
626
626
|
|
|
627
627
|
|
|
628
628
|
def make_on_model_error_callback(cfg: GemCodeConfig):
|
|
629
|
-
"""Structured model errors to the user + audit trail.
|
|
629
|
+
"""Structured model errors to the user + audit trail.
|
|
630
|
+
|
|
631
|
+
For transient errors (HTTP 503, 429, server-overloaded) we return None so the
|
|
632
|
+
exception propagates to invoke.py, which retries with exponential backoff.
|
|
633
|
+
For permanent errors we absorb and return a user-friendly LlmResponse.
|
|
634
|
+
"""
|
|
630
635
|
|
|
631
636
|
async def on_model_error(*, callback_context, llm_request, error: Exception):
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
if st is not None and not st.get(TERMINAL_REASON_KEY):
|
|
635
|
-
st[TERMINAL_REASON_KEY] = "model_error"
|
|
636
|
-
except Exception:
|
|
637
|
-
pass
|
|
637
|
+
from gemcode.model_errors import is_transient_error
|
|
638
|
+
|
|
638
639
|
append_audit(
|
|
639
640
|
cfg.project_root,
|
|
640
641
|
{
|
|
641
642
|
"phase": "model_exception",
|
|
642
643
|
"error": f"{type(error).__name__}: {error}",
|
|
644
|
+
"transient": is_transient_error(error),
|
|
643
645
|
},
|
|
644
646
|
)
|
|
647
|
+
|
|
648
|
+
# Transient errors (503, 429, server-overloaded): let the exception propagate
|
|
649
|
+
# so invoke.py can retry with backoff. Do NOT set terminal state here — the
|
|
650
|
+
# turn is not over yet.
|
|
651
|
+
if is_transient_error(error):
|
|
652
|
+
return None
|
|
653
|
+
|
|
654
|
+
# Permanent errors: mark session terminal and return a user-friendly message.
|
|
655
|
+
try:
|
|
656
|
+
st = callback_context.state
|
|
657
|
+
if st is not None and not st.get(TERMINAL_REASON_KEY):
|
|
658
|
+
st[TERMINAL_REASON_KEY] = "model_error"
|
|
659
|
+
except Exception:
|
|
660
|
+
pass
|
|
661
|
+
|
|
645
662
|
if _truthy_env("GEMCODE_VERBOSE_MODEL_ERRORS", default=False):
|
|
646
663
|
import traceback
|
|
647
|
-
|
|
648
664
|
traceback.print_exception(type(error), error, error.__traceback__, file=sys.stderr)
|
|
649
665
|
|
|
650
666
|
user_text = format_model_error_for_user(error)
|
|
651
|
-
# Scrollback/TUI already prints "GemCode:" before assistant text — avoid "GemCode: GemCode:".
|
|
652
667
|
from google.adk.models.llm_response import LlmResponse
|
|
653
668
|
from google.genai import types
|
|
654
669
|
|
|
@@ -6,6 +6,7 @@ CLI and tests call `run_turn` with a Runner already bound to app + session servi
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
import asyncio
|
|
9
10
|
import os
|
|
10
11
|
import sys
|
|
11
12
|
from typing import Any
|
|
@@ -16,6 +17,11 @@ from google.adk.runners import Runner
|
|
|
16
17
|
from google.genai import types
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
# Delays (seconds) between successive transient-error retries: 2s, 5s, 12s.
|
|
21
|
+
# Three retries = up to ~19 seconds of total wait before giving up.
|
|
22
|
+
_TRANSIENT_RETRY_DELAYS = [2.0, 5.0, 12.0]
|
|
23
|
+
|
|
24
|
+
|
|
19
25
|
_HITL_PROMPT_LOCK = Lock()
|
|
20
26
|
|
|
21
27
|
|
|
@@ -160,11 +166,43 @@ async def run_turn(
|
|
|
160
166
|
# Runner handoff loop: if tools request confirmations, we pause here to
|
|
161
167
|
# ask HITL, then send back function responses so ADK can re-execute the
|
|
162
168
|
# tools.
|
|
169
|
+
#
|
|
170
|
+
# Transient API errors (HTTP 503, 429) are retried here with exponential
|
|
171
|
+
# backoff. on_model_error returns None for these, so the exception
|
|
172
|
+
# propagates from runner.run_async and we catch it below.
|
|
163
173
|
do_reset = True
|
|
174
|
+
transient_attempts = 0
|
|
164
175
|
while True:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
176
|
+
try:
|
|
177
|
+
events = await _await_runner_events(
|
|
178
|
+
next_message=current_message, do_reset=do_reset
|
|
179
|
+
)
|
|
180
|
+
except Exception as _exc:
|
|
181
|
+
from gemcode.model_errors import is_transient_error
|
|
182
|
+
if is_transient_error(_exc) and transient_attempts < len(_TRANSIENT_RETRY_DELAYS):
|
|
183
|
+
delay = _TRANSIENT_RETRY_DELAYS[transient_attempts]
|
|
184
|
+
transient_attempts += 1
|
|
185
|
+
_tui_active = os.environ.get("GEMCODE_TUI_ACTIVE", "0").lower() in ("1", "true", "yes", "on")
|
|
186
|
+
_msg = (
|
|
187
|
+
f"\n[gemcode] Transient API error ({type(_exc).__name__}). "
|
|
188
|
+
f"Retrying in {delay:.0f}s (attempt {transient_attempts}/{len(_TRANSIENT_RETRY_DELAYS)})...\n"
|
|
189
|
+
)
|
|
190
|
+
print(_msg, file=sys.stderr)
|
|
191
|
+
# Surface retry notice in TUI if available.
|
|
192
|
+
if _tui_active:
|
|
193
|
+
try:
|
|
194
|
+
from gemcode.tui import scrollback as _sb
|
|
195
|
+
_sb._transient_retry_notice = _msg # type: ignore[attr-defined]
|
|
196
|
+
except Exception:
|
|
197
|
+
pass
|
|
198
|
+
await asyncio.sleep(delay)
|
|
199
|
+
# Retry the same message from scratch (session history is intact in SQLite).
|
|
200
|
+
continue
|
|
201
|
+
# Non-transient or out of retries: re-raise so the TUI surfaces it.
|
|
202
|
+
raise
|
|
203
|
+
|
|
204
|
+
# Reset transient counter after a successful model call.
|
|
205
|
+
transient_attempts = 0
|
|
168
206
|
collected.extend(events)
|
|
169
207
|
|
|
170
208
|
confirmation_fcs = _get_confirmation_requests(events)
|
|
@@ -5,6 +5,38 @@ from __future__ import annotations
|
|
|
5
5
|
import re
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def is_transient_error(error: Exception) -> bool:
|
|
9
|
+
"""Return True for HTTP 503 / 429 and similar transient API errors that are safe to retry.
|
|
10
|
+
|
|
11
|
+
Transient means: the request was fine, the server was temporarily unavailable or
|
|
12
|
+
rate-limited. Retrying the same request (with backoff) will likely succeed.
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
from google.genai import errors as genai_errors
|
|
16
|
+
if isinstance(error, genai_errors.APIError):
|
|
17
|
+
code = int(getattr(error, "code", None) or 0) or None
|
|
18
|
+
if code in (429, 503):
|
|
19
|
+
return True
|
|
20
|
+
# Some 500-range server errors are also transient (502 Bad Gateway, etc.)
|
|
21
|
+
if code is not None and 500 <= code < 600 and code not in (400, 401, 403, 404):
|
|
22
|
+
return True
|
|
23
|
+
except Exception:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
# gRPC / google-api-core equivalents
|
|
27
|
+
et = type(error).__name__
|
|
28
|
+
if "ResourceExhausted" in et or "ServiceUnavailable" in et or "DeadlineExceeded" in et:
|
|
29
|
+
return True
|
|
30
|
+
|
|
31
|
+
msg = str(error)
|
|
32
|
+
# Match the specific phrases Gemini uses in 503 responses
|
|
33
|
+
if "503" in msg and any(p in msg for p in ("high demand", "service unavailable", "overloaded")):
|
|
34
|
+
return True
|
|
35
|
+
if "429" in msg and any(p in msg for p in ("rate limit", "quota", "resource exhausted")):
|
|
36
|
+
return True
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
8
40
|
def _sanitize_api_text(s: str) -> str:
|
|
9
41
|
"""Strip likely API key material from strings shown to the user."""
|
|
10
42
|
if not s:
|
|
@@ -141,6 +141,19 @@ class GemCodeTerminalHooksPlugin(BasePlugin):
|
|
|
141
141
|
},
|
|
142
142
|
)
|
|
143
143
|
|
|
144
|
+
# Surface suggestion to the TUI by storing it on cfg.
|
|
145
|
+
# The TUI reads cfg._last_prompt_suggestion after each turn and displays it.
|
|
146
|
+
try:
|
|
147
|
+
object.__setattr__(self.cfg, "_last_prompt_suggestion", suggestion)
|
|
148
|
+
except Exception:
|
|
149
|
+
pass
|
|
150
|
+
else:
|
|
151
|
+
# Clear any stale suggestion from the previous turn.
|
|
152
|
+
try:
|
|
153
|
+
object.__setattr__(self.cfg, "_last_prompt_suggestion", None)
|
|
154
|
+
except Exception:
|
|
155
|
+
pass
|
|
156
|
+
|
|
144
157
|
if getattr(self.cfg, "enable_memory", False):
|
|
145
158
|
try:
|
|
146
159
|
await callback_context.add_session_to_memory()
|
|
@@ -22,7 +22,7 @@ warnings.filterwarnings("ignore", category=UserWarning, message=".*EXPERIMENTAL.
|
|
|
22
22
|
from google.adk.runners import Runner
|
|
23
23
|
from google.adk.sessions.sqlite_session_service import SqliteSessionService
|
|
24
24
|
|
|
25
|
-
from gemcode.agent import build_root_agent
|
|
25
|
+
from gemcode.agent import build_global_instruction, build_root_agent
|
|
26
26
|
from gemcode.config import GemCodeConfig
|
|
27
27
|
from gemcode.modality_tools import build_extra_tools as build_modality_extra_tools
|
|
28
28
|
from gemcode.memory.embedding_memory_service import EmbeddingFileMemoryService
|
|
@@ -31,6 +31,52 @@ from gemcode.plugins.terminal_hooks_plugin import GemCodeTerminalHooksPlugin
|
|
|
31
31
|
from gemcode.plugins.tool_recovery_plugin import GemCodeReflectAndRetryToolPlugin
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# ADK App-level feature helpers
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
def _build_context_cache_config():
|
|
39
|
+
"""Return ContextCacheConfig if context caching is enabled, else None.
|
|
40
|
+
|
|
41
|
+
Context caching lets Gemini reuse the compiled representation of a stable
|
|
42
|
+
prefix (system prompt + tools) across multiple turns, cutting ~75% of input
|
|
43
|
+
token costs on long sessions.
|
|
44
|
+
|
|
45
|
+
Opt-out: set ``GEMCODE_CONTEXT_CACHE=0`` in the environment.
|
|
46
|
+
"""
|
|
47
|
+
if os.environ.get("GEMCODE_CONTEXT_CACHE", "1").lower() in ("0", "false", "no", "off"):
|
|
48
|
+
return None
|
|
49
|
+
try:
|
|
50
|
+
from google.adk.agents.context_cache_config import ContextCacheConfig
|
|
51
|
+
return ContextCacheConfig(
|
|
52
|
+
cache_intervals=10, # refresh the cache every 10 invocations
|
|
53
|
+
ttl_seconds=1800, # cache lives 30 minutes
|
|
54
|
+
min_tokens=1024, # skip caching tiny sessions (< ~1 K tokens)
|
|
55
|
+
)
|
|
56
|
+
except Exception:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _build_app(agent, plugins, cfg: GemCodeConfig):
|
|
61
|
+
"""Wrap the root agent in an ADK App for modern plugin + context-cache support.
|
|
62
|
+
|
|
63
|
+
Using ``App`` instead of passing ``agent`` + ``plugins`` directly to ``Runner``
|
|
64
|
+
is the recommended ADK pattern as of ADK 1.x (``plugins=`` on ``Runner`` is
|
|
65
|
+
officially deprecated).
|
|
66
|
+
"""
|
|
67
|
+
try:
|
|
68
|
+
from google.adk.apps.app import App
|
|
69
|
+
return App(
|
|
70
|
+
name="gemcode",
|
|
71
|
+
root_agent=agent,
|
|
72
|
+
plugins=plugins,
|
|
73
|
+
context_cache_config=_build_context_cache_config(),
|
|
74
|
+
)
|
|
75
|
+
except Exception:
|
|
76
|
+
# Fall back silently — Runner still accepts the legacy kwargs.
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
34
80
|
def session_db_path(cfg: GemCodeConfig) -> Path:
|
|
35
81
|
return cfg.project_root / ".gemcode" / "sessions.sqlite"
|
|
36
82
|
|
|
@@ -242,15 +288,23 @@ def _make_safe_computer_toolset(computer):
|
|
|
242
288
|
|
|
243
289
|
|
|
244
290
|
def _build_artifact_service(cfg: GemCodeConfig):
|
|
245
|
-
"""
|
|
246
|
-
Return an ADK ArtifactService for this session, or None if disabled.
|
|
291
|
+
"""Return an ADK ArtifactService for this session, or None if disabled.
|
|
247
292
|
|
|
248
|
-
Uses
|
|
249
|
-
|
|
250
|
-
|
|
293
|
+
Uses ``FileArtifactService`` backed by ``.gemcode/artifacts/`` so that
|
|
294
|
+
artifacts (screenshots, generated reports, diffs, etc.) survive session
|
|
295
|
+
restarts. Falls back to ``InMemoryArtifactService`` if the file-based
|
|
296
|
+
service is unavailable (older ADK).
|
|
251
297
|
"""
|
|
252
298
|
if not getattr(cfg, "enable_artifacts", True):
|
|
253
299
|
return None
|
|
300
|
+
try:
|
|
301
|
+
from google.adk.artifacts import FileArtifactService
|
|
302
|
+
artifacts_dir = cfg.project_root / ".gemcode" / "artifacts"
|
|
303
|
+
artifacts_dir.mkdir(parents=True, exist_ok=True)
|
|
304
|
+
return FileArtifactService(root_dir=artifacts_dir)
|
|
305
|
+
except Exception:
|
|
306
|
+
pass
|
|
307
|
+
# Fallback for older ADK versions that don't have FileArtifactService.
|
|
254
308
|
try:
|
|
255
309
|
from google.adk.artifacts import InMemoryArtifactService
|
|
256
310
|
return InMemoryArtifactService()
|
|
@@ -344,11 +398,32 @@ def create_runner(cfg: GemCodeConfig, extra_tools: list | None = None) -> Runner
|
|
|
344
398
|
db.parent.mkdir(parents=True, exist_ok=True)
|
|
345
399
|
session_service = SqliteSessionService(str(db))
|
|
346
400
|
|
|
347
|
-
|
|
348
|
-
#
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
401
|
+
# ── Plugins ──────────────────────────────────────────────────────────────
|
|
402
|
+
# Recovery plugin first so it can intercept tool errors before terminal hooks.
|
|
403
|
+
plugins = [GemCodeReflectAndRetryToolPlugin(cfg), GemCodeTerminalHooksPlugin(cfg)]
|
|
404
|
+
|
|
405
|
+
# Global instruction is now applied via ADK's GlobalInstructionPlugin (the
|
|
406
|
+
# modern replacement for the deprecated LlmAgent.global_instruction field).
|
|
407
|
+
try:
|
|
408
|
+
from google.adk.plugins.global_instruction_plugin import GlobalInstructionPlugin
|
|
409
|
+
plugins.insert(0, GlobalInstructionPlugin(build_global_instruction()))
|
|
410
|
+
except Exception:
|
|
411
|
+
pass
|
|
412
|
+
|
|
413
|
+
# Optional: rich YAML debug log (every LLM request/response + tool calls).
|
|
414
|
+
# Enable with: GEMCODE_DEBUG_LOG=1
|
|
415
|
+
if os.environ.get("GEMCODE_DEBUG_LOG", "").lower() in ("1", "true", "yes", "on"):
|
|
416
|
+
try:
|
|
417
|
+
from google.adk.plugins.debug_logging_plugin import DebugLoggingPlugin
|
|
418
|
+
debug_log_path = cfg.project_root / ".gemcode" / "debug.yaml"
|
|
419
|
+
plugins.append(DebugLoggingPlugin(
|
|
420
|
+
output_path=str(debug_log_path),
|
|
421
|
+
include_session_state=True,
|
|
422
|
+
))
|
|
423
|
+
except Exception:
|
|
424
|
+
pass
|
|
425
|
+
|
|
426
|
+
# ── Memory service ────────────────────────────────────────────────────────
|
|
352
427
|
memory_service = None
|
|
353
428
|
if getattr(cfg, "enable_memory", False):
|
|
354
429
|
mem_path = cfg.project_root / ".gemcode" / "memories.jsonl"
|
|
@@ -361,14 +436,31 @@ def create_runner(cfg: GemCodeConfig, extra_tools: list | None = None) -> Runner
|
|
|
361
436
|
|
|
362
437
|
artifact_service = _build_artifact_service(cfg)
|
|
363
438
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
439
|
+
# ── Runner via ADK App (modern pattern) ──────────────────────────────────
|
|
440
|
+
# App is the recommended top-level container as of ADK 1.x. It owns the
|
|
441
|
+
# plugin list and context-cache config so Runner stays clean.
|
|
442
|
+
# ``plugins=`` on Runner is officially deprecated; using App avoids the
|
|
443
|
+
# DeprecationWarning and enables context caching + future App-level features.
|
|
444
|
+
app = _build_app(agent, plugins, cfg)
|
|
445
|
+
|
|
446
|
+
if app is not None:
|
|
447
|
+
runner_kwargs: dict = dict(
|
|
448
|
+
app=app,
|
|
449
|
+
session_service=session_service,
|
|
450
|
+
memory_service=memory_service,
|
|
451
|
+
auto_create_session=True,
|
|
452
|
+
)
|
|
453
|
+
else:
|
|
454
|
+
# Legacy fallback if App is unavailable (very old ADK installs).
|
|
455
|
+
runner_kwargs = dict(
|
|
456
|
+
app_name="gemcode",
|
|
457
|
+
agent=agent,
|
|
458
|
+
session_service=session_service,
|
|
459
|
+
plugins=plugins,
|
|
460
|
+
memory_service=memory_service,
|
|
461
|
+
auto_create_session=True,
|
|
462
|
+
)
|
|
463
|
+
|
|
372
464
|
if artifact_service is not None:
|
|
373
465
|
runner_kwargs["artifact_service"] = artifact_service
|
|
374
466
|
|