gemcode 0.3.59__tar.gz → 0.3.65__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {gemcode-0.3.59/src/gemcode.egg-info → gemcode-0.3.65}/PKG-INFO +1 -1
  2. {gemcode-0.3.59 → gemcode-0.3.65}/pyproject.toml +1 -1
  3. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/agent.py +115 -25
  4. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/callbacks.py +24 -9
  5. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/config.py +2 -2
  6. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/invoke.py +41 -3
  7. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/model_errors.py +32 -0
  8. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/plugins/terminal_hooks_plugin.py +13 -0
  9. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/repl_commands.py +1 -1
  10. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/repl_slash.py +2 -2
  11. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/session_runtime.py +111 -19
  12. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/__init__.py +44 -1
  13. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/bash.py +30 -4
  14. gemcode-0.3.65/src/gemcode/tools/notebook.py +242 -0
  15. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/subtask.py +82 -17
  16. gemcode-0.3.65/src/gemcode/tools/tasks.py +211 -0
  17. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/todo.py +33 -0
  18. gemcode-0.3.65/src/gemcode/tools/web_search.py +247 -0
  19. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tui/scrollback.py +12 -0
  20. {gemcode-0.3.59 → gemcode-0.3.65/src/gemcode.egg-info}/PKG-INFO +1 -1
  21. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode.egg-info/SOURCES.txt +3 -0
  22. {gemcode-0.3.59 → gemcode-0.3.65}/LICENSE +0 -0
  23. {gemcode-0.3.59 → gemcode-0.3.65}/MANIFEST.in +0 -0
  24. {gemcode-0.3.59 → gemcode-0.3.65}/README.md +0 -0
  25. {gemcode-0.3.59 → gemcode-0.3.65}/setup.cfg +0 -0
  26. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/__init__.py +0 -0
  27. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/__main__.py +0 -0
  28. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/audit.py +0 -0
  29. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/autocompact.py +0 -0
  30. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/capability_routing.py +0 -0
  31. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/cli.py +0 -0
  32. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/compaction.py +0 -0
  33. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/computer_use/__init__.py +0 -0
  34. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/computer_use/browser_computer.py +0 -0
  35. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/context_budget.py +0 -0
  36. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/context_warning.py +0 -0
  37. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/credentials.py +0 -0
  38. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/hitl_session.py +0 -0
  39. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/hooks.py +0 -0
  40. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/intent_classifier.py +0 -0
  41. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/interactions.py +0 -0
  42. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/kairos_daemon.py +0 -0
  43. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/limits.py +0 -0
  44. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/live_audio_engine.py +0 -0
  45. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/logging_config.py +0 -0
  46. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/mcp_loader.py +0 -0
  47. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/memory/__init__.py +0 -0
  48. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/memory/embedding_memory_service.py +0 -0
  49. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/memory/file_memory_service.py +0 -0
  50. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/modality_tools.py +0 -0
  51. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/model_routing.py +0 -0
  52. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/openapi_loader.py +0 -0
  53. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/paths.py +0 -0
  54. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/permissions.py +0 -0
  55. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/plugins/__init__.py +0 -0
  56. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/plugins/tool_recovery_plugin.py +0 -0
  57. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/pricing.py +0 -0
  58. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/prompt_suggestions.py +0 -0
  59. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/query/__init__.py +0 -0
  60. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/query/config.py +0 -0
  61. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/query/deps.py +0 -0
  62. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/query/engine.py +0 -0
  63. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/query/stop_hooks.py +0 -0
  64. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/query/token_budget.py +0 -0
  65. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/query/transitions.py +0 -0
  66. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/refine.py +0 -0
  67. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/review_agent.py +0 -0
  68. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/session_store.py +0 -0
  69. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/slash_commands.py +0 -0
  70. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/thinking.py +0 -0
  71. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tool_prompt_manifest.py +0 -0
  72. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tool_registry.py +0 -0
  73. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/browser.py +0 -0
  74. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/edit.py +0 -0
  75. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/filesystem.py +0 -0
  76. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/notes.py +0 -0
  77. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/search.py +0 -0
  78. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/shell.py +0 -0
  79. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/shell_gate.py +0 -0
  80. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/think.py +0 -0
  81. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools/web.py +0 -0
  82. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tools_inspector.py +0 -0
  83. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/trust.py +0 -0
  84. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tui/input_handler.py +0 -0
  85. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tui/spinner.py +0 -0
  86. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tui/welcome_banner.py +0 -0
  87. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/tui/welcome_rich.py +0 -0
  88. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/version.py +0 -0
  89. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/vertex.py +0 -0
  90. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/web/__init__.py +0 -0
  91. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/web/claude_sse_adapter.py +0 -0
  92. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/web/terminal_repl.py +0 -0
  93. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/workspace_hints.py +0 -0
  94. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode.egg-info/dependency_links.txt +0 -0
  95. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode.egg-info/entry_points.txt +0 -0
  96. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode.egg-info/requires.txt +0 -0
  97. {gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode.egg-info/top_level.txt +0 -0
  98. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_agent_instruction.py +0 -0
  99. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_autocompact.py +0 -0
  100. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_capability_routing.py +0 -0
  101. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_claude_web_adapter_sse.py +0 -0
  102. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_cli_init.py +0 -0
  103. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_computer_use_permissions.py +0 -0
  104. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_context_budget.py +0 -0
  105. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_context_warning.py +0 -0
  106. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_credentials.py +0 -0
  107. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_interactive_permission_ask.py +0 -0
  108. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_kairos_scheduler.py +0 -0
  109. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_modality_tools.py +0 -0
  110. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_model_error_retry.py +0 -0
  111. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_model_errors.py +0 -0
  112. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_model_routing.py +0 -0
  113. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_paths.py +0 -0
  114. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_permissions.py +0 -0
  115. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_prompt_suggestions.py +0 -0
  116. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_repl_commands.py +0 -0
  117. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_repl_slash.py +0 -0
  118. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_slash_commands.py +0 -0
  119. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_thinking_config.py +0 -0
  120. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_token_budget.py +0 -0
  121. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_tool_context_circulation.py +0 -0
  122. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_tools.py +0 -0
  123. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_tools_inspector.py +0 -0
  124. {gemcode-0.3.59 → gemcode-0.3.65}/tests/test_workspace_hints.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemcode
3
- Version: 0.3.59
3
+ Version: 0.3.65
4
4
  Summary: Local-first coding agent on Google Gemini + ADK
5
5
  Author: GemCode Contributors
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gemcode"
7
- version = "0.3.59"
7
+ version = "0.3.65"
8
8
  description = "Local-first coding agent on Google Gemini + ADK"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -29,6 +29,17 @@ from gemcode.tools import build_function_tools
29
29
  from gemcode.tool_prompt_manifest import build_tool_manifest
30
30
 
31
31
 
32
+ def build_global_instruction() -> str:
33
+ """Global instruction applied to the entire agent tree (via ADK plugin)."""
34
+ return (
35
+ "You are GemCode, an expert software engineering agent powered by Google Gemini. "
36
+ "Think deeply about what the person actually wants before you do anything. "
37
+ "Use exactly as many tools as the task genuinely requires — no more. "
38
+ "Act fully and autonomously when action is needed. "
39
+ "Always use read-only tools before shell or write tools."
40
+ )
41
+
42
+
32
43
  def _chain_before_model_callbacks(*callbacks):
33
44
  cbs = [c for c in callbacks if c is not None]
34
45
  if not cbs:
@@ -197,7 +208,7 @@ def _build_runtime_facts(cfg: GemCodeConfig) -> str:
197
208
  if getattr(cfg, "enable_memory", False):
198
209
  mem_path = root / ".gemcode" / "memories.jsonl"
199
210
  mem_kind = "embedding-backed" if getattr(cfg, "enable_embeddings", False) else "keyword-backed"
200
- caps.append(f"memory ON ({mem_kind}, stored at {mem_path}; ADK preload_memory injects relevant memories before each turn)")
211
+ caps.append(f"memory ON ({mem_kind}, stored at {mem_path}; ADK preload_memory auto-injects relevant memories before each turn; use load_memory(query) for explicit on-demand retrieval)")
201
212
  if getattr(cfg, "enable_computer_use", False):
202
213
  caps.append("computer_use ON (tools: navigate, click_at, type_text_at, browser_screenshot, browser_find_element, etc.)")
203
214
  if getattr(cfg, "enable_code_executor", False):
@@ -515,11 +526,12 @@ You have native deep thinking capability — use it actively:
515
526
  - For **subfolders**: `bash("cargo build --release", cwd_subdir="backend")`
516
527
 
517
528
  - **Long-running servers / watchers** — use `bash` with `background=True`:
518
- - `bash("npm run dev", background=True)` — start the dev server in background
529
+ - `bash("npm run dev", background=True)` — start the dev server in background → returns PID
519
530
  - `bash("python manage.py runserver", background=True)` — Django server
520
- - `bash("tail -f logs/app.log", background=True)` — background log watcher
521
531
  - NEVER call `bash("npm run dev")` without `background=True` — it blocks forever and crashes the turn
522
- - After starting a background process, confirm the port is ready with `bash("sleep 2 && curl -s http://localhost:3000 -o /dev/null && echo ready")`
532
+ - After starting: use `task_output(pid)` to read startup logs, then check if port is ready
533
+ - Use `list_tasks()` to see all running background processes
534
+ - Use `kill_task(pid)` to stop a background server when done
523
535
 
524
536
  - **`run_command`** — simple single-executable calls without shell features:
525
537
  - `run_command("npm", args=["install", "--legacy-peer-deps"])` — clean npm install
@@ -549,7 +561,21 @@ You have native deep thinking capability — use it actively:
549
561
  - **`move_file`** — rename or reorganize files/directories within the project.
550
562
  - **`delete_file`** — remove a single file.
551
563
 
564
+ ### Memory (when memory is enabled)
565
+ - **`preload_memory`** — automatically injects relevant past memories before each turn (runs in background).
566
+ - **`load_memory`** — explicit on-demand memory search:
567
+ - `load_memory("authentication patterns used in this project")` — recall specific knowledge
568
+ - `load_memory("previous bugs fixed in the auth module")` — targeted retrieval
569
+ - Use when the preloaded context is missing something specific you know you've seen before.
570
+
552
571
  ### Research and documentation
572
+ - **`web_search`** — search the web without any API key or research mode:
573
+ - `web_search("python asyncio tutorial 2025")` — general search
574
+ - `web_search("fastapi jwt authentication example")` — find code examples
575
+ - `web_search("react 19 breaking changes")` — check recent releases
576
+ - Returns titles, URLs, and snippets. Follow with `web_fetch(url)` to read full content.
577
+ - Use this for quick lookups; use `/research on` for deep multi-page research.
578
+
553
579
  - **`web_fetch`** — fetch docs, APIs, changelogs, READMEs from the web:
554
580
  - `web_fetch("https://docs.python.org/3/library/asyncio.html")` — official docs
555
581
  - `web_fetch("https://api.github.com/repos/owner/repo/releases/latest")` — API data
@@ -566,6 +592,23 @@ You have native deep thinking capability — use it actively:
566
592
  - **`todo_write`** — track work items. Use for any task with 3+ steps.
567
593
  - Create at task start, mark completed as you finish, merge updates.
568
594
 
595
+ - **`todo_read`** — read the current session todo list.
596
+ - Call this to check progress, find task ids for a merge update, or verify what's pending.
597
+
598
+ - **`notebook_read`** — read a Jupyter notebook (.ipynb) as structured cells.
599
+ - Always prefer this over `read_file` for `.ipynb` files — gives clean cell-by-cell output.
600
+ - `notebook_read("analysis.ipynb")` — shows all cells with source and outputs.
601
+
602
+ - **`notebook_edit`** — edit a cell in a Jupyter notebook:
603
+ - `notebook_edit("nb.ipynb", cell_index=2, new_source="import pandas as pd")` — replace cell
604
+ - `notebook_edit("nb.ipynb", cell_index=0, new_source="# Title", cell_type="markdown", edit_mode="insert")` — insert
605
+ - `notebook_edit("nb.ipynb", cell_index=3, new_source="", edit_mode="delete")` — delete cell
606
+
607
+ - **Background task management** — for processes started with `bash(..., background=True)`:
608
+ - `list_tasks()` — see all background tasks (PID, command, status: running/finished)
609
+ - `task_output(pid)` — read stdout/stderr captured from a background task
610
+ - `kill_task(pid)` — gracefully stop a background task (use `force=True` for SIGKILL)
611
+
569
612
  - **`run_subtask`** — spawn an isolated sub-agent with its own fresh context window.
570
613
  - The sub-agent has the same tools (bash, read_file, grep, etc.) but starts from scratch.
571
614
  - Use when a task would bloat your context too much: e.g. "read all 40 test files and find patterns"
@@ -587,14 +630,34 @@ One user message = many model↔tool rounds (up to 256 LLM calls by default). Th
587
630
 
588
631
  **Do not stop after step 2 or 3** — complete the full task.
589
632
 
590
- ## Parallelism — batch independent work
633
+ ## Parallelism — batch independent work aggressively
591
634
  Issue independent tool calls **in the same turn** when outputs don't depend on each other.
592
- This is faster and costs fewer turns. Concrete examples:
593
- - Reading multiple files → send all `read_file` calls together
594
- - Grepping different patterns → one message, multiple `grep_content` calls
595
- - `list_directory` + `glob_files` → issue both at once
596
- - Exploring multiple subsystems one `run_subtask` per subsystem in one turn
597
- - `git status` and `git log` chain with `&&` or issue in parallel
635
+ This is always faster. **Default to parallel; only serialize when you must.**
636
+
637
+ Concrete patterns:
638
+
639
+ **Parallel file exploration (always do this):**
640
+ - Reading multiple files emit all `read_file` calls in one turn, not one by one
641
+ - Grepping different patterns → multiple `grep_content` in one response
642
+ - `list_directory` + `glob_files` → both at once
643
+
644
+ **Parallel sub-agent exploration (OpenClaude pattern):**
645
+ When a task requires understanding several subsystems before acting:
646
+ 1. Spawn parallel `run_subtask` workers, one per subsystem
647
+ 2. Wait for all results to return in the same turn
648
+ 3. Synthesise findings and execute the change
649
+
650
+ Example — understanding a codebase before a big refactor:
651
+ ```
652
+ run_subtask("Analyse src/auth/ — how does authentication flow work? List all key files and patterns.")
653
+ run_subtask("Analyse src/api/ — what endpoints exist? How are they protected?")
654
+ run_subtask("Analyse tests/auth* — what is the test coverage for auth?")
655
+ ```
656
+ All three run concurrently. Then synthesise and act.
657
+
658
+ **Parallel git + build:**
659
+ - `git status && git diff --stat` → one bash call
660
+ - Running lint + type-check → `npm run lint && npm run typecheck` in one call
598
661
 
599
662
  Sequential only when step B genuinely needs step A's output.
600
663
 
@@ -642,10 +705,36 @@ When `code_executor ON` (see Runtime facts above):
642
705
  - The sandbox does NOT have internet access or filesystem access — use for pure computation
643
706
  - For file I/O or shell commands, use the standard tools (`bash`, `write_file`, etc.)
644
707
 
708
+ ## Verification contract (mandatory for non-trivial tasks)
709
+
710
+ After completing any implementation that touches **3 or more files**, introduces a new feature, or fixes a bug, you **MUST** run a verification pass before calling the task done.
711
+
712
+ **How to verify:**
713
+
714
+ Option A — Run tests/build (preferred when tests exist):
715
+ ```
716
+ bash("npm run build 2>&1 | tail -30")
717
+ bash("pytest tests/ -x -q --tb=short 2>&1 | head -80")
718
+ ```
719
+
720
+ Option B — Spawn a verification sub-agent (for complex multi-file changes):
721
+ ```
722
+ run_subtask(
723
+ task="You are a strict code reviewer. Verify the following changes are correct, complete, and consistent. Check: (1) syntax errors, (2) logic bugs, (3) broken imports, (4) missing edge cases, (5) consistency across all modified files. Report PASS or FAIL with specific findings.",
724
+ context="Files changed: [list them]. Change summary: [what you did]."
725
+ )
726
+ ```
727
+
728
+ **Rules:**
729
+ - If verification finds issues → fix them → verify again. Never stop at a failed verification.
730
+ - Only report "done" after a clean verification pass.
731
+ - For destructive changes (delete, refactor) always run both Option A and Option B.
732
+ - For simple single-file edits, a quick `bash("python3 -c 'import <module>'")` or syntax check is sufficient.
733
+
645
734
  ## Evaluator-optimizer loop
646
735
  For tasks where quality matters:
647
736
  1. Complete the task (execute tools, write code, run commands)
648
- 2. Spawn a verification `run_subtask` or use `bash` to run tests/lint
737
+ 2. Verify run tests, build, or spawn a verification sub-agent (see Verification contract above)
649
738
  3. If verification fails, read the error, fix, re-verify
650
739
  4. Report done only when verified
651
740
 
@@ -704,7 +793,9 @@ Use `gh pr create` via `bash`. When asked to create a PR:
704
793
  - Do NOT retry failing commands in a sleep loop — diagnose the root cause first
705
794
 
706
795
  ## Communication
707
- - One short line before the first tool call in a turn (e.g. "Reading the auth module and checking the test suite...").
796
+ - **ACT FIRST, narrate after.** Do NOT write out a multi-step numbered plan as prose and then stop. Execute immediately — use tools right away.
797
+ - One short line before the first tool call is fine (e.g. "Reading the auth module..."). That's it. No verbose announcements.
798
+ - If you want to plan, use the **`think` tool privately** — never dump a plan into your text response before acting. The user cannot run plan text; they need results.
708
799
  - Summarize tool results in plain language — the user doesn't see raw tool internals.
709
800
  - After completing a task: clear summary of what changed, where, and why.
710
801
  - If the user pastes UI copy / noise / error output, extract the real intent and act on source files.
@@ -851,22 +942,10 @@ def build_root_agent(
851
942
  tool_config=tool_cfg,
852
943
  )
853
944
 
854
- # global_instruction applies to the entire agent tree (including sub-agents
855
- # spawned via run_subtask or multi-agent delegation). Keep it short — it's
856
- # prepended to every agent's effective instruction.
857
- global_instr = (
858
- "You are GemCode, an expert software engineering agent powered by Google Gemini. "
859
- "Think deeply about what the person actually wants before you do anything. "
860
- "Use exactly as many tools as the task genuinely requires — no more. "
861
- "Act fully and autonomously when action is needed. "
862
- "Always use read-only tools before shell or write tools."
863
- )
864
-
865
945
  agent_kwargs: dict = dict(
866
946
  model=cfg.model,
867
947
  name="gemcode",
868
948
  instruction=build_instruction(cfg),
869
- global_instruction=global_instr,
870
949
  tools=tools,
871
950
  generate_content_config=gen_cfg,
872
951
  **cb_kwargs,
@@ -876,6 +955,17 @@ def build_root_agent(
876
955
  if code_executor is not None:
877
956
  agent_kwargs["code_executor"] = code_executor
878
957
 
958
+ # Optional: ADK PlanReActPlanner — injects a structured "plan then act" pass
959
+ # into every turn at the framework level (not just via prompting).
960
+ # Enable with: GEMCODE_PLANREACT=1
961
+ import os as _os
962
+ if _os.environ.get("GEMCODE_PLANREACT", "").lower() in ("1", "true", "yes", "on"):
963
+ try:
964
+ from google.adk.planners import PlanReActPlanner
965
+ agent_kwargs["planner"] = PlanReActPlanner()
966
+ except Exception:
967
+ pass
968
+
879
969
  return LlmAgent(**agent_kwargs)
880
970
 
881
971
 
@@ -626,29 +626,44 @@ def make_on_tool_error_callback(cfg: GemCodeConfig):
626
626
 
627
627
 
628
628
  def make_on_model_error_callback(cfg: GemCodeConfig):
629
- """Structured model errors to the user + audit trail."""
629
+ """Structured model errors to the user + audit trail.
630
+
631
+ For transient errors (HTTP 503, 429, server-overloaded) we return None so the
632
+ exception propagates to invoke.py, which retries with exponential backoff.
633
+ For permanent errors we absorb and return a user-friendly LlmResponse.
634
+ """
630
635
 
631
636
  async def on_model_error(*, callback_context, llm_request, error: Exception):
632
- try:
633
- st = callback_context.state
634
- if st is not None and not st.get(TERMINAL_REASON_KEY):
635
- st[TERMINAL_REASON_KEY] = "model_error"
636
- except Exception:
637
- pass
637
+ from gemcode.model_errors import is_transient_error
638
+
638
639
  append_audit(
639
640
  cfg.project_root,
640
641
  {
641
642
  "phase": "model_exception",
642
643
  "error": f"{type(error).__name__}: {error}",
644
+ "transient": is_transient_error(error),
643
645
  },
644
646
  )
647
+
648
+ # Transient errors (503, 429, server-overloaded): let the exception propagate
649
+ # so invoke.py can retry with backoff. Do NOT set terminal state here — the
650
+ # turn is not over yet.
651
+ if is_transient_error(error):
652
+ return None
653
+
654
+ # Permanent errors: mark session terminal and return a user-friendly message.
655
+ try:
656
+ st = callback_context.state
657
+ if st is not None and not st.get(TERMINAL_REASON_KEY):
658
+ st[TERMINAL_REASON_KEY] = "model_error"
659
+ except Exception:
660
+ pass
661
+
645
662
  if _truthy_env("GEMCODE_VERBOSE_MODEL_ERRORS", default=False):
646
663
  import traceback
647
-
648
664
  traceback.print_exception(type(error), error, error.__traceback__, file=sys.stderr)
649
665
 
650
666
  user_text = format_model_error_for_user(error)
651
- # Scrollback/TUI already prints "GemCode:" before assistant text — avoid "GemCode: GemCode:".
652
667
  from google.adk.models.llm_response import LlmResponse
653
668
  from google.genai import types
654
669
 
@@ -245,10 +245,10 @@ class GemCodeConfig:
245
245
  )
246
246
 
247
247
  # Controls how the TUI renders model thinking: True = full Rich Markdown,
248
- # False = collapsed one-line excerpt (default, like OpenClaude).
248
+ # False = collapsed one-line excerpt.
249
249
  # Toggled at runtime via /thinking verbose|brief.
250
250
  show_full_thinking: bool = field(
251
- default_factory=lambda: _truthy_env("GEMCODE_SHOW_FULL_THINKING", default=False)
251
+ default_factory=lambda: _truthy_env("GEMCODE_SHOW_FULL_THINKING", default=True)
252
252
  )
253
253
 
254
254
  # Enable ADK BuiltInCodeExecutor for safe sandboxed Python execution via
@@ -6,6 +6,7 @@ CLI and tests call `run_turn` with a Runner already bound to app + session servi
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ import asyncio
9
10
  import os
10
11
  import sys
11
12
  from typing import Any
@@ -16,6 +17,11 @@ from google.adk.runners import Runner
16
17
  from google.genai import types
17
18
 
18
19
 
20
+ # Delays (seconds) between successive transient-error retries: 2s, 5s, 12s.
21
+ # Three retries = up to ~19 seconds of total wait before giving up.
22
+ _TRANSIENT_RETRY_DELAYS = [2.0, 5.0, 12.0]
23
+
24
+
19
25
  _HITL_PROMPT_LOCK = Lock()
20
26
 
21
27
 
@@ -160,11 +166,43 @@ async def run_turn(
160
166
  # Runner handoff loop: if tools request confirmations, we pause here to
161
167
  # ask HITL, then send back function responses so ADK can re-execute the
162
168
  # tools.
169
+ #
170
+ # Transient API errors (HTTP 503, 429) are retried here with exponential
171
+ # backoff. on_model_error returns None for these, so the exception
172
+ # propagates from runner.run_async and we catch it below.
163
173
  do_reset = True
174
+ transient_attempts = 0
164
175
  while True:
165
- events = await _await_runner_events(
166
- next_message=current_message, do_reset=do_reset
167
- )
176
+ try:
177
+ events = await _await_runner_events(
178
+ next_message=current_message, do_reset=do_reset
179
+ )
180
+ except Exception as _exc:
181
+ from gemcode.model_errors import is_transient_error
182
+ if is_transient_error(_exc) and transient_attempts < len(_TRANSIENT_RETRY_DELAYS):
183
+ delay = _TRANSIENT_RETRY_DELAYS[transient_attempts]
184
+ transient_attempts += 1
185
+ _tui_active = os.environ.get("GEMCODE_TUI_ACTIVE", "0").lower() in ("1", "true", "yes", "on")
186
+ _msg = (
187
+ f"\n[gemcode] Transient API error ({type(_exc).__name__}). "
188
+ f"Retrying in {delay:.0f}s (attempt {transient_attempts}/{len(_TRANSIENT_RETRY_DELAYS)})...\n"
189
+ )
190
+ print(_msg, file=sys.stderr)
191
+ # Surface retry notice in TUI if available.
192
+ if _tui_active:
193
+ try:
194
+ from gemcode.tui import scrollback as _sb
195
+ _sb._transient_retry_notice = _msg # type: ignore[attr-defined]
196
+ except Exception:
197
+ pass
198
+ await asyncio.sleep(delay)
199
+ # Retry the same message from scratch (session history is intact in SQLite).
200
+ continue
201
+ # Non-transient or out of retries: re-raise so the TUI surfaces it.
202
+ raise
203
+
204
+ # Reset transient counter after a successful model call.
205
+ transient_attempts = 0
168
206
  collected.extend(events)
169
207
 
170
208
  confirmation_fcs = _get_confirmation_requests(events)
@@ -5,6 +5,38 @@ from __future__ import annotations
5
5
  import re
6
6
 
7
7
 
8
+ def is_transient_error(error: Exception) -> bool:
9
+ """Return True for HTTP 503 / 429 and similar transient API errors that are safe to retry.
10
+
11
+ Transient means: the request was fine, the server was temporarily unavailable or
12
+ rate-limited. Retrying the same request (with backoff) will likely succeed.
13
+ """
14
+ try:
15
+ from google.genai import errors as genai_errors
16
+ if isinstance(error, genai_errors.APIError):
17
+ code = int(getattr(error, "code", None) or 0) or None
18
+ if code in (429, 503):
19
+ return True
20
+ # Some 500-range server errors are also transient (502 Bad Gateway, etc.)
21
+ if code is not None and 500 <= code < 600 and code not in (400, 401, 403, 404):
22
+ return True
23
+ except Exception:
24
+ pass
25
+
26
+ # gRPC / google-api-core equivalents
27
+ et = type(error).__name__
28
+ if "ResourceExhausted" in et or "ServiceUnavailable" in et or "DeadlineExceeded" in et:
29
+ return True
30
+
31
+ msg = str(error)
32
+ # Match the specific phrases Gemini uses in 503 responses
33
+ if "503" in msg and any(p in msg for p in ("high demand", "service unavailable", "overloaded")):
34
+ return True
35
+ if "429" in msg and any(p in msg for p in ("rate limit", "quota", "resource exhausted")):
36
+ return True
37
+ return False
38
+
39
+
8
40
  def _sanitize_api_text(s: str) -> str:
9
41
  """Strip likely API key material from strings shown to the user."""
10
42
  if not s:
@@ -141,6 +141,19 @@ class GemCodeTerminalHooksPlugin(BasePlugin):
141
141
  },
142
142
  )
143
143
 
144
+ # Surface suggestion to the TUI by storing it on cfg.
145
+ # The TUI reads cfg._last_prompt_suggestion after each turn and displays it.
146
+ try:
147
+ object.__setattr__(self.cfg, "_last_prompt_suggestion", suggestion)
148
+ except Exception:
149
+ pass
150
+ else:
151
+ # Clear any stale suggestion from the previous turn.
152
+ try:
153
+ object.__setattr__(self.cfg, "_last_prompt_suggestion", None)
154
+ except Exception:
155
+ pass
156
+
144
157
  if getattr(self.cfg, "enable_memory", False):
145
158
  try:
146
159
  await callback_context.add_session_to_memory()
@@ -253,7 +253,7 @@ def slash_help_lines() -> list[str]:
253
253
  " Thinking:",
254
254
  " /thinking Show current thinking config",
255
255
  " /thinking verbose Show full thinking text each turn",
256
- " /thinking brief Show collapsed one-line excerpt (default)",
256
+ " /thinking brief Show collapsed one-line excerpt",
257
257
  " /thinking off Disable model thinking",
258
258
  " /thinking on Re-enable thinking (auto budget/level)",
259
259
  " /thinking budget <N> Set thinking token budget (Gemini 2.5, 0=off, -1=dynamic)",
@@ -1001,7 +1001,7 @@ async def process_repl_slash(
1001
1001
  out(" /thinking level <minimal|low|medium|high>")
1002
1002
  out("Display commands (all models):")
1003
1003
  out(" /thinking verbose — show full thinking text each turn")
1004
- out(" /thinking brief — show collapsed one-line excerpt (default)")
1004
+ out(" /thinking brief — show collapsed one-line excerpt")
1005
1005
  out()
1006
1006
  return ReplSlashResult(skip_model_turn=True)
1007
1007
 
@@ -1016,7 +1016,7 @@ async def process_repl_slash(
1016
1016
 
1017
1017
  if sub in ("brief", "short", "collapsed"):
1018
1018
  cfg.show_full_thinking = False
1019
- out("thinking display: brief — collapsed one-line excerpt (default)")
1019
+ out("thinking display: brief — collapsed one-line excerpt")
1020
1020
  out()
1021
1021
  return ReplSlashResult(skip_model_turn=True)
1022
1022
 
@@ -22,7 +22,7 @@ warnings.filterwarnings("ignore", category=UserWarning, message=".*EXPERIMENTAL.
22
22
  from google.adk.runners import Runner
23
23
  from google.adk.sessions.sqlite_session_service import SqliteSessionService
24
24
 
25
- from gemcode.agent import build_root_agent
25
+ from gemcode.agent import build_global_instruction, build_root_agent
26
26
  from gemcode.config import GemCodeConfig
27
27
  from gemcode.modality_tools import build_extra_tools as build_modality_extra_tools
28
28
  from gemcode.memory.embedding_memory_service import EmbeddingFileMemoryService
@@ -31,6 +31,52 @@ from gemcode.plugins.terminal_hooks_plugin import GemCodeTerminalHooksPlugin
31
31
  from gemcode.plugins.tool_recovery_plugin import GemCodeReflectAndRetryToolPlugin
32
32
 
33
33
 
34
+ # ---------------------------------------------------------------------------
35
+ # ADK App-level feature helpers
36
+ # ---------------------------------------------------------------------------
37
+
38
+ def _build_context_cache_config():
39
+ """Return ContextCacheConfig if context caching is enabled, else None.
40
+
41
+ Context caching lets Gemini reuse the compiled representation of a stable
42
+ prefix (system prompt + tools) across multiple turns, cutting ~75% of input
43
+ token costs on long sessions.
44
+
45
+ Opt-out: set ``GEMCODE_CONTEXT_CACHE=0`` in the environment.
46
+ """
47
+ if os.environ.get("GEMCODE_CONTEXT_CACHE", "1").lower() in ("0", "false", "no", "off"):
48
+ return None
49
+ try:
50
+ from google.adk.agents.context_cache_config import ContextCacheConfig
51
+ return ContextCacheConfig(
52
+ cache_intervals=10, # refresh the cache every 10 invocations
53
+ ttl_seconds=1800, # cache lives 30 minutes
54
+ min_tokens=1024, # skip caching tiny sessions (< ~1 K tokens)
55
+ )
56
+ except Exception:
57
+ return None
58
+
59
+
60
+ def _build_app(agent, plugins, cfg: GemCodeConfig):
61
+ """Wrap the root agent in an ADK App for modern plugin + context-cache support.
62
+
63
+ Using ``App`` instead of passing ``agent`` + ``plugins`` directly to ``Runner``
64
+ is the recommended ADK pattern as of ADK 1.x (``plugins=`` on ``Runner`` is
65
+ officially deprecated).
66
+ """
67
+ try:
68
+ from google.adk.apps.app import App
69
+ return App(
70
+ name="gemcode",
71
+ root_agent=agent,
72
+ plugins=plugins,
73
+ context_cache_config=_build_context_cache_config(),
74
+ )
75
+ except Exception:
76
+ # Fall back silently — Runner still accepts the legacy kwargs.
77
+ return None
78
+
79
+
34
80
  def session_db_path(cfg: GemCodeConfig) -> Path:
35
81
  return cfg.project_root / ".gemcode" / "sessions.sqlite"
36
82
 
@@ -242,15 +288,23 @@ def _make_safe_computer_toolset(computer):
242
288
 
243
289
 
244
290
  def _build_artifact_service(cfg: GemCodeConfig):
245
- """
246
- Return an ADK ArtifactService for this session, or None if disabled.
291
+ """Return an ADK ArtifactService for this session, or None if disabled.
247
292
 
248
- Uses InMemoryArtifactService so artifacts are available within the session
249
- without requiring GCS credentials. The agent can save screenshots, generated
250
- files, large reports, etc. as artifacts to avoid bloating session history.
293
+ Uses ``FileArtifactService`` backed by ``.gemcode/artifacts/`` so that
294
+ artifacts (screenshots, generated reports, diffs, etc.) survive session
295
+ restarts. Falls back to ``InMemoryArtifactService`` if the file-based
296
+ service is unavailable (older ADK).
251
297
  """
252
298
  if not getattr(cfg, "enable_artifacts", True):
253
299
  return None
300
+ try:
301
+ from google.adk.artifacts import FileArtifactService
302
+ artifacts_dir = cfg.project_root / ".gemcode" / "artifacts"
303
+ artifacts_dir.mkdir(parents=True, exist_ok=True)
304
+ return FileArtifactService(root_dir=artifacts_dir)
305
+ except Exception:
306
+ pass
307
+ # Fallback for older ADK versions that don't have FileArtifactService.
254
308
  try:
255
309
  from google.adk.artifacts import InMemoryArtifactService
256
310
  return InMemoryArtifactService()
@@ -344,11 +398,32 @@ def create_runner(cfg: GemCodeConfig, extra_tools: list | None = None) -> Runner
344
398
  db.parent.mkdir(parents=True, exist_ok=True)
345
399
  session_service = SqliteSessionService(str(db))
346
400
 
347
- plugins = [GemCodeTerminalHooksPlugin(cfg)]
348
- # Place recovery plugin before terminal hooks so it can influence tool results
349
- # during the invocation.
350
- if True:
351
- plugins.insert(0, GemCodeReflectAndRetryToolPlugin(cfg))
401
+ # ── Plugins ──────────────────────────────────────────────────────────────
402
+ # Recovery plugin first so it can intercept tool errors before terminal hooks.
403
+ plugins = [GemCodeReflectAndRetryToolPlugin(cfg), GemCodeTerminalHooksPlugin(cfg)]
404
+
405
+ # Global instruction is now applied via ADK's GlobalInstructionPlugin (the
406
+ # modern replacement for the deprecated LlmAgent.global_instruction field).
407
+ try:
408
+ from google.adk.plugins.global_instruction_plugin import GlobalInstructionPlugin
409
+ plugins.insert(0, GlobalInstructionPlugin(build_global_instruction()))
410
+ except Exception:
411
+ pass
412
+
413
+ # Optional: rich YAML debug log (every LLM request/response + tool calls).
414
+ # Enable with: GEMCODE_DEBUG_LOG=1
415
+ if os.environ.get("GEMCODE_DEBUG_LOG", "").lower() in ("1", "true", "yes", "on"):
416
+ try:
417
+ from google.adk.plugins.debug_logging_plugin import DebugLoggingPlugin
418
+ debug_log_path = cfg.project_root / ".gemcode" / "debug.yaml"
419
+ plugins.append(DebugLoggingPlugin(
420
+ output_path=str(debug_log_path),
421
+ include_session_state=True,
422
+ ))
423
+ except Exception:
424
+ pass
425
+
426
+ # ── Memory service ────────────────────────────────────────────────────────
352
427
  memory_service = None
353
428
  if getattr(cfg, "enable_memory", False):
354
429
  mem_path = cfg.project_root / ".gemcode" / "memories.jsonl"
@@ -361,14 +436,31 @@ def create_runner(cfg: GemCodeConfig, extra_tools: list | None = None) -> Runner
361
436
 
362
437
  artifact_service = _build_artifact_service(cfg)
363
438
 
364
- runner_kwargs: dict = dict(
365
- app_name="gemcode",
366
- agent=agent,
367
- session_service=session_service,
368
- plugins=plugins,
369
- memory_service=memory_service,
370
- auto_create_session=True,
371
- )
439
+ # ── Runner via ADK App (modern pattern) ──────────────────────────────────
440
+ # App is the recommended top-level container as of ADK 1.x. It owns the
441
+ # plugin list and context-cache config so Runner stays clean.
442
+ # ``plugins=`` on Runner is officially deprecated; using App avoids the
443
+ # DeprecationWarning and enables context caching + future App-level features.
444
+ app = _build_app(agent, plugins, cfg)
445
+
446
+ if app is not None:
447
+ runner_kwargs: dict = dict(
448
+ app=app,
449
+ session_service=session_service,
450
+ memory_service=memory_service,
451
+ auto_create_session=True,
452
+ )
453
+ else:
454
+ # Legacy fallback if App is unavailable (very old ADK installs).
455
+ runner_kwargs = dict(
456
+ app_name="gemcode",
457
+ agent=agent,
458
+ session_service=session_service,
459
+ plugins=plugins,
460
+ memory_service=memory_service,
461
+ auto_create_session=True,
462
+ )
463
+
372
464
  if artifact_service is not None:
373
465
  runner_kwargs["artifact_service"] = artifact_service
374
466