PyPI - gemcode - Versions diffs - 0.3.59__tar.gz → 0.3.65__tar.gz - Mend

gemcode 0.3.59tar.gz → 0.3.65tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

{gemcode-0.3.59/src/gemcode.egg-info → gemcode-0.3.65}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gemcode
-Version: 0.3.59
+Version: 0.3.65
 Summary: Local-first coding agent on Google Gemini + ADK
 Author: GemCode Contributors
 License:                                  Apache License

{gemcode-0.3.59 → gemcode-0.3.65}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "gemcode"
-version = "0.3.59"
+version = "0.3.65"
 description = "Local-first coding agent on Google Gemini + ADK"
 readme = "README.md"
 requires-python = ">=3.11"

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/agent.py RENAMED Viewed

@@ -29,6 +29,17 @@ from gemcode.tools import build_function_tools
 from gemcode.tool_prompt_manifest import build_tool_manifest
+def build_global_instruction() -> str:
+  """Global instruction applied to the entire agent tree (via ADK plugin)."""
+  return (
+    "You are GemCode, an expert software engineering agent powered by Google Gemini. "
+    "Think deeply about what the person actually wants before you do anything. "
+    "Use exactly as many tools as the task genuinely requires — no more. "
+    "Act fully and autonomously when action is needed. "
+    "Always use read-only tools before shell or write tools."
+  )
 def _chain_before_model_callbacks(*callbacks):
   cbs = [c for c in callbacks if c is not None]
   if not cbs:
@@ -197,7 +208,7 @@ def _build_runtime_facts(cfg: GemCodeConfig) -> str:
   if getattr(cfg, "enable_memory", False):
     mem_path = root / ".gemcode" / "memories.jsonl"
     mem_kind = "embedding-backed" if getattr(cfg, "enable_embeddings", False) else "keyword-backed"
-    caps.append(f"memory ON ({mem_kind}, stored at {mem_path}; ADK preload_memory injects relevant memories before each turn)")
+    caps.append(f"memory ON ({mem_kind}, stored at {mem_path}; ADK preload_memory auto-injects relevant memories before each turn; use load_memory(query) for explicit on-demand retrieval)")
   if getattr(cfg, "enable_computer_use", False):
     caps.append("computer_use ON (tools: navigate, click_at, type_text_at, browser_screenshot, browser_find_element, etc.)")
   if getattr(cfg, "enable_code_executor", False):
@@ -515,11 +526,12 @@ You have native deep thinking capability — use it actively:
   - For **subfolders**: `bash("cargo build --release", cwd_subdir="backend")`
 - **Long-running servers / watchers** — use `bash` with `background=True`:
-  - `bash("npm run dev", background=True)` — start the dev server in background
+  - `bash("npm run dev", background=True)` — start the dev server in background → returns PID
   - `bash("python manage.py runserver", background=True)` — Django server
-  - `bash("tail -f logs/app.log", background=True)` — background log watcher
   - NEVER call `bash("npm run dev")` without `background=True` — it blocks forever and crashes the turn
-  - After starting a background process, confirm the port is ready with `bash("sleep 2 && curl -s http://localhost:3000 -o /dev/null && echo ready")`
+  - After starting: use `task_output(pid)` to read startup logs, then check if port is ready
+  - Use `list_tasks()` to see all running background processes
+  - Use `kill_task(pid)` to stop a background server when done
 - **`run_command`** — simple single-executable calls without shell features:
   - `run_command("npm", args=["install", "--legacy-peer-deps"])` — clean npm install
@@ -549,7 +561,21 @@ You have native deep thinking capability — use it actively:
 - **`move_file`** — rename or reorganize files/directories within the project.
 - **`delete_file`** — remove a single file.
+### Memory (when memory is enabled)
+- **`preload_memory`** — automatically injects relevant past memories before each turn (runs in background).
+- **`load_memory`** — explicit on-demand memory search:
+  - `load_memory("authentication patterns used in this project")` — recall specific knowledge
+  - `load_memory("previous bugs fixed in the auth module")` — targeted retrieval
+  - Use when the preloaded context is missing something specific you know you've seen before.
 ### Research and documentation
+- **`web_search`** — search the web without any API key or research mode:
+  - `web_search("python asyncio tutorial 2025")` — general search
+  - `web_search("fastapi jwt authentication example")` — find code examples
+  - `web_search("react 19 breaking changes")` — check recent releases
+  - Returns titles, URLs, and snippets. Follow with `web_fetch(url)` to read full content.
+  - Use this for quick lookups; use `/research on` for deep multi-page research.
 - **`web_fetch`** — fetch docs, APIs, changelogs, READMEs from the web:
   - `web_fetch("https://docs.python.org/3/library/asyncio.html")` — official docs
   - `web_fetch("https://api.github.com/repos/owner/repo/releases/latest")` — API data
@@ -566,6 +592,23 @@ You have native deep thinking capability — use it actively:
 - **`todo_write`** — track work items. Use for any task with 3+ steps.
   - Create at task start, mark completed as you finish, merge updates.
+- **`todo_read`** — read the current session todo list.
+  - Call this to check progress, find task ids for a merge update, or verify what's pending.
+- **`notebook_read`** — read a Jupyter notebook (.ipynb) as structured cells.
+  - Always prefer this over `read_file` for `.ipynb` files — gives clean cell-by-cell output.
+  - `notebook_read("analysis.ipynb")` — shows all cells with source and outputs.
+- **`notebook_edit`** — edit a cell in a Jupyter notebook:
+  - `notebook_edit("nb.ipynb", cell_index=2, new_source="import pandas as pd")` — replace cell
+  - `notebook_edit("nb.ipynb", cell_index=0, new_source="# Title", cell_type="markdown", edit_mode="insert")` — insert
+  - `notebook_edit("nb.ipynb", cell_index=3, new_source="", edit_mode="delete")` — delete cell
+- **Background task management** — for processes started with `bash(..., background=True)`:
+  - `list_tasks()` — see all background tasks (PID, command, status: running/finished)
+  - `task_output(pid)` — read stdout/stderr captured from a background task
+  - `kill_task(pid)` — gracefully stop a background task (use `force=True` for SIGKILL)
 - **`run_subtask`** — spawn an isolated sub-agent with its own fresh context window.
   - The sub-agent has the same tools (bash, read_file, grep, etc.) but starts from scratch.
   - Use when a task would bloat your context too much: e.g. "read all 40 test files and find patterns"
@@ -587,14 +630,34 @@ One user message = many model↔tool rounds (up to 256 LLM calls by default). Th
 **Do not stop after step 2 or 3** — complete the full task.
-## Parallelism — batch independent work
+## Parallelism — batch independent work aggressively
 Issue independent tool calls **in the same turn** when outputs don't depend on each other.
-This is faster and costs fewer turns. Concrete examples:
-- Reading multiple files → send all `read_file` calls together
-- Grepping different patterns → one message, multiple `grep_content` calls
-- `list_directory` + `glob_files` → issue both at once
-- Exploring multiple subsystems → one `run_subtask` per subsystem in one turn
-- `git status` and `git log` → chain with `&&` or issue in parallel
+This is always faster. **Default to parallel; only serialize when you must.**
+Concrete patterns:
+**Parallel file exploration (always do this):**
+- Reading multiple files → emit all `read_file` calls in one turn, not one by one
+- Grepping different patterns → multiple `grep_content` in one response
+- `list_directory` + `glob_files` → both at once
+**Parallel sub-agent exploration (OpenClaude pattern):**
+When a task requires understanding several subsystems before acting:
+1. Spawn parallel `run_subtask` workers, one per subsystem
+2. Wait for all results to return in the same turn
+3. Synthesise findings and execute the change
+Example — understanding a codebase before a big refactor:
+```
+run_subtask("Analyse src/auth/ — how does authentication flow work? List all key files and patterns.")
+run_subtask("Analyse src/api/ — what endpoints exist? How are they protected?")
+run_subtask("Analyse tests/auth* — what is the test coverage for auth?")
+```
+All three run concurrently. Then synthesise and act.
+**Parallel git + build:**
+- `git status && git diff --stat` → one bash call
+- Running lint + type-check → `npm run lint && npm run typecheck` in one call
 Sequential only when step B genuinely needs step A's output.
@@ -642,10 +705,36 @@ When `code_executor ON` (see Runtime facts above):
 - The sandbox does NOT have internet access or filesystem access — use for pure computation
 - For file I/O or shell commands, use the standard tools (`bash`, `write_file`, etc.)
+## Verification contract (mandatory for non-trivial tasks)
+After completing any implementation that touches **3 or more files**, introduces a new feature, or fixes a bug, you **MUST** run a verification pass before calling the task done.
+**How to verify:**
+Option A — Run tests/build (preferred when tests exist):
+```
+bash("npm run build 2>&1 | tail -30")
+bash("pytest tests/ -x -q --tb=short 2>&1 | head -80")
+```
+Option B — Spawn a verification sub-agent (for complex multi-file changes):
+```
+run_subtask(
+  task="You are a strict code reviewer. Verify the following changes are correct, complete, and consistent. Check: (1) syntax errors, (2) logic bugs, (3) broken imports, (4) missing edge cases, (5) consistency across all modified files. Report PASS or FAIL with specific findings.",
+  context="Files changed: [list them]. Change summary: [what you did]."
+)
+```
+**Rules:**
+- If verification finds issues → fix them → verify again. Never stop at a failed verification.
+- Only report "done" after a clean verification pass.
+- For destructive changes (delete, refactor) always run both Option A and Option B.
+- For simple single-file edits, a quick `bash("python3 -c 'import <module>'")` or syntax check is sufficient.
 ## Evaluator-optimizer loop
 For tasks where quality matters:
 1. Complete the task (execute tools, write code, run commands)
-2. Spawn a verification `run_subtask` or use `bash` to run tests/lint
+2. Verify — run tests, build, or spawn a verification sub-agent (see Verification contract above)
 3. If verification fails, read the error, fix, re-verify
 4. Report done only when verified
@@ -704,7 +793,9 @@ Use `gh pr create` via `bash`. When asked to create a PR:
 - Do NOT retry failing commands in a sleep loop — diagnose the root cause first
 ## Communication
-- One short line before the first tool call in a turn (e.g. "Reading the auth module and checking the test suite...").
+- **ACT FIRST, narrate after.** Do NOT write out a multi-step numbered plan as prose and then stop. Execute immediately — use tools right away.
+- One short line before the first tool call is fine (e.g. "Reading the auth module..."). That's it. No verbose announcements.
+- If you want to plan, use the **`think` tool privately** — never dump a plan into your text response before acting. The user cannot run plan text; they need results.
 - Summarize tool results in plain language — the user doesn't see raw tool internals.
 - After completing a task: clear summary of what changed, where, and why.
 - If the user pastes UI copy / noise / error output, extract the real intent and act on source files.
@@ -851,22 +942,10 @@ def build_root_agent(
       tool_config=tool_cfg,
     )
-  # global_instruction applies to the entire agent tree (including sub-agents
-  # spawned via run_subtask or multi-agent delegation).  Keep it short — it's
-  # prepended to every agent's effective instruction.
-  global_instr = (
-    "You are GemCode, an expert software engineering agent powered by Google Gemini. "
-    "Think deeply about what the person actually wants before you do anything. "
-    "Use exactly as many tools as the task genuinely requires — no more. "
-    "Act fully and autonomously when action is needed. "
-    "Always use read-only tools before shell or write tools."
-  )
   agent_kwargs: dict = dict(
       model=cfg.model,
       name="gemcode",
       instruction=build_instruction(cfg),
-      global_instruction=global_instr,
       tools=tools,
       generate_content_config=gen_cfg,
       **cb_kwargs,
@@ -876,6 +955,17 @@ def build_root_agent(
   if code_executor is not None:
     agent_kwargs["code_executor"] = code_executor
+  # Optional: ADK PlanReActPlanner — injects a structured "plan then act" pass
+  # into every turn at the framework level (not just via prompting).
+  # Enable with: GEMCODE_PLANREACT=1
+  import os as _os
+  if _os.environ.get("GEMCODE_PLANREACT", "").lower() in ("1", "true", "yes", "on"):
+    try:
+      from google.adk.planners import PlanReActPlanner
+      agent_kwargs["planner"] = PlanReActPlanner()
+    except Exception:
+      pass
   return LlmAgent(**agent_kwargs)

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/callbacks.py RENAMED Viewed

@@ -626,29 +626,44 @@ def make_on_tool_error_callback(cfg: GemCodeConfig):
 def make_on_model_error_callback(cfg: GemCodeConfig):
-  """Structured model errors to the user + audit trail."""
+  """Structured model errors to the user + audit trail.
+  For transient errors (HTTP 503, 429, server-overloaded) we return None so the
+  exception propagates to invoke.py, which retries with exponential backoff.
+  For permanent errors we absorb and return a user-friendly LlmResponse.
+  """
   async def on_model_error(*, callback_context, llm_request, error: Exception):
-    try:
-      st = callback_context.state
-      if st is not None and not st.get(TERMINAL_REASON_KEY):
-        st[TERMINAL_REASON_KEY] = "model_error"
-    except Exception:
-      pass
+    from gemcode.model_errors import is_transient_error
     append_audit(
         cfg.project_root,
         {
             "phase": "model_exception",
             "error": f"{type(error).__name__}: {error}",
+            "transient": is_transient_error(error),
         },
     )
+    # Transient errors (503, 429, server-overloaded): let the exception propagate
+    # so invoke.py can retry with backoff. Do NOT set terminal state here — the
+    # turn is not over yet.
+    if is_transient_error(error):
+      return None
+    # Permanent errors: mark session terminal and return a user-friendly message.
+    try:
+      st = callback_context.state
+      if st is not None and not st.get(TERMINAL_REASON_KEY):
+        st[TERMINAL_REASON_KEY] = "model_error"
+    except Exception:
+      pass
     if _truthy_env("GEMCODE_VERBOSE_MODEL_ERRORS", default=False):
       import traceback
       traceback.print_exception(type(error), error, error.__traceback__, file=sys.stderr)
     user_text = format_model_error_for_user(error)
-    # Scrollback/TUI already prints "GemCode:" before assistant text — avoid "GemCode: GemCode:".
     from google.adk.models.llm_response import LlmResponse
     from google.genai import types

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/config.py RENAMED Viewed

@@ -245,10 +245,10 @@ class GemCodeConfig:
   )
   # Controls how the TUI renders model thinking: True = full Rich Markdown,
-  # False = collapsed one-line excerpt (default, like OpenClaude).
+  # False = collapsed one-line excerpt.
   # Toggled at runtime via /thinking verbose|brief.
   show_full_thinking: bool = field(
-    default_factory=lambda: _truthy_env("GEMCODE_SHOW_FULL_THINKING", default=False)
+    default_factory=lambda: _truthy_env("GEMCODE_SHOW_FULL_THINKING", default=True)
   )
   # Enable ADK BuiltInCodeExecutor for safe sandboxed Python execution via

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/invoke.py RENAMED Viewed

@@ -6,6 +6,7 @@ CLI and tests call `run_turn` with a Runner already bound to app + session servi
 from __future__ import annotations
+import asyncio
 import os
 import sys
 from typing import Any
@@ -16,6 +17,11 @@ from google.adk.runners import Runner
 from google.genai import types
+# Delays (seconds) between successive transient-error retries: 2s, 5s, 12s.
+# Three retries = up to ~19 seconds of total wait before giving up.
+_TRANSIENT_RETRY_DELAYS = [2.0, 5.0, 12.0]
 _HITL_PROMPT_LOCK = Lock()
@@ -160,11 +166,43 @@ async def run_turn(
       # Runner handoff loop: if tools request confirmations, we pause here to
       # ask HITL, then send back function responses so ADK can re-execute the
       # tools.
+      #
+      # Transient API errors (HTTP 503, 429) are retried here with exponential
+      # backoff. on_model_error returns None for these, so the exception
+      # propagates from runner.run_async and we catch it below.
       do_reset = True
+      transient_attempts = 0
       while True:
-        events = await _await_runner_events(
-          next_message=current_message, do_reset=do_reset
-        )
+        try:
+          events = await _await_runner_events(
+            next_message=current_message, do_reset=do_reset
+          )
+        except Exception as _exc:
+          from gemcode.model_errors import is_transient_error
+          if is_transient_error(_exc) and transient_attempts < len(_TRANSIENT_RETRY_DELAYS):
+            delay = _TRANSIENT_RETRY_DELAYS[transient_attempts]
+            transient_attempts += 1
+            _tui_active = os.environ.get("GEMCODE_TUI_ACTIVE", "0").lower() in ("1", "true", "yes", "on")
+            _msg = (
+              f"\n[gemcode] Transient API error ({type(_exc).__name__}). "
+              f"Retrying in {delay:.0f}s (attempt {transient_attempts}/{len(_TRANSIENT_RETRY_DELAYS)})...\n"
+            )
+            print(_msg, file=sys.stderr)
+            # Surface retry notice in TUI if available.
+            if _tui_active:
+              try:
+                from gemcode.tui import scrollback as _sb
+                _sb._transient_retry_notice = _msg  # type: ignore[attr-defined]
+              except Exception:
+                pass
+            await asyncio.sleep(delay)
+            # Retry the same message from scratch (session history is intact in SQLite).
+            continue
+          # Non-transient or out of retries: re-raise so the TUI surfaces it.
+          raise
+        # Reset transient counter after a successful model call.
+        transient_attempts = 0
         collected.extend(events)
         confirmation_fcs = _get_confirmation_requests(events)

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/model_errors.py RENAMED Viewed

@@ -5,6 +5,38 @@ from __future__ import annotations
 import re
+def is_transient_error(error: Exception) -> bool:
+  """Return True for HTTP 503 / 429 and similar transient API errors that are safe to retry.
+  Transient means: the request was fine, the server was temporarily unavailable or
+  rate-limited. Retrying the same request (with backoff) will likely succeed.
+  """
+  try:
+    from google.genai import errors as genai_errors
+    if isinstance(error, genai_errors.APIError):
+      code = int(getattr(error, "code", None) or 0) or None
+      if code in (429, 503):
+        return True
+      # Some 500-range server errors are also transient (502 Bad Gateway, etc.)
+      if code is not None and 500 <= code < 600 and code not in (400, 401, 403, 404):
+        return True
+  except Exception:
+    pass
+  # gRPC / google-api-core equivalents
+  et = type(error).__name__
+  if "ResourceExhausted" in et or "ServiceUnavailable" in et or "DeadlineExceeded" in et:
+    return True
+  msg = str(error)
+  # Match the specific phrases Gemini uses in 503 responses
+  if "503" in msg and any(p in msg for p in ("high demand", "service unavailable", "overloaded")):
+    return True
+  if "429" in msg and any(p in msg for p in ("rate limit", "quota", "resource exhausted")):
+    return True
+  return False
 def _sanitize_api_text(s: str) -> str:
   """Strip likely API key material from strings shown to the user."""
   if not s:

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/plugins/terminal_hooks_plugin.py RENAMED Viewed

@@ -141,6 +141,19 @@ class GemCodeTerminalHooksPlugin(BasePlugin):
         },
       )
+      # Surface suggestion to the TUI by storing it on cfg.
+      # The TUI reads cfg._last_prompt_suggestion after each turn and displays it.
+      try:
+        object.__setattr__(self.cfg, "_last_prompt_suggestion", suggestion)
+      except Exception:
+        pass
+    else:
+      # Clear any stale suggestion from the previous turn.
+      try:
+        object.__setattr__(self.cfg, "_last_prompt_suggestion", None)
+      except Exception:
+        pass
     if getattr(self.cfg, "enable_memory", False):
       try:
         await callback_context.add_session_to_memory()

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/repl_commands.py RENAMED Viewed

@@ -253,7 +253,7 @@ def slash_help_lines() -> list[str]:
       "  Thinking:",
       "  /thinking             Show current thinking config",
       "  /thinking verbose     Show full thinking text each turn",
-      "  /thinking brief       Show collapsed one-line excerpt (default)",
+      "  /thinking brief       Show collapsed one-line excerpt",
       "  /thinking off         Disable model thinking",
       "  /thinking on          Re-enable thinking (auto budget/level)",
       "  /thinking budget <N>  Set thinking token budget (Gemini 2.5, 0=off, -1=dynamic)",

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/repl_slash.py RENAMED Viewed

@@ -1001,7 +1001,7 @@ async def process_repl_slash(
         out("  /thinking level <minimal|low|medium|high>")
       out("Display commands (all models):")
       out("  /thinking verbose  — show full thinking text each turn")
-      out("  /thinking brief    — show collapsed one-line excerpt (default)")
+      out("  /thinking brief    — show collapsed one-line excerpt")
       out()
       return ReplSlashResult(skip_model_turn=True)
@@ -1016,7 +1016,7 @@ async def process_repl_slash(
     if sub in ("brief", "short", "collapsed"):
       cfg.show_full_thinking = False
-      out("thinking display: brief — collapsed one-line excerpt (default)")
+      out("thinking display: brief — collapsed one-line excerpt")
       out()
       return ReplSlashResult(skip_model_turn=True)

{gemcode-0.3.59 → gemcode-0.3.65}/src/gemcode/session_runtime.py RENAMED Viewed

@@ -22,7 +22,7 @@ warnings.filterwarnings("ignore", category=UserWarning, message=".*EXPERIMENTAL.
 from google.adk.runners import Runner
 from google.adk.sessions.sqlite_session_service import SqliteSessionService
-from gemcode.agent import build_root_agent
+from gemcode.agent import build_global_instruction, build_root_agent
 from gemcode.config import GemCodeConfig
 from gemcode.modality_tools import build_extra_tools as build_modality_extra_tools
 from gemcode.memory.embedding_memory_service import EmbeddingFileMemoryService
@@ -31,6 +31,52 @@ from gemcode.plugins.terminal_hooks_plugin import GemCodeTerminalHooksPlugin
 from gemcode.plugins.tool_recovery_plugin import GemCodeReflectAndRetryToolPlugin
+# ---------------------------------------------------------------------------
+# ADK App-level feature helpers
+# ---------------------------------------------------------------------------
+def _build_context_cache_config():
+  """Return ContextCacheConfig if context caching is enabled, else None.
+  Context caching lets Gemini reuse the compiled representation of a stable
+  prefix (system prompt + tools) across multiple turns, cutting ~75% of input
+  token costs on long sessions.
+  Opt-out: set ``GEMCODE_CONTEXT_CACHE=0`` in the environment.
+  """
+  if os.environ.get("GEMCODE_CONTEXT_CACHE", "1").lower() in ("0", "false", "no", "off"):
+    return None
+  try:
+    from google.adk.agents.context_cache_config import ContextCacheConfig
+    return ContextCacheConfig(
+      cache_intervals=10,   # refresh the cache every 10 invocations
+      ttl_seconds=1800,     # cache lives 30 minutes
+      min_tokens=1024,      # skip caching tiny sessions (< ~1 K tokens)
+    )
+  except Exception:
+    return None
+def _build_app(agent, plugins, cfg: GemCodeConfig):
+  """Wrap the root agent in an ADK App for modern plugin + context-cache support.
+  Using ``App`` instead of passing ``agent`` + ``plugins`` directly to ``Runner``
+  is the recommended ADK pattern as of ADK 1.x (``plugins=`` on ``Runner`` is
+  officially deprecated).
+  """
+  try:
+    from google.adk.apps.app import App
+    return App(
+      name="gemcode",
+      root_agent=agent,
+      plugins=plugins,
+      context_cache_config=_build_context_cache_config(),
+    )
+  except Exception:
+    # Fall back silently — Runner still accepts the legacy kwargs.
+    return None
 def session_db_path(cfg: GemCodeConfig) -> Path:
   return cfg.project_root / ".gemcode" / "sessions.sqlite"
@@ -242,15 +288,23 @@ def _make_safe_computer_toolset(computer):
 def _build_artifact_service(cfg: GemCodeConfig):
-  """
-  Return an ADK ArtifactService for this session, or None if disabled.
+  """Return an ADK ArtifactService for this session, or None if disabled.
-  Uses InMemoryArtifactService so artifacts are available within the session
-  without requiring GCS credentials. The agent can save screenshots, generated
-  files, large reports, etc. as artifacts to avoid bloating session history.
+  Uses ``FileArtifactService`` backed by ``.gemcode/artifacts/`` so that
+  artifacts (screenshots, generated reports, diffs, etc.) survive session
+  restarts.  Falls back to ``InMemoryArtifactService`` if the file-based
+  service is unavailable (older ADK).
   """
   if not getattr(cfg, "enable_artifacts", True):
     return None
+  try:
+    from google.adk.artifacts import FileArtifactService
+    artifacts_dir = cfg.project_root / ".gemcode" / "artifacts"
+    artifacts_dir.mkdir(parents=True, exist_ok=True)
+    return FileArtifactService(root_dir=artifacts_dir)
+  except Exception:
+    pass
+  # Fallback for older ADK versions that don't have FileArtifactService.
   try:
     from google.adk.artifacts import InMemoryArtifactService
     return InMemoryArtifactService()
@@ -344,11 +398,32 @@ def create_runner(cfg: GemCodeConfig, extra_tools: list | None = None) -> Runner
   db.parent.mkdir(parents=True, exist_ok=True)
   session_service = SqliteSessionService(str(db))
-  plugins = [GemCodeTerminalHooksPlugin(cfg)]
-  # Place recovery plugin before terminal hooks so it can influence tool results
-  # during the invocation.
-  if True:
-    plugins.insert(0, GemCodeReflectAndRetryToolPlugin(cfg))
+  # ── Plugins ──────────────────────────────────────────────────────────────
+  # Recovery plugin first so it can intercept tool errors before terminal hooks.
+  plugins = [GemCodeReflectAndRetryToolPlugin(cfg), GemCodeTerminalHooksPlugin(cfg)]
+  # Global instruction is now applied via ADK's GlobalInstructionPlugin (the
+  # modern replacement for the deprecated LlmAgent.global_instruction field).
+  try:
+    from google.adk.plugins.global_instruction_plugin import GlobalInstructionPlugin
+    plugins.insert(0, GlobalInstructionPlugin(build_global_instruction()))
+  except Exception:
+    pass
+  # Optional: rich YAML debug log (every LLM request/response + tool calls).
+  # Enable with: GEMCODE_DEBUG_LOG=1
+  if os.environ.get("GEMCODE_DEBUG_LOG", "").lower() in ("1", "true", "yes", "on"):
+    try:
+      from google.adk.plugins.debug_logging_plugin import DebugLoggingPlugin
+      debug_log_path = cfg.project_root / ".gemcode" / "debug.yaml"
+      plugins.append(DebugLoggingPlugin(
+        output_path=str(debug_log_path),
+        include_session_state=True,
+      ))
+    except Exception:
+      pass
+  # ── Memory service ────────────────────────────────────────────────────────
   memory_service = None
   if getattr(cfg, "enable_memory", False):
     mem_path = cfg.project_root / ".gemcode" / "memories.jsonl"
@@ -361,14 +436,31 @@ def create_runner(cfg: GemCodeConfig, extra_tools: list | None = None) -> Runner
   artifact_service = _build_artifact_service(cfg)
-  runner_kwargs: dict = dict(
-      app_name="gemcode",
-      agent=agent,
-      session_service=session_service,
-      plugins=plugins,
-      memory_service=memory_service,
-      auto_create_session=True,
-  )
+  # ── Runner via ADK App (modern pattern) ──────────────────────────────────
+  # App is the recommended top-level container as of ADK 1.x.  It owns the
+  # plugin list and context-cache config so Runner stays clean.
+  # ``plugins=`` on Runner is officially deprecated; using App avoids the
+  # DeprecationWarning and enables context caching + future App-level features.
+  app = _build_app(agent, plugins, cfg)
+  if app is not None:
+    runner_kwargs: dict = dict(
+        app=app,
+        session_service=session_service,
+        memory_service=memory_service,
+        auto_create_session=True,
+    )
+  else:
+    # Legacy fallback if App is unavailable (very old ADK installs).
+    runner_kwargs = dict(
+        app_name="gemcode",
+        agent=agent,
+        session_service=session_service,
+        plugins=plugins,
+        memory_service=memory_service,
+        auto_create_session=True,
+    )
   if artifact_service is not None:
     runner_kwargs["artifact_service"] = artifact_service

gemcode 0.3.59__tar.gz → 0.3.65__tar.gz

gemcode 0.3.59tar.gz → 0.3.65tar.gz