PyPI - gemcode - Versions diffs - 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl - Mend

gemcode 0.3.74py3-none-any.whl → 0.3.76py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

gemcode/agent.py +35 -2
gemcode/autotune.py +76 -0
gemcode/checkpoints.py +144 -0
gemcode/cli.py +50 -0
gemcode/config.py +23 -0
gemcode/curated_memory.py +110 -0
gemcode/evals/harness.py +126 -0
gemcode/ide_protocol.py +71 -0
gemcode/ide_stdio.py +206 -0
gemcode/learning.py +122 -0
gemcode/output_styles.py +78 -0
gemcode/paths.py +60 -0
gemcode/permissions.py +3 -0
gemcode/plugins/terminal_hooks_plugin.py +12 -0
gemcode/repl_commands.py +10 -0
gemcode/repl_slash.py +504 -0
gemcode/rules.py +115 -0
gemcode/session_runtime.py +9 -0
gemcode/skills.py +299 -0
gemcode/thinking.py +8 -13
gemcode/tools/__init__.py +26 -0
gemcode/tools/bash.py +22 -0
gemcode/tools/curated_memory.py +34 -0
gemcode/tools/edit.py +150 -2
gemcode/tools/filesystem.py +65 -13
gemcode/tools/repo_map.py +11 -0
gemcode/tools/search.py +63 -19
gemcode/tools/shell.py +26 -1
gemcode/tools/skills.py +61 -0
gemcode/tui/input_handler.py +7 -2
gemcode/web/claude_sse_adapter.py +15 -270
gemcode/web/sse_adapter.py +247 -0
gemcode/web/terminal_repl.py +3 -3
{gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/METADATA +14 -1
{gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/RECORD +39 -26
{gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/WHEEL +0 -0
{gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/entry_points.txt +0 -0
{gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/licenses/LICENSE +0 -0
{gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/top_level.txt +0 -0

gemcode/agent.py CHANGED Viewed

@@ -27,6 +27,9 @@ from gemcode.limits import make_before_model_limits_callback, make_before_model_
 from gemcode.thinking import build_thinking_config
 from gemcode.tools import build_function_tools
 from gemcode.tool_prompt_manifest import build_tool_manifest
+from gemcode.skills import build_skill_manifest_text
+from gemcode.output_styles import build_output_style_section
+from gemcode.rules import build_rules_section
 def build_global_instruction() -> str:
@@ -248,6 +251,19 @@ def _build_runtime_facts(cfg: GemCodeConfig) -> str:
   git_ctx = _get_git_context(root)
   git_section = f"\n\n## Git context (snapshot at session start)\n{git_ctx}" if git_ctx else ""
+  # ── Curated memory (safe-to-inject) ───────────────────────────────────────
+  curated_section = ""
+  try:
+    snap = getattr(cfg, "_curated_memory_snapshot", None)
+    if isinstance(snap, dict) and (snap.get("text") or "").strip():
+      curated_section = (
+        "\n\n## Curated memory (safe, persistent)\n"
+        "This is small, curated memory that should be treated as durable project/user facts.\n"
+        f"{snap.get('text')}\n"
+      )
+  except Exception:
+    curated_section = ""
   return f"""## Runtime facts (authoritative for this session)
 - **Today's date:** {today}
 - **Project root** — every filesystem tool path is relative to: `{root}`
@@ -261,7 +277,7 @@ def _build_runtime_facts(cfg: GemCodeConfig) -> str:
 {kairos_section}
 - **UI banner** phrases like "GemCode Pro" are terminal marketing, not a separate API tier.
 - **Env toggles** (`GEMCODE_ENABLE_COMPUTER_USE`, `GEMCODE_MODEL`, etc.) affect only the OS process that launched gemcode. Pasting `VAR=1` in chat does NOT reconfigure a running session—tell the user to export in their shell, use project `.env`, or restart the CLI.
-- **Working in subfolders** — call `list_directory("Desktop")`, `glob_files("**/query.ts")`, `read_file("testing/ai-edtech-app/src/app/page.tsx")` directly. Never claim access is blocked unless a tool returned an explicit error.{git_section}"""
+- **Working in subfolders** — call `list_directory(\"Desktop\")`, `glob_files(\"**/query.ts\")`, `read_file(\"testing/ai-edtech-app/src/app/page.tsx\")` directly. Never claim access is blocked unless a tool returned an explicit error.{git_section}{curated_section}"""
 def _build_memory_section(cfg: GemCodeConfig) -> str:
@@ -840,7 +856,12 @@ You have two tools to persist project insights across sessions, like Claude Code
   Call this **immediately** when you discover something useful — not just at the end of tasks.
   Notes are loaded at session start so future sessions inherit this knowledge.
-- **`read_project_notes()`** — read current notes **only when starting a real engineering task** (editing, debugging, building). Do NOT call this for greetings or general questions. If notes exist and you're about to work on a task, read them once to avoid re-discovering known information."""
+- **`read_project_notes()`** — read current notes **only when starting a real engineering task** (editing, debugging, building). Do NOT call this for greetings or general questions. If notes exist and you're about to work on a task, read them once to avoid re-discovering known information.
+## Do not create vendor-specific instruction files
+- Do NOT create or modify `CLAUDE.md` or `AGENTS.md`. GemCode does not use these.
+- If project instructions are needed and the user asked for it, use `GEMINI.md` (repo root).
+"""
   # Inject capability-specific strategy sections only when those caps are on.
   if getattr(cfg, "enable_computer_use", False):
@@ -855,6 +876,18 @@ You have two tools to persist project insights across sessions, like Claude Code
   tool_manifest = build_tool_manifest(cfg)
   if tool_manifest:
     base = f"{base}\n\n{tool_manifest}"
+  # Output style: small, user-selected formatting layer.
+  style_section = build_output_style_section(cfg.project_root, getattr(cfg, "output_style", None))
+  if style_section:
+    base = f"{base}\n\n{style_section}"
+  # Rules: project conventions (path-gated based on files the agent/user touched this session).
+  touched = sorted(getattr(cfg, "_touched_paths", set()) or set())
+  rules_section = build_rules_section(cfg.project_root, touched_paths=touched or None)
+  if rules_section:
+    base = f"{base}\n\n{rules_section}"
+  skill_manifest = build_skill_manifest_text(cfg.project_root)
+  if skill_manifest:
+    base = f"{base}\n\n{skill_manifest}"
   extra = _load_gemini_md(cfg.project_root)
   if extra.strip():
     return f"{base}\n\n## Project instructions (GEMINI.md)\n{extra}"

gemcode/autotune.py ADDED Viewed

@@ -0,0 +1,76 @@
+from __future__ import annotations
+import subprocess
+import time
+from pathlib import Path
+from typing import Any
+from gemcode.evals.harness import run_eval_suite, write_eval_record
+def _sh(cmd: list[str], *, cwd: Path) -> tuple[int, str]:
+  p = subprocess.run(cmd, cwd=str(cwd), capture_output=True, text=True)
+  out = (p.stdout or "") + (p.stderr or "")
+  return int(p.returncode), out
+def _git_head_sha(repo: Path) -> str | None:
+  rc, out = _sh(["git", "rev-parse", "HEAD"], cwd=repo)
+  if rc != 0:
+    return None
+  return (out or "").strip().splitlines()[-1] if (out or "").strip() else None
+def _git_branch(repo: Path) -> str | None:
+  rc, out = _sh(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=repo)
+  if rc != 0:
+    return None
+  return (out or "").strip().splitlines()[-1] if (out or "").strip() else None
+def init_autotune(*, project_root: Path, tag: str) -> dict[str, Any]:
+  """
+  AutoResearch-style setup:
+  - create branch autotune/<tag> (if not exists)
+  - create results ledger under .gemcode/evals/
+  """
+  repo = project_root
+  if not (repo / ".git").exists():
+    return {"error": "not_a_git_repo"}
+  branch = f"autotune/{tag}"
+  rc, out = _sh(["git", "rev-parse", "--verify", branch], cwd=repo)
+  if rc == 0:
+    return {"status": "exists", "branch": branch}
+  rc2, out2 = _sh(["git", "checkout", "-b", branch], cwd=repo)
+  if rc2 != 0:
+    return {"error": "branch_create_failed", "output": out2[-1200:]}
+  return {"status": "created", "branch": branch}
+def run_autotune_eval(*, project_root: Path, include_llm: bool, model: str | None = None) -> dict[str, Any]:
+  """
+  Run eval suite and persist last result to .gemcode/evals/last_eval.json.
+  """
+  res = run_eval_suite(project_root=project_root, include_llm=include_llm, model=model)
+  meta = {
+    "ts": time.time(),
+    "git_sha": _git_head_sha(project_root),
+    "git_branch": _git_branch(project_root),
+  }
+  p = write_eval_record(project_root, {**meta, **res})
+  res["record_path"] = str(p)
+  # Append ledger line (untracked; .gemcode/ is gitignored)
+  try:
+    ledger = project_root / ".gemcode" / "evals" / "autotune_ledger.jsonl"
+    ledger.parent.mkdir(parents=True, exist_ok=True)
+    import json
+    ledger.write_text("", encoding="utf-8") if not ledger.exists() else None
+    with ledger.open("a", encoding="utf-8") as f:
+      f.write(json.dumps({**meta, **res}, ensure_ascii=False) + "\n")
+    res["ledger_path"] = str(ledger)
+  except Exception:
+    pass
+  return res

gemcode/checkpoints.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""
+Hermes-style checkpoints for GemCode.
+Goal: make file mutations reversible with an explicit, local checkpoint log.
+Storage:
+  <project>/.gemcode/checkpoints/<checkpoint_id>/manifest.json
+  <project>/.gemcode/checkpoints/<checkpoint_id>/files/<path>
+"""
+from __future__ import annotations
+import json
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+def _now_ms() -> int:
+  return int(time.time() * 1000)
+def _checkpoints_dir(project_root: Path) -> Path:
+  return project_root / ".gemcode" / "checkpoints"
+def _safe_rel(project_root: Path, p: Path) -> str:
+  return str(p.resolve().relative_to(project_root.resolve()))
+@dataclass
+class CheckpointFile:
+  path: str
+  existed: bool
+@dataclass
+class Checkpoint:
+  id: str
+  ts_ms: int
+  op: str
+  files: list[CheckpointFile]
+def create_checkpoint(
+  *,
+  project_root: Path,
+  op: str,
+  file_snapshots: list[tuple[Path, bool]],
+) -> Checkpoint:
+  """
+  Create a checkpoint capturing the *previous* contents of the provided files.
+  file_snapshots entries are (absolute_path, existed_bool).
+  """
+  ts = _now_ms()
+  cid = f"cp_{ts}"
+  base = _checkpoints_dir(project_root) / cid
+  files_dir = base / "files"
+  files_dir.mkdir(parents=True, exist_ok=True)
+  out_files: list[CheckpointFile] = []
+  for abs_path, existed in file_snapshots:
+    try:
+      rel = _safe_rel(project_root, abs_path)
+    except Exception:
+      continue
+    out_files.append(CheckpointFile(path=rel, existed=bool(existed)))
+    if existed and abs_path.is_file():
+      target = files_dir / rel
+      target.parent.mkdir(parents=True, exist_ok=True)
+      target.write_bytes(abs_path.read_bytes())
+  manifest = {
+    "id": cid,
+    "ts_ms": ts,
+    "op": op,
+    "files": [{"path": f.path, "existed": f.existed} for f in out_files],
+  }
+  (base / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
+  return Checkpoint(id=cid, ts_ms=ts, op=op, files=out_files)
+def list_checkpoints(project_root: Path, limit: int = 20) -> list[dict[str, Any]]:
+  d = _checkpoints_dir(project_root)
+  if not d.is_dir():
+    return []
+  cps = []
+  for p in sorted(d.iterdir(), key=lambda x: x.name, reverse=True):
+    m = p / "manifest.json"
+    if not m.is_file():
+      continue
+    try:
+      obj = json.loads(m.read_text(encoding="utf-8"))
+      cps.append(obj)
+    except Exception:
+      continue
+    if len(cps) >= max(1, int(limit)):
+      break
+  return cps
+def undo_checkpoint(project_root: Path, checkpoint_id: str | None = None) -> dict[str, Any]:
+  d = _checkpoints_dir(project_root)
+  if not d.is_dir():
+    return {"error": "no_checkpoints"}
+  if checkpoint_id:
+    base = d / checkpoint_id
+  else:
+    # newest
+    items = [p for p in d.iterdir() if p.is_dir()]
+    if not items:
+      return {"error": "no_checkpoints"}
+    base = sorted(items, key=lambda x: x.name, reverse=True)[0]
+  manifest_path = base / "manifest.json"
+  if not manifest_path.is_file():
+    return {"error": "checkpoint_missing_manifest"}
+  try:
+    manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
+  except Exception as e:
+    return {"error": f"checkpoint_manifest_invalid:{e}"}
+  files_dir = base / "files"
+  restored = []
+  for f in manifest.get("files") or []:
+    try:
+      rel = str(f.get("path") or "")
+      existed = bool(f.get("existed"))
+      abs_path = (project_root / rel).resolve()
+      if existed:
+        src = files_dir / rel
+        if src.is_file():
+          abs_path.parent.mkdir(parents=True, exist_ok=True)
+          abs_path.write_bytes(src.read_bytes())
+          restored.append(rel)
+      else:
+        # File did not exist previously; remove it if it exists now.
+        if abs_path.is_file():
+          abs_path.unlink()
+          restored.append(rel)
+    except Exception:
+      continue
+  return {"checkpoint_id": manifest.get("id") or base.name, "restored": restored}

gemcode/cli.py CHANGED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 import argparse
 import asyncio
 import getpass
+import json
 import os
 import sys
 import uuid
@@ -19,6 +20,8 @@ from gemcode.capability_routing import apply_capability_routing
 from gemcode.session_runtime import create_runner
 from gemcode.trust import is_trusted_root, trust_root
 from gemcode.repl_slash import process_repl_slash
+from gemcode.ide_stdio import main as ide_stdio_main
+from gemcode.autotune import init_autotune, run_autotune_eval
 def _events_to_text(events) -> str:
@@ -339,6 +342,20 @@ def main() -> None:
       "enable Terminal for Desktop Folder (or grant Full Disk Access)."
     )
+  # Hidden IDE engine mode: `gemcode ide --stdio`
+  if len(sys.argv) >= 2 and sys.argv[1] == "ide":
+    ide_parser = argparse.ArgumentParser(prog="gemcode ide")
+    ide_parser.add_argument(
+      "--stdio",
+      action="store_true",
+      help="Run IDE engine over stdin/stdout (JSONL)",
+    )
+    ide_args = ide_parser.parse_args(sys.argv[2:])
+    if ide_args.stdio:
+      ide_stdio_main()
+      return
+    raise SystemExit("Usage: gemcode ide --stdio")
   # Persist or rotate API key (Claude Code–style `claude login`).
   if len(sys.argv) > 1 and sys.argv[1] == "login":
     load_cli_environment()
@@ -461,6 +478,39 @@ def main() -> None:
     print(f"smoke ok: {len(inspections)} tools validated")
     return
+  # Eval harness (AutoResearch-style gates).
+  if len(sys.argv) > 1 and sys.argv[1] == "eval":
+    eval_parser = argparse.ArgumentParser(prog="gemcode eval")
+    eval_parser.add_argument("-C", "--directory", type=Path, default=Path.cwd(), help="Project root")
+    eval_parser.add_argument("--llm", action="store_true", help="Include LLM golden prompts (costs tokens)")
+    eval_parser.add_argument("--model", default=None, help="Override model for LLM evals")
+    args = eval_parser.parse_args(sys.argv[2:])
+    from gemcode.evals.harness import run_eval_suite, write_eval_record
+    res = run_eval_suite(project_root=args.directory.resolve(), include_llm=bool(args.llm), model=args.model)
+    p = write_eval_record(args.directory.resolve(), res)
+    print(json.dumps(res, ensure_ascii=False, indent=2))
+    print(f"\n[gemcode eval] wrote {p}", file=sys.stderr)
+    raise SystemExit(0 if res.get("ok") else 1)
+  # Autotune scaffolding (AutoResearch-inspired).
+  if len(sys.argv) > 1 and sys.argv[1] == "autotune":
+    at_parser = argparse.ArgumentParser(prog="gemcode autotune")
+    at_parser.add_argument("subcommand", choices=("init", "eval"))
+    at_parser.add_argument("-C", "--directory", type=Path, default=Path.cwd(), help="Project root")
+    at_parser.add_argument("--tag", default=None, help="Run tag (e.g. apr7)")
+    at_parser.add_argument("--llm", action="store_true", help="Include LLM golden prompts (costs tokens)")
+    at_parser.add_argument("--model", default=None, help="Override model for LLM evals")
+    args = at_parser.parse_args(sys.argv[2:])
+    root = args.directory.resolve()
+    if args.subcommand == "init":
+      if not args.tag:
+        raise SystemExit("autotune init requires --tag")
+      print(json.dumps(init_autotune(project_root=root, tag=str(args.tag)), ensure_ascii=False, indent=2))
+      return
+    # eval
+    print(json.dumps(run_autotune_eval(project_root=root, include_llm=bool(args.llm), model=args.model), ensure_ascii=False, indent=2))
+    return
   # Live audio mode (Gemini Live API via ADK run_live()).
   if len(sys.argv) > 1 and sys.argv[1] == "live-audio":
     audio_parser = argparse.ArgumentParser(

gemcode/config.py CHANGED Viewed

@@ -206,6 +206,12 @@ class GemCodeConfig:
     default_factory=lambda: _truthy_env("GEMCODE_ENABLE_MEMORY", default=False)
   )
+  # Output style: optional extra system-prompt section loaded from
+  # `.gemcode/output-styles/<name>.md` or `~/.gemcode/output-styles/<name>.md`.
+  output_style: str | None = field(
+    default_factory=lambda: os.environ.get("GEMCODE_OUTPUT_STYLE") or None
+  )
   # Modality toggles (tool injection + routing).
   enable_deep_research: bool = field(
     default_factory=lambda: _truthy_env("GEMCODE_ENABLE_DEEP_RESEARCH", default=False)
@@ -340,6 +346,23 @@ class GemCodeConfig:
     default_factory=lambda: _truthy_env("GEMCODE_ENABLE_WEB_SEARCH", default=False)
   )
+  # IDE mode (VS Code extension): the engine should *propose* writes/commands,
+  # and the IDE applies them (WorkspaceEdit / terminal task) after user approval.
+  ide_proposal_mode: bool = False
+  ide_allow_write: bool = False
+  ide_allow_shell: bool = False
+  # Checkpointing (Hermes-style): snapshot files before mutating tools so users
+  # can undo accidental agent edits.
+  enable_checkpoints: bool = field(
+    default_factory=lambda: _truthy_env("GEMCODE_CHECKPOINTS", default=True)
+  )
+  # Background learner (Hermes-style): post-turn distillation into curated memory/notes.
+  enable_background_learner: bool = field(
+    default_factory=lambda: _truthy_env("GEMCODE_BACKGROUND_LEARNER", default=False)
+  )
   def __post_init__(self) -> None:
     self.project_root = self.project_root.resolve()
     # Default agentic depth when env omits GEMCODE_MAX_LLM_CALLS (was: None → SDK default).

gemcode/curated_memory.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""
+Curated memory store (Hermes-style).
+This is distinct from ADK's `.gemcode/memories.jsonl`:
+- memories.jsonl: auto-generated, retrieval-oriented, noisy by design
+- curated memory: small, human/agent curated facts that are safe to re-inject
+Files:
+  <project>/.gemcode/GEMCODE_MEMORY.md  (project facts, conventions, commands)
+  <project>/.gemcode/GEMCODE_USER.md    (user preferences for this project)
+Backward compatibility:
+  - If older files exist, they are still read:
+    - .gemcode/MEMORY.md
+    - .gemcode/USER.md
+"""
+from __future__ import annotations
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+_SUSPICIOUS = [
+  "api_key",
+  "access key",
+  "secret",
+  "password",
+  "token",
+  "private key",
+  "-----BEGIN",
+]
+def memory_paths(project_root: Path) -> tuple[Path, Path]:
+  d = project_root / ".gemcode"
+  return d / "GEMCODE_MEMORY.md", d / "GEMCODE_USER.md"
+def _legacy_memory_paths(project_root: Path) -> tuple[Path, Path]:
+  d = project_root / ".gemcode"
+  return d / "MEMORY.md", d / "USER.md"
+def _scan_safe(text: str) -> str | None:
+  t = (text or "").strip()
+  if not t:
+    return "empty"
+  if len(t) > 4000:
+    return "too_long"
+  low = t.lower()
+  for s in _SUSPICIOUS:
+    if s in low:
+      return "looks_sensitive"
+  # Block invisible control characters except newline/tab.
+  for ch in t:
+    o = ord(ch)
+    if o < 32 and ch not in ("\n", "\t"):
+      return "control_chars"
+  return None
+def load_snapshot(project_root: Path, *, max_chars: int = 6000) -> dict[str, Any]:
+  mem, user = memory_paths(project_root)
+  legacy_mem, legacy_user = _legacy_memory_paths(project_root)
+  def _read(p: Path) -> str:
+    if not p.is_file():
+      return ""
+    return p.read_text(encoding="utf-8", errors="replace")
+  # Prefer new filenames; fall back to legacy if new doesn't exist.
+  mem_txt = _read(mem) or _read(legacy_mem)
+  user_txt = _read(user) or _read(legacy_user)
+  out = (mem_txt.strip() + "\n\n" + user_txt.strip()).strip()
+  if len(out) > max_chars:
+    out = out[:max_chars] + "\n\n(truncated)\n"
+  return {
+    "exists": bool(mem_txt.strip() or user_txt.strip()),
+    "memory_path": str(mem if mem.is_file() else legacy_mem),
+    "user_path": str(user if user.is_file() else legacy_user),
+    "text": out,
+    "chars": len(out),
+  }
+def append_fact(project_root: Path, *, target: str, text: str) -> dict[str, Any]:
+  """
+  Append a curated fact to MEMORY.md or USER.md.
+  target: 'memory' or 'user'
+  """
+  err = _scan_safe(text)
+  if err:
+    return {"error": f"rejected:{err}"}
+  mem, user = memory_paths(project_root)
+  p = mem if (target or "").strip().lower() != "user" else user
+  p.parent.mkdir(parents=True, exist_ok=True)
+  if not p.exists():
+    hdr = "# Curated memory\n\nThis file is safe-to-inject project memory.\n\n"
+    p.write_text(hdr, encoding="utf-8")
+  cur = p.read_text(encoding="utf-8", errors="replace")
+  stripped = text.strip()
+  if stripped in cur:
+    return {"status": "already_exists", "path": str(p)}
+  ts = datetime.now().strftime("%Y-%m-%d %H:%M")
+  entry = f"\n<!-- {ts} -->\n- {stripped}\n"
+  p.write_text(cur + entry, encoding="utf-8")
+  return {"status": "appended", "path": str(p)}

gemcode/evals/harness.py ADDED Viewed

@@ -0,0 +1,126 @@
+from __future__ import annotations
+import asyncio
+import json
+import os
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable
+from gemcode.config import GemCodeConfig, load_cli_environment
+from gemcode.invoke import run_turn
+from gemcode.session_runtime import create_runner
+@dataclass
+class EvalResult:
+  name: str
+  ok: bool
+  score: float
+  details: str = ""
+def _run_cmd(cmd: str, *, cwd: Path) -> tuple[int, str]:
+  import subprocess
+  p = subprocess.run(cmd, cwd=str(cwd), shell=True, capture_output=True, text=True)
+  out = (p.stdout or "") + (p.stderr or "")
+  return int(p.returncode), out
+def _events_to_text(events: list) -> str:
+  parts: list[str] = []
+  for event in events:
+    try:
+      if not event.content or not event.content.parts:
+        continue
+      if getattr(event, "author", None) == "user":
+        continue
+      for part in event.content.parts:
+        t = getattr(part, "text", None)
+        if t:
+          parts.append(t)
+    except Exception:
+      continue
+  return "".join(parts).strip()
+async def _eval_golden_prompt(cfg: GemCodeConfig, prompt: str, *, name: str) -> EvalResult:
+  runner = create_runner(cfg, extra_tools=None)
+  try:
+    events = await run_turn(
+      runner,
+      user_id="local",
+      session_id=f"eval:{name}",
+      prompt=prompt,
+      max_llm_calls=min(int(getattr(cfg, "max_llm_calls", 256) or 256), 32),
+      cfg=cfg,
+    )
+    text = _events_to_text(events)
+  finally:
+    await runner.close()
+  if not text:
+    return EvalResult(name=name, ok=False, score=0.0, details="empty_output")
+  if "Traceback" in text or "SyntaxError" in text:
+    return EvalResult(name=name, ok=False, score=0.0, details="looks_like_exception_text")
+  return EvalResult(name=name, ok=True, score=1.0, details=text[:400])
+def run_eval_suite(
+  *,
+  project_root: Path,
+  include_llm: bool,
+  model: str | None = None,
+) -> dict[str, Any]:
+  """
+  Fixed evaluation harness (AutoResearch-style): deterministic gates + optional LLM golden prompts.
+  """
+  t0 = time.time()
+  load_cli_environment()
+  cfg = GemCodeConfig(project_root=project_root)
+  if model:
+    cfg.model = model
+    cfg.model_overridden = True
+  results: list[EvalResult] = []
+  # Gate 1: tool schema smoke
+  rc, out = _run_cmd("PYTHONPATH=src python3 -m gemcode tools smoke", cwd=project_root / "gemcode")
+  results.append(EvalResult(name="tools_smoke", ok=(rc == 0), score=1.0 if rc == 0 else 0.0, details=out[-800:]))
+  # Gate 2: pytest if present
+  tests_dir = project_root / "gemcode" / "tests"
+  if tests_dir.is_dir():
+    rc2, out2 = _run_cmd("PYTHONPATH=src python3 -m pytest -q", cwd=project_root / "gemcode")
+    results.append(EvalResult(name="pytest", ok=(rc2 == 0), score=1.0 if rc2 == 0 else 0.0, details=out2[-1200:]))
+  if include_llm:
+    goldens = [
+      ("no_op_greeting", "hii"),
+      ("explain_mode", "Explain what tools you have available, briefly."),
+    ]
+    async def _run():
+      for n, p in goldens:
+        results.append(await _eval_golden_prompt(cfg, p, name=n))
+    asyncio.run(_run())
+  ok_all = all(r.ok for r in results if r.name in ("tools_smoke", "pytest"))
+  score = float(sum(r.score for r in results)) / max(1, len(results))
+  elapsed = time.time() - t0
+  return {
+    "ok": bool(ok_all),
+    "score": score,
+    "elapsed_s": elapsed,
+    "results": [r.__dict__ for r in results],
+  }
+def write_eval_record(project_root: Path, record: dict[str, Any]) -> Path:
+  d = project_root / ".gemcode" / "evals"
+  d.mkdir(parents=True, exist_ok=True)
+  p = d / "last_eval.json"
+  p.write_text(json.dumps(record, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
+  return p

gemcode 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl

gemcode 0.3.74py3-none-any.whl → 0.3.76py3-none-any.whl