gemcode 0.3.74__py3-none-any.whl → 0.3.76__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. gemcode/agent.py +35 -2
  2. gemcode/autotune.py +76 -0
  3. gemcode/checkpoints.py +144 -0
  4. gemcode/cli.py +50 -0
  5. gemcode/config.py +23 -0
  6. gemcode/curated_memory.py +110 -0
  7. gemcode/evals/harness.py +126 -0
  8. gemcode/ide_protocol.py +71 -0
  9. gemcode/ide_stdio.py +206 -0
  10. gemcode/learning.py +122 -0
  11. gemcode/output_styles.py +78 -0
  12. gemcode/paths.py +60 -0
  13. gemcode/permissions.py +3 -0
  14. gemcode/plugins/terminal_hooks_plugin.py +12 -0
  15. gemcode/repl_commands.py +10 -0
  16. gemcode/repl_slash.py +504 -0
  17. gemcode/rules.py +115 -0
  18. gemcode/session_runtime.py +9 -0
  19. gemcode/skills.py +299 -0
  20. gemcode/thinking.py +8 -13
  21. gemcode/tools/__init__.py +26 -0
  22. gemcode/tools/bash.py +22 -0
  23. gemcode/tools/curated_memory.py +34 -0
  24. gemcode/tools/edit.py +150 -2
  25. gemcode/tools/filesystem.py +65 -13
  26. gemcode/tools/repo_map.py +11 -0
  27. gemcode/tools/search.py +63 -19
  28. gemcode/tools/shell.py +26 -1
  29. gemcode/tools/skills.py +61 -0
  30. gemcode/tui/input_handler.py +7 -2
  31. gemcode/web/claude_sse_adapter.py +15 -270
  32. gemcode/web/sse_adapter.py +247 -0
  33. gemcode/web/terminal_repl.py +3 -3
  34. {gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/METADATA +14 -1
  35. {gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/RECORD +39 -26
  36. {gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/WHEEL +0 -0
  37. {gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/entry_points.txt +0 -0
  38. {gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/licenses/LICENSE +0 -0
  39. {gemcode-0.3.74.dist-info → gemcode-0.3.76.dist-info}/top_level.txt +0 -0
gemcode/agent.py CHANGED
@@ -27,6 +27,9 @@ from gemcode.limits import make_before_model_limits_callback, make_before_model_
27
27
  from gemcode.thinking import build_thinking_config
28
28
  from gemcode.tools import build_function_tools
29
29
  from gemcode.tool_prompt_manifest import build_tool_manifest
30
+ from gemcode.skills import build_skill_manifest_text
31
+ from gemcode.output_styles import build_output_style_section
32
+ from gemcode.rules import build_rules_section
30
33
 
31
34
 
32
35
  def build_global_instruction() -> str:
@@ -248,6 +251,19 @@ def _build_runtime_facts(cfg: GemCodeConfig) -> str:
248
251
  git_ctx = _get_git_context(root)
249
252
  git_section = f"\n\n## Git context (snapshot at session start)\n{git_ctx}" if git_ctx else ""
250
253
 
254
+ # ── Curated memory (safe-to-inject) ───────────────────────────────────────
255
+ curated_section = ""
256
+ try:
257
+ snap = getattr(cfg, "_curated_memory_snapshot", None)
258
+ if isinstance(snap, dict) and (snap.get("text") or "").strip():
259
+ curated_section = (
260
+ "\n\n## Curated memory (safe, persistent)\n"
261
+ "This is small, curated memory that should be treated as durable project/user facts.\n"
262
+ f"{snap.get('text')}\n"
263
+ )
264
+ except Exception:
265
+ curated_section = ""
266
+
251
267
  return f"""## Runtime facts (authoritative for this session)
252
268
  - **Today's date:** {today}
253
269
  - **Project root** — every filesystem tool path is relative to: `{root}`
@@ -261,7 +277,7 @@ def _build_runtime_facts(cfg: GemCodeConfig) -> str:
261
277
  {kairos_section}
262
278
  - **UI banner** phrases like "GemCode Pro" are terminal marketing, not a separate API tier.
263
279
  - **Env toggles** (`GEMCODE_ENABLE_COMPUTER_USE`, `GEMCODE_MODEL`, etc.) affect only the OS process that launched gemcode. Pasting `VAR=1` in chat does NOT reconfigure a running session—tell the user to export in their shell, use project `.env`, or restart the CLI.
264
- - **Working in subfolders** — call `list_directory("Desktop")`, `glob_files("**/query.ts")`, `read_file("testing/ai-edtech-app/src/app/page.tsx")` directly. Never claim access is blocked unless a tool returned an explicit error.{git_section}"""
280
+ - **Working in subfolders** — call `list_directory(\"Desktop\")`, `glob_files(\"**/query.ts\")`, `read_file(\"testing/ai-edtech-app/src/app/page.tsx\")` directly. Never claim access is blocked unless a tool returned an explicit error.{git_section}{curated_section}"""
265
281
 
266
282
 
267
283
  def _build_memory_section(cfg: GemCodeConfig) -> str:
@@ -840,7 +856,12 @@ You have two tools to persist project insights across sessions, like Claude Code
840
856
  Call this **immediately** when you discover something useful — not just at the end of tasks.
841
857
  Notes are loaded at session start so future sessions inherit this knowledge.
842
858
 
843
- - **`read_project_notes()`** — read current notes **only when starting a real engineering task** (editing, debugging, building). Do NOT call this for greetings or general questions. If notes exist and you're about to work on a task, read them once to avoid re-discovering known information."""
859
+ - **`read_project_notes()`** — read current notes **only when starting a real engineering task** (editing, debugging, building). Do NOT call this for greetings or general questions. If notes exist and you're about to work on a task, read them once to avoid re-discovering known information.
860
+
861
+ ## Do not create vendor-specific instruction files
862
+ - Do NOT create or modify `CLAUDE.md` or `AGENTS.md`. GemCode does not use these.
863
+ - If project instructions are needed and the user asked for it, use `GEMINI.md` (repo root).
864
+ """
844
865
 
845
866
  # Inject capability-specific strategy sections only when those caps are on.
846
867
  if getattr(cfg, "enable_computer_use", False):
@@ -855,6 +876,18 @@ You have two tools to persist project insights across sessions, like Claude Code
855
876
  tool_manifest = build_tool_manifest(cfg)
856
877
  if tool_manifest:
857
878
  base = f"{base}\n\n{tool_manifest}"
879
+ # Output style: small, user-selected formatting layer.
880
+ style_section = build_output_style_section(cfg.project_root, getattr(cfg, "output_style", None))
881
+ if style_section:
882
+ base = f"{base}\n\n{style_section}"
883
+ # Rules: project conventions (path-gated based on files the agent/user touched this session).
884
+ touched = sorted(getattr(cfg, "_touched_paths", set()) or set())
885
+ rules_section = build_rules_section(cfg.project_root, touched_paths=touched or None)
886
+ if rules_section:
887
+ base = f"{base}\n\n{rules_section}"
888
+ skill_manifest = build_skill_manifest_text(cfg.project_root)
889
+ if skill_manifest:
890
+ base = f"{base}\n\n{skill_manifest}"
858
891
  extra = _load_gemini_md(cfg.project_root)
859
892
  if extra.strip():
860
893
  return f"{base}\n\n## Project instructions (GEMINI.md)\n{extra}"
gemcode/autotune.py ADDED
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from gemcode.evals.harness import run_eval_suite, write_eval_record
9
+
10
+
11
+ def _sh(cmd: list[str], *, cwd: Path) -> tuple[int, str]:
12
+ p = subprocess.run(cmd, cwd=str(cwd), capture_output=True, text=True)
13
+ out = (p.stdout or "") + (p.stderr or "")
14
+ return int(p.returncode), out
15
+
16
+
17
+ def _git_head_sha(repo: Path) -> str | None:
18
+ rc, out = _sh(["git", "rev-parse", "HEAD"], cwd=repo)
19
+ if rc != 0:
20
+ return None
21
+ return (out or "").strip().splitlines()[-1] if (out or "").strip() else None
22
+
23
+
24
+ def _git_branch(repo: Path) -> str | None:
25
+ rc, out = _sh(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=repo)
26
+ if rc != 0:
27
+ return None
28
+ return (out or "").strip().splitlines()[-1] if (out or "").strip() else None
29
+
30
+
31
+ def init_autotune(*, project_root: Path, tag: str) -> dict[str, Any]:
32
+ """
33
+ AutoResearch-style setup:
34
+ - create branch autotune/<tag> (if not exists)
35
+ - create results ledger under .gemcode/evals/
36
+ """
37
+ repo = project_root
38
+ if not (repo / ".git").exists():
39
+ return {"error": "not_a_git_repo"}
40
+ branch = f"autotune/{tag}"
41
+ rc, out = _sh(["git", "rev-parse", "--verify", branch], cwd=repo)
42
+ if rc == 0:
43
+ return {"status": "exists", "branch": branch}
44
+ rc2, out2 = _sh(["git", "checkout", "-b", branch], cwd=repo)
45
+ if rc2 != 0:
46
+ return {"error": "branch_create_failed", "output": out2[-1200:]}
47
+ return {"status": "created", "branch": branch}
48
+
49
+
50
+ def run_autotune_eval(*, project_root: Path, include_llm: bool, model: str | None = None) -> dict[str, Any]:
51
+ """
52
+ Run eval suite and persist last result to .gemcode/evals/last_eval.json.
53
+ """
54
+ res = run_eval_suite(project_root=project_root, include_llm=include_llm, model=model)
55
+ meta = {
56
+ "ts": time.time(),
57
+ "git_sha": _git_head_sha(project_root),
58
+ "git_branch": _git_branch(project_root),
59
+ }
60
+ p = write_eval_record(project_root, {**meta, **res})
61
+ res["record_path"] = str(p)
62
+
63
+ # Append ledger line (untracked; .gemcode/ is gitignored)
64
+ try:
65
+ ledger = project_root / ".gemcode" / "evals" / "autotune_ledger.jsonl"
66
+ ledger.parent.mkdir(parents=True, exist_ok=True)
67
+ import json
68
+
69
+ ledger.write_text("", encoding="utf-8") if not ledger.exists() else None
70
+ with ledger.open("a", encoding="utf-8") as f:
71
+ f.write(json.dumps({**meta, **res}, ensure_ascii=False) + "\n")
72
+ res["ledger_path"] = str(ledger)
73
+ except Exception:
74
+ pass
75
+ return res
76
+
gemcode/checkpoints.py ADDED
@@ -0,0 +1,144 @@
1
+ """
2
+ Hermes-style checkpoints for GemCode.
3
+
4
+ Goal: make file mutations reversible with an explicit, local checkpoint log.
5
+
6
+ Storage:
7
+ <project>/.gemcode/checkpoints/<checkpoint_id>/manifest.json
8
+ <project>/.gemcode/checkpoints/<checkpoint_id>/files/<path>
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import time
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+
20
+ def _now_ms() -> int:
21
+ return int(time.time() * 1000)
22
+
23
+
24
+ def _checkpoints_dir(project_root: Path) -> Path:
25
+ return project_root / ".gemcode" / "checkpoints"
26
+
27
+
28
+ def _safe_rel(project_root: Path, p: Path) -> str:
29
+ return str(p.resolve().relative_to(project_root.resolve()))
30
+
31
+
32
+ @dataclass
33
+ class CheckpointFile:
34
+ path: str
35
+ existed: bool
36
+
37
+
38
+ @dataclass
39
+ class Checkpoint:
40
+ id: str
41
+ ts_ms: int
42
+ op: str
43
+ files: list[CheckpointFile]
44
+
45
+
46
+ def create_checkpoint(
47
+ *,
48
+ project_root: Path,
49
+ op: str,
50
+ file_snapshots: list[tuple[Path, bool]],
51
+ ) -> Checkpoint:
52
+ """
53
+ Create a checkpoint capturing the *previous* contents of the provided files.
54
+
55
+ file_snapshots entries are (absolute_path, existed_bool).
56
+ """
57
+ ts = _now_ms()
58
+ cid = f"cp_{ts}"
59
+ base = _checkpoints_dir(project_root) / cid
60
+ files_dir = base / "files"
61
+ files_dir.mkdir(parents=True, exist_ok=True)
62
+ out_files: list[CheckpointFile] = []
63
+
64
+ for abs_path, existed in file_snapshots:
65
+ try:
66
+ rel = _safe_rel(project_root, abs_path)
67
+ except Exception:
68
+ continue
69
+ out_files.append(CheckpointFile(path=rel, existed=bool(existed)))
70
+ if existed and abs_path.is_file():
71
+ target = files_dir / rel
72
+ target.parent.mkdir(parents=True, exist_ok=True)
73
+ target.write_bytes(abs_path.read_bytes())
74
+
75
+ manifest = {
76
+ "id": cid,
77
+ "ts_ms": ts,
78
+ "op": op,
79
+ "files": [{"path": f.path, "existed": f.existed} for f in out_files],
80
+ }
81
+ (base / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
82
+ return Checkpoint(id=cid, ts_ms=ts, op=op, files=out_files)
83
+
84
+
85
+ def list_checkpoints(project_root: Path, limit: int = 20) -> list[dict[str, Any]]:
86
+ d = _checkpoints_dir(project_root)
87
+ if not d.is_dir():
88
+ return []
89
+ cps = []
90
+ for p in sorted(d.iterdir(), key=lambda x: x.name, reverse=True):
91
+ m = p / "manifest.json"
92
+ if not m.is_file():
93
+ continue
94
+ try:
95
+ obj = json.loads(m.read_text(encoding="utf-8"))
96
+ cps.append(obj)
97
+ except Exception:
98
+ continue
99
+ if len(cps) >= max(1, int(limit)):
100
+ break
101
+ return cps
102
+
103
+
104
+ def undo_checkpoint(project_root: Path, checkpoint_id: str | None = None) -> dict[str, Any]:
105
+ d = _checkpoints_dir(project_root)
106
+ if not d.is_dir():
107
+ return {"error": "no_checkpoints"}
108
+ if checkpoint_id:
109
+ base = d / checkpoint_id
110
+ else:
111
+ # newest
112
+ items = [p for p in d.iterdir() if p.is_dir()]
113
+ if not items:
114
+ return {"error": "no_checkpoints"}
115
+ base = sorted(items, key=lambda x: x.name, reverse=True)[0]
116
+ manifest_path = base / "manifest.json"
117
+ if not manifest_path.is_file():
118
+ return {"error": "checkpoint_missing_manifest"}
119
+ try:
120
+ manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
121
+ except Exception as e:
122
+ return {"error": f"checkpoint_manifest_invalid:{e}"}
123
+ files_dir = base / "files"
124
+ restored = []
125
+ for f in manifest.get("files") or []:
126
+ try:
127
+ rel = str(f.get("path") or "")
128
+ existed = bool(f.get("existed"))
129
+ abs_path = (project_root / rel).resolve()
130
+ if existed:
131
+ src = files_dir / rel
132
+ if src.is_file():
133
+ abs_path.parent.mkdir(parents=True, exist_ok=True)
134
+ abs_path.write_bytes(src.read_bytes())
135
+ restored.append(rel)
136
+ else:
137
+ # File did not exist previously; remove it if it exists now.
138
+ if abs_path.is_file():
139
+ abs_path.unlink()
140
+ restored.append(rel)
141
+ except Exception:
142
+ continue
143
+ return {"checkpoint_id": manifest.get("id") or base.name, "restored": restored}
144
+
gemcode/cli.py CHANGED
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import argparse
6
6
  import asyncio
7
7
  import getpass
8
+ import json
8
9
  import os
9
10
  import sys
10
11
  import uuid
@@ -19,6 +20,8 @@ from gemcode.capability_routing import apply_capability_routing
19
20
  from gemcode.session_runtime import create_runner
20
21
  from gemcode.trust import is_trusted_root, trust_root
21
22
  from gemcode.repl_slash import process_repl_slash
23
+ from gemcode.ide_stdio import main as ide_stdio_main
24
+ from gemcode.autotune import init_autotune, run_autotune_eval
22
25
 
23
26
 
24
27
  def _events_to_text(events) -> str:
@@ -339,6 +342,20 @@ def main() -> None:
339
342
  "enable Terminal for Desktop Folder (or grant Full Disk Access)."
340
343
  )
341
344
 
345
+ # Hidden IDE engine mode: `gemcode ide --stdio`
346
+ if len(sys.argv) >= 2 and sys.argv[1] == "ide":
347
+ ide_parser = argparse.ArgumentParser(prog="gemcode ide")
348
+ ide_parser.add_argument(
349
+ "--stdio",
350
+ action="store_true",
351
+ help="Run IDE engine over stdin/stdout (JSONL)",
352
+ )
353
+ ide_args = ide_parser.parse_args(sys.argv[2:])
354
+ if ide_args.stdio:
355
+ ide_stdio_main()
356
+ return
357
+ raise SystemExit("Usage: gemcode ide --stdio")
358
+
342
359
  # Persist or rotate API key (Claude Code–style `claude login`).
343
360
  if len(sys.argv) > 1 and sys.argv[1] == "login":
344
361
  load_cli_environment()
@@ -461,6 +478,39 @@ def main() -> None:
461
478
  print(f"smoke ok: {len(inspections)} tools validated")
462
479
  return
463
480
 
481
+ # Eval harness (AutoResearch-style gates).
482
+ if len(sys.argv) > 1 and sys.argv[1] == "eval":
483
+ eval_parser = argparse.ArgumentParser(prog="gemcode eval")
484
+ eval_parser.add_argument("-C", "--directory", type=Path, default=Path.cwd(), help="Project root")
485
+ eval_parser.add_argument("--llm", action="store_true", help="Include LLM golden prompts (costs tokens)")
486
+ eval_parser.add_argument("--model", default=None, help="Override model for LLM evals")
487
+ args = eval_parser.parse_args(sys.argv[2:])
488
+ from gemcode.evals.harness import run_eval_suite, write_eval_record
489
+ res = run_eval_suite(project_root=args.directory.resolve(), include_llm=bool(args.llm), model=args.model)
490
+ p = write_eval_record(args.directory.resolve(), res)
491
+ print(json.dumps(res, ensure_ascii=False, indent=2))
492
+ print(f"\n[gemcode eval] wrote {p}", file=sys.stderr)
493
+ raise SystemExit(0 if res.get("ok") else 1)
494
+
495
+ # Autotune scaffolding (AutoResearch-inspired).
496
+ if len(sys.argv) > 1 and sys.argv[1] == "autotune":
497
+ at_parser = argparse.ArgumentParser(prog="gemcode autotune")
498
+ at_parser.add_argument("subcommand", choices=("init", "eval"))
499
+ at_parser.add_argument("-C", "--directory", type=Path, default=Path.cwd(), help="Project root")
500
+ at_parser.add_argument("--tag", default=None, help="Run tag (e.g. apr7)")
501
+ at_parser.add_argument("--llm", action="store_true", help="Include LLM golden prompts (costs tokens)")
502
+ at_parser.add_argument("--model", default=None, help="Override model for LLM evals")
503
+ args = at_parser.parse_args(sys.argv[2:])
504
+ root = args.directory.resolve()
505
+ if args.subcommand == "init":
506
+ if not args.tag:
507
+ raise SystemExit("autotune init requires --tag")
508
+ print(json.dumps(init_autotune(project_root=root, tag=str(args.tag)), ensure_ascii=False, indent=2))
509
+ return
510
+ # eval
511
+ print(json.dumps(run_autotune_eval(project_root=root, include_llm=bool(args.llm), model=args.model), ensure_ascii=False, indent=2))
512
+ return
513
+
464
514
  # Live audio mode (Gemini Live API via ADK run_live()).
465
515
  if len(sys.argv) > 1 and sys.argv[1] == "live-audio":
466
516
  audio_parser = argparse.ArgumentParser(
gemcode/config.py CHANGED
@@ -206,6 +206,12 @@ class GemCodeConfig:
206
206
  default_factory=lambda: _truthy_env("GEMCODE_ENABLE_MEMORY", default=False)
207
207
  )
208
208
 
209
+ # Output style: optional extra system-prompt section loaded from
210
+ # `.gemcode/output-styles/<name>.md` or `~/.gemcode/output-styles/<name>.md`.
211
+ output_style: str | None = field(
212
+ default_factory=lambda: os.environ.get("GEMCODE_OUTPUT_STYLE") or None
213
+ )
214
+
209
215
  # Modality toggles (tool injection + routing).
210
216
  enable_deep_research: bool = field(
211
217
  default_factory=lambda: _truthy_env("GEMCODE_ENABLE_DEEP_RESEARCH", default=False)
@@ -340,6 +346,23 @@ class GemCodeConfig:
340
346
  default_factory=lambda: _truthy_env("GEMCODE_ENABLE_WEB_SEARCH", default=False)
341
347
  )
342
348
 
349
+ # IDE mode (VS Code extension): the engine should *propose* writes/commands,
350
+ # and the IDE applies them (WorkspaceEdit / terminal task) after user approval.
351
+ ide_proposal_mode: bool = False
352
+ ide_allow_write: bool = False
353
+ ide_allow_shell: bool = False
354
+
355
+ # Checkpointing (Hermes-style): snapshot files before mutating tools so users
356
+ # can undo accidental agent edits.
357
+ enable_checkpoints: bool = field(
358
+ default_factory=lambda: _truthy_env("GEMCODE_CHECKPOINTS", default=True)
359
+ )
360
+
361
+ # Background learner (Hermes-style): post-turn distillation into curated memory/notes.
362
+ enable_background_learner: bool = field(
363
+ default_factory=lambda: _truthy_env("GEMCODE_BACKGROUND_LEARNER", default=False)
364
+ )
365
+
343
366
  def __post_init__(self) -> None:
344
367
  self.project_root = self.project_root.resolve()
345
368
  # Default agentic depth when env omits GEMCODE_MAX_LLM_CALLS (was: None → SDK default).
@@ -0,0 +1,110 @@
1
+ """
2
+ Curated memory store (Hermes-style).
3
+
4
+ This is distinct from ADK's `.gemcode/memories.jsonl`:
5
+ - memories.jsonl: auto-generated, retrieval-oriented, noisy by design
6
+ - curated memory: small, human/agent curated facts that are safe to re-inject
7
+
8
+ Files:
9
+ <project>/.gemcode/GEMCODE_MEMORY.md (project facts, conventions, commands)
10
+ <project>/.gemcode/GEMCODE_USER.md (user preferences for this project)
11
+
12
+ Backward compatibility:
13
+ - If older files exist, they are still read:
14
+ - .gemcode/MEMORY.md
15
+ - .gemcode/USER.md
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+ from typing import Any
24
+
25
+
26
+ _SUSPICIOUS = [
27
+ "api_key",
28
+ "access key",
29
+ "secret",
30
+ "password",
31
+ "token",
32
+ "private key",
33
+ "-----BEGIN",
34
+ ]
35
+
36
+
37
+ def memory_paths(project_root: Path) -> tuple[Path, Path]:
38
+ d = project_root / ".gemcode"
39
+ return d / "GEMCODE_MEMORY.md", d / "GEMCODE_USER.md"
40
+
41
+
42
+ def _legacy_memory_paths(project_root: Path) -> tuple[Path, Path]:
43
+ d = project_root / ".gemcode"
44
+ return d / "MEMORY.md", d / "USER.md"
45
+
46
+
47
+ def _scan_safe(text: str) -> str | None:
48
+ t = (text or "").strip()
49
+ if not t:
50
+ return "empty"
51
+ if len(t) > 4000:
52
+ return "too_long"
53
+ low = t.lower()
54
+ for s in _SUSPICIOUS:
55
+ if s in low:
56
+ return "looks_sensitive"
57
+ # Block invisible control characters except newline/tab.
58
+ for ch in t:
59
+ o = ord(ch)
60
+ if o < 32 and ch not in ("\n", "\t"):
61
+ return "control_chars"
62
+ return None
63
+
64
+
65
+ def load_snapshot(project_root: Path, *, max_chars: int = 6000) -> dict[str, Any]:
66
+ mem, user = memory_paths(project_root)
67
+ legacy_mem, legacy_user = _legacy_memory_paths(project_root)
68
+ def _read(p: Path) -> str:
69
+ if not p.is_file():
70
+ return ""
71
+ return p.read_text(encoding="utf-8", errors="replace")
72
+ # Prefer new filenames; fall back to legacy if new doesn't exist.
73
+ mem_txt = _read(mem) or _read(legacy_mem)
74
+ user_txt = _read(user) or _read(legacy_user)
75
+ out = (mem_txt.strip() + "\n\n" + user_txt.strip()).strip()
76
+ if len(out) > max_chars:
77
+ out = out[:max_chars] + "\n\n(truncated)\n"
78
+ return {
79
+ "exists": bool(mem_txt.strip() or user_txt.strip()),
80
+ "memory_path": str(mem if mem.is_file() else legacy_mem),
81
+ "user_path": str(user if user.is_file() else legacy_user),
82
+ "text": out,
83
+ "chars": len(out),
84
+ }
85
+
86
+
87
+ def append_fact(project_root: Path, *, target: str, text: str) -> dict[str, Any]:
88
+ """
89
+ Append a curated fact to MEMORY.md or USER.md.
90
+
91
+ target: 'memory' or 'user'
92
+ """
93
+ err = _scan_safe(text)
94
+ if err:
95
+ return {"error": f"rejected:{err}"}
96
+ mem, user = memory_paths(project_root)
97
+ p = mem if (target or "").strip().lower() != "user" else user
98
+ p.parent.mkdir(parents=True, exist_ok=True)
99
+ if not p.exists():
100
+ hdr = "# Curated memory\n\nThis file is safe-to-inject project memory.\n\n"
101
+ p.write_text(hdr, encoding="utf-8")
102
+ cur = p.read_text(encoding="utf-8", errors="replace")
103
+ stripped = text.strip()
104
+ if stripped in cur:
105
+ return {"status": "already_exists", "path": str(p)}
106
+ ts = datetime.now().strftime("%Y-%m-%d %H:%M")
107
+ entry = f"\n<!-- {ts} -->\n- {stripped}\n"
108
+ p.write_text(cur + entry, encoding="utf-8")
109
+ return {"status": "appended", "path": str(p)}
110
+
@@ -0,0 +1,126 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ import os
6
+ import time
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any, Callable
10
+
11
+ from gemcode.config import GemCodeConfig, load_cli_environment
12
+ from gemcode.invoke import run_turn
13
+ from gemcode.session_runtime import create_runner
14
+
15
+
16
+ @dataclass
17
+ class EvalResult:
18
+ name: str
19
+ ok: bool
20
+ score: float
21
+ details: str = ""
22
+
23
+
24
+ def _run_cmd(cmd: str, *, cwd: Path) -> tuple[int, str]:
25
+ import subprocess
26
+ p = subprocess.run(cmd, cwd=str(cwd), shell=True, capture_output=True, text=True)
27
+ out = (p.stdout or "") + (p.stderr or "")
28
+ return int(p.returncode), out
29
+
30
+
31
+ def _events_to_text(events: list) -> str:
32
+ parts: list[str] = []
33
+ for event in events:
34
+ try:
35
+ if not event.content or not event.content.parts:
36
+ continue
37
+ if getattr(event, "author", None) == "user":
38
+ continue
39
+ for part in event.content.parts:
40
+ t = getattr(part, "text", None)
41
+ if t:
42
+ parts.append(t)
43
+ except Exception:
44
+ continue
45
+ return "".join(parts).strip()
46
+
47
+
48
+ async def _eval_golden_prompt(cfg: GemCodeConfig, prompt: str, *, name: str) -> EvalResult:
49
+ runner = create_runner(cfg, extra_tools=None)
50
+ try:
51
+ events = await run_turn(
52
+ runner,
53
+ user_id="local",
54
+ session_id=f"eval:{name}",
55
+ prompt=prompt,
56
+ max_llm_calls=min(int(getattr(cfg, "max_llm_calls", 256) or 256), 32),
57
+ cfg=cfg,
58
+ )
59
+ text = _events_to_text(events)
60
+ finally:
61
+ await runner.close()
62
+
63
+ if not text:
64
+ return EvalResult(name=name, ok=False, score=0.0, details="empty_output")
65
+ if "Traceback" in text or "SyntaxError" in text:
66
+ return EvalResult(name=name, ok=False, score=0.0, details="looks_like_exception_text")
67
+ return EvalResult(name=name, ok=True, score=1.0, details=text[:400])
68
+
69
+
70
+ def run_eval_suite(
71
+ *,
72
+ project_root: Path,
73
+ include_llm: bool,
74
+ model: str | None = None,
75
+ ) -> dict[str, Any]:
76
+ """
77
+ Fixed evaluation harness (AutoResearch-style): deterministic gates + optional LLM golden prompts.
78
+ """
79
+ t0 = time.time()
80
+ load_cli_environment()
81
+ cfg = GemCodeConfig(project_root=project_root)
82
+ if model:
83
+ cfg.model = model
84
+ cfg.model_overridden = True
85
+
86
+ results: list[EvalResult] = []
87
+
88
+ # Gate 1: tool schema smoke
89
+ rc, out = _run_cmd("PYTHONPATH=src python3 -m gemcode tools smoke", cwd=project_root / "gemcode")
90
+ results.append(EvalResult(name="tools_smoke", ok=(rc == 0), score=1.0 if rc == 0 else 0.0, details=out[-800:]))
91
+
92
+ # Gate 2: pytest if present
93
+ tests_dir = project_root / "gemcode" / "tests"
94
+ if tests_dir.is_dir():
95
+ rc2, out2 = _run_cmd("PYTHONPATH=src python3 -m pytest -q", cwd=project_root / "gemcode")
96
+ results.append(EvalResult(name="pytest", ok=(rc2 == 0), score=1.0 if rc2 == 0 else 0.0, details=out2[-1200:]))
97
+
98
+ if include_llm:
99
+ goldens = [
100
+ ("no_op_greeting", "hii"),
101
+ ("explain_mode", "Explain what tools you have available, briefly."),
102
+ ]
103
+ async def _run():
104
+ for n, p in goldens:
105
+ results.append(await _eval_golden_prompt(cfg, p, name=n))
106
+ asyncio.run(_run())
107
+
108
+ ok_all = all(r.ok for r in results if r.name in ("tools_smoke", "pytest"))
109
+ score = float(sum(r.score for r in results)) / max(1, len(results))
110
+ elapsed = time.time() - t0
111
+
112
+ return {
113
+ "ok": bool(ok_all),
114
+ "score": score,
115
+ "elapsed_s": elapsed,
116
+ "results": [r.__dict__ for r in results],
117
+ }
118
+
119
+
120
+ def write_eval_record(project_root: Path, record: dict[str, Any]) -> Path:
121
+ d = project_root / ".gemcode" / "evals"
122
+ d.mkdir(parents=True, exist_ok=True)
123
+ p = d / "last_eval.json"
124
+ p.write_text(json.dumps(record, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
125
+ return p
126
+