gemcode 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. gemcode/__init__.py +3 -0
  2. gemcode/__main__.py +3 -0
  3. gemcode/agent.py +146 -0
  4. gemcode/audit.py +16 -0
  5. gemcode/callbacks.py +473 -0
  6. gemcode/capability_routing.py +137 -0
  7. gemcode/cli.py +658 -0
  8. gemcode/compaction.py +35 -0
  9. gemcode/computer_use/__init__.py +0 -0
  10. gemcode/computer_use/browser_computer.py +275 -0
  11. gemcode/config.py +247 -0
  12. gemcode/interactions.py +15 -0
  13. gemcode/invoke.py +151 -0
  14. gemcode/kairos_daemon.py +221 -0
  15. gemcode/limits.py +83 -0
  16. gemcode/live_audio_engine.py +124 -0
  17. gemcode/mcp_loader.py +57 -0
  18. gemcode/memory/__init__.py +0 -0
  19. gemcode/memory/embedding_memory_service.py +292 -0
  20. gemcode/memory/file_memory_service.py +176 -0
  21. gemcode/modality_tools.py +216 -0
  22. gemcode/model_routing.py +179 -0
  23. gemcode/paths.py +29 -0
  24. gemcode/permissions.py +5 -0
  25. gemcode/plugins/__init__.py +0 -0
  26. gemcode/plugins/terminal_hooks_plugin.py +168 -0
  27. gemcode/plugins/tool_recovery_plugin.py +135 -0
  28. gemcode/prompt_suggestions.py +80 -0
  29. gemcode/query/__init__.py +36 -0
  30. gemcode/query/config.py +35 -0
  31. gemcode/query/deps.py +20 -0
  32. gemcode/query/engine.py +55 -0
  33. gemcode/query/stop_hooks.py +63 -0
  34. gemcode/query/token_budget.py +109 -0
  35. gemcode/query/transitions.py +41 -0
  36. gemcode/session_runtime.py +81 -0
  37. gemcode/thinking.py +136 -0
  38. gemcode/tool_prompt_manifest.py +118 -0
  39. gemcode/tool_registry.py +50 -0
  40. gemcode/tools/__init__.py +25 -0
  41. gemcode/tools/edit.py +53 -0
  42. gemcode/tools/filesystem.py +73 -0
  43. gemcode/tools/search.py +85 -0
  44. gemcode/tools/shell.py +73 -0
  45. gemcode/tools_inspector.py +132 -0
  46. gemcode/trust.py +54 -0
  47. gemcode/tui/app.py +697 -0
  48. gemcode/tui/scrollback.py +312 -0
  49. gemcode/vertex.py +22 -0
  50. gemcode/web/__init__.py +2 -0
  51. gemcode/web/claude_sse_adapter.py +282 -0
  52. gemcode/web/terminal_repl.py +147 -0
  53. gemcode-0.2.2.dist-info/METADATA +440 -0
  54. gemcode-0.2.2.dist-info/RECORD +58 -0
  55. gemcode-0.2.2.dist-info/WHEEL +5 -0
  56. gemcode-0.2.2.dist-info/entry_points.txt +2 -0
  57. gemcode-0.2.2.dist-info/licenses/LICENSE +151 -0
  58. gemcode-0.2.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,292 @@
1
+ """
2
+ Embedding-backed memory service for GemCode.
3
+
4
+ This is a clean-room, local file-backed implementation of ADK's
5
+ `BaseMemoryService` that:
6
+ - persists memory events (JSONL) to `.gemcode/memories.jsonl`
7
+ - stores an embedding vector per memory record (MVP)
8
+ - returns relevant memories via cosine similarity in `search_memory()`
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import math
15
+ import os
16
+ import re
17
+ from pathlib import Path
18
+ from typing import Any
19
+ from typing import Iterable
20
+ from typing import Sequence
21
+
22
+ from google.adk.memory.base_memory_service import BaseMemoryService
23
+ from google.adk.memory.base_memory_service import SearchMemoryResponse
24
+ from google.adk.memory.memory_entry import MemoryEntry
25
+ from google.genai import types
26
+
27
+
28
+ _WORD_RE = re.compile(r"[A-Za-z]+")
29
+
30
+
31
+ def _words_lower(s: str) -> set[str]:
32
+ return {w.lower() for w in _WORD_RE.findall(s or "")}
33
+
34
+
35
+ def _extract_text_parts(content: Any) -> list[str]:
36
+ try:
37
+ parts = getattr(content, "parts", None)
38
+ if not parts:
39
+ return []
40
+ out: list[str] = []
41
+ for p in parts:
42
+ t = getattr(p, "text", None)
43
+ if isinstance(t, str) and t.strip():
44
+ out.append(t.strip())
45
+ return out
46
+ except Exception:
47
+ return []
48
+
49
+
50
+ def _concat_text(content: Any) -> str:
51
+ pieces = _extract_text_parts(content)
52
+ if not pieces:
53
+ return ""
54
+ return "\n".join(pieces)
55
+
56
+
57
+ def _cosine_similarity(a: list[float], b: list[float]) -> float:
58
+ if not a or not b or len(a) != len(b):
59
+ return -1.0
60
+ dot = 0.0
61
+ na = 0.0
62
+ nb = 0.0
63
+ for x, y in zip(a, b):
64
+ dot += x * y
65
+ na += x * x
66
+ nb += y * y
67
+ denom = math.sqrt(na) * math.sqrt(nb)
68
+ if denom == 0:
69
+ return -1.0
70
+ return dot / denom
71
+
72
+
73
+ def _get_embedding_model() -> str:
74
+ return os.environ.get("GEMCODE_EMBEDDINGS_MODEL", "models/gemini-embedding-2-preview")
75
+
76
+
77
+ def _get_embedding_api_key() -> str | None:
78
+ return os.environ.get("GOOGLE_API_KEY")
79
+
80
+
81
+ async def _embed_texts(
82
+ *,
83
+ texts: Sequence[str],
84
+ embedding_model: str,
85
+ ) -> list[list[float]]:
86
+ from google.genai import Client
87
+ from google.genai.types import EmbedContentConfig
88
+
89
+ client = Client(api_key=_get_embedding_api_key())
90
+ config = EmbedContentConfig(auto_truncate=True)
91
+ resp = await client.aio.models.embed_content(
92
+ model=embedding_model,
93
+ contents=list(texts),
94
+ config=config,
95
+ )
96
+ return [list(e.values) for e in resp.embeddings]
97
+
98
+
99
+ class EmbeddingFileMemoryService(BaseMemoryService):
100
+ """JSONL-backed memory service with embedding similarity search."""
101
+
102
+ def __init__(
103
+ self,
104
+ memories_path: Path,
105
+ *,
106
+ embeddings_model: str | None = None,
107
+ embedding_max_chars: int = 6000,
108
+ embedding_batch_size: int = 16,
109
+ ):
110
+ self.memories_path = memories_path
111
+ self.embeddings_model = embeddings_model or _get_embedding_model()
112
+ self.embedding_max_chars = embedding_max_chars
113
+ self.embedding_batch_size = embedding_batch_size
114
+
115
+ def _ensure_parent(self) -> None:
116
+ self.memories_path.parent.mkdir(parents=True, exist_ok=True)
117
+
118
+ def _iter_records(self) -> Iterable[dict[str, Any]]:
119
+ if not self.memories_path.is_file():
120
+ return []
121
+ with self.memories_path.open("r", encoding="utf-8") as f:
122
+ for line in f:
123
+ line = line.strip()
124
+ if not line:
125
+ continue
126
+ try:
127
+ yield json.loads(line)
128
+ except json.JSONDecodeError:
129
+ continue
130
+
131
+ async def add_session_to_memory(self, session) -> None: # type: ignore[override]
132
+ await self.add_events_to_memory(
133
+ app_name=session.app_name,
134
+ user_id=session.user_id,
135
+ session_id=session.id,
136
+ events=session.events,
137
+ )
138
+
139
+ async def add_events_to_memory( # type: ignore[override]
140
+ self,
141
+ *,
142
+ app_name: str,
143
+ user_id: str,
144
+ events,
145
+ session_id: str | None = None,
146
+ custom_metadata: Any = None,
147
+ ) -> None:
148
+ _ = custom_metadata
149
+ self._ensure_parent()
150
+
151
+ existing_ids: set[str] = set()
152
+ for r in self._iter_records():
153
+ if r.get("app_name") == app_name and r.get("user_id") == user_id:
154
+ mid = r.get("id")
155
+ if isinstance(mid, str) and mid:
156
+ existing_ids.add(mid)
157
+
158
+ # First pass: collect new texts to embed.
159
+ new_records: list[dict[str, Any]] = []
160
+ texts_to_embed: list[str] = []
161
+ for ev in events:
162
+ author = getattr(ev, "author", None)
163
+ content = getattr(ev, "content", None)
164
+ if content is None:
165
+ continue
166
+ text = _concat_text(content)
167
+ if not text.strip():
168
+ continue
169
+
170
+ ev_id = getattr(ev, "id", None)
171
+ if not isinstance(ev_id, str) or not ev_id:
172
+ continue
173
+ if ev_id in existing_ids:
174
+ continue
175
+
176
+ ts = getattr(ev, "timestamp", None)
177
+ ts_out = ts if isinstance(ts, str) else None
178
+
179
+ truncated = text[: self.embedding_max_chars]
180
+ rec: dict[str, Any] = {
181
+ "id": ev_id,
182
+ "app_name": app_name,
183
+ "user_id": user_id,
184
+ "session_id": session_id,
185
+ "author": author if isinstance(author, str) else None,
186
+ "timestamp": ts_out,
187
+ "text": text,
188
+ "embedding_text": truncated,
189
+ "embedding": None,
190
+ }
191
+ new_records.append(rec)
192
+ texts_to_embed.append(truncated)
193
+ existing_ids.add(ev_id)
194
+
195
+ if not new_records:
196
+ return
197
+
198
+ # Embed in batches to avoid too-large requests.
199
+ for i in range(0, len(new_records), self.embedding_batch_size):
200
+ batch_records = new_records[i : i + self.embedding_batch_size]
201
+ batch_texts = [r["embedding_text"] for r in batch_records]
202
+ try:
203
+ vectors = await _embed_texts(
204
+ texts=batch_texts, embedding_model=self.embeddings_model
205
+ )
206
+ for r, vec in zip(batch_records, vectors):
207
+ r["embedding"] = vec
208
+ except Exception:
209
+ # Best-effort: keep record but without embedding.
210
+ for r in batch_records:
211
+ r["embedding"] = None
212
+
213
+ # Persist
214
+ with self.memories_path.open("a", encoding="utf-8") as f:
215
+ for rec in new_records:
216
+ rec_out = dict(rec)
217
+ rec_out.pop("embedding_text", None)
218
+ f.write(json.dumps(rec_out, ensure_ascii=False) + "\n")
219
+
220
+ async def search_memory( # type: ignore[override]
221
+ self,
222
+ *,
223
+ app_name: str,
224
+ user_id: str,
225
+ query: str,
226
+ ) -> SearchMemoryResponse:
227
+ response = SearchMemoryResponse()
228
+ q = (query or "").strip()
229
+ if not q:
230
+ return response
231
+
232
+ # Compute query embedding.
233
+ try:
234
+ q_vecs = await _embed_texts(
235
+ texts=[q[: self.embedding_max_chars]], embedding_model=self.embeddings_model
236
+ )
237
+ q_vec = q_vecs[0]
238
+ except Exception:
239
+ # Fallback to naive keyword search if embedding fails.
240
+ q_words = _words_lower(q)
241
+ if not q_words:
242
+ return response
243
+ for rec in self._iter_records():
244
+ if rec.get("app_name") != app_name or rec.get("user_id") != user_id:
245
+ continue
246
+ text = rec.get("text")
247
+ if not isinstance(text, str):
248
+ continue
249
+ event_words = _words_lower(text)
250
+ if event_words and any(w in event_words for w in q_words):
251
+ content = types.Content(role="user", parts=[types.Part(text=text)])
252
+ response.memories.append(
253
+ MemoryEntry(
254
+ content=content,
255
+ author=rec.get("author") if isinstance(rec.get("author"), str) else None,
256
+ timestamp=rec.get("timestamp") if isinstance(rec.get("timestamp"), str) else None,
257
+ )
258
+ )
259
+ return response
260
+
261
+ # Rank by cosine similarity.
262
+ scored: list[tuple[float, dict[str, Any]]] = []
263
+ for rec in self._iter_records():
264
+ if rec.get("app_name") != app_name or rec.get("user_id") != user_id:
265
+ continue
266
+ vec = rec.get("embedding")
267
+ text = rec.get("text")
268
+ if not isinstance(vec, list) or not isinstance(text, str) or not vec:
269
+ continue
270
+ if not all(isinstance(x, (int, float)) for x in vec):
271
+ continue
272
+ v = [float(x) for x in vec]
273
+ score = _cosine_similarity(q_vec, v)
274
+ if score >= 0:
275
+ scored.append((score, rec))
276
+
277
+ scored.sort(key=lambda x: x[0], reverse=True)
278
+ for _score, rec in scored[:12]:
279
+ text = rec.get("text")
280
+ if not isinstance(text, str):
281
+ continue
282
+ content = types.Content(role="user", parts=[types.Part(text=text)])
283
+ response.memories.append(
284
+ MemoryEntry(
285
+ content=content,
286
+ author=rec.get("author") if isinstance(rec.get("author"), str) else None,
287
+ timestamp=rec.get("timestamp") if isinstance(rec.get("timestamp"), str) else None,
288
+ )
289
+ )
290
+
291
+ return response
292
+
@@ -0,0 +1,176 @@
1
+ """
2
+ Persistent, clean-room memory service for GemCode.
3
+
4
+ This complements ADK's memory integration by providing a file-backed
5
+ implementation of `BaseMemoryService` so memory survives across CLI runs.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+ from pathlib import Path
13
+ from typing import Any, Iterable
14
+
15
+ from google.adk.memory.base_memory_service import BaseMemoryService
16
+ from google.adk.memory.base_memory_service import SearchMemoryResponse
17
+ from google.adk.memory.memory_entry import MemoryEntry
18
+ from google.genai import types
19
+
20
+
21
+ _WORD_RE = re.compile(r"[A-Za-z]+")
22
+
23
+
24
+ def _words_lower(s: str) -> set[str]:
25
+ return {w.lower() for w in _WORD_RE.findall(s or "")}
26
+
27
+
28
+ def _extract_text_parts(content: Any) -> list[str]:
29
+ # `google.genai.types.Content.parts` is a list of Part-like objects.
30
+ # We store only text parts for retrieval.
31
+ try:
32
+ parts = getattr(content, "parts", None)
33
+ if not parts:
34
+ return []
35
+ out: list[str] = []
36
+ for p in parts:
37
+ t = getattr(p, "text", None)
38
+ if isinstance(t, str) and t.strip():
39
+ out.append(t.strip())
40
+ return out
41
+ except Exception:
42
+ return []
43
+
44
+
45
+ def _concat_text(content: Any) -> str:
46
+ pieces = _extract_text_parts(content)
47
+ if not pieces:
48
+ return ""
49
+ return "\n".join(pieces)
50
+
51
+
52
+ class FileMemoryService(BaseMemoryService):
53
+ """JSONL-backed memory service with naive keyword matching."""
54
+
55
+ def __init__(self, memories_path: Path):
56
+ self.memories_path = memories_path
57
+
58
+ def _ensure_parent(self) -> None:
59
+ self.memories_path.parent.mkdir(parents=True, exist_ok=True)
60
+
61
+ def _iter_records(self) -> Iterable[dict[str, Any]]:
62
+ if not self.memories_path.is_file():
63
+ return []
64
+ # Best-effort JSONL parse; skip corrupt lines.
65
+ with self.memories_path.open("r", encoding="utf-8") as f:
66
+ for line in f:
67
+ line = line.strip()
68
+ if not line:
69
+ continue
70
+ try:
71
+ yield json.loads(line)
72
+ except json.JSONDecodeError:
73
+ continue
74
+
75
+ async def add_session_to_memory(self, session) -> None: # type: ignore[override]
76
+ await self.add_events_to_memory(
77
+ app_name=session.app_name,
78
+ user_id=session.user_id,
79
+ session_id=session.id,
80
+ events=session.events,
81
+ )
82
+
83
+ async def add_events_to_memory( # type: ignore[override]
84
+ self,
85
+ *,
86
+ app_name: str,
87
+ user_id: str,
88
+ events,
89
+ session_id: str | None = None,
90
+ custom_metadata: Any = None,
91
+ ) -> None:
92
+ _ = custom_metadata
93
+ self._ensure_parent()
94
+
95
+ existing_ids: set[str] = set()
96
+ for r in self._iter_records():
97
+ if r.get("app_name") == app_name and r.get("user_id") == user_id:
98
+ mid = r.get("id")
99
+ if isinstance(mid, str) and mid:
100
+ existing_ids.add(mid)
101
+
102
+ to_append: list[dict[str, Any]] = []
103
+ for ev in events:
104
+ author = getattr(ev, "author", None)
105
+ content = getattr(ev, "content", None)
106
+ if content is None:
107
+ continue
108
+ text = _concat_text(content)
109
+ if not text.strip():
110
+ continue
111
+
112
+ ev_id = getattr(ev, "id", None)
113
+ if not isinstance(ev_id, str) or not ev_id:
114
+ continue
115
+ if ev_id in existing_ids:
116
+ continue
117
+
118
+ ts = getattr(ev, "timestamp", None)
119
+ # ADK event.timestamp is typically a string; preserve best-effort.
120
+ ts_out = ts if isinstance(ts, str) else None
121
+
122
+ to_append.append(
123
+ {
124
+ "id": ev_id,
125
+ "app_name": app_name,
126
+ "user_id": user_id,
127
+ "session_id": session_id,
128
+ "author": author,
129
+ "timestamp": ts_out,
130
+ "text": text,
131
+ }
132
+ )
133
+ existing_ids.add(ev_id)
134
+
135
+ if not to_append:
136
+ return
137
+
138
+ with self.memories_path.open("a", encoding="utf-8") as f:
139
+ for rec in to_append:
140
+ f.write(json.dumps(rec, ensure_ascii=False) + "\n")
141
+
142
+ async def search_memory( # type: ignore[override]
143
+ self, *, app_name: str, user_id: str, query: str
144
+ ) -> SearchMemoryResponse:
145
+ response = SearchMemoryResponse()
146
+ query_words = _words_lower(query)
147
+ if not query_words:
148
+ return response
149
+
150
+ for rec in self._iter_records():
151
+ if rec.get("app_name") != app_name or rec.get("user_id") != user_id:
152
+ continue
153
+ text = rec.get("text")
154
+ if not isinstance(text, str) or not text:
155
+ continue
156
+ event_words = _words_lower(text)
157
+ if not event_words:
158
+ continue
159
+ if any(w in event_words for w in query_words):
160
+ ts = rec.get("timestamp")
161
+ author = rec.get("author")
162
+ # Recreate MemoryEntry with a single text part.
163
+ content = types.Content(
164
+ role="user",
165
+ parts=[types.Part(text=text)],
166
+ )
167
+ response.memories.append(
168
+ MemoryEntry(
169
+ content=content,
170
+ author=author if isinstance(author, str) else None,
171
+ timestamp=ts if isinstance(ts, str) else None,
172
+ )
173
+ )
174
+
175
+ return response
176
+
@@ -0,0 +1,216 @@
1
+ """
2
+ Modality tool injection for GemCode.
3
+
4
+ Claude Code–style: outer loop + inner tool orchestration remains ADK-driven,
5
+ but we choose which tools to expose based on user flags / prompt heuristics.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import math
11
+ import os
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from gemcode.config import GemCodeConfig
16
+
17
+
18
+ def _get_embedding_client():
19
+ # google-genai picks up credentials from GOOGLE_API_KEY by default, but we
20
+ # pass explicitly so this works in tests/processes with different env.
21
+ from google.genai import Client
22
+
23
+ api_key = os.environ.get("GOOGLE_API_KEY")
24
+ return Client(api_key=api_key)
25
+
26
+
27
+ def _cosine_similarity(a: list[float], b: list[float]) -> float:
28
+ if not a or not b or len(a) != len(b):
29
+ return -1.0
30
+ dot = 0.0
31
+ na = 0.0
32
+ nb = 0.0
33
+ for x, y in zip(a, b):
34
+ dot += x * y
35
+ na += x * x
36
+ nb += y * y
37
+ denom = math.sqrt(na) * math.sqrt(nb)
38
+ if denom == 0:
39
+ return -1.0
40
+ return dot / denom
41
+
42
+
43
+ def _chunk_text(text: str, *, chunk_size: int = 1200, max_chunks: int = 8) -> list[str]:
44
+ t = (text or "").strip()
45
+ if not t:
46
+ return []
47
+ # Simple fixed-size chunks (MVP): fast, deterministic, and good enough for
48
+ # semantic retrieval at small scales.
49
+ out: list[str] = []
50
+ for i in range(0, len(t), chunk_size):
51
+ if len(out) >= max_chunks:
52
+ break
53
+ out.append(t[i : i + chunk_size])
54
+ return out
55
+
56
+
57
+ async def semantic_search_files(
58
+ query: str,
59
+ path_glob: str = "**/*",
60
+ *,
61
+ max_files: int = 25,
62
+ max_chunks_per_file: int = 6,
63
+ max_total_chunks: int = 40,
64
+ max_file_bytes: int = 200_000,
65
+ max_results: int = 8,
66
+ embedding_model: str | None = None,
67
+ project_root: str | None = None,
68
+ ) -> dict[str, Any]:
69
+ """
70
+ Embeddings-powered semantic search across files under the project root.
71
+
72
+ Notes:
73
+ - This MVP performs per-call embedding (no persistent vector index).
74
+ - It is intentionally bounded (max_files/max_total_chunks) to limit API
75
+ calls and latency.
76
+ """
77
+ if not isinstance(query, str) or not query.strip():
78
+ return {"error": "query must be a non-empty string"}
79
+
80
+ root = Path(project_root).resolve() if project_root else None
81
+ if root is None:
82
+ # When invoked as a GemCode tool, `project_root` is supplied by ADK via
83
+ # closure (see build_extra_tools).
84
+ return {"error": "project_root not provided"}
85
+
86
+ if ".." in path_glob or path_glob.startswith("/"):
87
+ return {"error": "Invalid path_glob"}
88
+
89
+ embedding_model = embedding_model or os.environ.get(
90
+ "GEMCODE_EMBEDDINGS_MODEL", "models/gemini-embedding-2-preview"
91
+ )
92
+
93
+ # Collect candidate chunks.
94
+ chunks: list[str] = []
95
+ chunk_meta: list[dict[str, str]] = []
96
+
97
+ files_seen = 0
98
+ for fp in root.glob(path_glob):
99
+ if files_seen >= max_files:
100
+ break
101
+ if not fp.is_file():
102
+ continue
103
+ files_seen += 1
104
+
105
+ try:
106
+ data = fp.read_bytes()
107
+ except OSError:
108
+ continue
109
+ if len(data) > max_file_bytes:
110
+ data = data[:max_file_bytes]
111
+ try:
112
+ text = data.decode("utf-8", errors="ignore")
113
+ except Exception:
114
+ continue
115
+
116
+ file_chunks = _chunk_text(text, max_chunks=max_chunks_per_file)
117
+ if not file_chunks:
118
+ continue
119
+ for c in file_chunks:
120
+ if len(chunks) >= max_total_chunks:
121
+ break
122
+ chunks.append(c)
123
+ rel = fp.resolve().relative_to(root)
124
+ chunk_meta.append({"path": str(rel)})
125
+ if len(chunks) >= max_total_chunks:
126
+ break
127
+
128
+ if not chunks:
129
+ return {"query": query, "matches": [], "backend": "embeddings"}
130
+
131
+ client = _get_embedding_client()
132
+
133
+ # Embed query and chunks.
134
+ try:
135
+ from google.genai.types import EmbedContentConfig
136
+
137
+ config = EmbedContentConfig()
138
+ q_emb = await client.aio.models.embed_content(
139
+ model=embedding_model,
140
+ contents=[query],
141
+ config=config,
142
+ )
143
+ q_vec = list(q_emb.embeddings[0].values)
144
+
145
+ c_emb = await client.aio.models.embed_content(
146
+ model=embedding_model,
147
+ contents=chunks,
148
+ config=config,
149
+ )
150
+ c_vecs = [list(e.values) for e in c_emb.embeddings]
151
+ except Exception as e:
152
+ return {"error": f"embedding failed: {type(e).__name__}: {e}"}
153
+
154
+ scored: list[tuple[float, int]] = []
155
+ for i, vec in enumerate(c_vecs):
156
+ score = _cosine_similarity(q_vec, vec)
157
+ scored.append((score, i))
158
+
159
+ scored.sort(key=lambda x: x[0], reverse=True)
160
+ matches: list[dict[str, Any]] = []
161
+ for score, idx in scored[: max_results]:
162
+ if score < 0:
163
+ continue
164
+ rel = chunk_meta[idx]["path"]
165
+ snippet = chunks[idx][:500].replace("\n", " ")
166
+ matches.append({"path": rel, "snippet": snippet, "score": score})
167
+
168
+ return {"query": query, "backend": "embeddings", "matches": matches}
169
+
170
+
171
+ def build_extra_tools(cfg: GemCodeConfig) -> list[Any]:
172
+ """Return ADK tool unions to expose for enabled modalities."""
173
+ extra: list[Any] = []
174
+
175
+ if getattr(cfg, "enable_deep_research", False):
176
+ from google.adk.tools import google_search, url_context
177
+ extra.append(google_search)
178
+ extra.append(url_context)
179
+ # Google Maps grounding can be incompatible with other built-in tools
180
+ # (e.g., google_search) depending on the request/model tooling layer.
181
+ # Make it opt-in so deep-research stays reliable by default.
182
+ if getattr(cfg, "enable_maps_grounding", False):
183
+ from google.adk.tools.google_maps_grounding_tool import google_maps_grounding
184
+
185
+ extra.append(google_maps_grounding)
186
+
187
+ if getattr(cfg, "enable_embeddings", False):
188
+ # Provide a closure so the embedding tool can resolve project_root.
189
+ async def _semantic_search_files(
190
+ query: str,
191
+ path_glob: str = "**/*",
192
+ *,
193
+ max_files: int = 25,
194
+ max_chunks_per_file: int = 6,
195
+ max_total_chunks: int = 40,
196
+ max_file_bytes: int = 200_000,
197
+ max_results: int = 8,
198
+ embedding_model: str | None = None,
199
+ ):
200
+ return await semantic_search_files(
201
+ query,
202
+ path_glob,
203
+ max_files=max_files,
204
+ max_chunks_per_file=max_chunks_per_file,
205
+ max_total_chunks=max_total_chunks,
206
+ max_file_bytes=max_file_bytes,
207
+ max_results=max_results,
208
+ embedding_model=embedding_model,
209
+ project_root=str(cfg.project_root),
210
+ )
211
+
212
+ _semantic_search_files.__name__ = "semantic_search_files"
213
+ extra.append(_semantic_search_files)
214
+
215
+ return extra
216
+