gemcode 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gemcode/__init__.py +3 -0
- gemcode/__main__.py +3 -0
- gemcode/agent.py +146 -0
- gemcode/audit.py +16 -0
- gemcode/callbacks.py +473 -0
- gemcode/capability_routing.py +137 -0
- gemcode/cli.py +658 -0
- gemcode/compaction.py +35 -0
- gemcode/computer_use/__init__.py +0 -0
- gemcode/computer_use/browser_computer.py +275 -0
- gemcode/config.py +247 -0
- gemcode/interactions.py +15 -0
- gemcode/invoke.py +151 -0
- gemcode/kairos_daemon.py +221 -0
- gemcode/limits.py +83 -0
- gemcode/live_audio_engine.py +124 -0
- gemcode/mcp_loader.py +57 -0
- gemcode/memory/__init__.py +0 -0
- gemcode/memory/embedding_memory_service.py +292 -0
- gemcode/memory/file_memory_service.py +176 -0
- gemcode/modality_tools.py +216 -0
- gemcode/model_routing.py +179 -0
- gemcode/paths.py +29 -0
- gemcode/permissions.py +5 -0
- gemcode/plugins/__init__.py +0 -0
- gemcode/plugins/terminal_hooks_plugin.py +168 -0
- gemcode/plugins/tool_recovery_plugin.py +135 -0
- gemcode/prompt_suggestions.py +80 -0
- gemcode/query/__init__.py +36 -0
- gemcode/query/config.py +35 -0
- gemcode/query/deps.py +20 -0
- gemcode/query/engine.py +55 -0
- gemcode/query/stop_hooks.py +63 -0
- gemcode/query/token_budget.py +109 -0
- gemcode/query/transitions.py +41 -0
- gemcode/session_runtime.py +81 -0
- gemcode/thinking.py +136 -0
- gemcode/tool_prompt_manifest.py +118 -0
- gemcode/tool_registry.py +50 -0
- gemcode/tools/__init__.py +25 -0
- gemcode/tools/edit.py +53 -0
- gemcode/tools/filesystem.py +73 -0
- gemcode/tools/search.py +85 -0
- gemcode/tools/shell.py +73 -0
- gemcode/tools_inspector.py +132 -0
- gemcode/trust.py +54 -0
- gemcode/tui/app.py +697 -0
- gemcode/tui/scrollback.py +312 -0
- gemcode/vertex.py +22 -0
- gemcode/web/__init__.py +2 -0
- gemcode/web/claude_sse_adapter.py +282 -0
- gemcode/web/terminal_repl.py +147 -0
- gemcode-0.2.2.dist-info/METADATA +440 -0
- gemcode-0.2.2.dist-info/RECORD +58 -0
- gemcode-0.2.2.dist-info/WHEEL +5 -0
- gemcode-0.2.2.dist-info/entry_points.txt +2 -0
- gemcode-0.2.2.dist-info/licenses/LICENSE +151 -0
- gemcode-0.2.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding-backed memory service for GemCode.
|
|
3
|
+
|
|
4
|
+
This is a clean-room, local file-backed implementation of ADK's
|
|
5
|
+
`BaseMemoryService` that:
|
|
6
|
+
- persists memory events (JSONL) to `.gemcode/memories.jsonl`
|
|
7
|
+
- stores an embedding vector per memory record (MVP)
|
|
8
|
+
- returns relevant memories via cosine similarity in `search_memory()`
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import math
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
from typing import Iterable
|
|
20
|
+
from typing import Sequence
|
|
21
|
+
|
|
22
|
+
from google.adk.memory.base_memory_service import BaseMemoryService
|
|
23
|
+
from google.adk.memory.base_memory_service import SearchMemoryResponse
|
|
24
|
+
from google.adk.memory.memory_entry import MemoryEntry
|
|
25
|
+
from google.genai import types
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
_WORD_RE = re.compile(r"[A-Za-z]+")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _words_lower(s: str) -> set[str]:
|
|
32
|
+
return {w.lower() for w in _WORD_RE.findall(s or "")}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _extract_text_parts(content: Any) -> list[str]:
|
|
36
|
+
try:
|
|
37
|
+
parts = getattr(content, "parts", None)
|
|
38
|
+
if not parts:
|
|
39
|
+
return []
|
|
40
|
+
out: list[str] = []
|
|
41
|
+
for p in parts:
|
|
42
|
+
t = getattr(p, "text", None)
|
|
43
|
+
if isinstance(t, str) and t.strip():
|
|
44
|
+
out.append(t.strip())
|
|
45
|
+
return out
|
|
46
|
+
except Exception:
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _concat_text(content: Any) -> str:
|
|
51
|
+
pieces = _extract_text_parts(content)
|
|
52
|
+
if not pieces:
|
|
53
|
+
return ""
|
|
54
|
+
return "\n".join(pieces)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
58
|
+
if not a or not b or len(a) != len(b):
|
|
59
|
+
return -1.0
|
|
60
|
+
dot = 0.0
|
|
61
|
+
na = 0.0
|
|
62
|
+
nb = 0.0
|
|
63
|
+
for x, y in zip(a, b):
|
|
64
|
+
dot += x * y
|
|
65
|
+
na += x * x
|
|
66
|
+
nb += y * y
|
|
67
|
+
denom = math.sqrt(na) * math.sqrt(nb)
|
|
68
|
+
if denom == 0:
|
|
69
|
+
return -1.0
|
|
70
|
+
return dot / denom
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _get_embedding_model() -> str:
|
|
74
|
+
return os.environ.get("GEMCODE_EMBEDDINGS_MODEL", "models/gemini-embedding-2-preview")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _get_embedding_api_key() -> str | None:
|
|
78
|
+
return os.environ.get("GOOGLE_API_KEY")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def _embed_texts(
|
|
82
|
+
*,
|
|
83
|
+
texts: Sequence[str],
|
|
84
|
+
embedding_model: str,
|
|
85
|
+
) -> list[list[float]]:
|
|
86
|
+
from google.genai import Client
|
|
87
|
+
from google.genai.types import EmbedContentConfig
|
|
88
|
+
|
|
89
|
+
client = Client(api_key=_get_embedding_api_key())
|
|
90
|
+
config = EmbedContentConfig(auto_truncate=True)
|
|
91
|
+
resp = await client.aio.models.embed_content(
|
|
92
|
+
model=embedding_model,
|
|
93
|
+
contents=list(texts),
|
|
94
|
+
config=config,
|
|
95
|
+
)
|
|
96
|
+
return [list(e.values) for e in resp.embeddings]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class EmbeddingFileMemoryService(BaseMemoryService):
|
|
100
|
+
"""JSONL-backed memory service with embedding similarity search."""
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
memories_path: Path,
|
|
105
|
+
*,
|
|
106
|
+
embeddings_model: str | None = None,
|
|
107
|
+
embedding_max_chars: int = 6000,
|
|
108
|
+
embedding_batch_size: int = 16,
|
|
109
|
+
):
|
|
110
|
+
self.memories_path = memories_path
|
|
111
|
+
self.embeddings_model = embeddings_model or _get_embedding_model()
|
|
112
|
+
self.embedding_max_chars = embedding_max_chars
|
|
113
|
+
self.embedding_batch_size = embedding_batch_size
|
|
114
|
+
|
|
115
|
+
def _ensure_parent(self) -> None:
|
|
116
|
+
self.memories_path.parent.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
|
|
118
|
+
def _iter_records(self) -> Iterable[dict[str, Any]]:
|
|
119
|
+
if not self.memories_path.is_file():
|
|
120
|
+
return []
|
|
121
|
+
with self.memories_path.open("r", encoding="utf-8") as f:
|
|
122
|
+
for line in f:
|
|
123
|
+
line = line.strip()
|
|
124
|
+
if not line:
|
|
125
|
+
continue
|
|
126
|
+
try:
|
|
127
|
+
yield json.loads(line)
|
|
128
|
+
except json.JSONDecodeError:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
async def add_session_to_memory(self, session) -> None: # type: ignore[override]
|
|
132
|
+
await self.add_events_to_memory(
|
|
133
|
+
app_name=session.app_name,
|
|
134
|
+
user_id=session.user_id,
|
|
135
|
+
session_id=session.id,
|
|
136
|
+
events=session.events,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
async def add_events_to_memory( # type: ignore[override]
|
|
140
|
+
self,
|
|
141
|
+
*,
|
|
142
|
+
app_name: str,
|
|
143
|
+
user_id: str,
|
|
144
|
+
events,
|
|
145
|
+
session_id: str | None = None,
|
|
146
|
+
custom_metadata: Any = None,
|
|
147
|
+
) -> None:
|
|
148
|
+
_ = custom_metadata
|
|
149
|
+
self._ensure_parent()
|
|
150
|
+
|
|
151
|
+
existing_ids: set[str] = set()
|
|
152
|
+
for r in self._iter_records():
|
|
153
|
+
if r.get("app_name") == app_name and r.get("user_id") == user_id:
|
|
154
|
+
mid = r.get("id")
|
|
155
|
+
if isinstance(mid, str) and mid:
|
|
156
|
+
existing_ids.add(mid)
|
|
157
|
+
|
|
158
|
+
# First pass: collect new texts to embed.
|
|
159
|
+
new_records: list[dict[str, Any]] = []
|
|
160
|
+
texts_to_embed: list[str] = []
|
|
161
|
+
for ev in events:
|
|
162
|
+
author = getattr(ev, "author", None)
|
|
163
|
+
content = getattr(ev, "content", None)
|
|
164
|
+
if content is None:
|
|
165
|
+
continue
|
|
166
|
+
text = _concat_text(content)
|
|
167
|
+
if not text.strip():
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
ev_id = getattr(ev, "id", None)
|
|
171
|
+
if not isinstance(ev_id, str) or not ev_id:
|
|
172
|
+
continue
|
|
173
|
+
if ev_id in existing_ids:
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
ts = getattr(ev, "timestamp", None)
|
|
177
|
+
ts_out = ts if isinstance(ts, str) else None
|
|
178
|
+
|
|
179
|
+
truncated = text[: self.embedding_max_chars]
|
|
180
|
+
rec: dict[str, Any] = {
|
|
181
|
+
"id": ev_id,
|
|
182
|
+
"app_name": app_name,
|
|
183
|
+
"user_id": user_id,
|
|
184
|
+
"session_id": session_id,
|
|
185
|
+
"author": author if isinstance(author, str) else None,
|
|
186
|
+
"timestamp": ts_out,
|
|
187
|
+
"text": text,
|
|
188
|
+
"embedding_text": truncated,
|
|
189
|
+
"embedding": None,
|
|
190
|
+
}
|
|
191
|
+
new_records.append(rec)
|
|
192
|
+
texts_to_embed.append(truncated)
|
|
193
|
+
existing_ids.add(ev_id)
|
|
194
|
+
|
|
195
|
+
if not new_records:
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
# Embed in batches to avoid too-large requests.
|
|
199
|
+
for i in range(0, len(new_records), self.embedding_batch_size):
|
|
200
|
+
batch_records = new_records[i : i + self.embedding_batch_size]
|
|
201
|
+
batch_texts = [r["embedding_text"] for r in batch_records]
|
|
202
|
+
try:
|
|
203
|
+
vectors = await _embed_texts(
|
|
204
|
+
texts=batch_texts, embedding_model=self.embeddings_model
|
|
205
|
+
)
|
|
206
|
+
for r, vec in zip(batch_records, vectors):
|
|
207
|
+
r["embedding"] = vec
|
|
208
|
+
except Exception:
|
|
209
|
+
# Best-effort: keep record but without embedding.
|
|
210
|
+
for r in batch_records:
|
|
211
|
+
r["embedding"] = None
|
|
212
|
+
|
|
213
|
+
# Persist
|
|
214
|
+
with self.memories_path.open("a", encoding="utf-8") as f:
|
|
215
|
+
for rec in new_records:
|
|
216
|
+
rec_out = dict(rec)
|
|
217
|
+
rec_out.pop("embedding_text", None)
|
|
218
|
+
f.write(json.dumps(rec_out, ensure_ascii=False) + "\n")
|
|
219
|
+
|
|
220
|
+
async def search_memory( # type: ignore[override]
|
|
221
|
+
self,
|
|
222
|
+
*,
|
|
223
|
+
app_name: str,
|
|
224
|
+
user_id: str,
|
|
225
|
+
query: str,
|
|
226
|
+
) -> SearchMemoryResponse:
|
|
227
|
+
response = SearchMemoryResponse()
|
|
228
|
+
q = (query or "").strip()
|
|
229
|
+
if not q:
|
|
230
|
+
return response
|
|
231
|
+
|
|
232
|
+
# Compute query embedding.
|
|
233
|
+
try:
|
|
234
|
+
q_vecs = await _embed_texts(
|
|
235
|
+
texts=[q[: self.embedding_max_chars]], embedding_model=self.embeddings_model
|
|
236
|
+
)
|
|
237
|
+
q_vec = q_vecs[0]
|
|
238
|
+
except Exception:
|
|
239
|
+
# Fallback to naive keyword search if embedding fails.
|
|
240
|
+
q_words = _words_lower(q)
|
|
241
|
+
if not q_words:
|
|
242
|
+
return response
|
|
243
|
+
for rec in self._iter_records():
|
|
244
|
+
if rec.get("app_name") != app_name or rec.get("user_id") != user_id:
|
|
245
|
+
continue
|
|
246
|
+
text = rec.get("text")
|
|
247
|
+
if not isinstance(text, str):
|
|
248
|
+
continue
|
|
249
|
+
event_words = _words_lower(text)
|
|
250
|
+
if event_words and any(w in event_words for w in q_words):
|
|
251
|
+
content = types.Content(role="user", parts=[types.Part(text=text)])
|
|
252
|
+
response.memories.append(
|
|
253
|
+
MemoryEntry(
|
|
254
|
+
content=content,
|
|
255
|
+
author=rec.get("author") if isinstance(rec.get("author"), str) else None,
|
|
256
|
+
timestamp=rec.get("timestamp") if isinstance(rec.get("timestamp"), str) else None,
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
return response
|
|
260
|
+
|
|
261
|
+
# Rank by cosine similarity.
|
|
262
|
+
scored: list[tuple[float, dict[str, Any]]] = []
|
|
263
|
+
for rec in self._iter_records():
|
|
264
|
+
if rec.get("app_name") != app_name or rec.get("user_id") != user_id:
|
|
265
|
+
continue
|
|
266
|
+
vec = rec.get("embedding")
|
|
267
|
+
text = rec.get("text")
|
|
268
|
+
if not isinstance(vec, list) or not isinstance(text, str) or not vec:
|
|
269
|
+
continue
|
|
270
|
+
if not all(isinstance(x, (int, float)) for x in vec):
|
|
271
|
+
continue
|
|
272
|
+
v = [float(x) for x in vec]
|
|
273
|
+
score = _cosine_similarity(q_vec, v)
|
|
274
|
+
if score >= 0:
|
|
275
|
+
scored.append((score, rec))
|
|
276
|
+
|
|
277
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
278
|
+
for _score, rec in scored[:12]:
|
|
279
|
+
text = rec.get("text")
|
|
280
|
+
if not isinstance(text, str):
|
|
281
|
+
continue
|
|
282
|
+
content = types.Content(role="user", parts=[types.Part(text=text)])
|
|
283
|
+
response.memories.append(
|
|
284
|
+
MemoryEntry(
|
|
285
|
+
content=content,
|
|
286
|
+
author=rec.get("author") if isinstance(rec.get("author"), str) else None,
|
|
287
|
+
timestamp=rec.get("timestamp") if isinstance(rec.get("timestamp"), str) else None,
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
return response
|
|
292
|
+
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Persistent, clean-room memory service for GemCode.
|
|
3
|
+
|
|
4
|
+
This complements ADK's memory integration by providing a file-backed
|
|
5
|
+
implementation of `BaseMemoryService` so memory survives across CLI runs.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Iterable
|
|
14
|
+
|
|
15
|
+
from google.adk.memory.base_memory_service import BaseMemoryService
|
|
16
|
+
from google.adk.memory.base_memory_service import SearchMemoryResponse
|
|
17
|
+
from google.adk.memory.memory_entry import MemoryEntry
|
|
18
|
+
from google.genai import types
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_WORD_RE = re.compile(r"[A-Za-z]+")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _words_lower(s: str) -> set[str]:
|
|
25
|
+
return {w.lower() for w in _WORD_RE.findall(s or "")}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _extract_text_parts(content: Any) -> list[str]:
|
|
29
|
+
# `google.genai.types.Content.parts` is a list of Part-like objects.
|
|
30
|
+
# We store only text parts for retrieval.
|
|
31
|
+
try:
|
|
32
|
+
parts = getattr(content, "parts", None)
|
|
33
|
+
if not parts:
|
|
34
|
+
return []
|
|
35
|
+
out: list[str] = []
|
|
36
|
+
for p in parts:
|
|
37
|
+
t = getattr(p, "text", None)
|
|
38
|
+
if isinstance(t, str) and t.strip():
|
|
39
|
+
out.append(t.strip())
|
|
40
|
+
return out
|
|
41
|
+
except Exception:
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _concat_text(content: Any) -> str:
|
|
46
|
+
pieces = _extract_text_parts(content)
|
|
47
|
+
if not pieces:
|
|
48
|
+
return ""
|
|
49
|
+
return "\n".join(pieces)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class FileMemoryService(BaseMemoryService):
|
|
53
|
+
"""JSONL-backed memory service with naive keyword matching."""
|
|
54
|
+
|
|
55
|
+
def __init__(self, memories_path: Path):
|
|
56
|
+
self.memories_path = memories_path
|
|
57
|
+
|
|
58
|
+
def _ensure_parent(self) -> None:
|
|
59
|
+
self.memories_path.parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
def _iter_records(self) -> Iterable[dict[str, Any]]:
|
|
62
|
+
if not self.memories_path.is_file():
|
|
63
|
+
return []
|
|
64
|
+
# Best-effort JSONL parse; skip corrupt lines.
|
|
65
|
+
with self.memories_path.open("r", encoding="utf-8") as f:
|
|
66
|
+
for line in f:
|
|
67
|
+
line = line.strip()
|
|
68
|
+
if not line:
|
|
69
|
+
continue
|
|
70
|
+
try:
|
|
71
|
+
yield json.loads(line)
|
|
72
|
+
except json.JSONDecodeError:
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
async def add_session_to_memory(self, session) -> None: # type: ignore[override]
|
|
76
|
+
await self.add_events_to_memory(
|
|
77
|
+
app_name=session.app_name,
|
|
78
|
+
user_id=session.user_id,
|
|
79
|
+
session_id=session.id,
|
|
80
|
+
events=session.events,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
async def add_events_to_memory( # type: ignore[override]
|
|
84
|
+
self,
|
|
85
|
+
*,
|
|
86
|
+
app_name: str,
|
|
87
|
+
user_id: str,
|
|
88
|
+
events,
|
|
89
|
+
session_id: str | None = None,
|
|
90
|
+
custom_metadata: Any = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
_ = custom_metadata
|
|
93
|
+
self._ensure_parent()
|
|
94
|
+
|
|
95
|
+
existing_ids: set[str] = set()
|
|
96
|
+
for r in self._iter_records():
|
|
97
|
+
if r.get("app_name") == app_name and r.get("user_id") == user_id:
|
|
98
|
+
mid = r.get("id")
|
|
99
|
+
if isinstance(mid, str) and mid:
|
|
100
|
+
existing_ids.add(mid)
|
|
101
|
+
|
|
102
|
+
to_append: list[dict[str, Any]] = []
|
|
103
|
+
for ev in events:
|
|
104
|
+
author = getattr(ev, "author", None)
|
|
105
|
+
content = getattr(ev, "content", None)
|
|
106
|
+
if content is None:
|
|
107
|
+
continue
|
|
108
|
+
text = _concat_text(content)
|
|
109
|
+
if not text.strip():
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
ev_id = getattr(ev, "id", None)
|
|
113
|
+
if not isinstance(ev_id, str) or not ev_id:
|
|
114
|
+
continue
|
|
115
|
+
if ev_id in existing_ids:
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
ts = getattr(ev, "timestamp", None)
|
|
119
|
+
# ADK event.timestamp is typically a string; preserve best-effort.
|
|
120
|
+
ts_out = ts if isinstance(ts, str) else None
|
|
121
|
+
|
|
122
|
+
to_append.append(
|
|
123
|
+
{
|
|
124
|
+
"id": ev_id,
|
|
125
|
+
"app_name": app_name,
|
|
126
|
+
"user_id": user_id,
|
|
127
|
+
"session_id": session_id,
|
|
128
|
+
"author": author,
|
|
129
|
+
"timestamp": ts_out,
|
|
130
|
+
"text": text,
|
|
131
|
+
}
|
|
132
|
+
)
|
|
133
|
+
existing_ids.add(ev_id)
|
|
134
|
+
|
|
135
|
+
if not to_append:
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
with self.memories_path.open("a", encoding="utf-8") as f:
|
|
139
|
+
for rec in to_append:
|
|
140
|
+
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
|
141
|
+
|
|
142
|
+
async def search_memory( # type: ignore[override]
|
|
143
|
+
self, *, app_name: str, user_id: str, query: str
|
|
144
|
+
) -> SearchMemoryResponse:
|
|
145
|
+
response = SearchMemoryResponse()
|
|
146
|
+
query_words = _words_lower(query)
|
|
147
|
+
if not query_words:
|
|
148
|
+
return response
|
|
149
|
+
|
|
150
|
+
for rec in self._iter_records():
|
|
151
|
+
if rec.get("app_name") != app_name or rec.get("user_id") != user_id:
|
|
152
|
+
continue
|
|
153
|
+
text = rec.get("text")
|
|
154
|
+
if not isinstance(text, str) or not text:
|
|
155
|
+
continue
|
|
156
|
+
event_words = _words_lower(text)
|
|
157
|
+
if not event_words:
|
|
158
|
+
continue
|
|
159
|
+
if any(w in event_words for w in query_words):
|
|
160
|
+
ts = rec.get("timestamp")
|
|
161
|
+
author = rec.get("author")
|
|
162
|
+
# Recreate MemoryEntry with a single text part.
|
|
163
|
+
content = types.Content(
|
|
164
|
+
role="user",
|
|
165
|
+
parts=[types.Part(text=text)],
|
|
166
|
+
)
|
|
167
|
+
response.memories.append(
|
|
168
|
+
MemoryEntry(
|
|
169
|
+
content=content,
|
|
170
|
+
author=author if isinstance(author, str) else None,
|
|
171
|
+
timestamp=ts if isinstance(ts, str) else None,
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return response
|
|
176
|
+
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Modality tool injection for GemCode.
|
|
3
|
+
|
|
4
|
+
Claude Code–style: outer loop + inner tool orchestration remains ADK-driven,
|
|
5
|
+
but we choose which tools to expose based on user flags / prompt heuristics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from gemcode.config import GemCodeConfig
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_embedding_client():
|
|
19
|
+
# google-genai picks up credentials from GOOGLE_API_KEY by default, but we
|
|
20
|
+
# pass explicitly so this works in tests/processes with different env.
|
|
21
|
+
from google.genai import Client
|
|
22
|
+
|
|
23
|
+
api_key = os.environ.get("GOOGLE_API_KEY")
|
|
24
|
+
return Client(api_key=api_key)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
28
|
+
if not a or not b or len(a) != len(b):
|
|
29
|
+
return -1.0
|
|
30
|
+
dot = 0.0
|
|
31
|
+
na = 0.0
|
|
32
|
+
nb = 0.0
|
|
33
|
+
for x, y in zip(a, b):
|
|
34
|
+
dot += x * y
|
|
35
|
+
na += x * x
|
|
36
|
+
nb += y * y
|
|
37
|
+
denom = math.sqrt(na) * math.sqrt(nb)
|
|
38
|
+
if denom == 0:
|
|
39
|
+
return -1.0
|
|
40
|
+
return dot / denom
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _chunk_text(text: str, *, chunk_size: int = 1200, max_chunks: int = 8) -> list[str]:
|
|
44
|
+
t = (text or "").strip()
|
|
45
|
+
if not t:
|
|
46
|
+
return []
|
|
47
|
+
# Simple fixed-size chunks (MVP): fast, deterministic, and good enough for
|
|
48
|
+
# semantic retrieval at small scales.
|
|
49
|
+
out: list[str] = []
|
|
50
|
+
for i in range(0, len(t), chunk_size):
|
|
51
|
+
if len(out) >= max_chunks:
|
|
52
|
+
break
|
|
53
|
+
out.append(t[i : i + chunk_size])
|
|
54
|
+
return out
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
async def semantic_search_files(
|
|
58
|
+
query: str,
|
|
59
|
+
path_glob: str = "**/*",
|
|
60
|
+
*,
|
|
61
|
+
max_files: int = 25,
|
|
62
|
+
max_chunks_per_file: int = 6,
|
|
63
|
+
max_total_chunks: int = 40,
|
|
64
|
+
max_file_bytes: int = 200_000,
|
|
65
|
+
max_results: int = 8,
|
|
66
|
+
embedding_model: str | None = None,
|
|
67
|
+
project_root: str | None = None,
|
|
68
|
+
) -> dict[str, Any]:
|
|
69
|
+
"""
|
|
70
|
+
Embeddings-powered semantic search across files under the project root.
|
|
71
|
+
|
|
72
|
+
Notes:
|
|
73
|
+
- This MVP performs per-call embedding (no persistent vector index).
|
|
74
|
+
- It is intentionally bounded (max_files/max_total_chunks) to limit API
|
|
75
|
+
calls and latency.
|
|
76
|
+
"""
|
|
77
|
+
if not isinstance(query, str) or not query.strip():
|
|
78
|
+
return {"error": "query must be a non-empty string"}
|
|
79
|
+
|
|
80
|
+
root = Path(project_root).resolve() if project_root else None
|
|
81
|
+
if root is None:
|
|
82
|
+
# When invoked as a GemCode tool, `project_root` is supplied by ADK via
|
|
83
|
+
# closure (see build_extra_tools).
|
|
84
|
+
return {"error": "project_root not provided"}
|
|
85
|
+
|
|
86
|
+
if ".." in path_glob or path_glob.startswith("/"):
|
|
87
|
+
return {"error": "Invalid path_glob"}
|
|
88
|
+
|
|
89
|
+
embedding_model = embedding_model or os.environ.get(
|
|
90
|
+
"GEMCODE_EMBEDDINGS_MODEL", "models/gemini-embedding-2-preview"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Collect candidate chunks.
|
|
94
|
+
chunks: list[str] = []
|
|
95
|
+
chunk_meta: list[dict[str, str]] = []
|
|
96
|
+
|
|
97
|
+
files_seen = 0
|
|
98
|
+
for fp in root.glob(path_glob):
|
|
99
|
+
if files_seen >= max_files:
|
|
100
|
+
break
|
|
101
|
+
if not fp.is_file():
|
|
102
|
+
continue
|
|
103
|
+
files_seen += 1
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
data = fp.read_bytes()
|
|
107
|
+
except OSError:
|
|
108
|
+
continue
|
|
109
|
+
if len(data) > max_file_bytes:
|
|
110
|
+
data = data[:max_file_bytes]
|
|
111
|
+
try:
|
|
112
|
+
text = data.decode("utf-8", errors="ignore")
|
|
113
|
+
except Exception:
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
file_chunks = _chunk_text(text, max_chunks=max_chunks_per_file)
|
|
117
|
+
if not file_chunks:
|
|
118
|
+
continue
|
|
119
|
+
for c in file_chunks:
|
|
120
|
+
if len(chunks) >= max_total_chunks:
|
|
121
|
+
break
|
|
122
|
+
chunks.append(c)
|
|
123
|
+
rel = fp.resolve().relative_to(root)
|
|
124
|
+
chunk_meta.append({"path": str(rel)})
|
|
125
|
+
if len(chunks) >= max_total_chunks:
|
|
126
|
+
break
|
|
127
|
+
|
|
128
|
+
if not chunks:
|
|
129
|
+
return {"query": query, "matches": [], "backend": "embeddings"}
|
|
130
|
+
|
|
131
|
+
client = _get_embedding_client()
|
|
132
|
+
|
|
133
|
+
# Embed query and chunks.
|
|
134
|
+
try:
|
|
135
|
+
from google.genai.types import EmbedContentConfig
|
|
136
|
+
|
|
137
|
+
config = EmbedContentConfig()
|
|
138
|
+
q_emb = await client.aio.models.embed_content(
|
|
139
|
+
model=embedding_model,
|
|
140
|
+
contents=[query],
|
|
141
|
+
config=config,
|
|
142
|
+
)
|
|
143
|
+
q_vec = list(q_emb.embeddings[0].values)
|
|
144
|
+
|
|
145
|
+
c_emb = await client.aio.models.embed_content(
|
|
146
|
+
model=embedding_model,
|
|
147
|
+
contents=chunks,
|
|
148
|
+
config=config,
|
|
149
|
+
)
|
|
150
|
+
c_vecs = [list(e.values) for e in c_emb.embeddings]
|
|
151
|
+
except Exception as e:
|
|
152
|
+
return {"error": f"embedding failed: {type(e).__name__}: {e}"}
|
|
153
|
+
|
|
154
|
+
scored: list[tuple[float, int]] = []
|
|
155
|
+
for i, vec in enumerate(c_vecs):
|
|
156
|
+
score = _cosine_similarity(q_vec, vec)
|
|
157
|
+
scored.append((score, i))
|
|
158
|
+
|
|
159
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
160
|
+
matches: list[dict[str, Any]] = []
|
|
161
|
+
for score, idx in scored[: max_results]:
|
|
162
|
+
if score < 0:
|
|
163
|
+
continue
|
|
164
|
+
rel = chunk_meta[idx]["path"]
|
|
165
|
+
snippet = chunks[idx][:500].replace("\n", " ")
|
|
166
|
+
matches.append({"path": rel, "snippet": snippet, "score": score})
|
|
167
|
+
|
|
168
|
+
return {"query": query, "backend": "embeddings", "matches": matches}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def build_extra_tools(cfg: GemCodeConfig) -> list[Any]:
|
|
172
|
+
"""Return ADK tool unions to expose for enabled modalities."""
|
|
173
|
+
extra: list[Any] = []
|
|
174
|
+
|
|
175
|
+
if getattr(cfg, "enable_deep_research", False):
|
|
176
|
+
from google.adk.tools import google_search, url_context
|
|
177
|
+
extra.append(google_search)
|
|
178
|
+
extra.append(url_context)
|
|
179
|
+
# Google Maps grounding can be incompatible with other built-in tools
|
|
180
|
+
# (e.g., google_search) depending on the request/model tooling layer.
|
|
181
|
+
# Make it opt-in so deep-research stays reliable by default.
|
|
182
|
+
if getattr(cfg, "enable_maps_grounding", False):
|
|
183
|
+
from google.adk.tools.google_maps_grounding_tool import google_maps_grounding
|
|
184
|
+
|
|
185
|
+
extra.append(google_maps_grounding)
|
|
186
|
+
|
|
187
|
+
if getattr(cfg, "enable_embeddings", False):
|
|
188
|
+
# Provide a closure so the embedding tool can resolve project_root.
|
|
189
|
+
async def _semantic_search_files(
|
|
190
|
+
query: str,
|
|
191
|
+
path_glob: str = "**/*",
|
|
192
|
+
*,
|
|
193
|
+
max_files: int = 25,
|
|
194
|
+
max_chunks_per_file: int = 6,
|
|
195
|
+
max_total_chunks: int = 40,
|
|
196
|
+
max_file_bytes: int = 200_000,
|
|
197
|
+
max_results: int = 8,
|
|
198
|
+
embedding_model: str | None = None,
|
|
199
|
+
):
|
|
200
|
+
return await semantic_search_files(
|
|
201
|
+
query,
|
|
202
|
+
path_glob,
|
|
203
|
+
max_files=max_files,
|
|
204
|
+
max_chunks_per_file=max_chunks_per_file,
|
|
205
|
+
max_total_chunks=max_total_chunks,
|
|
206
|
+
max_file_bytes=max_file_bytes,
|
|
207
|
+
max_results=max_results,
|
|
208
|
+
embedding_model=embedding_model,
|
|
209
|
+
project_root=str(cfg.project_root),
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
_semantic_search_files.__name__ = "semantic_search_files"
|
|
213
|
+
extra.append(_semantic_search_files)
|
|
214
|
+
|
|
215
|
+
return extra
|
|
216
|
+
|