gdmcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gdmcode-0.1.0.dist-info/METADATA +240 -0
- gdmcode-0.1.0.dist-info/RECORD +131 -0
- gdmcode-0.1.0.dist-info/WHEEL +4 -0
- gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
- src/__init__.py +1 -0
- src/_internal/__init__.py +0 -0
- src/_internal/constants.py +244 -0
- src/_internal/domain_skills.py +339 -0
- src/agent/__init__.py +0 -0
- src/agent/commit_classifier.py +91 -0
- src/agent/context_budget.py +391 -0
- src/agent/daemon.py +681 -0
- src/agent/dag_validator.py +153 -0
- src/agent/debug_loop.py +473 -0
- src/agent/impact_analyzer.py +149 -0
- src/agent/impact_graph.py +117 -0
- src/agent/loop.py +1410 -0
- src/agent/orchestrator.py +141 -0
- src/agent/regression_guard.py +251 -0
- src/agent/review_gate.py +648 -0
- src/agent/risk_scorer.py +169 -0
- src/agent/self_healing.py +145 -0
- src/agent/smart_test_selector.py +89 -0
- src/agent/system_prompt.py +226 -0
- src/agent/task_tracker.py +320 -0
- src/agent/test_validator.py +210 -0
- src/agent/tool_orchestrator.py +402 -0
- src/agent/transcript.py +230 -0
- src/agent/verification_loop.py +133 -0
- src/agent/work_director.py +136 -0
- src/agent/worktree_manager.py +53 -0
- src/artifacts/__init__.py +16 -0
- src/artifacts/artifact_store.py +456 -0
- src/artifacts/verification_graph.py +75 -0
- src/auth.py +411 -0
- src/cli.py +1290 -0
- src/commands.py +1398 -0
- src/config.py +762 -0
- src/cost_tracker.py +348 -0
- src/db/__init__.py +4 -0
- src/db/migrations.py +337 -0
- src/enterprise/__init__.py +3 -0
- src/enterprise/audit_log.py +182 -0
- src/enterprise/identity.py +90 -0
- src/enterprise/rbac.py +100 -0
- src/enterprise/team_config.py +125 -0
- src/enterprise/usage_analytics.py +261 -0
- src/exceptions.py +207 -0
- src/git_workflow.py +651 -0
- src/integrations/__init__.py +6 -0
- src/integrations/github_actions.py +106 -0
- src/integrations/mcp_server.py +333 -0
- src/integrations/sentry_integration.py +100 -0
- src/integrations/sentry_server.py +82 -0
- src/integrations/webhook_security.py +19 -0
- src/main.py +27 -0
- src/memory/__init__.py +0 -0
- src/memory/code_index.py +376 -0
- src/memory/compressor.py +378 -0
- src/memory/context_memory.py +135 -0
- src/memory/continuous_memory.py +234 -0
- src/memory/conventions.py +495 -0
- src/memory/db.py +1119 -0
- src/memory/document_index.py +205 -0
- src/memory/file_cache.py +128 -0
- src/memory/project_scanner.py +178 -0
- src/memory/session_store.py +201 -0
- src/models/__init__.py +0 -0
- src/models/client.py +715 -0
- src/models/definitions.py +459 -0
- src/models/router.py +418 -0
- src/models/schemas.py +389 -0
- src/permissions.py +294 -0
- src/remote/__init__.py +5 -0
- src/remote/command_filter.py +33 -0
- src/remote/models.py +31 -0
- src/remote/permission_handler.py +79 -0
- src/remote/phone_ui.py +48 -0
- src/remote/protocol.py +59 -0
- src/remote/qr.py +65 -0
- src/remote/server.py +586 -0
- src/remote/token_manager.py +61 -0
- src/remote/tunnel.py +212 -0
- src/repl.py +475 -0
- src/runtime/__init__.py +1 -0
- src/runtime/branch_farm.py +372 -0
- src/runtime/replay.py +351 -0
- src/sandbox/__init__.py +2 -0
- src/sandbox/hermetic.py +214 -0
- src/sandbox/policy.py +44 -0
- src/sdk/__init__.py +3 -0
- src/sdk/plugin_base.py +39 -0
- src/sdk/plugin_host.py +100 -0
- src/sdk/plugin_loader.py +101 -0
- src/security.py +409 -0
- src/server/__init__.py +7 -0
- src/server/bridge.py +427 -0
- src/server/bridge_cli.py +103 -0
- src/server/bridge_client.py +170 -0
- src/server/protocol_version.py +103 -0
- src/session/__init__.py +10 -0
- src/session/event_fanout.py +46 -0
- src/session/input_broker.py +38 -0
- src/session/permission_bridge.py +100 -0
- src/tools/__init__.py +160 -0
- src/tools/_atomic.py +72 -0
- src/tools/agent_tools.py +423 -0
- src/tools/ask_user_tool.py +83 -0
- src/tools/bash_tool.py +384 -0
- src/tools/browser_tool.py +352 -0
- src/tools/browser_tools.py +179 -0
- src/tools/dep_tools.py +210 -0
- src/tools/document_reader.py +167 -0
- src/tools/document_tool.py +240 -0
- src/tools/document_writer.py +171 -0
- src/tools/impact_tools.py +240 -0
- src/tools/playwright_tool.py +172 -0
- src/tools/quality_tools.py +366 -0
- src/tools/read_tools.py +318 -0
- src/tools/result_cache.py +157 -0
- src/tools/search_tools.py +310 -0
- src/tools/shell_tools.py +311 -0
- src/tools/write_tools.py +337 -0
- src/voice/__init__.py +25 -0
- src/voice/audio_capture.py +92 -0
- src/voice/audio_playback.py +68 -0
- src/voice/errors.py +14 -0
- src/voice/models.py +35 -0
- src/voice/providers.py +143 -0
- src/voice/vad.py +55 -0
- src/voice/voice_loop.py +156 -0
src/memory/compressor.py
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""Session compressor — summarises old conversation turns via the Scout model.
|
|
2
|
+
|
|
3
|
+
Called by ``loop._maybe_compress()`` when the context budget exceeds the
|
|
4
|
+
compression trigger threshold. The resulting digest is injected into the
|
|
5
|
+
transcript as a ``system`` message that replaces the compressed turns.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from src.agent.context_budget import count_tokens
|
|
14
|
+
from src.config import GdmConfig
|
|
15
|
+
from src.models.definitions import PROVIDER_BASE_URLS, ModelTier, Provider, get_model
|
|
16
|
+
|
|
17
|
+
__all__ = ["CompressResult", "SessionCompressor"]
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# ── Constants ────────────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
_MAX_OUTPUT_TOKENS: int = 300
|
|
24
|
+
_FALLBACK_CHAR_LIMIT: int = 200
|
|
25
|
+
|
|
26
|
+
_COMPRESSION_PROMPT_TEMPLATE: str = """\
|
|
27
|
+
Summarize this conversation segment into ≤300 tokens, preserving:
|
|
28
|
+
- Key decisions made
|
|
29
|
+
- Files modified (with line numbers if mentioned)
|
|
30
|
+
- Errors encountered and how they were resolved
|
|
31
|
+
- User preferences expressed
|
|
32
|
+
- Current task state
|
|
33
|
+
|
|
34
|
+
Format as plain prose. Be maximally dense.
|
|
35
|
+
Task context: {task_description}
|
|
36
|
+
|
|
37
|
+
Conversation to summarize:
|
|
38
|
+
{formatted_turns}"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ── Data model ───────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class CompressResult:
|
|
46
|
+
"""Result of a single compression operation."""
|
|
47
|
+
|
|
48
|
+
digest: str # Summary text prefixed with [DIGEST: turns X-Y compressed]
|
|
49
|
+
tokens_freed: int # Approximate tokens freed by removing the compressed turns
|
|
50
|
+
turns_compressed: int # Number of turns that were summarised
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ── Main class ───────────────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SessionCompressor:
|
|
57
|
+
"""Compresses old conversation turns into a digest using the Scout model.
|
|
58
|
+
|
|
59
|
+
Uses grok-4-1-fast-non-reasoning (Scout) to generate a dense summary
|
|
60
|
+
of old turns. The digest replaces the compressed turns in context.
|
|
61
|
+
|
|
62
|
+
Cost: ~500 input tokens → ~$0.0001 per compression (effectively free).
|
|
63
|
+
|
|
64
|
+
Usage::
|
|
65
|
+
|
|
66
|
+
compressor = SessionCompressor(cfg)
|
|
67
|
+
result = compressor.compress(turns_to_compress, task_description)
|
|
68
|
+
# Inject result.digest into transcript as a system message
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, cfg: GdmConfig) -> None:
|
|
72
|
+
# Prefer Grok for compression — Scout is a Grok-native model.
|
|
73
|
+
# Fall back to the configured provider if Grok credentials are absent.
|
|
74
|
+
if cfg.xai_api_key:
|
|
75
|
+
self._api_key: str = cfg.xai_api_key
|
|
76
|
+
self._base_url: str = PROVIDER_BASE_URLS[Provider.GROK]
|
|
77
|
+
self._model_id: str = get_model(ModelTier.SCOUT, Provider.GROK).id
|
|
78
|
+
else:
|
|
79
|
+
self._api_key = cfg.api_key
|
|
80
|
+
self._base_url = PROVIDER_BASE_URLS.get(
|
|
81
|
+
cfg.provider, PROVIDER_BASE_URLS[Provider.GROK]
|
|
82
|
+
)
|
|
83
|
+
try:
|
|
84
|
+
self._model_id = get_model(ModelTier.SCOUT, cfg.provider).id
|
|
85
|
+
except KeyError:
|
|
86
|
+
self._model_id = get_model(ModelTier.SCOUT, Provider.GROK).id
|
|
87
|
+
|
|
88
|
+
# ------------------------------------------------------------------
|
|
89
|
+
# Public API
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
def compress(
|
|
93
|
+
self,
|
|
94
|
+
turns: list[dict[str, Any]],
|
|
95
|
+
task_description: str = "",
|
|
96
|
+
) -> CompressResult:
|
|
97
|
+
"""Summarize old turns into a compact digest using 3-tier strategy.
|
|
98
|
+
|
|
99
|
+
Tier 1 (free): replace raw tool result payloads with 1-line summaries.
|
|
100
|
+
Tier 2 (~$0.001): summarize old assistant turns via API (1 paragraph each).
|
|
101
|
+
Tier 3 (~$0.003): full Scout digest of all droppable turns (last resort).
|
|
102
|
+
|
|
103
|
+
Non-droppable turns (``non_droppable=True``) are preserved verbatim at
|
|
104
|
+
every tier — their content is never truncated or summarised.
|
|
105
|
+
|
|
106
|
+
Falls back to :meth:`_fallback_compress` if the API is unavailable.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
turns: raw turn dicts, typically from ``turn.to_compress_dict()``.
|
|
110
|
+
task_description: short description of the current task for context.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
A :class:`CompressResult` with the digest and token savings.
|
|
114
|
+
"""
|
|
115
|
+
if not turns:
|
|
116
|
+
return CompressResult(digest="", tokens_freed=0, turns_compressed=0)
|
|
117
|
+
|
|
118
|
+
full_text = self._format_turns(turns)
|
|
119
|
+
original_tokens = count_tokens(full_text)
|
|
120
|
+
prefix = f"[DIGEST: turns 1-{len(turns)} compressed]\n"
|
|
121
|
+
|
|
122
|
+
# Separate non-droppable turns (preserved verbatim) from droppable ones
|
|
123
|
+
non_droppable = [t for t in turns if t.get("non_droppable")]
|
|
124
|
+
droppable = [t for t in turns if not t.get("non_droppable")]
|
|
125
|
+
|
|
126
|
+
# ── Tier 1: Drop raw tool result payloads (free, no API call) ─────────
|
|
127
|
+
tier1_turns = self._tier1_compress(droppable)
|
|
128
|
+
tier1_tokens = count_tokens(self._format_turns(tier1_turns)) if tier1_turns else 0
|
|
129
|
+
|
|
130
|
+
if tier1_tokens <= original_tokens * 0.70:
|
|
131
|
+
log.debug("Tier 1 sufficient: freed ~%d tokens", original_tokens - tier1_tokens)
|
|
132
|
+
digest = self._build_tiered_digest(prefix, tier1_turns, non_droppable, len(turns))
|
|
133
|
+
tokens_freed = max(0, original_tokens - count_tokens(digest))
|
|
134
|
+
return CompressResult(
|
|
135
|
+
digest=digest,
|
|
136
|
+
tokens_freed=tokens_freed,
|
|
137
|
+
turns_compressed=len(turns),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# ── Tier 2: Summarize old assistant turns via API ──────────────────────
|
|
141
|
+
try:
|
|
142
|
+
tier2_turns = self._tier2_compress(tier1_turns, task_description)
|
|
143
|
+
tier2_tokens = count_tokens(self._format_turns(tier2_turns)) if tier2_turns else 0
|
|
144
|
+
except Exception as exc:
|
|
145
|
+
log.debug("Tier 2 compression failed: %s", exc)
|
|
146
|
+
tier2_turns = tier1_turns
|
|
147
|
+
tier2_tokens = tier1_tokens
|
|
148
|
+
|
|
149
|
+
if tier2_tokens <= original_tokens * 0.70:
|
|
150
|
+
log.debug("Tier 2 sufficient: freed ~%d tokens", original_tokens - tier2_tokens)
|
|
151
|
+
digest = self._build_tiered_digest(prefix, tier2_turns, non_droppable, len(turns))
|
|
152
|
+
tokens_freed = max(0, original_tokens - count_tokens(digest))
|
|
153
|
+
return CompressResult(
|
|
154
|
+
digest=digest,
|
|
155
|
+
tokens_freed=tokens_freed,
|
|
156
|
+
turns_compressed=len(turns),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# ── Tier 3: Full Scout digest (last resort) ────────────────────────────
|
|
160
|
+
try:
|
|
161
|
+
import openai as _openai # lazy import — avoids crash if not installed
|
|
162
|
+
prompt = self.build_compression_prompt(
|
|
163
|
+
tier2_turns if tier2_turns else turns, task_description
|
|
164
|
+
)
|
|
165
|
+
client = _openai.OpenAI(api_key=self._api_key, base_url=self._base_url)
|
|
166
|
+
response = client.chat.completions.create(
|
|
167
|
+
model=self._model_id,
|
|
168
|
+
messages=[{"role": "user", "content": prompt}],
|
|
169
|
+
max_tokens=_MAX_OUTPUT_TOKENS,
|
|
170
|
+
)
|
|
171
|
+
summary = (response.choices[0].message.content or "").strip()
|
|
172
|
+
digest = prefix + summary
|
|
173
|
+
# Preserve non-droppable turns verbatim in the digest
|
|
174
|
+
if non_droppable:
|
|
175
|
+
digest += "\n[PRESERVED NON-DROPPABLE TURNS]:\n" + self._format_turns(
|
|
176
|
+
non_droppable
|
|
177
|
+
)
|
|
178
|
+
tokens_freed = max(0, original_tokens - count_tokens(digest))
|
|
179
|
+
log.debug(
|
|
180
|
+
"Tier 3 (Scout): compressed %d turns, freed ~%d tokens",
|
|
181
|
+
len(turns),
|
|
182
|
+
tokens_freed,
|
|
183
|
+
)
|
|
184
|
+
return CompressResult(
|
|
185
|
+
digest=digest,
|
|
186
|
+
tokens_freed=tokens_freed,
|
|
187
|
+
turns_compressed=len(turns),
|
|
188
|
+
)
|
|
189
|
+
except ImportError:
|
|
190
|
+
return self._fallback_compress(turns)
|
|
191
|
+
except Exception as exc:
|
|
192
|
+
log.warning("Tier 3 Scout failed, using fallback: %s", exc)
|
|
193
|
+
return self._fallback_compress(turns)
|
|
194
|
+
|
|
195
|
+
def build_compression_prompt(
|
|
196
|
+
self, turns: list[dict[str, Any]], task: str
|
|
197
|
+
) -> str:
|
|
198
|
+
"""Build the prompt asking Scout to summarize the turns.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
turns: raw turn dicts.
|
|
202
|
+
task: short description of the current task.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
A formatted prompt string ready to send to the model.
|
|
206
|
+
"""
|
|
207
|
+
return _COMPRESSION_PROMPT_TEMPLATE.format(
|
|
208
|
+
task_description=task or "(not specified)",
|
|
209
|
+
formatted_turns=self._format_turns(turns),
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# ------------------------------------------------------------------
|
|
213
|
+
# Private helpers
|
|
214
|
+
# ------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
def _fallback_compress(
|
|
217
|
+
self, turns: list[dict[str, Any]]
|
|
218
|
+
) -> CompressResult:
|
|
219
|
+
"""Simple non-API fallback: truncate droppable turns to first 200 chars.
|
|
220
|
+
|
|
221
|
+
Non-droppable turns (``non_droppable=True``) are preserved verbatim —
|
|
222
|
+
their content is never truncated regardless of length.
|
|
223
|
+
|
|
224
|
+
Called when the Scout API is unavailable or returns an error.
|
|
225
|
+
Self-contained so it can also be invoked independently.
|
|
226
|
+
"""
|
|
227
|
+
lines: list[str] = []
|
|
228
|
+
for turn in turns:
|
|
229
|
+
role = turn.get("role", "unknown")
|
|
230
|
+
text = self._extract_content_text(turn.get("content", ""))
|
|
231
|
+
if turn.get("non_droppable"):
|
|
232
|
+
# Preserve verbatim — do NOT truncate
|
|
233
|
+
lines.append(f"{role}: {text}")
|
|
234
|
+
else:
|
|
235
|
+
lines.append(f"{role}: {text[:_FALLBACK_CHAR_LIMIT]}")
|
|
236
|
+
|
|
237
|
+
prefix = f"[DIGEST: turns 1-{len(turns)} compressed]\n"
|
|
238
|
+
digest = prefix + "\n".join(lines)
|
|
239
|
+
full_text = self._format_turns(turns)
|
|
240
|
+
tokens_freed = max(0, count_tokens(full_text) - count_tokens(digest))
|
|
241
|
+
return CompressResult(
|
|
242
|
+
digest=digest,
|
|
243
|
+
tokens_freed=tokens_freed,
|
|
244
|
+
turns_compressed=len(turns),
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
def _tier1_compress(
|
|
248
|
+
self, turns: list[dict[str, Any]]
|
|
249
|
+
) -> list[dict[str, Any]]:
|
|
250
|
+
"""Tier 1: Replace raw tool result payloads with 1-line summaries.
|
|
251
|
+
|
|
252
|
+
Free — no API call required. Typically achieves 40–60% context
|
|
253
|
+
reduction when there are many large tool results.
|
|
254
|
+
|
|
255
|
+
Non-droppable turns are never modified.
|
|
256
|
+
"""
|
|
257
|
+
result: list[dict[str, Any]] = []
|
|
258
|
+
for turn in turns:
|
|
259
|
+
if turn.get("non_droppable"):
|
|
260
|
+
result.append(turn)
|
|
261
|
+
continue
|
|
262
|
+
if turn.get("role") == "tool":
|
|
263
|
+
orig = self._extract_content_text(turn.get("content", ""))
|
|
264
|
+
tool_name = turn.get("name", "tool")
|
|
265
|
+
short = f"[tool: {tool_name} → {len(orig)} chars truncated]"
|
|
266
|
+
result.append({**turn, "content": short})
|
|
267
|
+
else:
|
|
268
|
+
result.append(turn)
|
|
269
|
+
return result
|
|
270
|
+
|
|
271
|
+
def _tier2_compress(
|
|
272
|
+
self, turns: list[dict[str, Any]], task_description: str = ""
|
|
273
|
+
) -> list[dict[str, Any]]:
|
|
274
|
+
"""Tier 2: Summarize old assistant turns (>3 turns back) via API.
|
|
275
|
+
|
|
276
|
+
Each qualifying assistant turn is replaced with a 1-paragraph summary.
|
|
277
|
+
Short turns (< 50 tokens) and non-droppable turns are skipped.
|
|
278
|
+
|
|
279
|
+
Raises:
|
|
280
|
+
ImportError: if the ``openai`` package is not installed.
|
|
281
|
+
Exception: on API call failure (caller should fall through to Tier 3).
|
|
282
|
+
"""
|
|
283
|
+
import openai as _openai # lazy import — avoids crash if not installed
|
|
284
|
+
|
|
285
|
+
result = list(turns)
|
|
286
|
+
cutoff = max(0, len(turns) - 3) # only process turns >3 from the end
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
client = _openai.OpenAI(api_key=self._api_key, base_url=self._base_url)
|
|
290
|
+
except Exception as exc:
|
|
291
|
+
log.debug("Tier 2: failed to create OpenAI client: %s", exc)
|
|
292
|
+
return result
|
|
293
|
+
|
|
294
|
+
for i, turn in enumerate(turns[:cutoff]):
|
|
295
|
+
if turn.get("non_droppable"):
|
|
296
|
+
continue # preserve verbatim
|
|
297
|
+
if turn.get("role") != "assistant":
|
|
298
|
+
continue
|
|
299
|
+
orig_content = self._extract_content_text(turn.get("content", ""))
|
|
300
|
+
if not orig_content or count_tokens(orig_content) < 50:
|
|
301
|
+
continue # already short — not worth an API call
|
|
302
|
+
try:
|
|
303
|
+
prompt = (
|
|
304
|
+
f"Summarize in 1 paragraph "
|
|
305
|
+
f"(task: {task_description or '(not specified)'}):\n"
|
|
306
|
+
f"{orig_content[:2000]}"
|
|
307
|
+
)
|
|
308
|
+
resp = client.chat.completions.create(
|
|
309
|
+
model=self._model_id,
|
|
310
|
+
messages=[{"role": "user", "content": prompt}],
|
|
311
|
+
max_tokens=100,
|
|
312
|
+
)
|
|
313
|
+
summary = (resp.choices[0].message.content or "").strip()
|
|
314
|
+
if summary:
|
|
315
|
+
result[i] = {**turn, "content": summary}
|
|
316
|
+
except Exception as exc: # noqa: BLE001
|
|
317
|
+
log.debug("Tier 2: failed to summarize turn %d: %s", i, exc)
|
|
318
|
+
return result
|
|
319
|
+
|
|
320
|
+
def _build_tiered_digest(
|
|
321
|
+
self,
|
|
322
|
+
prefix: str,
|
|
323
|
+
compressed_turns: list[dict[str, Any]],
|
|
324
|
+
non_droppable_turns: list[dict[str, Any]],
|
|
325
|
+
original_count: int,
|
|
326
|
+
) -> str:
|
|
327
|
+
"""Build a digest string from Tier 1/2 compressed turns.
|
|
328
|
+
|
|
329
|
+
Non-droppable turns are appended verbatim in a clearly-labelled section.
|
|
330
|
+
"""
|
|
331
|
+
digest = prefix + self._format_turns(compressed_turns)
|
|
332
|
+
if non_droppable_turns:
|
|
333
|
+
digest += "\n[PRESERVED NON-DROPPABLE TURNS]:\n" + self._format_turns(
|
|
334
|
+
non_droppable_turns
|
|
335
|
+
)
|
|
336
|
+
return digest
|
|
337
|
+
|
|
338
|
+
@staticmethod
|
|
339
|
+
def _extract_content_text(content: Any) -> str:
|
|
340
|
+
"""Coerce a message ``content`` value to plain text.
|
|
341
|
+
|
|
342
|
+
Handles both raw strings and the OpenAI list-of-parts format.
|
|
343
|
+
"""
|
|
344
|
+
if isinstance(content, str):
|
|
345
|
+
return content
|
|
346
|
+
if isinstance(content, list):
|
|
347
|
+
parts: list[str] = []
|
|
348
|
+
for part in content:
|
|
349
|
+
if isinstance(part, dict):
|
|
350
|
+
parts.append(part.get("text", ""))
|
|
351
|
+
else:
|
|
352
|
+
parts.append(str(part))
|
|
353
|
+
return " ".join(parts)
|
|
354
|
+
return str(content)
|
|
355
|
+
|
|
356
|
+
def _format_turns(self, turns: list[dict[str, Any]]) -> str:
|
|
357
|
+
"""Format a list of turns as a numbered transcript string.
|
|
358
|
+
|
|
359
|
+
Serialises text content and any tool call names so the compressor
|
|
360
|
+
receives a faithful record of what happened in each turn.
|
|
361
|
+
"""
|
|
362
|
+
lines: list[str] = []
|
|
363
|
+
for i, turn in enumerate(turns, 1):
|
|
364
|
+
role = turn.get("role", "unknown")
|
|
365
|
+
text = self._extract_content_text(turn.get("content", ""))
|
|
366
|
+
tool_calls: list[Any] = turn.get("tool_calls") or []
|
|
367
|
+
if tool_calls:
|
|
368
|
+
names = [
|
|
369
|
+
(tc.get("function", {}).get("name", "?") if isinstance(tc, dict) else str(tc))
|
|
370
|
+
for tc in tool_calls
|
|
371
|
+
]
|
|
372
|
+
text = f"{text} [tool_calls: {', '.join(names)}]".strip()
|
|
373
|
+
if role == "tool":
|
|
374
|
+
tool_name = turn.get("name", "tool")
|
|
375
|
+
lines.append(f"[Turn {i}] tool_result({tool_name}): {text}")
|
|
376
|
+
else:
|
|
377
|
+
lines.append(f"[Turn {i}] {role}: {text}")
|
|
378
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
""".context-memory/ directory manager — handles FAISS index paths and daemon IPC."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from src._internal.constants import _CONTEXT_MEMORY_DIR
|
|
12
|
+
|
|
13
|
+
__all__ = ["ContextMemoryManager"]
|
|
14
|
+
|
|
15
|
+
log = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_FAISS_FILENAME: str = "code_index.faiss"
|
|
18
|
+
_INBOX_DIR: str = "inbox"
|
|
19
|
+
_OUTBOX_DIR: str = "outbox"
|
|
20
|
+
_JOB_FILENAME_PATTERN: str = "job-{job_id}.json"
|
|
21
|
+
_SAFE_JOB_ID_RE: re.Pattern[str] = re.compile(r"^[a-zA-Z0-9_-]{1,64}$")
|
|
22
|
+
|
|
23
|
+
_GITIGNORE_CONTENT: str = (
|
|
24
|
+
"# gdm code \u2014 managed state, do not commit\n"
|
|
25
|
+
"gdm.db\n"
|
|
26
|
+
"code_index.faiss\n"
|
|
27
|
+
"inbox/\n"
|
|
28
|
+
"outbox/\n"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ContextMemoryManager:
|
|
33
|
+
"""Manages the .context-memory/ directory structure.
|
|
34
|
+
|
|
35
|
+
Responsibilities:
|
|
36
|
+
- Ensure required subdirectories exist
|
|
37
|
+
- Manage .gitignore for .context-memory/
|
|
38
|
+
- Provide typed paths for FAISS index, daemon IPC (inbox/outbox)
|
|
39
|
+
- Never touches gdm.db (that's GdmDatabase's job)
|
|
40
|
+
|
|
41
|
+
Directory layout::
|
|
42
|
+
|
|
43
|
+
.context-memory/
|
|
44
|
+
\u251c\u2500\u2500 gdm.db (managed by GdmDatabase, not here)
|
|
45
|
+
\u251c\u2500\u2500 code_index.faiss (managed by code_index.py)
|
|
46
|
+
\u251c\u2500\u2500 inbox/ (daemon job queue)
|
|
47
|
+
\u2514\u2500\u2500 outbox/ (daemon results)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, project_root: Path) -> None:
|
|
51
|
+
self._project_root = project_root
|
|
52
|
+
self._root = project_root / _CONTEXT_MEMORY_DIR
|
|
53
|
+
|
|
54
|
+
def ensure_structure(self) -> None:
|
|
55
|
+
"""Create directories and .gitignore if missing. Idempotent."""
|
|
56
|
+
self._root.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
self.inbox_dir.mkdir(exist_ok=True)
|
|
58
|
+
self.outbox_dir.mkdir(exist_ok=True)
|
|
59
|
+
self._ensure_gitignore()
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def root(self) -> Path:
|
|
63
|
+
"""Path to .context-memory/ directory."""
|
|
64
|
+
return self._root
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def faiss_index_path(self) -> Path:
|
|
68
|
+
"""Path to code_index.faiss file."""
|
|
69
|
+
return self._root / _FAISS_FILENAME
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def inbox_dir(self) -> Path:
|
|
73
|
+
"""Path to daemon job inbox directory."""
|
|
74
|
+
return self._root / _INBOX_DIR
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def outbox_dir(self) -> Path:
|
|
78
|
+
"""Path to daemon results outbox directory."""
|
|
79
|
+
return self._root / _OUTBOX_DIR
|
|
80
|
+
|
|
81
|
+
def get_inbox_path(self, job_id: str) -> Path:
|
|
82
|
+
"""Path for a specific job's inbox file: inbox/job-{job_id}.json"""
|
|
83
|
+
self._validate_job_id(job_id)
|
|
84
|
+
return self.inbox_dir / _JOB_FILENAME_PATTERN.format(job_id=job_id)
|
|
85
|
+
|
|
86
|
+
def get_outbox_path(self, job_id: str) -> Path:
|
|
87
|
+
"""Path for a specific job's outbox file: outbox/job-{job_id}.json"""
|
|
88
|
+
self._validate_job_id(job_id)
|
|
89
|
+
return self.outbox_dir / _JOB_FILENAME_PATTERN.format(job_id=job_id)
|
|
90
|
+
|
|
91
|
+
def write_job(self, job_id: str, payload: dict[str, Any]) -> None:
|
|
92
|
+
"""Write a job to the inbox as JSON. Uses atomic write via temp file."""
|
|
93
|
+
dest = self.get_inbox_path(job_id)
|
|
94
|
+
tmp = dest.with_suffix(".tmp")
|
|
95
|
+
tmp.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
|
96
|
+
os.replace(tmp, dest)
|
|
97
|
+
log.debug("Job %s written to inbox", job_id)
|
|
98
|
+
|
|
99
|
+
def read_result(self, job_id: str) -> dict[str, Any] | None:
|
|
100
|
+
"""Read a completed job result from outbox. Returns None if not ready."""
|
|
101
|
+
outbox_path = self.get_outbox_path(job_id)
|
|
102
|
+
if not outbox_path.exists():
|
|
103
|
+
return None
|
|
104
|
+
try:
|
|
105
|
+
return json.loads(outbox_path.read_text(encoding="utf-8"))
|
|
106
|
+
except json.JSONDecodeError:
|
|
107
|
+
log.warning("Corrupt result for job %s — treating as not ready", job_id)
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
def list_pending_jobs(self) -> list[str]:
|
|
111
|
+
"""Return job IDs that are in inbox but not yet in outbox."""
|
|
112
|
+
if not self.inbox_dir.exists():
|
|
113
|
+
return []
|
|
114
|
+
pending: list[str] = []
|
|
115
|
+
for f in self.inbox_dir.glob("job-*.json"):
|
|
116
|
+
job_id = f.stem.removeprefix("job-")
|
|
117
|
+
if not _SAFE_JOB_ID_RE.match(job_id):
|
|
118
|
+
continue # skip malformed filenames
|
|
119
|
+
outbox_file = self.outbox_dir / _JOB_FILENAME_PATTERN.format(job_id=job_id)
|
|
120
|
+
if not outbox_file.exists():
|
|
121
|
+
pending.append(job_id)
|
|
122
|
+
return pending
|
|
123
|
+
|
|
124
|
+
def _ensure_gitignore(self) -> None:
|
|
125
|
+
"""Write .gitignore into .context-memory/ if not already present."""
|
|
126
|
+
gitignore = self._root / ".gitignore"
|
|
127
|
+
if not gitignore.exists():
|
|
128
|
+
gitignore.write_text(_GITIGNORE_CONTENT, encoding="utf-8")
|
|
129
|
+
|
|
130
|
+
def _validate_job_id(self, job_id: str) -> None:
|
|
131
|
+
"""Raise ValueError if job_id contains unsafe characters (path traversal guard)."""
|
|
132
|
+
if not _SAFE_JOB_ID_RE.match(job_id):
|
|
133
|
+
raise ValueError(
|
|
134
|
+
f"Invalid job_id {job_id!r}: must match [a-zA-Z0-9_-]{{1,64}}"
|
|
135
|
+
)
|