gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
@@ -0,0 +1,378 @@
1
+ """Session compressor — summarises old conversation turns via the Scout model.
2
+
3
+ Called by ``loop._maybe_compress()`` when the context budget exceeds the
4
+ compression trigger threshold. The resulting digest is injected into the
5
+ transcript as a ``system`` message that replaces the compressed turns.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ from src.agent.context_budget import count_tokens
14
+ from src.config import GdmConfig
15
+ from src.models.definitions import PROVIDER_BASE_URLS, ModelTier, Provider, get_model
16
+
17
+ __all__ = ["CompressResult", "SessionCompressor"]
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+ # ── Constants ────────────────────────────────────────────────────────────────
22
+
23
+ _MAX_OUTPUT_TOKENS: int = 300
24
+ _FALLBACK_CHAR_LIMIT: int = 200
25
+
26
+ _COMPRESSION_PROMPT_TEMPLATE: str = """\
27
+ Summarize this conversation segment into ≤300 tokens, preserving:
28
+ - Key decisions made
29
+ - Files modified (with line numbers if mentioned)
30
+ - Errors encountered and how they were resolved
31
+ - User preferences expressed
32
+ - Current task state
33
+
34
+ Format as plain prose. Be maximally dense.
35
+ Task context: {task_description}
36
+
37
+ Conversation to summarize:
38
+ {formatted_turns}"""
39
+
40
+
41
+ # ── Data model ───────────────────────────────────────────────────────────────
42
+
43
+
44
+ @dataclass
45
+ class CompressResult:
46
+ """Result of a single compression operation."""
47
+
48
+ digest: str # Summary text prefixed with [DIGEST: turns X-Y compressed]
49
+ tokens_freed: int # Approximate tokens freed by removing the compressed turns
50
+ turns_compressed: int # Number of turns that were summarised
51
+
52
+
53
+ # ── Main class ───────────────────────────────────────────────────────────────
54
+
55
+
56
+ class SessionCompressor:
57
+ """Compresses old conversation turns into a digest using the Scout model.
58
+
59
+ Uses grok-4-1-fast-non-reasoning (Scout) to generate a dense summary
60
+ of old turns. The digest replaces the compressed turns in context.
61
+
62
+ Cost: ~500 input tokens → ~$0.0001 per compression (effectively free).
63
+
64
+ Usage::
65
+
66
+ compressor = SessionCompressor(cfg)
67
+ result = compressor.compress(turns_to_compress, task_description)
68
+ # Inject result.digest into transcript as a system message
69
+ """
70
+
71
+ def __init__(self, cfg: GdmConfig) -> None:
72
+ # Prefer Grok for compression — Scout is a Grok-native model.
73
+ # Fall back to the configured provider if Grok credentials are absent.
74
+ if cfg.xai_api_key:
75
+ self._api_key: str = cfg.xai_api_key
76
+ self._base_url: str = PROVIDER_BASE_URLS[Provider.GROK]
77
+ self._model_id: str = get_model(ModelTier.SCOUT, Provider.GROK).id
78
+ else:
79
+ self._api_key = cfg.api_key
80
+ self._base_url = PROVIDER_BASE_URLS.get(
81
+ cfg.provider, PROVIDER_BASE_URLS[Provider.GROK]
82
+ )
83
+ try:
84
+ self._model_id = get_model(ModelTier.SCOUT, cfg.provider).id
85
+ except KeyError:
86
+ self._model_id = get_model(ModelTier.SCOUT, Provider.GROK).id
87
+
88
+ # ------------------------------------------------------------------
89
+ # Public API
90
+ # ------------------------------------------------------------------
91
+
92
+ def compress(
93
+ self,
94
+ turns: list[dict[str, Any]],
95
+ task_description: str = "",
96
+ ) -> CompressResult:
97
+ """Summarize old turns into a compact digest using 3-tier strategy.
98
+
99
+ Tier 1 (free): replace raw tool result payloads with 1-line summaries.
100
+ Tier 2 (~$0.001): summarize old assistant turns via API (1 paragraph each).
101
+ Tier 3 (~$0.003): full Scout digest of all droppable turns (last resort).
102
+
103
+ Non-droppable turns (``non_droppable=True``) are preserved verbatim at
104
+ every tier — their content is never truncated or summarised.
105
+
106
+ Falls back to :meth:`_fallback_compress` if the API is unavailable.
107
+
108
+ Args:
109
+ turns: raw turn dicts, typically from ``turn.to_compress_dict()``.
110
+ task_description: short description of the current task for context.
111
+
112
+ Returns:
113
+ A :class:`CompressResult` with the digest and token savings.
114
+ """
115
+ if not turns:
116
+ return CompressResult(digest="", tokens_freed=0, turns_compressed=0)
117
+
118
+ full_text = self._format_turns(turns)
119
+ original_tokens = count_tokens(full_text)
120
+ prefix = f"[DIGEST: turns 1-{len(turns)} compressed]\n"
121
+
122
+ # Separate non-droppable turns (preserved verbatim) from droppable ones
123
+ non_droppable = [t for t in turns if t.get("non_droppable")]
124
+ droppable = [t for t in turns if not t.get("non_droppable")]
125
+
126
+ # ── Tier 1: Drop raw tool result payloads (free, no API call) ─────────
127
+ tier1_turns = self._tier1_compress(droppable)
128
+ tier1_tokens = count_tokens(self._format_turns(tier1_turns)) if tier1_turns else 0
129
+
130
+ if tier1_tokens <= original_tokens * 0.70:
131
+ log.debug("Tier 1 sufficient: freed ~%d tokens", original_tokens - tier1_tokens)
132
+ digest = self._build_tiered_digest(prefix, tier1_turns, non_droppable, len(turns))
133
+ tokens_freed = max(0, original_tokens - count_tokens(digest))
134
+ return CompressResult(
135
+ digest=digest,
136
+ tokens_freed=tokens_freed,
137
+ turns_compressed=len(turns),
138
+ )
139
+
140
+ # ── Tier 2: Summarize old assistant turns via API ──────────────────────
141
+ try:
142
+ tier2_turns = self._tier2_compress(tier1_turns, task_description)
143
+ tier2_tokens = count_tokens(self._format_turns(tier2_turns)) if tier2_turns else 0
144
+ except Exception as exc:
145
+ log.debug("Tier 2 compression failed: %s", exc)
146
+ tier2_turns = tier1_turns
147
+ tier2_tokens = tier1_tokens
148
+
149
+ if tier2_tokens <= original_tokens * 0.70:
150
+ log.debug("Tier 2 sufficient: freed ~%d tokens", original_tokens - tier2_tokens)
151
+ digest = self._build_tiered_digest(prefix, tier2_turns, non_droppable, len(turns))
152
+ tokens_freed = max(0, original_tokens - count_tokens(digest))
153
+ return CompressResult(
154
+ digest=digest,
155
+ tokens_freed=tokens_freed,
156
+ turns_compressed=len(turns),
157
+ )
158
+
159
+ # ── Tier 3: Full Scout digest (last resort) ────────────────────────────
160
+ try:
161
+ import openai as _openai # lazy import — avoids crash if not installed
162
+ prompt = self.build_compression_prompt(
163
+ tier2_turns if tier2_turns else turns, task_description
164
+ )
165
+ client = _openai.OpenAI(api_key=self._api_key, base_url=self._base_url)
166
+ response = client.chat.completions.create(
167
+ model=self._model_id,
168
+ messages=[{"role": "user", "content": prompt}],
169
+ max_tokens=_MAX_OUTPUT_TOKENS,
170
+ )
171
+ summary = (response.choices[0].message.content or "").strip()
172
+ digest = prefix + summary
173
+ # Preserve non-droppable turns verbatim in the digest
174
+ if non_droppable:
175
+ digest += "\n[PRESERVED NON-DROPPABLE TURNS]:\n" + self._format_turns(
176
+ non_droppable
177
+ )
178
+ tokens_freed = max(0, original_tokens - count_tokens(digest))
179
+ log.debug(
180
+ "Tier 3 (Scout): compressed %d turns, freed ~%d tokens",
181
+ len(turns),
182
+ tokens_freed,
183
+ )
184
+ return CompressResult(
185
+ digest=digest,
186
+ tokens_freed=tokens_freed,
187
+ turns_compressed=len(turns),
188
+ )
189
+ except ImportError:
190
+ return self._fallback_compress(turns)
191
+ except Exception as exc:
192
+ log.warning("Tier 3 Scout failed, using fallback: %s", exc)
193
+ return self._fallback_compress(turns)
194
+
195
+ def build_compression_prompt(
196
+ self, turns: list[dict[str, Any]], task: str
197
+ ) -> str:
198
+ """Build the prompt asking Scout to summarize the turns.
199
+
200
+ Args:
201
+ turns: raw turn dicts.
202
+ task: short description of the current task.
203
+
204
+ Returns:
205
+ A formatted prompt string ready to send to the model.
206
+ """
207
+ return _COMPRESSION_PROMPT_TEMPLATE.format(
208
+ task_description=task or "(not specified)",
209
+ formatted_turns=self._format_turns(turns),
210
+ )
211
+
212
+ # ------------------------------------------------------------------
213
+ # Private helpers
214
+ # ------------------------------------------------------------------
215
+
216
+ def _fallback_compress(
217
+ self, turns: list[dict[str, Any]]
218
+ ) -> CompressResult:
219
+ """Simple non-API fallback: truncate droppable turns to first 200 chars.
220
+
221
+ Non-droppable turns (``non_droppable=True``) are preserved verbatim —
222
+ their content is never truncated regardless of length.
223
+
224
+ Called when the Scout API is unavailable or returns an error.
225
+ Self-contained so it can also be invoked independently.
226
+ """
227
+ lines: list[str] = []
228
+ for turn in turns:
229
+ role = turn.get("role", "unknown")
230
+ text = self._extract_content_text(turn.get("content", ""))
231
+ if turn.get("non_droppable"):
232
+ # Preserve verbatim — do NOT truncate
233
+ lines.append(f"{role}: {text}")
234
+ else:
235
+ lines.append(f"{role}: {text[:_FALLBACK_CHAR_LIMIT]}")
236
+
237
+ prefix = f"[DIGEST: turns 1-{len(turns)} compressed]\n"
238
+ digest = prefix + "\n".join(lines)
239
+ full_text = self._format_turns(turns)
240
+ tokens_freed = max(0, count_tokens(full_text) - count_tokens(digest))
241
+ return CompressResult(
242
+ digest=digest,
243
+ tokens_freed=tokens_freed,
244
+ turns_compressed=len(turns),
245
+ )
246
+
247
+ def _tier1_compress(
248
+ self, turns: list[dict[str, Any]]
249
+ ) -> list[dict[str, Any]]:
250
+ """Tier 1: Replace raw tool result payloads with 1-line summaries.
251
+
252
+ Free — no API call required. Typically achieves 40–60% context
253
+ reduction when there are many large tool results.
254
+
255
+ Non-droppable turns are never modified.
256
+ """
257
+ result: list[dict[str, Any]] = []
258
+ for turn in turns:
259
+ if turn.get("non_droppable"):
260
+ result.append(turn)
261
+ continue
262
+ if turn.get("role") == "tool":
263
+ orig = self._extract_content_text(turn.get("content", ""))
264
+ tool_name = turn.get("name", "tool")
265
+ short = f"[tool: {tool_name} → {len(orig)} chars truncated]"
266
+ result.append({**turn, "content": short})
267
+ else:
268
+ result.append(turn)
269
+ return result
270
+
271
+ def _tier2_compress(
272
+ self, turns: list[dict[str, Any]], task_description: str = ""
273
+ ) -> list[dict[str, Any]]:
274
+ """Tier 2: Summarize old assistant turns (>3 turns back) via API.
275
+
276
+ Each qualifying assistant turn is replaced with a 1-paragraph summary.
277
+ Short turns (< 50 tokens) and non-droppable turns are skipped.
278
+
279
+ Raises:
280
+ ImportError: if the ``openai`` package is not installed.
281
+ Exception: on API call failure (caller should fall through to Tier 3).
282
+ """
283
+ import openai as _openai # lazy import — avoids crash if not installed
284
+
285
+ result = list(turns)
286
+ cutoff = max(0, len(turns) - 3) # only process turns >3 from the end
287
+
288
+ try:
289
+ client = _openai.OpenAI(api_key=self._api_key, base_url=self._base_url)
290
+ except Exception as exc:
291
+ log.debug("Tier 2: failed to create OpenAI client: %s", exc)
292
+ return result
293
+
294
+ for i, turn in enumerate(turns[:cutoff]):
295
+ if turn.get("non_droppable"):
296
+ continue # preserve verbatim
297
+ if turn.get("role") != "assistant":
298
+ continue
299
+ orig_content = self._extract_content_text(turn.get("content", ""))
300
+ if not orig_content or count_tokens(orig_content) < 50:
301
+ continue # already short — not worth an API call
302
+ try:
303
+ prompt = (
304
+ f"Summarize in 1 paragraph "
305
+ f"(task: {task_description or '(not specified)'}):\n"
306
+ f"{orig_content[:2000]}"
307
+ )
308
+ resp = client.chat.completions.create(
309
+ model=self._model_id,
310
+ messages=[{"role": "user", "content": prompt}],
311
+ max_tokens=100,
312
+ )
313
+ summary = (resp.choices[0].message.content or "").strip()
314
+ if summary:
315
+ result[i] = {**turn, "content": summary}
316
+ except Exception as exc: # noqa: BLE001
317
+ log.debug("Tier 2: failed to summarize turn %d: %s", i, exc)
318
+ return result
319
+
320
+ def _build_tiered_digest(
321
+ self,
322
+ prefix: str,
323
+ compressed_turns: list[dict[str, Any]],
324
+ non_droppable_turns: list[dict[str, Any]],
325
+ original_count: int,
326
+ ) -> str:
327
+ """Build a digest string from Tier 1/2 compressed turns.
328
+
329
+ Non-droppable turns are appended verbatim in a clearly-labelled section.
330
+ """
331
+ digest = prefix + self._format_turns(compressed_turns)
332
+ if non_droppable_turns:
333
+ digest += "\n[PRESERVED NON-DROPPABLE TURNS]:\n" + self._format_turns(
334
+ non_droppable_turns
335
+ )
336
+ return digest
337
+
338
+ @staticmethod
339
+ def _extract_content_text(content: Any) -> str:
340
+ """Coerce a message ``content`` value to plain text.
341
+
342
+ Handles both raw strings and the OpenAI list-of-parts format.
343
+ """
344
+ if isinstance(content, str):
345
+ return content
346
+ if isinstance(content, list):
347
+ parts: list[str] = []
348
+ for part in content:
349
+ if isinstance(part, dict):
350
+ parts.append(part.get("text", ""))
351
+ else:
352
+ parts.append(str(part))
353
+ return " ".join(parts)
354
+ return str(content)
355
+
356
+ def _format_turns(self, turns: list[dict[str, Any]]) -> str:
357
+ """Format a list of turns as a numbered transcript string.
358
+
359
+ Serialises text content and any tool call names so the compressor
360
+ receives a faithful record of what happened in each turn.
361
+ """
362
+ lines: list[str] = []
363
+ for i, turn in enumerate(turns, 1):
364
+ role = turn.get("role", "unknown")
365
+ text = self._extract_content_text(turn.get("content", ""))
366
+ tool_calls: list[Any] = turn.get("tool_calls") or []
367
+ if tool_calls:
368
+ names = [
369
+ (tc.get("function", {}).get("name", "?") if isinstance(tc, dict) else str(tc))
370
+ for tc in tool_calls
371
+ ]
372
+ text = f"{text} [tool_calls: {', '.join(names)}]".strip()
373
+ if role == "tool":
374
+ tool_name = turn.get("name", "tool")
375
+ lines.append(f"[Turn {i}] tool_result({tool_name}): {text}")
376
+ else:
377
+ lines.append(f"[Turn {i}] {role}: {text}")
378
+ return "\n".join(lines)
@@ -0,0 +1,135 @@
1
+ """.context-memory/ directory manager — handles FAISS index paths and daemon IPC."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import logging
6
+ import os
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from src._internal.constants import _CONTEXT_MEMORY_DIR
12
+
13
+ __all__ = ["ContextMemoryManager"]
14
+
15
+ log = logging.getLogger(__name__)
16
+
17
+ _FAISS_FILENAME: str = "code_index.faiss"
18
+ _INBOX_DIR: str = "inbox"
19
+ _OUTBOX_DIR: str = "outbox"
20
+ _JOB_FILENAME_PATTERN: str = "job-{job_id}.json"
21
+ _SAFE_JOB_ID_RE: re.Pattern[str] = re.compile(r"^[a-zA-Z0-9_-]{1,64}$")
22
+
23
+ _GITIGNORE_CONTENT: str = (
24
+ "# gdm code \u2014 managed state, do not commit\n"
25
+ "gdm.db\n"
26
+ "code_index.faiss\n"
27
+ "inbox/\n"
28
+ "outbox/\n"
29
+ )
30
+
31
+
32
+ class ContextMemoryManager:
33
+ """Manages the .context-memory/ directory structure.
34
+
35
+ Responsibilities:
36
+ - Ensure required subdirectories exist
37
+ - Manage .gitignore for .context-memory/
38
+ - Provide typed paths for FAISS index, daemon IPC (inbox/outbox)
39
+ - Never touches gdm.db (that's GdmDatabase's job)
40
+
41
+ Directory layout::
42
+
43
+ .context-memory/
44
+ \u251c\u2500\u2500 gdm.db (managed by GdmDatabase, not here)
45
+ \u251c\u2500\u2500 code_index.faiss (managed by code_index.py)
46
+ \u251c\u2500\u2500 inbox/ (daemon job queue)
47
+ \u2514\u2500\u2500 outbox/ (daemon results)
48
+ """
49
+
50
+ def __init__(self, project_root: Path) -> None:
51
+ self._project_root = project_root
52
+ self._root = project_root / _CONTEXT_MEMORY_DIR
53
+
54
+ def ensure_structure(self) -> None:
55
+ """Create directories and .gitignore if missing. Idempotent."""
56
+ self._root.mkdir(parents=True, exist_ok=True)
57
+ self.inbox_dir.mkdir(exist_ok=True)
58
+ self.outbox_dir.mkdir(exist_ok=True)
59
+ self._ensure_gitignore()
60
+
61
+ @property
62
+ def root(self) -> Path:
63
+ """Path to .context-memory/ directory."""
64
+ return self._root
65
+
66
+ @property
67
+ def faiss_index_path(self) -> Path:
68
+ """Path to code_index.faiss file."""
69
+ return self._root / _FAISS_FILENAME
70
+
71
+ @property
72
+ def inbox_dir(self) -> Path:
73
+ """Path to daemon job inbox directory."""
74
+ return self._root / _INBOX_DIR
75
+
76
+ @property
77
+ def outbox_dir(self) -> Path:
78
+ """Path to daemon results outbox directory."""
79
+ return self._root / _OUTBOX_DIR
80
+
81
+ def get_inbox_path(self, job_id: str) -> Path:
82
+ """Path for a specific job's inbox file: inbox/job-{job_id}.json"""
83
+ self._validate_job_id(job_id)
84
+ return self.inbox_dir / _JOB_FILENAME_PATTERN.format(job_id=job_id)
85
+
86
+ def get_outbox_path(self, job_id: str) -> Path:
87
+ """Path for a specific job's outbox file: outbox/job-{job_id}.json"""
88
+ self._validate_job_id(job_id)
89
+ return self.outbox_dir / _JOB_FILENAME_PATTERN.format(job_id=job_id)
90
+
91
+ def write_job(self, job_id: str, payload: dict[str, Any]) -> None:
92
+ """Write a job to the inbox as JSON. Uses atomic write via temp file."""
93
+ dest = self.get_inbox_path(job_id)
94
+ tmp = dest.with_suffix(".tmp")
95
+ tmp.write_text(json.dumps(payload, indent=2), encoding="utf-8")
96
+ os.replace(tmp, dest)
97
+ log.debug("Job %s written to inbox", job_id)
98
+
99
+ def read_result(self, job_id: str) -> dict[str, Any] | None:
100
+ """Read a completed job result from outbox. Returns None if not ready."""
101
+ outbox_path = self.get_outbox_path(job_id)
102
+ if not outbox_path.exists():
103
+ return None
104
+ try:
105
+ return json.loads(outbox_path.read_text(encoding="utf-8"))
106
+ except json.JSONDecodeError:
107
+ log.warning("Corrupt result for job %s — treating as not ready", job_id)
108
+ return None
109
+
110
+ def list_pending_jobs(self) -> list[str]:
111
+ """Return job IDs that are in inbox but not yet in outbox."""
112
+ if not self.inbox_dir.exists():
113
+ return []
114
+ pending: list[str] = []
115
+ for f in self.inbox_dir.glob("job-*.json"):
116
+ job_id = f.stem.removeprefix("job-")
117
+ if not _SAFE_JOB_ID_RE.match(job_id):
118
+ continue # skip malformed filenames
119
+ outbox_file = self.outbox_dir / _JOB_FILENAME_PATTERN.format(job_id=job_id)
120
+ if not outbox_file.exists():
121
+ pending.append(job_id)
122
+ return pending
123
+
124
+ def _ensure_gitignore(self) -> None:
125
+ """Write .gitignore into .context-memory/ if not already present."""
126
+ gitignore = self._root / ".gitignore"
127
+ if not gitignore.exists():
128
+ gitignore.write_text(_GITIGNORE_CONTENT, encoding="utf-8")
129
+
130
+ def _validate_job_id(self, job_id: str) -> None:
131
+ """Raise ValueError if job_id contains unsafe characters (path traversal guard)."""
132
+ if not _SAFE_JOB_ID_RE.match(job_id):
133
+ raise ValueError(
134
+ f"Invalid job_id {job_id!r}: must match [a-zA-Z0-9_-]{{1,64}}"
135
+ )