gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
@@ -0,0 +1,391 @@
1
+ """Context budget — tracks tokens and triggers compression.
2
+
3
+ The golden rule: never send more than 80% of the model's context window
4
+ in a single request. This module enforces that constraint and provides
5
+ chunked file reading so large files never blow the budget in one shot.
6
+
7
+ Token counting uses tiktoken cl100k_base (close enough for Grok/Gemini).
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+
15
+ import tiktoken
16
+
17
+ from src._internal.constants import _COMPRESSION_TRIGGER_RATIO
18
+ from src.agent.transcript import TranscriptStore
19
+
20
+ __all__ = ["ContextBudget", "count_tokens", "chunk_file", "WholeCodebaseMode"]
21
+
22
+ log = logging.getLogger(__name__)
23
+
24
+ # cl100k_base is used by GPT-4/Claude/Grok — good-enough approximation
25
+ _ENCODER = tiktoken.get_encoding("cl100k_base")
26
+
27
+ # Chars-per-token approximation (used as fast path before tiktoken)
28
+ _CHARS_PER_TOKEN = 3.5
29
+
30
+
31
+ def count_tokens(text: str) -> int:
32
+ """Count tokens in *text* using tiktoken cl100k_base.
33
+
34
+ Falls back to char-based estimate on encode failure.
35
+ """
36
+ try:
37
+ return len(_ENCODER.encode(text, disallowed_special=()))
38
+ except Exception as exc:
39
+ log.debug("tiktoken encode failed, using estimate: %s", exc)
40
+ return max(1, int(len(text) / _CHARS_PER_TOKEN))
41
+
42
+
43
+ def chunk_file(path: Path, chunk_tokens: int = 4_000) -> list[str]:
44
+ """Split a file into token-sized chunks for incremental injection.
45
+
46
+ Args:
47
+ path: file to read
48
+ chunk_tokens: max tokens per chunk
49
+
50
+ Returns:
51
+ list of text chunks, each ≤ chunk_tokens tokens
52
+ """
53
+ try:
54
+ text = path.read_text(encoding="utf-8", errors="replace")
55
+ except OSError as exc:
56
+ log.warning("Could not read %s for chunking: %s", path, exc)
57
+ return []
58
+
59
+ lines = text.splitlines(keepends=True)
60
+ chunks: list[str] = []
61
+ current_lines: list[str] = []
62
+ current_tokens = 0
63
+
64
+ for line in lines:
65
+ line_tokens = count_tokens(line)
66
+ if current_tokens + line_tokens > chunk_tokens and current_lines:
67
+ chunks.append("".join(current_lines))
68
+ current_lines = [line]
69
+ current_tokens = line_tokens
70
+ else:
71
+ current_lines.append(line)
72
+ current_tokens += line_tokens
73
+
74
+ if current_lines:
75
+ chunks.append("".join(current_lines))
76
+
77
+ return chunks
78
+
79
+
80
+ @dataclass
81
+ class BudgetSnapshot:
82
+ """A point-in-time snapshot of token usage."""
83
+
84
+ used_tokens: int
85
+ max_tokens: int
86
+ budget_ratio: float
87
+ needs_compression: bool
88
+
89
+
90
+ class ContextBudget:
91
+ """Tracks token usage and enforces the 80% compression threshold.
92
+
93
+ Usage::
94
+
95
+ budget = ContextBudget(model_context_window=131_072)
96
+ tokens = count_tokens(user_message)
97
+ budget.add(tokens)
98
+
99
+ if budget.needs_compression:
100
+ n = transcript.maybe_evict()
101
+ budget.report_eviction(n_tokens_freed)
102
+ """
103
+
104
+ def __init__(
105
+ self,
106
+ model_context_window: int = 131_072,
107
+ compression_ratio: float = _COMPRESSION_TRIGGER_RATIO,
108
+ ) -> None:
109
+ self._max = model_context_window
110
+ self._threshold = compression_ratio
111
+ self._used: int = 0
112
+ self._evicted_total: int = 0
113
+
114
+ # ------------------------------------------------------------------
115
+ # Mutation
116
+ # ------------------------------------------------------------------
117
+
118
+ def add(self, tokens: int) -> None:
119
+ """Record *tokens* added to the context."""
120
+ self._used += max(0, tokens)
121
+
122
+ def subtract(self, tokens: int) -> None:
123
+ """Record *tokens* removed from the context (eviction/compression)."""
124
+ self._used = max(0, self._used - tokens)
125
+ self._evicted_total += max(0, tokens)
126
+
127
+ def reset(self) -> None:
128
+ """Reset used counter (e.g., new turn)."""
129
+ self._used = 0
130
+
131
+ def sync_from_transcript(self, store: TranscriptStore) -> None:
132
+ """Re-sync token count from the authoritative TranscriptStore."""
133
+ self._used = store.token_count
134
+
135
+ # ------------------------------------------------------------------
136
+ # Query
137
+ # ------------------------------------------------------------------
138
+
139
+ @property
140
+ def used_tokens(self) -> int:
141
+ return self._used
142
+
143
+ @property
144
+ def max_tokens(self) -> int:
145
+ return self._max
146
+
147
+ @property
148
+ def remaining_tokens(self) -> int:
149
+ return max(0, self._max - self._used)
150
+
151
+ @property
152
+ def budget_ratio(self) -> float:
153
+ return self._used / self._max if self._max else 0.0
154
+
155
+ @property
156
+ def needs_compression(self) -> bool:
157
+ """True when usage exceeds the compression trigger ratio."""
158
+ return self.budget_ratio >= self._threshold
159
+
160
+ def snapshot(self) -> BudgetSnapshot:
161
+ return BudgetSnapshot(
162
+ used_tokens=self._used,
163
+ max_tokens=self._max,
164
+ budget_ratio=self.budget_ratio,
165
+ needs_compression=self.needs_compression,
166
+ )
167
+
168
+ def is_near_limit(self, threshold: float = 0.8) -> bool:
169
+ """True when usage exceeds *threshold* fraction of max tokens.
170
+
171
+ Looser than needs_compression so callers can act earlier. Separate
172
+ method so the default threshold can differ from the compression trigger.
173
+ """
174
+ return self._used / self._max >= threshold
175
+
176
+ def can_fit(self, tokens: int) -> bool:
177
+ """Check whether *tokens* more can be added without hitting the threshold."""
178
+ return (self._used + tokens) / self._max < self._threshold
179
+
180
+
181
+ class WholeCodebaseMode:
182
+ """Whole-codebase loading — loads all project source files into context upfront.
183
+
184
+ When a project's estimated token count fits within 75 % of the model's context
185
+ window, all source files are loaded in one shot. Deterministic inclusion rules
186
+ prevent binary/generated/vendor files from polluting the context.
187
+
188
+ Usage::
189
+
190
+ budget = ContextBudget(model_context_window=2_000_000)
191
+ wcm = WholeCodebaseMode(budget)
192
+ files = wcm.load_whole_codebase(root, tech_stack, mode="auto")
193
+ """
194
+
195
+ _EXCLUDE_DIRS: frozenset[str] = frozenset({
196
+ "node_modules", ".git", "dist", "build", "__pycache__",
197
+ "vendor", "third_party", "site-packages",
198
+ })
199
+
200
+ _STACK_EXTENSIONS: dict[str, frozenset[str]] = {
201
+ "python": frozenset({".py", ".pyi", ".toml", ".cfg", ".md", ".txt"}),
202
+ "nodejs": frozenset({".ts", ".tsx", ".js", ".jsx", ".json", ".md"}),
203
+ "typescript": frozenset({".ts", ".tsx", ".js", ".jsx", ".json", ".md"}),
204
+ "rust": frozenset({".rs", ".toml"}),
205
+ "golang": frozenset({".go", ".mod"}),
206
+ }
207
+
208
+ _LOCK_FILES: frozenset[str] = frozenset({
209
+ "package-lock.json", "yarn.lock", "pnpm-lock.yaml",
210
+ "poetry.lock", "Cargo.lock",
211
+ })
212
+
213
+ _MAX_FILE_BYTES: int = 200 * 1024 # 200 KB
214
+
215
+ def __init__(self, budget: ContextBudget) -> None:
216
+ self._budget = budget
217
+
218
+ # ------------------------------------------------------------------
219
+ # Public API
220
+ # ------------------------------------------------------------------
221
+
222
+ def estimate_token_count(self, root: Path, tech_stack: list[str]) -> int:
223
+ """Estimate total token count for *root* without per-file content reads.
224
+
225
+ Uses ``file_size_bytes / 3.5`` as a fast approximation. Must complete
226
+ in <3 s for projects with up to 10 K files.
227
+ """
228
+ allowed_ext = self._get_allowed_extensions(tech_stack)
229
+ ignore_spec = self._load_ignore_spec(root)
230
+ total_bytes = 0
231
+ for fp in self._iter_files(root, allowed_ext, ignore_spec):
232
+ try:
233
+ total_bytes += fp.stat().st_size
234
+ except OSError:
235
+ continue
236
+ return max(1, int(total_bytes / _CHARS_PER_TOKEN))
237
+
238
+ def load_whole_codebase(
239
+ self,
240
+ root: Path,
241
+ tech_stack: list[str],
242
+ mode: str = "auto",
243
+ ) -> list[tuple[Path, str]]:
244
+ """Load all source files for the project.
245
+
246
+ Args:
247
+ root: project root directory.
248
+ tech_stack: detected tech stack (e.g. ``["python", "docker"]``).
249
+ mode: ``"auto"`` checks budget threshold; ``"always"`` forces load;
250
+ ``"never"`` returns an empty list immediately.
251
+
252
+ Returns:
253
+ List of ``(path, content)`` tuples in filesystem order.
254
+ """
255
+ if mode == "never":
256
+ return []
257
+
258
+ allowed_ext = self._get_allowed_extensions(tech_stack)
259
+ ignore_spec = self._load_ignore_spec(root)
260
+ threshold = self._get_threshold()
261
+
262
+ if mode == "auto":
263
+ estimated = self.estimate_token_count(root, tech_stack)
264
+ if estimated > threshold:
265
+ log.debug(
266
+ "Project too large for whole-codebase mode (%dK / %dK threshold).",
267
+ estimated // 1000,
268
+ threshold // 1000,
269
+ )
270
+ return []
271
+ log.info(
272
+ "Project fits in context (%dK / %dK) — whole-codebase mode active.",
273
+ estimated // 1000,
274
+ threshold // 1000,
275
+ )
276
+
277
+ results: list[tuple[Path, str]] = []
278
+ for fp in self._iter_files(root, allowed_ext, ignore_spec):
279
+ if self._is_binary(fp):
280
+ continue
281
+ try:
282
+ text = fp.read_text(encoding="utf-8", errors="replace")
283
+ results.append((fp, text))
284
+ except OSError:
285
+ continue
286
+
287
+ if mode == "always":
288
+ log.info(
289
+ "Whole-codebase mode active (forced) — loaded %d files.", len(results)
290
+ )
291
+ return results
292
+
293
+ # ------------------------------------------------------------------
294
+ # Private helpers
295
+ # ------------------------------------------------------------------
296
+
297
+ def _get_threshold(self) -> int:
298
+ """75 % of model context window — leaves room for history + system prompt."""
299
+ return int(self._budget._max * 0.75)
300
+
301
+ def _get_allowed_extensions(self, tech_stack: list[str]) -> frozenset[str] | None:
302
+ """Return the set of allowed extensions for *tech_stack*, or None for all."""
303
+ if not tech_stack:
304
+ return None
305
+ extensions: set[str] = set()
306
+ found = False
307
+ for stack in tech_stack:
308
+ if stack in self._STACK_EXTENSIONS:
309
+ extensions.update(self._STACK_EXTENSIONS[stack])
310
+ found = True
311
+ return frozenset(extensions) if found else None
312
+
313
+ def _load_ignore_spec(self, root: Path):
314
+ """Load .gdmignore, falling back to .gitignore, via pathspec."""
315
+ import pathspec # local import — optional dependency
316
+
317
+ for name in (".gdmignore", ".gitignore"):
318
+ ignore_file = root / name
319
+ if ignore_file.exists():
320
+ text = ignore_file.read_text(encoding="utf-8", errors="replace")
321
+ return pathspec.PathSpec.from_lines("gitignore", text.splitlines())
322
+ return None
323
+
324
+ def _is_binary(self, path: Path) -> bool:
325
+ """Return True when the first 8 192 bytes of *path* contain a null byte."""
326
+ try:
327
+ with path.open("rb") as fh:
328
+ return b"\x00" in fh.read(8192)
329
+ except OSError:
330
+ return True
331
+
332
+ def _iter_files(self, root: Path, allowed_ext: frozenset[str] | None, ignore_spec):
333
+ """Yield all eligible source files under *root*.
334
+
335
+ Applies:
336
+ - Directory exclusions (node_modules, .git, dist, …)
337
+ - File-size limit (>200 KB skipped)
338
+ - Extension filter derived from tech_stack
339
+ - Name-based exclusions (.pyc, .min.js, lock files)
340
+ - Ignore-spec patterns (.gdmignore / .gitignore)
341
+ - Resolved-path tracking to prevent symlink loops
342
+ """
343
+ visited: set[Path] = set()
344
+
345
+ def _walk(directory: Path):
346
+ try:
347
+ real = directory.resolve()
348
+ except OSError:
349
+ return
350
+ if real in visited:
351
+ return
352
+ visited.add(real)
353
+
354
+ try:
355
+ entries = sorted(directory.iterdir())
356
+ except OSError:
357
+ return
358
+
359
+ for entry in entries:
360
+ if entry.is_dir():
361
+ if entry.name in self._EXCLUDE_DIRS:
362
+ continue
363
+ yield from _walk(entry)
364
+ elif entry.is_file():
365
+ try:
366
+ if entry.stat().st_size > self._MAX_FILE_BYTES:
367
+ continue
368
+ except OSError:
369
+ continue
370
+
371
+ name = entry.name
372
+ if name.endswith((".min.js", ".min.css", ".pyc")):
373
+ continue
374
+ if name in self._LOCK_FILES:
375
+ continue
376
+
377
+ if allowed_ext is not None:
378
+ if entry.suffix.lower() not in allowed_ext:
379
+ continue
380
+
381
+ if ignore_spec is not None:
382
+ try:
383
+ rel = str(entry.relative_to(root)).replace("\\", "/")
384
+ if ignore_spec.match_file(rel):
385
+ continue
386
+ except ValueError:
387
+ pass
388
+
389
+ yield entry
390
+
391
+ yield from _walk(root)