gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
@@ -0,0 +1,495 @@
1
+ """Convention extractor — scans project files to learn style rules.
2
+
3
+ Conventions are stored in ``gdm.db`` and injected into the system prompt
4
+ for every write operation so the agent follows the project's existing style.
5
+
6
+ Detection is purely pattern-based (no LLM calls). The extractor re-runs
7
+ at most once per ``_CACHE_TTL_HOURS`` unless ``force=True`` is passed.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import re
13
+ from dataclasses import dataclass, field
14
+ from datetime import datetime, timedelta, timezone
15
+ from pathlib import Path
16
+
17
+ from src.memory.db import GdmDatabase
18
+
19
+ __all__ = ["Convention", "ConventionExtractor", "ConventionDrift", "DriftReport"]
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+ # ── Convention keys ──────────────────────────────────────────────────────────
24
+
25
+ _KEY_NAMING_STYLE: str = "naming_style"
26
+ _KEY_IMPORT_STYLE: str = "import_style"
27
+ _KEY_TEST_STRUCTURE: str = "test_structure"
28
+ _KEY_ASYNC_PATTERN: str = "async_pattern"
29
+ _KEY_TYPE_ANNOTATIONS: str = "type_annotations"
30
+ _KEY_DOCSTRING_FORMAT: str = "docstring_format"
31
+
32
+ # ── Tuning constants ─────────────────────────────────────────────────────────
33
+
34
+ _TOP_FILES_LIMIT: int = 40
35
+ _NAMING_SAMPLE_LIMIT: int = 20
36
+ _CACHE_TTL_HOURS: int = 24
37
+ _MAX_FILE_READ_BYTES: int = 8_192
38
+ _MIN_DEF_COUNT: int = 3 # minimum defs to make a confident naming judgement
39
+
40
+ # ── Excluded directory names ─────────────────────────────────────────────────
41
+
42
+ _EXCLUDED_DIRS: frozenset[str] = frozenset(
43
+ {".git", "__pycache__", "node_modules", ".venv", "venv", ".mypy_cache", "dist", "build"}
44
+ )
45
+
46
+ # ── Compiled regexes ─────────────────────────────────────────────────────────
47
+
48
+ _SNAKE_DEF_RE = re.compile(r"\bdef\s+[a-z][a-z0-9_]+\s*\(")
49
+ _PASCAL_DEF_RE = re.compile(r"\bdef\s+[A-Z][a-zA-Z0-9]+\s*\(")
50
+ _ABS_IMPORT_RE = re.compile(r"^from\s+src\.", re.MULTILINE)
51
+ _REL_IMPORT_RE = re.compile(r"^from\s+\.", re.MULTILINE)
52
+ _ASYNC_DEF_RE = re.compile(r"\basync\s+def\s+")
53
+ _ALL_DEF_RE = re.compile(r"\bdef\s+\w+")
54
+ _RETURN_ANNOT_RE = re.compile(r"\)\s*->")
55
+ _GOOGLE_DOC_RE = re.compile(r"\bArgs:\s*\n|\bReturns:\s*\n")
56
+ _NUMPY_DOC_RE = re.compile(r"\bParameters\s*\n\s*-{3,}")
57
+
58
+ # ── Header template ───────────────────────────────────────────────────────────
59
+
60
+ _CONVENTIONS_HEADER: str = (
61
+ "## Project Conventions\n\n"
62
+ "The following conventions were detected in this codebase. "
63
+ "ALL code you write MUST follow these exactly:\n"
64
+ )
65
+
66
+
67
+ # ── Data model ────────────────────────────────────────────────────────────────
68
+
69
+
70
+ @dataclass
71
+ class Convention:
72
+ """A single detected project convention."""
73
+
74
+ key: str
75
+ value: str
76
+ confidence: float = field(default=1.0)
77
+
78
+
79
+ @dataclass
80
+ class ConventionDrift:
81
+ """A single detected deviation from a project convention."""
82
+
83
+ convention: str
84
+ expected: str
85
+ found: str
86
+ line_number: int
87
+ confidence: float
88
+
89
+
90
+ @dataclass
91
+ class DriftReport:
92
+ """Result of a convention drift check on a single file."""
93
+
94
+ drifts: list[ConventionDrift]
95
+ has_high_confidence_drift: bool
96
+
97
+
98
+ # ── Main class ────────────────────────────────────────────────────────────────
99
+
100
+
101
+ class ConventionExtractor:
102
+ """Extracts coding conventions from project source files.
103
+
104
+ Scans the top-40 most recently modified files and infers style rules
105
+ by pattern matching. Results are stored in ``gdm.db`` conventions table.
106
+
107
+ Usage::
108
+
109
+ extractor = ConventionExtractor(db, project_root=root)
110
+ conventions = extractor.extract(project_root)
111
+ block = extractor.build_conventions_block() # for system prompt injection
112
+ """
113
+
114
+ def __init__(
115
+ self,
116
+ db: GdmDatabase,
117
+ project_id: str | None = None,
118
+ *,
119
+ project_root: Path | None = None,
120
+ ) -> None:
121
+ self._db = db
122
+ self._project_root = project_root
123
+ if project_id is not None:
124
+ self._project_id = project_id
125
+ elif project_root is not None:
126
+ self._project_id = str(project_root)
127
+ else:
128
+ self._project_id = "default"
129
+
130
+ # ------------------------------------------------------------------
131
+ # Public API
132
+ # ------------------------------------------------------------------
133
+
134
+ def extract(self, project_root: Path, *, force: bool = False) -> list[Convention]:
135
+ """Scan project and extract conventions. Cached — skips if fresh.
136
+
137
+ Args:
138
+ project_root: root directory of the project to scan.
139
+ force: if ``True``, re-scan even if cached data is recent.
140
+
141
+ Returns:
142
+ Detected :class:`Convention` list (deduplicated by key).
143
+ """
144
+ if not force and self._is_cache_fresh():
145
+ log.debug("Conventions cache is fresh for %s, skipping scan", self._project_id)
146
+ return self._load_from_cache()
147
+
148
+ conventions: list[Convention] = []
149
+ conventions.extend(self._scan_python_files(project_root))
150
+ conventions.extend(self._scan_typescript_files(project_root))
151
+
152
+ test_conv = self._detect_test_structure(project_root)
153
+ if test_conv is not None:
154
+ conventions.append(test_conv)
155
+
156
+ # Deduplicate — last detected value for a key wins.
157
+ by_key = {c.key: c for c in conventions}
158
+ unique = list(by_key.values())
159
+ self._upsert_conventions(unique)
160
+ log.info("Extracted %d conventions for project %s", len(unique), self._project_id)
161
+ return unique
162
+
163
+ def build_conventions_block(self) -> str:
164
+ """Build a system-prompt-ready conventions block from cached data.
165
+
166
+ Returns:
167
+ Formatted string ready for injection into a system prompt,
168
+ or an empty string if no conventions have been detected yet.
169
+ """
170
+ rows = self._db.get_conventions(self._project_id)
171
+ if not rows:
172
+ return ""
173
+ lines: list[str] = []
174
+ for row in rows:
175
+ lines.append(f"- {row['key']}: {row['value']}")
176
+ return _CONVENTIONS_HEADER + "\n".join(lines) + "\n"
177
+
178
+ def get_convention(self, key: str) -> str | None:
179
+ """Return the value for a specific convention key, or ``None``.
180
+
181
+ Args:
182
+ key: one of the ``_KEY_*`` constants (e.g. ``"naming_style"``).
183
+
184
+ Returns:
185
+ The stored convention value, or ``None`` if not yet detected.
186
+ """
187
+ row = self._db.execute_one(
188
+ "SELECT value FROM conventions WHERE project_id = ? AND key = ?",
189
+ (self._project_id, key),
190
+ )
191
+ return str(row["value"]) if row is not None else None
192
+
193
+ # ------------------------------------------------------------------
194
+ # Drift detection
195
+ # ------------------------------------------------------------------
196
+
197
+ def _extract_error_handling(self, files: list[Path]) -> str:
198
+ """Returns 'try_except' | 'result_type' | 'mixed'"""
199
+ try_count = 0
200
+ result_count = 0
201
+ for f in files[:50]:
202
+ try:
203
+ content = f.read_text(errors="ignore")
204
+ except OSError:
205
+ continue
206
+ try_count += len(re.findall(r'\btry\b\s*:', content))
207
+ result_count += len(re.findall(r'\b(?:Ok|Err|Result)\s*[\[\(]', content))
208
+ if try_count > result_count * 3:
209
+ return "try_except"
210
+ if result_count > try_count * 3:
211
+ return "result_type"
212
+ return "mixed"
213
+
214
+ def _is_cache_stale(self) -> bool:
215
+ try:
216
+ cached_at = self._db.get_convention_cached_at()
217
+ except Exception:
218
+ return True
219
+ if not cached_at:
220
+ return True
221
+ if (datetime.now() - cached_at).total_seconds() > 86400:
222
+ return True
223
+ try:
224
+ recent_changes = sum(
225
+ 1 for f in self._project_root.rglob("*.py")
226
+ if f.stat().st_mtime > cached_at.timestamp()
227
+ )
228
+ return recent_changes > 100
229
+ except (OSError, AttributeError):
230
+ return False
231
+
232
+ def check_drift(self, file_content: str, conventions: dict) -> DriftReport:
233
+ """Check file content for deviations from the given conventions dict.
234
+
235
+ Args:
236
+ file_content: source text to inspect.
237
+ conventions: mapping of convention key → expected value string.
238
+
239
+ Returns:
240
+ :class:`DriftReport` with all high-confidence drifts found.
241
+ """
242
+ drifts: list[ConventionDrift] = []
243
+
244
+ if conventions.get("naming_style") == "snake_case":
245
+ for m in re.finditer(r'\bdef\s+([a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*)\b', file_content):
246
+ line_no = file_content[:m.start()].count('\n') + 1
247
+ drifts.append(ConventionDrift("naming_style", "snake_case", "camelCase", line_no, 0.9))
248
+
249
+ if conventions.get("error_handling") == "result_type":
250
+ for m in re.finditer(r'\btry\s*:', file_content):
251
+ line_no = file_content[:m.start()].count('\n') + 1
252
+ drifts.append(ConventionDrift("error_handling", "result_type", "try_except", line_no, 0.85))
253
+
254
+ if conventions.get("import_style") == "absolute":
255
+ for m in re.finditer(r'^from\s+\.', file_content, re.MULTILINE):
256
+ line_no = file_content[:m.start()].count('\n') + 1
257
+ drifts.append(ConventionDrift("import_style", "absolute", "relative", line_no, 0.95))
258
+
259
+ high = [d for d in drifts if d.confidence > 0.8]
260
+ return DriftReport(drifts=high, has_high_confidence_drift=bool(high))
261
+
262
+ # ------------------------------------------------------------------
263
+ # File-type scanners
264
+ # ------------------------------------------------------------------
265
+
266
+ def _scan_python_files(self, root: Path) -> list[Convention]:
267
+ """Extract Python-specific conventions from .py files."""
268
+ files = self._get_recent_files(root, ["*.py"])
269
+ if not files:
270
+ return []
271
+
272
+ result: list[Convention] = []
273
+ for detector in (
274
+ self._detect_naming_style,
275
+ self._detect_import_style,
276
+ self._detect_async_pattern,
277
+ self._detect_type_annotations,
278
+ self._detect_docstring_format,
279
+ ):
280
+ conv = detector(files)
281
+ if conv is not None:
282
+ result.append(conv)
283
+ return result
284
+
285
+ def _scan_typescript_files(self, root: Path) -> list[Convention]:
286
+ """Extract TypeScript-specific conventions from .ts/.tsx files."""
287
+ files = self._get_recent_files(root, ["*.ts", "*.tsx"])
288
+ if not files:
289
+ return []
290
+
291
+ result: list[Convention] = []
292
+ async_conv = self._detect_async_pattern(files)
293
+ if async_conv is not None:
294
+ result.append(async_conv)
295
+ return result
296
+
297
+ # ------------------------------------------------------------------
298
+ # Detectors (pattern-based, no LLM)
299
+ # ------------------------------------------------------------------
300
+
301
+ def _detect_naming_style(self, files: list[Path]) -> Convention | None:
302
+ """Detect function/class naming style by sampling function definitions."""
303
+ snake_count = 0
304
+ pascal_count = 0
305
+ for path in files[:_NAMING_SAMPLE_LIMIT]:
306
+ text = self._read_file_text(path)
307
+ snake_count += len(_SNAKE_DEF_RE.findall(text))
308
+ pascal_count += len(_PASCAL_DEF_RE.findall(text))
309
+
310
+ total = snake_count + pascal_count
311
+ if total < _MIN_DEF_COUNT:
312
+ return None
313
+
314
+ ratio = max(snake_count, pascal_count) / total
315
+ if ratio < 0.65:
316
+ # Too ambiguous to assert a single style
317
+ return Convention(
318
+ key=_KEY_NAMING_STYLE,
319
+ value="mixed naming styles detected",
320
+ confidence=round(1.0 - ratio, 2),
321
+ )
322
+ if snake_count >= pascal_count:
323
+ value = "snake_case functions, PascalCase classes, SCREAMING_SNAKE constants"
324
+ else:
325
+ value = "PascalCase functions and classes, SCREAMING_SNAKE constants"
326
+ return Convention(key=_KEY_NAMING_STYLE, value=value, confidence=round(ratio, 2))
327
+
328
+ def _detect_import_style(self, files: list[Path]) -> Convention | None:
329
+ """Detect import style: absolute (``from src.``) vs relative (``from .``)."""
330
+ abs_count = 0
331
+ rel_count = 0
332
+ for path in files:
333
+ text = self._read_file_text(path)
334
+ abs_count += len(_ABS_IMPORT_RE.findall(text))
335
+ rel_count += len(_REL_IMPORT_RE.findall(text))
336
+
337
+ total = abs_count + rel_count
338
+ if total == 0:
339
+ return None
340
+
341
+ if abs_count >= rel_count:
342
+ value = "absolute imports (from src. prefix)"
343
+ else:
344
+ value = "relative imports (from . prefix)"
345
+ confidence = min(1.0, max(abs_count, rel_count) / total)
346
+ return Convention(key=_KEY_IMPORT_STYLE, value=value, confidence=confidence)
347
+
348
+ def _detect_test_structure(self, root: Path) -> Convention | None:
349
+ """Detect test framework from test files and config presence."""
350
+ tests_dir = root / "tests"
351
+ if (root / "jest.config.js").exists() or (root / "jest.config.ts").exists():
352
+ return Convention(key=_KEY_TEST_STRUCTURE, value="jest", confidence=1.0)
353
+ if (root / "vitest.config.ts").exists() or (root / "vitest.config.js").exists():
354
+ return Convention(key=_KEY_TEST_STRUCTURE, value="vitest", confidence=1.0)
355
+ if tests_dir.exists() and (tests_dir / "conftest.py").exists():
356
+ return Convention(key=_KEY_TEST_STRUCTURE, value="pytest with conftest.py", confidence=1.0)
357
+ if tests_dir.exists() and any(tests_dir.rglob("test_*.py")):
358
+ return Convention(key=_KEY_TEST_STRUCTURE, value="pytest", confidence=0.9)
359
+ return None
360
+
361
+ def _detect_async_pattern(self, files: list[Path]) -> Convention | None:
362
+ """Detect async/await usage ratio across the file sample."""
363
+ async_count = 0
364
+ total_count = 0
365
+ for path in files:
366
+ text = self._read_file_text(path)
367
+ async_count += len(_ASYNC_DEF_RE.findall(text))
368
+ total_count += len(_ALL_DEF_RE.findall(text))
369
+
370
+ if total_count < _MIN_DEF_COUNT:
371
+ return None
372
+
373
+ ratio = async_count / total_count
374
+ if ratio > 0.5:
375
+ value = f"primarily async/await ({ratio:.0%} of functions are async)"
376
+ elif ratio > 0.1:
377
+ value = f"mixed sync/async ({ratio:.0%} of functions are async)"
378
+ else:
379
+ value = "primarily synchronous (minimal async usage)"
380
+ return Convention(key=_KEY_ASYNC_PATTERN, value=value, confidence=0.9)
381
+
382
+ def _detect_type_annotations(self, files: list[Path]) -> Convention | None:
383
+ """Check ratio of function definitions with ``->`` return annotations."""
384
+ annotated = 0
385
+ total = 0
386
+ for path in files:
387
+ text = self._read_file_text(path)
388
+ total += len(_ALL_DEF_RE.findall(text))
389
+ annotated += len(_RETURN_ANNOT_RE.findall(text))
390
+
391
+ if total < _MIN_DEF_COUNT:
392
+ return None
393
+
394
+ ratio = min(1.0, annotated / total)
395
+ if ratio > 0.8:
396
+ value = f"required — {ratio:.0%} of functions carry return annotations"
397
+ elif ratio > 0.4:
398
+ value = f"partial — {ratio:.0%} of functions carry return annotations"
399
+ else:
400
+ value = "optional/minimal type annotations"
401
+ return Convention(key=_KEY_TYPE_ANNOTATIONS, value=value, confidence=0.85)
402
+
403
+ def _detect_docstring_format(self, files: list[Path]) -> Convention | None:
404
+ """Detect Google or NumPy docstring style from a file sample."""
405
+ google_count = 0
406
+ numpy_count = 0
407
+ for path in files[:_NAMING_SAMPLE_LIMIT]:
408
+ text = self._read_file_text(path)
409
+ if _GOOGLE_DOC_RE.search(text):
410
+ google_count += 1
411
+ if _NUMPY_DOC_RE.search(text):
412
+ numpy_count += 1
413
+
414
+ if google_count == 0 and numpy_count == 0:
415
+ return None
416
+
417
+ if google_count >= numpy_count:
418
+ value = "Google style (Args:, Returns: sections)"
419
+ confidence = min(1.0, google_count / 5)
420
+ else:
421
+ value = "NumPy style (Parameters/dashes sections)"
422
+ confidence = min(1.0, numpy_count / 5)
423
+ return Convention(key=_KEY_DOCSTRING_FORMAT, value=value, confidence=confidence)
424
+
425
+ # ------------------------------------------------------------------
426
+ # Storage helpers
427
+ # ------------------------------------------------------------------
428
+
429
+ def _upsert_conventions(self, conventions: list[Convention]) -> None:
430
+ """Persist detected conventions to the DB (upsert by project+key).
431
+
432
+ Each upsert is committed individually. The ``upsert_convention``
433
+ helper on :class:`GdmDatabase` handles its own commit, so wrapping
434
+ in an explicit transaction would conflict.
435
+ """
436
+ for conv in conventions:
437
+ self._db.upsert_convention(
438
+ self._project_id, conv.key, conv.value, conv.confidence
439
+ )
440
+
441
+ # ------------------------------------------------------------------
442
+ # Cache helpers
443
+ # ------------------------------------------------------------------
444
+
445
+ def _is_cache_fresh(self) -> bool:
446
+ """Return ``True`` if the newest convention row is within the TTL."""
447
+ row = self._db.execute_one(
448
+ "SELECT last_updated FROM conventions WHERE project_id = ? "
449
+ "ORDER BY last_updated DESC LIMIT 1",
450
+ (self._project_id,),
451
+ )
452
+ if row is None:
453
+ return False
454
+ last_updated = datetime.fromisoformat(str(row["last_updated"]))
455
+ if last_updated.tzinfo is None:
456
+ last_updated = last_updated.replace(tzinfo=timezone.utc)
457
+ return datetime.now(timezone.utc) - last_updated < timedelta(hours=_CACHE_TTL_HOURS)
458
+
459
+ def _load_from_cache(self) -> list[Convention]:
460
+ """Build a :class:`Convention` list from DB rows."""
461
+ return [
462
+ Convention(key=str(r["key"]), value=str(r["value"]), confidence=float(r["confidence"]))
463
+ for r in self._db.get_conventions(self._project_id)
464
+ ]
465
+
466
+ # ------------------------------------------------------------------
467
+ # File system helpers
468
+ # ------------------------------------------------------------------
469
+
470
+ def _get_recent_files(self, root: Path, patterns: list[str]) -> list[Path]:
471
+ """Return up to ``_TOP_FILES_LIMIT`` most recently modified matching files."""
472
+ candidates: list[Path] = []
473
+ for pattern in patterns:
474
+ for path in root.rglob(pattern):
475
+ if not any(part in _EXCLUDED_DIRS for part in path.parts):
476
+ candidates.append(path)
477
+
478
+ def _safe_mtime(p: Path) -> float:
479
+ try:
480
+ return p.stat().st_mtime
481
+ except OSError:
482
+ return 0.0
483
+
484
+ candidates.sort(key=_safe_mtime, reverse=True)
485
+ return candidates[:_TOP_FILES_LIMIT]
486
+
487
+ @staticmethod
488
+ def _read_file_text(path: Path) -> str:
489
+ """Read up to ``_MAX_FILE_READ_BYTES`` bytes from a file as UTF-8 text."""
490
+ try:
491
+ raw = path.read_bytes()[:_MAX_FILE_READ_BYTES]
492
+ return raw.decode("utf-8", errors="replace")
493
+ except OSError as exc:
494
+ log.debug("Cannot read %s: %s", path, exc)
495
+ return ""