gdmcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gdmcode-0.1.0.dist-info/METADATA +240 -0
- gdmcode-0.1.0.dist-info/RECORD +131 -0
- gdmcode-0.1.0.dist-info/WHEEL +4 -0
- gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
- src/__init__.py +1 -0
- src/_internal/__init__.py +0 -0
- src/_internal/constants.py +244 -0
- src/_internal/domain_skills.py +339 -0
- src/agent/__init__.py +0 -0
- src/agent/commit_classifier.py +91 -0
- src/agent/context_budget.py +391 -0
- src/agent/daemon.py +681 -0
- src/agent/dag_validator.py +153 -0
- src/agent/debug_loop.py +473 -0
- src/agent/impact_analyzer.py +149 -0
- src/agent/impact_graph.py +117 -0
- src/agent/loop.py +1410 -0
- src/agent/orchestrator.py +141 -0
- src/agent/regression_guard.py +251 -0
- src/agent/review_gate.py +648 -0
- src/agent/risk_scorer.py +169 -0
- src/agent/self_healing.py +145 -0
- src/agent/smart_test_selector.py +89 -0
- src/agent/system_prompt.py +226 -0
- src/agent/task_tracker.py +320 -0
- src/agent/test_validator.py +210 -0
- src/agent/tool_orchestrator.py +402 -0
- src/agent/transcript.py +230 -0
- src/agent/verification_loop.py +133 -0
- src/agent/work_director.py +136 -0
- src/agent/worktree_manager.py +53 -0
- src/artifacts/__init__.py +16 -0
- src/artifacts/artifact_store.py +456 -0
- src/artifacts/verification_graph.py +75 -0
- src/auth.py +411 -0
- src/cli.py +1290 -0
- src/commands.py +1398 -0
- src/config.py +762 -0
- src/cost_tracker.py +348 -0
- src/db/__init__.py +4 -0
- src/db/migrations.py +337 -0
- src/enterprise/__init__.py +3 -0
- src/enterprise/audit_log.py +182 -0
- src/enterprise/identity.py +90 -0
- src/enterprise/rbac.py +100 -0
- src/enterprise/team_config.py +125 -0
- src/enterprise/usage_analytics.py +261 -0
- src/exceptions.py +207 -0
- src/git_workflow.py +651 -0
- src/integrations/__init__.py +6 -0
- src/integrations/github_actions.py +106 -0
- src/integrations/mcp_server.py +333 -0
- src/integrations/sentry_integration.py +100 -0
- src/integrations/sentry_server.py +82 -0
- src/integrations/webhook_security.py +19 -0
- src/main.py +27 -0
- src/memory/__init__.py +0 -0
- src/memory/code_index.py +376 -0
- src/memory/compressor.py +378 -0
- src/memory/context_memory.py +135 -0
- src/memory/continuous_memory.py +234 -0
- src/memory/conventions.py +495 -0
- src/memory/db.py +1119 -0
- src/memory/document_index.py +205 -0
- src/memory/file_cache.py +128 -0
- src/memory/project_scanner.py +178 -0
- src/memory/session_store.py +201 -0
- src/models/__init__.py +0 -0
- src/models/client.py +715 -0
- src/models/definitions.py +459 -0
- src/models/router.py +418 -0
- src/models/schemas.py +389 -0
- src/permissions.py +294 -0
- src/remote/__init__.py +5 -0
- src/remote/command_filter.py +33 -0
- src/remote/models.py +31 -0
- src/remote/permission_handler.py +79 -0
- src/remote/phone_ui.py +48 -0
- src/remote/protocol.py +59 -0
- src/remote/qr.py +65 -0
- src/remote/server.py +586 -0
- src/remote/token_manager.py +61 -0
- src/remote/tunnel.py +212 -0
- src/repl.py +475 -0
- src/runtime/__init__.py +1 -0
- src/runtime/branch_farm.py +372 -0
- src/runtime/replay.py +351 -0
- src/sandbox/__init__.py +2 -0
- src/sandbox/hermetic.py +214 -0
- src/sandbox/policy.py +44 -0
- src/sdk/__init__.py +3 -0
- src/sdk/plugin_base.py +39 -0
- src/sdk/plugin_host.py +100 -0
- src/sdk/plugin_loader.py +101 -0
- src/security.py +409 -0
- src/server/__init__.py +7 -0
- src/server/bridge.py +427 -0
- src/server/bridge_cli.py +103 -0
- src/server/bridge_client.py +170 -0
- src/server/protocol_version.py +103 -0
- src/session/__init__.py +10 -0
- src/session/event_fanout.py +46 -0
- src/session/input_broker.py +38 -0
- src/session/permission_bridge.py +100 -0
- src/tools/__init__.py +160 -0
- src/tools/_atomic.py +72 -0
- src/tools/agent_tools.py +423 -0
- src/tools/ask_user_tool.py +83 -0
- src/tools/bash_tool.py +384 -0
- src/tools/browser_tool.py +352 -0
- src/tools/browser_tools.py +179 -0
- src/tools/dep_tools.py +210 -0
- src/tools/document_reader.py +167 -0
- src/tools/document_tool.py +240 -0
- src/tools/document_writer.py +171 -0
- src/tools/impact_tools.py +240 -0
- src/tools/playwright_tool.py +172 -0
- src/tools/quality_tools.py +366 -0
- src/tools/read_tools.py +318 -0
- src/tools/result_cache.py +157 -0
- src/tools/search_tools.py +310 -0
- src/tools/shell_tools.py +311 -0
- src/tools/write_tools.py +337 -0
- src/voice/__init__.py +25 -0
- src/voice/audio_capture.py +92 -0
- src/voice/audio_playback.py +68 -0
- src/voice/errors.py +14 -0
- src/voice/models.py +35 -0
- src/voice/providers.py +143 -0
- src/voice/vad.py +55 -0
- src/voice/voice_loop.py +156 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
"""Convention extractor — scans project files to learn style rules.
|
|
2
|
+
|
|
3
|
+
Conventions are stored in ``gdm.db`` and injected into the system prompt
|
|
4
|
+
for every write operation so the agent follows the project's existing style.
|
|
5
|
+
|
|
6
|
+
Detection is purely pattern-based (no LLM calls). The extractor re-runs
|
|
7
|
+
at most once per ``_CACHE_TTL_HOURS`` unless ``force=True`` is passed.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime, timedelta, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from src.memory.db import GdmDatabase
|
|
18
|
+
|
|
19
|
+
__all__ = ["Convention", "ConventionExtractor", "ConventionDrift", "DriftReport"]
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# ── Convention keys ──────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
_KEY_NAMING_STYLE: str = "naming_style"
|
|
26
|
+
_KEY_IMPORT_STYLE: str = "import_style"
|
|
27
|
+
_KEY_TEST_STRUCTURE: str = "test_structure"
|
|
28
|
+
_KEY_ASYNC_PATTERN: str = "async_pattern"
|
|
29
|
+
_KEY_TYPE_ANNOTATIONS: str = "type_annotations"
|
|
30
|
+
_KEY_DOCSTRING_FORMAT: str = "docstring_format"
|
|
31
|
+
|
|
32
|
+
# ── Tuning constants ─────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
_TOP_FILES_LIMIT: int = 40
|
|
35
|
+
_NAMING_SAMPLE_LIMIT: int = 20
|
|
36
|
+
_CACHE_TTL_HOURS: int = 24
|
|
37
|
+
_MAX_FILE_READ_BYTES: int = 8_192
|
|
38
|
+
_MIN_DEF_COUNT: int = 3 # minimum defs to make a confident naming judgement
|
|
39
|
+
|
|
40
|
+
# ── Excluded directory names ─────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
_EXCLUDED_DIRS: frozenset[str] = frozenset(
|
|
43
|
+
{".git", "__pycache__", "node_modules", ".venv", "venv", ".mypy_cache", "dist", "build"}
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# ── Compiled regexes ─────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
_SNAKE_DEF_RE = re.compile(r"\bdef\s+[a-z][a-z0-9_]+\s*\(")
|
|
49
|
+
_PASCAL_DEF_RE = re.compile(r"\bdef\s+[A-Z][a-zA-Z0-9]+\s*\(")
|
|
50
|
+
_ABS_IMPORT_RE = re.compile(r"^from\s+src\.", re.MULTILINE)
|
|
51
|
+
_REL_IMPORT_RE = re.compile(r"^from\s+\.", re.MULTILINE)
|
|
52
|
+
_ASYNC_DEF_RE = re.compile(r"\basync\s+def\s+")
|
|
53
|
+
_ALL_DEF_RE = re.compile(r"\bdef\s+\w+")
|
|
54
|
+
_RETURN_ANNOT_RE = re.compile(r"\)\s*->")
|
|
55
|
+
_GOOGLE_DOC_RE = re.compile(r"\bArgs:\s*\n|\bReturns:\s*\n")
|
|
56
|
+
_NUMPY_DOC_RE = re.compile(r"\bParameters\s*\n\s*-{3,}")
|
|
57
|
+
|
|
58
|
+
# ── Header template ───────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
_CONVENTIONS_HEADER: str = (
|
|
61
|
+
"## Project Conventions\n\n"
|
|
62
|
+
"The following conventions were detected in this codebase. "
|
|
63
|
+
"ALL code you write MUST follow these exactly:\n"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ── Data model ────────────────────────────────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class Convention:
|
|
72
|
+
"""A single detected project convention."""
|
|
73
|
+
|
|
74
|
+
key: str
|
|
75
|
+
value: str
|
|
76
|
+
confidence: float = field(default=1.0)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class ConventionDrift:
|
|
81
|
+
"""A single detected deviation from a project convention."""
|
|
82
|
+
|
|
83
|
+
convention: str
|
|
84
|
+
expected: str
|
|
85
|
+
found: str
|
|
86
|
+
line_number: int
|
|
87
|
+
confidence: float
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class DriftReport:
|
|
92
|
+
"""Result of a convention drift check on a single file."""
|
|
93
|
+
|
|
94
|
+
drifts: list[ConventionDrift]
|
|
95
|
+
has_high_confidence_drift: bool
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ── Main class ────────────────────────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ConventionExtractor:
|
|
102
|
+
"""Extracts coding conventions from project source files.
|
|
103
|
+
|
|
104
|
+
Scans the top-40 most recently modified files and infers style rules
|
|
105
|
+
by pattern matching. Results are stored in ``gdm.db`` conventions table.
|
|
106
|
+
|
|
107
|
+
Usage::
|
|
108
|
+
|
|
109
|
+
extractor = ConventionExtractor(db, project_root=root)
|
|
110
|
+
conventions = extractor.extract(project_root)
|
|
111
|
+
block = extractor.build_conventions_block() # for system prompt injection
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
db: GdmDatabase,
|
|
117
|
+
project_id: str | None = None,
|
|
118
|
+
*,
|
|
119
|
+
project_root: Path | None = None,
|
|
120
|
+
) -> None:
|
|
121
|
+
self._db = db
|
|
122
|
+
self._project_root = project_root
|
|
123
|
+
if project_id is not None:
|
|
124
|
+
self._project_id = project_id
|
|
125
|
+
elif project_root is not None:
|
|
126
|
+
self._project_id = str(project_root)
|
|
127
|
+
else:
|
|
128
|
+
self._project_id = "default"
|
|
129
|
+
|
|
130
|
+
# ------------------------------------------------------------------
|
|
131
|
+
# Public API
|
|
132
|
+
# ------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
def extract(self, project_root: Path, *, force: bool = False) -> list[Convention]:
|
|
135
|
+
"""Scan project and extract conventions. Cached — skips if fresh.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
project_root: root directory of the project to scan.
|
|
139
|
+
force: if ``True``, re-scan even if cached data is recent.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Detected :class:`Convention` list (deduplicated by key).
|
|
143
|
+
"""
|
|
144
|
+
if not force and self._is_cache_fresh():
|
|
145
|
+
log.debug("Conventions cache is fresh for %s, skipping scan", self._project_id)
|
|
146
|
+
return self._load_from_cache()
|
|
147
|
+
|
|
148
|
+
conventions: list[Convention] = []
|
|
149
|
+
conventions.extend(self._scan_python_files(project_root))
|
|
150
|
+
conventions.extend(self._scan_typescript_files(project_root))
|
|
151
|
+
|
|
152
|
+
test_conv = self._detect_test_structure(project_root)
|
|
153
|
+
if test_conv is not None:
|
|
154
|
+
conventions.append(test_conv)
|
|
155
|
+
|
|
156
|
+
# Deduplicate — last detected value for a key wins.
|
|
157
|
+
by_key = {c.key: c for c in conventions}
|
|
158
|
+
unique = list(by_key.values())
|
|
159
|
+
self._upsert_conventions(unique)
|
|
160
|
+
log.info("Extracted %d conventions for project %s", len(unique), self._project_id)
|
|
161
|
+
return unique
|
|
162
|
+
|
|
163
|
+
def build_conventions_block(self) -> str:
|
|
164
|
+
"""Build a system-prompt-ready conventions block from cached data.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Formatted string ready for injection into a system prompt,
|
|
168
|
+
or an empty string if no conventions have been detected yet.
|
|
169
|
+
"""
|
|
170
|
+
rows = self._db.get_conventions(self._project_id)
|
|
171
|
+
if not rows:
|
|
172
|
+
return ""
|
|
173
|
+
lines: list[str] = []
|
|
174
|
+
for row in rows:
|
|
175
|
+
lines.append(f"- {row['key']}: {row['value']}")
|
|
176
|
+
return _CONVENTIONS_HEADER + "\n".join(lines) + "\n"
|
|
177
|
+
|
|
178
|
+
def get_convention(self, key: str) -> str | None:
|
|
179
|
+
"""Return the value for a specific convention key, or ``None``.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
key: one of the ``_KEY_*`` constants (e.g. ``"naming_style"``).
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
The stored convention value, or ``None`` if not yet detected.
|
|
186
|
+
"""
|
|
187
|
+
row = self._db.execute_one(
|
|
188
|
+
"SELECT value FROM conventions WHERE project_id = ? AND key = ?",
|
|
189
|
+
(self._project_id, key),
|
|
190
|
+
)
|
|
191
|
+
return str(row["value"]) if row is not None else None
|
|
192
|
+
|
|
193
|
+
# ------------------------------------------------------------------
|
|
194
|
+
# Drift detection
|
|
195
|
+
# ------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
def _extract_error_handling(self, files: list[Path]) -> str:
|
|
198
|
+
"""Returns 'try_except' | 'result_type' | 'mixed'"""
|
|
199
|
+
try_count = 0
|
|
200
|
+
result_count = 0
|
|
201
|
+
for f in files[:50]:
|
|
202
|
+
try:
|
|
203
|
+
content = f.read_text(errors="ignore")
|
|
204
|
+
except OSError:
|
|
205
|
+
continue
|
|
206
|
+
try_count += len(re.findall(r'\btry\b\s*:', content))
|
|
207
|
+
result_count += len(re.findall(r'\b(?:Ok|Err|Result)\s*[\[\(]', content))
|
|
208
|
+
if try_count > result_count * 3:
|
|
209
|
+
return "try_except"
|
|
210
|
+
if result_count > try_count * 3:
|
|
211
|
+
return "result_type"
|
|
212
|
+
return "mixed"
|
|
213
|
+
|
|
214
|
+
def _is_cache_stale(self) -> bool:
|
|
215
|
+
try:
|
|
216
|
+
cached_at = self._db.get_convention_cached_at()
|
|
217
|
+
except Exception:
|
|
218
|
+
return True
|
|
219
|
+
if not cached_at:
|
|
220
|
+
return True
|
|
221
|
+
if (datetime.now() - cached_at).total_seconds() > 86400:
|
|
222
|
+
return True
|
|
223
|
+
try:
|
|
224
|
+
recent_changes = sum(
|
|
225
|
+
1 for f in self._project_root.rglob("*.py")
|
|
226
|
+
if f.stat().st_mtime > cached_at.timestamp()
|
|
227
|
+
)
|
|
228
|
+
return recent_changes > 100
|
|
229
|
+
except (OSError, AttributeError):
|
|
230
|
+
return False
|
|
231
|
+
|
|
232
|
+
def check_drift(self, file_content: str, conventions: dict) -> DriftReport:
|
|
233
|
+
"""Check file content for deviations from the given conventions dict.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
file_content: source text to inspect.
|
|
237
|
+
conventions: mapping of convention key → expected value string.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
:class:`DriftReport` with all high-confidence drifts found.
|
|
241
|
+
"""
|
|
242
|
+
drifts: list[ConventionDrift] = []
|
|
243
|
+
|
|
244
|
+
if conventions.get("naming_style") == "snake_case":
|
|
245
|
+
for m in re.finditer(r'\bdef\s+([a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*)\b', file_content):
|
|
246
|
+
line_no = file_content[:m.start()].count('\n') + 1
|
|
247
|
+
drifts.append(ConventionDrift("naming_style", "snake_case", "camelCase", line_no, 0.9))
|
|
248
|
+
|
|
249
|
+
if conventions.get("error_handling") == "result_type":
|
|
250
|
+
for m in re.finditer(r'\btry\s*:', file_content):
|
|
251
|
+
line_no = file_content[:m.start()].count('\n') + 1
|
|
252
|
+
drifts.append(ConventionDrift("error_handling", "result_type", "try_except", line_no, 0.85))
|
|
253
|
+
|
|
254
|
+
if conventions.get("import_style") == "absolute":
|
|
255
|
+
for m in re.finditer(r'^from\s+\.', file_content, re.MULTILINE):
|
|
256
|
+
line_no = file_content[:m.start()].count('\n') + 1
|
|
257
|
+
drifts.append(ConventionDrift("import_style", "absolute", "relative", line_no, 0.95))
|
|
258
|
+
|
|
259
|
+
high = [d for d in drifts if d.confidence > 0.8]
|
|
260
|
+
return DriftReport(drifts=high, has_high_confidence_drift=bool(high))
|
|
261
|
+
|
|
262
|
+
# ------------------------------------------------------------------
|
|
263
|
+
# File-type scanners
|
|
264
|
+
# ------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
def _scan_python_files(self, root: Path) -> list[Convention]:
|
|
267
|
+
"""Extract Python-specific conventions from .py files."""
|
|
268
|
+
files = self._get_recent_files(root, ["*.py"])
|
|
269
|
+
if not files:
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
result: list[Convention] = []
|
|
273
|
+
for detector in (
|
|
274
|
+
self._detect_naming_style,
|
|
275
|
+
self._detect_import_style,
|
|
276
|
+
self._detect_async_pattern,
|
|
277
|
+
self._detect_type_annotations,
|
|
278
|
+
self._detect_docstring_format,
|
|
279
|
+
):
|
|
280
|
+
conv = detector(files)
|
|
281
|
+
if conv is not None:
|
|
282
|
+
result.append(conv)
|
|
283
|
+
return result
|
|
284
|
+
|
|
285
|
+
def _scan_typescript_files(self, root: Path) -> list[Convention]:
|
|
286
|
+
"""Extract TypeScript-specific conventions from .ts/.tsx files."""
|
|
287
|
+
files = self._get_recent_files(root, ["*.ts", "*.tsx"])
|
|
288
|
+
if not files:
|
|
289
|
+
return []
|
|
290
|
+
|
|
291
|
+
result: list[Convention] = []
|
|
292
|
+
async_conv = self._detect_async_pattern(files)
|
|
293
|
+
if async_conv is not None:
|
|
294
|
+
result.append(async_conv)
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
# ------------------------------------------------------------------
|
|
298
|
+
# Detectors (pattern-based, no LLM)
|
|
299
|
+
# ------------------------------------------------------------------
|
|
300
|
+
|
|
301
|
+
def _detect_naming_style(self, files: list[Path]) -> Convention | None:
|
|
302
|
+
"""Detect function/class naming style by sampling function definitions."""
|
|
303
|
+
snake_count = 0
|
|
304
|
+
pascal_count = 0
|
|
305
|
+
for path in files[:_NAMING_SAMPLE_LIMIT]:
|
|
306
|
+
text = self._read_file_text(path)
|
|
307
|
+
snake_count += len(_SNAKE_DEF_RE.findall(text))
|
|
308
|
+
pascal_count += len(_PASCAL_DEF_RE.findall(text))
|
|
309
|
+
|
|
310
|
+
total = snake_count + pascal_count
|
|
311
|
+
if total < _MIN_DEF_COUNT:
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
ratio = max(snake_count, pascal_count) / total
|
|
315
|
+
if ratio < 0.65:
|
|
316
|
+
# Too ambiguous to assert a single style
|
|
317
|
+
return Convention(
|
|
318
|
+
key=_KEY_NAMING_STYLE,
|
|
319
|
+
value="mixed naming styles detected",
|
|
320
|
+
confidence=round(1.0 - ratio, 2),
|
|
321
|
+
)
|
|
322
|
+
if snake_count >= pascal_count:
|
|
323
|
+
value = "snake_case functions, PascalCase classes, SCREAMING_SNAKE constants"
|
|
324
|
+
else:
|
|
325
|
+
value = "PascalCase functions and classes, SCREAMING_SNAKE constants"
|
|
326
|
+
return Convention(key=_KEY_NAMING_STYLE, value=value, confidence=round(ratio, 2))
|
|
327
|
+
|
|
328
|
+
def _detect_import_style(self, files: list[Path]) -> Convention | None:
|
|
329
|
+
"""Detect import style: absolute (``from src.``) vs relative (``from .``)."""
|
|
330
|
+
abs_count = 0
|
|
331
|
+
rel_count = 0
|
|
332
|
+
for path in files:
|
|
333
|
+
text = self._read_file_text(path)
|
|
334
|
+
abs_count += len(_ABS_IMPORT_RE.findall(text))
|
|
335
|
+
rel_count += len(_REL_IMPORT_RE.findall(text))
|
|
336
|
+
|
|
337
|
+
total = abs_count + rel_count
|
|
338
|
+
if total == 0:
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
if abs_count >= rel_count:
|
|
342
|
+
value = "absolute imports (from src. prefix)"
|
|
343
|
+
else:
|
|
344
|
+
value = "relative imports (from . prefix)"
|
|
345
|
+
confidence = min(1.0, max(abs_count, rel_count) / total)
|
|
346
|
+
return Convention(key=_KEY_IMPORT_STYLE, value=value, confidence=confidence)
|
|
347
|
+
|
|
348
|
+
def _detect_test_structure(self, root: Path) -> Convention | None:
|
|
349
|
+
"""Detect test framework from test files and config presence."""
|
|
350
|
+
tests_dir = root / "tests"
|
|
351
|
+
if (root / "jest.config.js").exists() or (root / "jest.config.ts").exists():
|
|
352
|
+
return Convention(key=_KEY_TEST_STRUCTURE, value="jest", confidence=1.0)
|
|
353
|
+
if (root / "vitest.config.ts").exists() or (root / "vitest.config.js").exists():
|
|
354
|
+
return Convention(key=_KEY_TEST_STRUCTURE, value="vitest", confidence=1.0)
|
|
355
|
+
if tests_dir.exists() and (tests_dir / "conftest.py").exists():
|
|
356
|
+
return Convention(key=_KEY_TEST_STRUCTURE, value="pytest with conftest.py", confidence=1.0)
|
|
357
|
+
if tests_dir.exists() and any(tests_dir.rglob("test_*.py")):
|
|
358
|
+
return Convention(key=_KEY_TEST_STRUCTURE, value="pytest", confidence=0.9)
|
|
359
|
+
return None
|
|
360
|
+
|
|
361
|
+
def _detect_async_pattern(self, files: list[Path]) -> Convention | None:
|
|
362
|
+
"""Detect async/await usage ratio across the file sample."""
|
|
363
|
+
async_count = 0
|
|
364
|
+
total_count = 0
|
|
365
|
+
for path in files:
|
|
366
|
+
text = self._read_file_text(path)
|
|
367
|
+
async_count += len(_ASYNC_DEF_RE.findall(text))
|
|
368
|
+
total_count += len(_ALL_DEF_RE.findall(text))
|
|
369
|
+
|
|
370
|
+
if total_count < _MIN_DEF_COUNT:
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
ratio = async_count / total_count
|
|
374
|
+
if ratio > 0.5:
|
|
375
|
+
value = f"primarily async/await ({ratio:.0%} of functions are async)"
|
|
376
|
+
elif ratio > 0.1:
|
|
377
|
+
value = f"mixed sync/async ({ratio:.0%} of functions are async)"
|
|
378
|
+
else:
|
|
379
|
+
value = "primarily synchronous (minimal async usage)"
|
|
380
|
+
return Convention(key=_KEY_ASYNC_PATTERN, value=value, confidence=0.9)
|
|
381
|
+
|
|
382
|
+
def _detect_type_annotations(self, files: list[Path]) -> Convention | None:
|
|
383
|
+
"""Check ratio of function definitions with ``->`` return annotations."""
|
|
384
|
+
annotated = 0
|
|
385
|
+
total = 0
|
|
386
|
+
for path in files:
|
|
387
|
+
text = self._read_file_text(path)
|
|
388
|
+
total += len(_ALL_DEF_RE.findall(text))
|
|
389
|
+
annotated += len(_RETURN_ANNOT_RE.findall(text))
|
|
390
|
+
|
|
391
|
+
if total < _MIN_DEF_COUNT:
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
ratio = min(1.0, annotated / total)
|
|
395
|
+
if ratio > 0.8:
|
|
396
|
+
value = f"required — {ratio:.0%} of functions carry return annotations"
|
|
397
|
+
elif ratio > 0.4:
|
|
398
|
+
value = f"partial — {ratio:.0%} of functions carry return annotations"
|
|
399
|
+
else:
|
|
400
|
+
value = "optional/minimal type annotations"
|
|
401
|
+
return Convention(key=_KEY_TYPE_ANNOTATIONS, value=value, confidence=0.85)
|
|
402
|
+
|
|
403
|
+
def _detect_docstring_format(self, files: list[Path]) -> Convention | None:
|
|
404
|
+
"""Detect Google or NumPy docstring style from a file sample."""
|
|
405
|
+
google_count = 0
|
|
406
|
+
numpy_count = 0
|
|
407
|
+
for path in files[:_NAMING_SAMPLE_LIMIT]:
|
|
408
|
+
text = self._read_file_text(path)
|
|
409
|
+
if _GOOGLE_DOC_RE.search(text):
|
|
410
|
+
google_count += 1
|
|
411
|
+
if _NUMPY_DOC_RE.search(text):
|
|
412
|
+
numpy_count += 1
|
|
413
|
+
|
|
414
|
+
if google_count == 0 and numpy_count == 0:
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
if google_count >= numpy_count:
|
|
418
|
+
value = "Google style (Args:, Returns: sections)"
|
|
419
|
+
confidence = min(1.0, google_count / 5)
|
|
420
|
+
else:
|
|
421
|
+
value = "NumPy style (Parameters/dashes sections)"
|
|
422
|
+
confidence = min(1.0, numpy_count / 5)
|
|
423
|
+
return Convention(key=_KEY_DOCSTRING_FORMAT, value=value, confidence=confidence)
|
|
424
|
+
|
|
425
|
+
# ------------------------------------------------------------------
|
|
426
|
+
# Storage helpers
|
|
427
|
+
# ------------------------------------------------------------------
|
|
428
|
+
|
|
429
|
+
def _upsert_conventions(self, conventions: list[Convention]) -> None:
|
|
430
|
+
"""Persist detected conventions to the DB (upsert by project+key).
|
|
431
|
+
|
|
432
|
+
Each upsert is committed individually. The ``upsert_convention``
|
|
433
|
+
helper on :class:`GdmDatabase` handles its own commit, so wrapping
|
|
434
|
+
in an explicit transaction would conflict.
|
|
435
|
+
"""
|
|
436
|
+
for conv in conventions:
|
|
437
|
+
self._db.upsert_convention(
|
|
438
|
+
self._project_id, conv.key, conv.value, conv.confidence
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# ------------------------------------------------------------------
|
|
442
|
+
# Cache helpers
|
|
443
|
+
# ------------------------------------------------------------------
|
|
444
|
+
|
|
445
|
+
def _is_cache_fresh(self) -> bool:
|
|
446
|
+
"""Return ``True`` if the newest convention row is within the TTL."""
|
|
447
|
+
row = self._db.execute_one(
|
|
448
|
+
"SELECT last_updated FROM conventions WHERE project_id = ? "
|
|
449
|
+
"ORDER BY last_updated DESC LIMIT 1",
|
|
450
|
+
(self._project_id,),
|
|
451
|
+
)
|
|
452
|
+
if row is None:
|
|
453
|
+
return False
|
|
454
|
+
last_updated = datetime.fromisoformat(str(row["last_updated"]))
|
|
455
|
+
if last_updated.tzinfo is None:
|
|
456
|
+
last_updated = last_updated.replace(tzinfo=timezone.utc)
|
|
457
|
+
return datetime.now(timezone.utc) - last_updated < timedelta(hours=_CACHE_TTL_HOURS)
|
|
458
|
+
|
|
459
|
+
def _load_from_cache(self) -> list[Convention]:
|
|
460
|
+
"""Build a :class:`Convention` list from DB rows."""
|
|
461
|
+
return [
|
|
462
|
+
Convention(key=str(r["key"]), value=str(r["value"]), confidence=float(r["confidence"]))
|
|
463
|
+
for r in self._db.get_conventions(self._project_id)
|
|
464
|
+
]
|
|
465
|
+
|
|
466
|
+
# ------------------------------------------------------------------
|
|
467
|
+
# File system helpers
|
|
468
|
+
# ------------------------------------------------------------------
|
|
469
|
+
|
|
470
|
+
def _get_recent_files(self, root: Path, patterns: list[str]) -> list[Path]:
|
|
471
|
+
"""Return up to ``_TOP_FILES_LIMIT`` most recently modified matching files."""
|
|
472
|
+
candidates: list[Path] = []
|
|
473
|
+
for pattern in patterns:
|
|
474
|
+
for path in root.rglob(pattern):
|
|
475
|
+
if not any(part in _EXCLUDED_DIRS for part in path.parts):
|
|
476
|
+
candidates.append(path)
|
|
477
|
+
|
|
478
|
+
def _safe_mtime(p: Path) -> float:
|
|
479
|
+
try:
|
|
480
|
+
return p.stat().st_mtime
|
|
481
|
+
except OSError:
|
|
482
|
+
return 0.0
|
|
483
|
+
|
|
484
|
+
candidates.sort(key=_safe_mtime, reverse=True)
|
|
485
|
+
return candidates[:_TOP_FILES_LIMIT]
|
|
486
|
+
|
|
487
|
+
@staticmethod
|
|
488
|
+
def _read_file_text(path: Path) -> str:
|
|
489
|
+
"""Read up to ``_MAX_FILE_READ_BYTES`` bytes from a file as UTF-8 text."""
|
|
490
|
+
try:
|
|
491
|
+
raw = path.read_bytes()[:_MAX_FILE_READ_BYTES]
|
|
492
|
+
return raw.decode("utf-8", errors="replace")
|
|
493
|
+
except OSError as exc:
|
|
494
|
+
log.debug("Cannot read %s: %s", path, exc)
|
|
495
|
+
return ""
|