gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
@@ -0,0 +1,205 @@
1
+ """DocumentIndex — SQLite FTS5 full-text index for local documents.
2
+
3
+ The index DB lives at ~/.config/gdm/document_index.db by default.
4
+ Pass db_path= in tests to use a temp database.
5
+ """
6
+ from __future__ import annotations
7
+ import logging, sqlite3, threading, time
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+ __all__ = ["DocumentIndex", "IndexedChunk", "SearchResult"]
15
+
16
+ INDEX_DB = Path.home() / ".config" / "gdm" / "document_index.db"
17
+ CHUNK_MAX_WORDS = 500
18
+
19
+
20
+ @dataclass
21
+ class IndexedChunk:
22
+ doc_id: int
23
+ chunk_index: int
24
+ text: str
25
+ source_label: str # e.g. "Sheet: Revenue" or "Page 3"
26
+
27
+
28
+ @dataclass
29
+ class SearchResult:
30
+ file_path: str
31
+ source_label: str
32
+ snippet: str
33
+ score: float
34
+ chunk_index: int
35
+
36
+
37
+ class DocumentIndex:
38
+ def __init__(self, db_path: Path = None):
39
+ self._path = db_path or INDEX_DB
40
+ self._lock = threading.Lock()
41
+ self._conn: Optional[sqlite3.Connection] = None
42
+ self._init_db()
43
+
44
+ def _get_conn(self) -> sqlite3.Connection:
45
+ if self._conn is None:
46
+ self._path.parent.mkdir(parents=True, exist_ok=True)
47
+ self._conn = sqlite3.connect(str(self._path), check_same_thread=False)
48
+ self._conn.row_factory = sqlite3.Row
49
+ return self._conn
50
+
51
+ def _init_db(self) -> None:
52
+ with self._lock:
53
+ conn = self._get_conn()
54
+ conn.executescript("""
55
+ CREATE TABLE IF NOT EXISTS indexed_documents (
56
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
57
+ file_path TEXT UNIQUE NOT NULL,
58
+ format TEXT NOT NULL,
59
+ file_mtime REAL NOT NULL,
60
+ chunk_count INTEGER NOT NULL DEFAULT 0,
61
+ indexed_at REAL NOT NULL
62
+ );
63
+ CREATE VIRTUAL TABLE IF NOT EXISTS document_chunks USING fts5(
64
+ file_path UNINDEXED,
65
+ source_label UNINDEXED,
66
+ chunk_index UNINDEXED,
67
+ text,
68
+ tokenize='porter unicode61'
69
+ );
70
+ """)
71
+ conn.commit()
72
+
73
+ def index_document(self, path: Path | str, force: bool = False) -> int:
74
+ """Index document at path. Returns number of chunks indexed.
75
+ Returns 0 if file mtime unchanged (unless force=True).
76
+ Returns -1 on error (file not found, unreadable, etc.)."""
77
+ from src.tools.document_reader import DocumentReader
78
+ path = Path(path)
79
+ try:
80
+ mtime = path.stat().st_mtime
81
+ except OSError as exc:
82
+ log.warning("Cannot stat %s: %s", path, exc)
83
+ return -1
84
+
85
+ with self._lock:
86
+ conn = self._get_conn()
87
+ existing = conn.execute(
88
+ "SELECT file_mtime FROM indexed_documents WHERE file_path=?",
89
+ (str(path),)
90
+ ).fetchone()
91
+ if existing and not force and abs(existing["file_mtime"] - mtime) < 0.001:
92
+ return 0 # up to date
93
+
94
+ content = DocumentReader().read(path)
95
+ if not content.success:
96
+ log.warning("Cannot index %s: %s", path, content.error)
97
+ return -1
98
+
99
+ chunks = self._chunk(content)
100
+ with self._lock:
101
+ conn = self._get_conn()
102
+ conn.execute(
103
+ "DELETE FROM document_chunks WHERE file_path=?", (str(path),)
104
+ )
105
+ conn.execute(
106
+ "DELETE FROM indexed_documents WHERE file_path=?", (str(path),)
107
+ )
108
+ conn.executemany(
109
+ "INSERT INTO document_chunks (file_path, source_label, chunk_index, text) VALUES (?,?,?,?)",
110
+ [(str(path), c.source_label, c.chunk_index, c.text) for c in chunks]
111
+ )
112
+ conn.execute(
113
+ "INSERT INTO indexed_documents (file_path, format, file_mtime, chunk_count, indexed_at) VALUES (?,?,?,?,?)",
114
+ (str(path), content.format, mtime, len(chunks), time.time())
115
+ )
116
+ conn.commit()
117
+ return len(chunks)
118
+
119
+ def search(self, query: str, limit: int = 10) -> list[SearchResult]:
120
+ """Full-text search across all indexed documents."""
121
+ try:
122
+ with self._lock:
123
+ conn = self._get_conn()
124
+ rows = conn.execute(
125
+ """SELECT file_path, source_label, chunk_index,
126
+ snippet(document_chunks, 3, '[', ']', '...', 20) as snip,
127
+ rank
128
+ FROM document_chunks
129
+ WHERE document_chunks MATCH ?
130
+ ORDER BY rank
131
+ LIMIT ?""",
132
+ (query, limit)
133
+ ).fetchall()
134
+ except sqlite3.OperationalError as exc:
135
+ log.warning("FTS5 search error: %s", exc)
136
+ return []
137
+ results = []
138
+ for row in rows:
139
+ results.append(SearchResult(
140
+ file_path=row["file_path"],
141
+ source_label=row["source_label"],
142
+ snippet=row["snip"],
143
+ score=abs(row["rank"]),
144
+ chunk_index=row["chunk_index"],
145
+ ))
146
+ return results
147
+
148
+ def list_indexed(self) -> list[dict]:
149
+ with self._lock:
150
+ rows = self._get_conn().execute(
151
+ "SELECT file_path, format, chunk_count, indexed_at FROM indexed_documents ORDER BY indexed_at DESC"
152
+ ).fetchall()
153
+ return [dict(r) for r in rows]
154
+
155
+ def remove(self, path: Path | str) -> None:
156
+ path_str = str(Path(path))
157
+ with self._lock:
158
+ conn = self._get_conn()
159
+ conn.execute("DELETE FROM document_chunks WHERE file_path=?", (path_str,))
160
+ conn.execute("DELETE FROM indexed_documents WHERE file_path=?", (path_str,))
161
+ conn.commit()
162
+
163
+ def _chunk(self, content) -> list[IndexedChunk]:
164
+ chunks: list[IndexedChunk] = []
165
+ # Spreadsheets: one chunk per sheet
166
+ if content.sheets:
167
+ for sheet in content.sheets:
168
+ text = sheet.to_text()
169
+ if text.strip():
170
+ chunks.append(IndexedChunk(
171
+ doc_id=0, chunk_index=len(chunks),
172
+ text=text[:8000],
173
+ source_label=f"Sheet: {sheet.name}",
174
+ ))
175
+ return chunks
176
+ # Text/PDF/DOCX: split on paragraphs, group into ~500 word chunks
177
+ paragraphs = [p.strip() for p in content.text.split("\n\n") if p.strip()]
178
+ current_words = 0
179
+ current_parts: list[str] = []
180
+ page_hint = 1
181
+ for para in paragraphs:
182
+ words = len(para.split())
183
+ if current_words + words > CHUNK_MAX_WORDS and current_parts:
184
+ chunks.append(IndexedChunk(
185
+ doc_id=0, chunk_index=len(chunks),
186
+ text="\n\n".join(current_parts),
187
+ source_label=f"Page {page_hint}",
188
+ ))
189
+ page_hint += 1
190
+ current_parts = []
191
+ current_words = 0
192
+ current_parts.append(para)
193
+ current_words += words
194
+ if current_parts:
195
+ chunks.append(IndexedChunk(
196
+ doc_id=0, chunk_index=len(chunks),
197
+ text="\n\n".join(current_parts),
198
+ source_label=f"Page {page_hint}",
199
+ ))
200
+ return chunks
201
+
202
+ def close(self) -> None:
203
+ if self._conn:
204
+ self._conn.close()
205
+ self._conn = None
@@ -0,0 +1,128 @@
1
+ """mtime-based file freshness cache — anti-hallucination core.
2
+
3
+ Tracks when each file was last read and its mtime at that time.
4
+ Used to detect stale file content in context and force re-reads.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from src.memory.db import GdmDatabase
15
+
16
+ __all__ = ["CacheEntry", "FileCache"]
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+ _PURGE_BATCH_SIZE: int = 500
21
+
22
+
23
+ @dataclass
24
+ class CacheEntry:
25
+ """A single file-cache record."""
26
+
27
+ path: Path
28
+ mtime: float
29
+ summary: str | None
30
+ last_read_at: str # ISO timestamp
31
+
32
+
33
+ class FileCache:
34
+ """mtime-based freshness cache for file reads.
35
+
36
+ Tracks when each file was last read and its mtime at that time.
37
+ Used to detect stale file content in context and force re-reads.
38
+
39
+ Usage::
40
+
41
+ cache = FileCache(db, project_id)
42
+ cache.mark_read(Path("src/auth.py"))
43
+ is_fresh = cache.is_fresh(Path("src/auth.py")) # False if file changed
44
+ """
45
+
46
+ def __init__(self, db: GdmDatabase, project_id: str) -> None:
47
+ self._db = db
48
+ self._project_id = project_id
49
+
50
+ @staticmethod
51
+ def _normalize(path: Path) -> Path:
52
+ """Return resolved absolute path for consistent DB storage."""
53
+ return path.resolve()
54
+
55
+ def mark_read(self, path: Path, summary: str | None = None) -> None:
56
+ """Record that this file was just read. Stores current mtime."""
57
+ normalized = self._normalize(path)
58
+ try:
59
+ mtime = normalized.stat().st_mtime
60
+ except OSError:
61
+ log.warning("mark_read: cannot stat %s", normalized)
62
+ return
63
+ self._db.upsert_file_cache(self._project_id, str(normalized), mtime, summary)
64
+
65
+ def is_fresh(self, path: Path) -> bool:
66
+ """True if file mtime matches cached mtime (file unchanged since last read).
67
+
68
+ Returns True if never read (unknown = assumed fresh).
69
+ Returns False if file no longer exists OR mtime changed.
70
+ """
71
+ normalized = self._normalize(path)
72
+ row = self._db.get_file_cache(self._project_id, str(normalized))
73
+ if row is None:
74
+ return True # never read — assumed fresh per spec
75
+ try:
76
+ current_mtime = normalized.stat().st_mtime
77
+ except OSError:
78
+ return False
79
+ return float(row["mtime"]) == current_mtime
80
+
81
+ def get_stale_paths(self, paths: list[Path]) -> list[Path]:
82
+ """Return subset of paths whose content may be stale."""
83
+ return [p for p in paths if not self.is_fresh(p)]
84
+
85
+ def invalidate(self, path: Path) -> None:
86
+ """Explicitly invalidate cache for a path (e.g., after write)."""
87
+ normalized = self._normalize(path)
88
+ self._db.execute(
89
+ "DELETE FROM file_cache WHERE project_id = ? AND path = ?",
90
+ (self._project_id, str(normalized)),
91
+ )
92
+
93
+ def get_entry(self, path: Path) -> CacheEntry | None:
94
+ """Get cache entry for a path. Returns None if not tracked."""
95
+ normalized = self._normalize(path)
96
+ row = self._db.get_file_cache(self._project_id, str(normalized))
97
+ if row is None:
98
+ return None
99
+ return CacheEntry(
100
+ path=normalized,
101
+ mtime=float(row["mtime"]),
102
+ summary=row["summary"],
103
+ last_read_at=row["last_read_at"],
104
+ )
105
+
106
+ def purge_deleted(self) -> int:
107
+ """Remove entries for files that no longer exist. Returns count purged."""
108
+ rows = self._db.execute_all(
109
+ "SELECT path FROM file_cache WHERE project_id = ?",
110
+ (self._project_id,),
111
+ )
112
+ stale = [row["path"] for row in rows if not Path(row["path"]).exists()]
113
+ if not stale:
114
+ return 0
115
+ return self._delete_batch(stale)
116
+
117
+ def _delete_batch(self, paths: list[str]) -> int:
118
+ """Delete file_cache rows for given path strings in batches."""
119
+ deleted = 0
120
+ for i in range(0, len(paths), _PURGE_BATCH_SIZE):
121
+ batch = paths[i : i + _PURGE_BATCH_SIZE]
122
+ placeholders = ",".join("?" * len(batch))
123
+ self._db.execute(
124
+ f"DELETE FROM file_cache WHERE project_id = ? AND path IN ({placeholders})",
125
+ (self._project_id, *tuple(batch)),
126
+ )
127
+ deleted += len(batch)
128
+ return deleted
@@ -0,0 +1,178 @@
1
+ """Initial project record builder — runs once on first invocation.
2
+
3
+ Updates on /memory refresh. Detects tech stack from marker files.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import hashlib
8
+ import json
9
+ import logging
10
+ import sqlite3
11
+ from dataclasses import dataclass
12
+ from datetime import datetime, timezone
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING
15
+
16
+ if TYPE_CHECKING:
17
+ from src.memory.db import GdmDatabase
18
+
19
+ __all__ = ["ProjectRecord", "ProjectScanner", "estimate_token_count"]
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+ _REFRESH_DAYS: int = 7
24
+
25
+ _TECH_MARKERS: dict[str, str] = {
26
+ "pyproject.toml": "python",
27
+ "requirements.txt": "python",
28
+ "setup.py": "python",
29
+ "package.json": "nodejs",
30
+ "tsconfig.json": "typescript",
31
+ "Cargo.toml": "rust",
32
+ "go.mod": "golang",
33
+ "pom.xml": "java",
34
+ "build.gradle": "java",
35
+ "Dockerfile": "docker",
36
+ "docker-compose.yml": "docker",
37
+ ".terraform": "terraform",
38
+ "pubspec.yaml": "dart",
39
+ "mix.exs": "elixir",
40
+ "Gemfile": "ruby",
41
+ "composer.json": "php",
42
+ }
43
+
44
+
45
+ @dataclass
46
+ class ProjectRecord:
47
+ """Lightweight descriptor for a scanned project."""
48
+
49
+ project_id: str
50
+ root_path: Path
51
+ name: str
52
+ tech_stack: list[str] # e.g. ["python", "typescript", "docker"]
53
+
54
+
55
+ class ProjectScanner:
56
+ """Scans project directory and builds/updates the project record in gdm.db.
57
+
58
+ Detects tech stack from marker files. Fast — no file content reading.
59
+
60
+ Usage::
61
+
62
+ scanner = ProjectScanner(db)
63
+ record = scanner.ensure_project(root_path) # creates or updates
64
+ """
65
+
66
+ def __init__(self, db: GdmDatabase) -> None:
67
+ self._db = db
68
+
69
+ def ensure_project(self, root: Path) -> ProjectRecord:
70
+ """Get existing project record or scan and create a new one.
71
+
72
+ Always updates last_seen. Updates tech_stack if 7+ days old.
73
+ Returns ProjectRecord.
74
+ """
75
+ normalized = root.resolve()
76
+ row = self._db.execute_one(
77
+ "SELECT project_id, root_path, name, tech_stack, last_seen "
78
+ "FROM projects WHERE root_path = ?",
79
+ (str(normalized),),
80
+ )
81
+ if row is None:
82
+ return self.scan(root)
83
+ return self._refresh_record(row, normalized)
84
+
85
+ def scan(self, root: Path) -> ProjectRecord:
86
+ """Full scan — detect tech stack, count files. Creates DB record."""
87
+ normalized = root.resolve()
88
+ project_id = self._generate_project_id(normalized)
89
+ name = normalized.name
90
+ tech_stack = self._detect_tech_stack(normalized)
91
+ self._db.execute(
92
+ "INSERT INTO projects (project_id, root_path, name, tech_stack) "
93
+ "VALUES (?, ?, ?, ?) "
94
+ "ON CONFLICT(root_path) DO UPDATE SET "
95
+ "name = excluded.name, "
96
+ "tech_stack = excluded.tech_stack, "
97
+ "last_seen = datetime('now')",
98
+ (project_id, str(normalized), name, json.dumps(tech_stack)),
99
+ )
100
+ row = self._db.execute_one(
101
+ "SELECT project_id FROM projects WHERE root_path = ?",
102
+ (str(normalized),),
103
+ )
104
+ actual_id = row["project_id"] if row else project_id
105
+ log.info("Scanned project '%s' id=%s stack=%s", name, actual_id, tech_stack)
106
+ return ProjectRecord(
107
+ project_id=actual_id,
108
+ root_path=normalized,
109
+ name=name,
110
+ tech_stack=tech_stack,
111
+ )
112
+
113
+ def get_project_id(self, root: Path) -> str | None:
114
+ """Get project_id for a root path. Returns None if not in DB."""
115
+ normalized = root.resolve()
116
+ row = self._db.execute_one(
117
+ "SELECT project_id FROM projects WHERE root_path = ?",
118
+ (str(normalized),),
119
+ )
120
+ return row["project_id"] if row is not None else None
121
+
122
+ def _refresh_record(self, row: sqlite3.Row, root: Path) -> ProjectRecord:
123
+ """Update last_seen and optionally refresh tech_stack if stale."""
124
+ last_seen = datetime.fromisoformat(row["last_seen"])
125
+ now = datetime.now(timezone.utc).replace(tzinfo=None) # naive UTC, matches SQLite
126
+ days_old = (now - last_seen).days
127
+ tech_stack = json.loads(row["tech_stack"])
128
+ if days_old >= _REFRESH_DAYS:
129
+ tech_stack = self._detect_tech_stack(root)
130
+ self._db.execute(
131
+ "UPDATE projects SET tech_stack = ?, last_seen = datetime('now') "
132
+ "WHERE project_id = ?",
133
+ (json.dumps(tech_stack), row["project_id"]),
134
+ )
135
+ else:
136
+ self._db.execute(
137
+ "UPDATE projects SET last_seen = datetime('now') WHERE project_id = ?",
138
+ (row["project_id"],),
139
+ )
140
+ return ProjectRecord(
141
+ project_id=row["project_id"],
142
+ root_path=root,
143
+ name=row["name"],
144
+ tech_stack=tech_stack,
145
+ )
146
+
147
+ def _detect_tech_stack(self, root: Path) -> list[str]:
148
+ """Detect technology stack from marker files. Returns sorted list."""
149
+ detected: set[str] = set()
150
+ for marker, tech in _TECH_MARKERS.items():
151
+ if (root / marker).exists():
152
+ detected.add(tech)
153
+ return sorted(detected)
154
+
155
+ def _generate_project_id(self, root: Path) -> str:
156
+ """Generate stable project ID from root path hash."""
157
+ return hashlib.sha256(str(root).encode()).hexdigest()[:16]
158
+
159
+
160
+ def estimate_token_count(root: Path, tech_stack: list[str] | None = None) -> int:
161
+ """Estimate total token count for a project directory.
162
+
163
+ Uses :class:`~src.agent.context_budget.WholeCodebaseMode` internally for
164
+ deterministic file inclusion and a fast char-based approximation.
165
+
166
+ Args:
167
+ root: project root directory.
168
+ tech_stack: list of detected technologies (e.g. ``["python", "nodejs"]``).
169
+ Pass ``None`` or ``[]`` to scan all text files.
170
+
171
+ Returns:
172
+ Estimated token count (always ≥ 1).
173
+ """
174
+ from src.agent.context_budget import ContextBudget, WholeCodebaseMode
175
+
176
+ budget = ContextBudget()
177
+ wcm = WholeCodebaseMode(budget)
178
+ return wcm.estimate_token_count(root, tech_stack or [])