codevira 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. codevira-1.6.0.dist-info/LICENSE +21 -0
  2. codevira-1.6.0.dist-info/METADATA +477 -0
  3. codevira-1.6.0.dist-info/RECORD +58 -0
  4. codevira-1.6.0.dist-info/WHEEL +5 -0
  5. codevira-1.6.0.dist-info/entry_points.txt +2 -0
  6. codevira-1.6.0.dist-info/top_level.txt +2 -0
  7. indexer/__init__.py +1 -0
  8. indexer/chunker.py +428 -0
  9. indexer/global_db.py +197 -0
  10. indexer/graph_generator.py +380 -0
  11. indexer/index_codebase.py +588 -0
  12. indexer/outcome_tracker.py +172 -0
  13. indexer/rule_learner.py +186 -0
  14. indexer/sqlite_graph.py +640 -0
  15. indexer/treesitter_parser.py +423 -0
  16. mcp_server/__init__.py +1 -0
  17. mcp_server/__main__.py +20 -0
  18. mcp_server/auto_init.py +257 -0
  19. mcp_server/cli.py +622 -0
  20. mcp_server/crash_logger.py +236 -0
  21. mcp_server/data/__init__.py +1 -0
  22. mcp_server/data/agents/builder.md +84 -0
  23. mcp_server/data/agents/developer.md +111 -0
  24. mcp_server/data/agents/documenter.md +138 -0
  25. mcp_server/data/agents/orchestrator.md +96 -0
  26. mcp_server/data/agents/planner.md +106 -0
  27. mcp_server/data/agents/reviewer.md +82 -0
  28. mcp_server/data/agents/tester.md +83 -0
  29. mcp_server/data/config.example.yaml +33 -0
  30. mcp_server/data/rules/coding-standards.md +48 -0
  31. mcp_server/data/rules/engineering-excellence.md +28 -0
  32. mcp_server/data/rules/git-cicd-governance.md +32 -0
  33. mcp_server/data/rules/git_commits.md +130 -0
  34. mcp_server/data/rules/incremental-updates.md +5 -0
  35. mcp_server/data/rules/master_rule.md +187 -0
  36. mcp_server/data/rules/multi-language.md +19 -0
  37. mcp_server/data/rules/persistence.md +21 -0
  38. mcp_server/data/rules/resilience-observability.md +17 -0
  39. mcp_server/data/rules/smoke-testing.md +48 -0
  40. mcp_server/data/rules/testing-standards.md +23 -0
  41. mcp_server/detect.py +284 -0
  42. mcp_server/gitignore.py +284 -0
  43. mcp_server/global_sync.py +187 -0
  44. mcp_server/http_server.py +341 -0
  45. mcp_server/ide_inject.py +444 -0
  46. mcp_server/launchd.py +156 -0
  47. mcp_server/migrate.py +215 -0
  48. mcp_server/paths.py +256 -0
  49. mcp_server/prompts.py +136 -0
  50. mcp_server/server.py +1049 -0
  51. mcp_server/tools/__init__.py +0 -0
  52. mcp_server/tools/changesets.py +223 -0
  53. mcp_server/tools/code_reader.py +335 -0
  54. mcp_server/tools/graph.py +637 -0
  55. mcp_server/tools/learning.py +238 -0
  56. mcp_server/tools/playbook.py +89 -0
  57. mcp_server/tools/roadmap.py +599 -0
  58. mcp_server/tools/search.py +145 -0
indexer/chunker.py ADDED
@@ -0,0 +1,428 @@
1
+ """
2
+ Multi-language source chunker for codebase indexing.
3
+ Splits source files into function/class/module chunks for semantic search.
4
+
5
+ Language support:
6
+ - Python: stdlib ast module (full support)
7
+ - TypeScript, Go, Rust: tree-sitter grammars via treesitter_parser
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import ast
12
+ import functools
13
+ import os
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+ from typing import Iterator
17
+
18
+ from indexer.treesitter_parser import (
19
+ parse_file as ts_parse_file,
20
+ get_language as ts_get_language,
21
+ EXTENSION_MAP as TS_EXTENSION_MAP,
22
+ )
23
+
24
+
25
+ def _load_config() -> dict:
26
+ from mcp_server.paths import get_data_dir
27
+ config_path = get_data_dir() / "config.yaml"
28
+ if config_path.exists():
29
+ try:
30
+ import yaml
31
+ with open(config_path) as f:
32
+ return yaml.safe_load(f) or {}
33
+ except Exception:
34
+ pass
35
+ return {}
36
+
37
+
38
+ SKIP_DIRS = {"__pycache__", ".venv", "venv", ".git", "node_modules", "migrations"}
39
+ SKIP_FILES = {"__init__.py"}
40
+
41
+ # All tree-sitter supported extensions for dispatch
42
+ _TS_SUPPORTED_EXTENSIONS = set(TS_EXTENSION_MAP.keys())
43
+
44
+
45
+ @functools.lru_cache(maxsize=None)
46
+ def _get_project_config() -> tuple[frozenset[str], tuple[str, ...]]:
47
+ """Lazily load config.yaml and return (TARGET_DIRS, FILE_EXTENSIONS).
48
+
49
+ Cached so subsequent calls are free. lru_cache is used so that the
50
+ config is only loaded once per process after the data directory is known.
51
+ """
52
+ cfg = _load_config()
53
+ project_cfg = cfg.get("project", cfg)
54
+ target_dirs: frozenset[str] = frozenset(project_cfg.get("watched_dirs", ["src"]))
55
+ file_extensions: tuple[str, ...] = tuple(project_cfg.get("file_extensions", [".py"]))
56
+ return target_dirs, file_extensions
57
+
58
+
59
+ @dataclass
60
+ class CodeChunk:
61
+ file_path: str # relative to project root
62
+ chunk_type: str # "function" | "class" | "module"
63
+ name: str # function/class name or filename for module chunks
64
+ source_text: str # the actual source code
65
+ start_line: int
66
+ end_line: int
67
+ docstring: str # first docstring if present, else ""
68
+ layer: str # inferred from file path
69
+
70
+
71
+ def _infer_layer(file_path: str) -> str:
72
+ parts = Path(file_path).parts
73
+ for i, part in enumerate(parts):
74
+ if part in {"generator", "assembler", "indexer", "scanner", "drift", "graph", "context"}:
75
+ return part
76
+ if part in {"api", "routes"}:
77
+ return "api"
78
+ if part in {"core", "datastore", "schemas"}:
79
+ return part
80
+ if part in {"contexts", "application", "providers", "control", "services", "handlers"}:
81
+ return part
82
+ return "unknown"
83
+
84
+
85
+ def _get_docstring(node: ast.AST) -> str:
86
+ try:
87
+ return ast.get_docstring(node) or ""
88
+ except Exception:
89
+ return ""
90
+
91
+
92
+ def _extract_source_lines(source_lines: list[str], start: int, end: int) -> str:
93
+ return "".join(source_lines[start - 1:end])
94
+
95
+
96
+ def extract_imports(file_path: str, project_root: str) -> list[str]:
97
+ """
98
+ Parse a source file's import statements and return relative paths of
99
+ project-local imports only (skips stdlib and third-party packages).
100
+
101
+ Dispatches to Python ast or tree-sitter based on file extension.
102
+ Returns list of relative file paths (e.g. 'src/services/provider.py').
103
+ Paths that cannot be resolved to an existing file are omitted.
104
+ """
105
+ ext = Path(file_path).suffix.lower()
106
+
107
+ # Non-Python files: use tree-sitter import extraction
108
+ if ext in _TS_SUPPORTED_EXTENSIONS:
109
+ return _extract_imports_treesitter(file_path, project_root)
110
+
111
+ # Python files: existing ast-based extraction
112
+ return _extract_imports_python(file_path, project_root)
113
+
114
+
115
+ def _extract_imports_treesitter(file_path: str, project_root: str) -> list[str]:
116
+ """
117
+ Extract import paths from a non-Python file using tree-sitter.
118
+ Resolves relative/local imports to actual project file paths where possible.
119
+ Falls back to raw module strings for unresolvable imports.
120
+ """
121
+ try:
122
+ parsed = ts_parse_file(file_path)
123
+ except (FileNotFoundError, ValueError):
124
+ return []
125
+
126
+ project_root_path = Path(project_root)
127
+ file_dir = Path(file_path).parent
128
+ results: list[str] = []
129
+
130
+ for imp in parsed.imports:
131
+ raw = imp.module
132
+ resolved = _resolve_ts_import(raw, file_dir, project_root_path)
133
+ if resolved and resolved not in results:
134
+ results.append(resolved)
135
+
136
+ return results
137
+
138
+
139
+ def _resolve_ts_import(raw_module: str, file_dir: Path, project_root: Path) -> str | None:
140
+ """
141
+ Try to resolve a tree-sitter import string to a relative file path.
142
+ Handles TypeScript/JS relative imports, Go package imports, and Rust use paths.
143
+ """
144
+ # TypeScript/JS: relative imports like './foo' or '../bar'
145
+ if raw_module.startswith('.'):
146
+ # Resolve relative to the importing file's directory
147
+ candidates = [
148
+ file_dir / f"{raw_module}.ts",
149
+ file_dir / f"{raw_module}.tsx",
150
+ file_dir / f"{raw_module}.js",
151
+ file_dir / f"{raw_module}.jsx",
152
+ file_dir / raw_module / "index.ts",
153
+ file_dir / raw_module / "index.tsx",
154
+ file_dir / raw_module / "index.js",
155
+ ]
156
+ for c in candidates:
157
+ resolved = c.resolve()
158
+ if resolved.exists():
159
+ try:
160
+ return str(resolved.relative_to(project_root))
161
+ except ValueError:
162
+ continue
163
+ return None
164
+
165
+ # Non-relative: try as a project-local path (e.g. 'src/utils/foo')
166
+ # Check common extensions
167
+ for ext in ['.ts', '.tsx', '.js', '.go', '.rs']:
168
+ candidate = project_root / f"{raw_module}{ext}"
169
+ if candidate.exists():
170
+ return str(candidate.relative_to(project_root))
171
+
172
+ # Try as directory with index file
173
+ for index in ['index.ts', 'index.tsx', 'index.js', 'mod.rs']:
174
+ candidate = project_root / raw_module / index
175
+ if candidate.exists():
176
+ return str(candidate.relative_to(project_root))
177
+
178
+ # Go: package paths like 'project/internal/services'
179
+ # Try mapping to directory with .go files
180
+ candidate_dir = project_root / raw_module
181
+ if candidate_dir.is_dir():
182
+ go_files = list(candidate_dir.glob('*.go'))
183
+ if go_files:
184
+ return str(go_files[0].relative_to(project_root))
185
+
186
+ return None
187
+
188
+
189
+ def _extract_imports_python(file_path: str, project_root: str) -> list[str]:
190
+ """
191
+ Parse a Python file's import statements and return relative paths of
192
+ project-local imports only (skips stdlib and third-party packages).
193
+ """
194
+ try:
195
+ with open(file_path, "r", encoding="utf-8") as f:
196
+ source = f.read()
197
+ except (OSError, UnicodeDecodeError):
198
+ return []
199
+
200
+ try:
201
+ tree = ast.parse(source, filename=file_path)
202
+ except SyntaxError:
203
+ return []
204
+
205
+ project_root_path = Path(project_root)
206
+ target_dirs, _ = _get_project_config()
207
+ project_packages = set(target_dirs)
208
+
209
+ results: list[str] = []
210
+
211
+ def _module_to_path(module: str) -> str | None:
212
+ """Convert a dotted module name to a relative file path if project-local."""
213
+ parts = module.split(".")
214
+ if not parts or parts[0] not in project_packages:
215
+ return None
216
+ candidates = [
217
+ project_root_path / Path(*parts) / "__init__.py",
218
+ project_root_path / Path(*parts[:-1]) / f"{parts[-1]}.py",
219
+ project_root_path / Path(*parts).with_suffix(".py"),
220
+ ]
221
+ for candidate in candidates:
222
+ if candidate.exists():
223
+ return str(candidate.relative_to(project_root_path))
224
+ direct = project_root_path / Path(*parts[:-1]) / f"{parts[-1]}.py"
225
+ rel = str(direct.relative_to(project_root_path))
226
+ if (project_root_path / Path(*parts[:-1])).exists():
227
+ return rel
228
+ return None
229
+
230
+ for node in ast.walk(tree):
231
+ if isinstance(node, ast.Import):
232
+ for alias in node.names:
233
+ path = _module_to_path(alias.name)
234
+ if path and path not in results:
235
+ results.append(path)
236
+ elif isinstance(node, ast.ImportFrom):
237
+ if node.level and node.level > 0:
238
+ file_rel = os.path.relpath(file_path, project_root)
239
+ file_parts = Path(file_rel).parts
240
+ base_parts = list(file_parts[:-node.level]) if node.level < len(file_parts) else []
241
+ if node.module:
242
+ module_parts = base_parts + str(node.module).split(".")
243
+ else:
244
+ module_parts = base_parts
245
+ abs_module = ".".join(module_parts)
246
+ elif node.module:
247
+ abs_module = str(node.module)
248
+ else:
249
+ continue
250
+ path = _module_to_path(abs_module)
251
+ if path and path not in results:
252
+ results.append(path)
253
+
254
+ return results
255
+
256
+
257
+ def chunk_file(file_path: str, project_root: str) -> list[CodeChunk]:
258
+ """
259
+ Parse a source file and return all meaningful code chunks.
260
+ Dispatches to Python ast or tree-sitter based on file extension.
261
+ """
262
+ ext = Path(file_path).suffix.lower()
263
+
264
+ # Non-Python files: dispatch to tree-sitter chunker
265
+ if ext in _TS_SUPPORTED_EXTENSIONS:
266
+ return _chunk_file_treesitter(file_path, project_root)
267
+
268
+ # Python files: existing ast-based chunking
269
+ return _chunk_file_python(file_path, project_root)
270
+
271
+
272
+ def _chunk_file_treesitter(file_path: str, project_root: str) -> list[CodeChunk]:
273
+ """Chunk a non-Python file using tree-sitter symbol extraction."""
274
+ rel_path = os.path.relpath(file_path, project_root)
275
+ layer = _infer_layer(rel_path)
276
+
277
+ try:
278
+ parsed = ts_parse_file(file_path)
279
+ except (FileNotFoundError, ValueError):
280
+ return []
281
+
282
+ try:
283
+ with open(file_path, "r", encoding="utf-8") as f:
284
+ source_lines = f.read().splitlines(keepends=True)
285
+ except (OSError, UnicodeDecodeError):
286
+ return []
287
+
288
+ chunks: list[CodeChunk] = []
289
+
290
+ # Module-level docstring chunk
291
+ if parsed.module_docstring:
292
+ chunks.append(CodeChunk(
293
+ file_path=rel_path,
294
+ chunk_type="module",
295
+ name=Path(file_path).stem,
296
+ source_text=parsed.module_docstring,
297
+ start_line=1,
298
+ end_line=1,
299
+ docstring=parsed.module_docstring,
300
+ layer=layer,
301
+ ))
302
+
303
+ for sym in parsed.symbols:
304
+ # Skip very short symbols (< 3 lines) like Python chunker does
305
+ if sym.end_line - sym.start_line < 3:
306
+ continue
307
+
308
+ source_text = _extract_source_lines(source_lines, sym.start_line, sym.end_line)
309
+
310
+ # For classes/structs/impl, limit source to first 15 lines (like Python chunker)
311
+ chunk_type = sym.kind
312
+ if chunk_type in ("class", "struct", "impl", "interface", "trait", "enum"):
313
+ sig_end = min(sym.start_line + 15, sym.end_line)
314
+ source_text = _extract_source_lines(source_lines, sym.start_line, sig_end)
315
+
316
+ chunks.append(CodeChunk(
317
+ file_path=rel_path,
318
+ chunk_type=chunk_type,
319
+ name=sym.name,
320
+ source_text=source_text,
321
+ start_line=sym.start_line,
322
+ end_line=sym.end_line,
323
+ docstring=sym.docstring or "",
324
+ layer=layer,
325
+ ))
326
+
327
+ return chunks
328
+
329
+
330
+ def _chunk_file_python(file_path: str, project_root: str) -> list[CodeChunk]:
331
+ """Parse a Python file and return all meaningful code chunks."""
332
+ rel_path = os.path.relpath(file_path, project_root)
333
+ layer = _infer_layer(rel_path)
334
+
335
+ try:
336
+ with open(file_path, "r", encoding="utf-8") as f:
337
+ source = f.read()
338
+ source_lines = source.splitlines(keepends=True)
339
+ except (OSError, UnicodeDecodeError):
340
+ return []
341
+
342
+ try:
343
+ tree = ast.parse(source, filename=file_path)
344
+ except SyntaxError:
345
+ return []
346
+
347
+ chunks: list[CodeChunk] = []
348
+
349
+ # Module-level docstring chunk
350
+ module_doc = _get_docstring(tree)
351
+ if module_doc:
352
+ chunks.append(CodeChunk(
353
+ file_path=rel_path,
354
+ chunk_type="module",
355
+ name=Path(file_path).stem,
356
+ source_text=module_doc,
357
+ start_line=1,
358
+ end_line=1,
359
+ docstring=module_doc,
360
+ layer=layer,
361
+ ))
362
+
363
+ for node in ast.walk(tree):
364
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
365
+ if node.name.startswith("__") and node.name.endswith("__"):
366
+ continue
367
+ end_line = getattr(node, "end_lineno", node.lineno)
368
+ source_text = _extract_source_lines(source_lines, node.lineno, end_line)
369
+ if end_line - node.lineno < 3:
370
+ continue
371
+ chunks.append(CodeChunk(
372
+ file_path=rel_path,
373
+ chunk_type="function",
374
+ name=node.name,
375
+ source_text=source_text,
376
+ start_line=node.lineno,
377
+ end_line=end_line,
378
+ docstring=_get_docstring(node),
379
+ layer=layer,
380
+ ))
381
+
382
+ elif isinstance(node, ast.ClassDef):
383
+ end_line = getattr(node, "end_lineno", node.lineno)
384
+ sig_end = min(node.lineno + 15, end_line)
385
+ source_text = _extract_source_lines(source_lines, node.lineno, sig_end)
386
+ chunks.append(CodeChunk(
387
+ file_path=rel_path,
388
+ chunk_type="class",
389
+ name=node.name,
390
+ source_text=source_text,
391
+ start_line=node.lineno,
392
+ end_line=end_line,
393
+ docstring=_get_docstring(node),
394
+ layer=layer,
395
+ ))
396
+
397
+ return chunks
398
+
399
+
400
+ def iter_source_files(project_root: str) -> Iterator[str]:
401
+ """Yield source files in TARGET_DIRS matching configured file_extensions."""
402
+ target_dirs, file_extensions = _get_project_config()
403
+ extensions = file_extensions
404
+ seen_files = set()
405
+
406
+ for target_dir in target_dirs:
407
+ target_path = os.path.join(project_root, target_dir)
408
+ if not os.path.exists(target_path):
409
+ continue
410
+
411
+ for root, dirs, files in os.walk(target_path):
412
+ # Prune skipped dirs
413
+ dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
414
+
415
+ for fname in files:
416
+ if fname.endswith(extensions) and fname not in SKIP_FILES:
417
+ full_path = os.path.abspath(os.path.join(root, fname))
418
+ if full_path not in seen_files:
419
+ seen_files.add(full_path)
420
+ yield full_path
421
+
422
+
423
+ def chunk_project(project_root: str) -> list[CodeChunk]:
424
+ """Chunk all source files in the project. Returns flat list of all chunks."""
425
+ all_chunks: list[CodeChunk] = []
426
+ for file_path in iter_source_files(project_root):
427
+ all_chunks.extend(chunk_file(file_path, project_root))
428
+ return all_chunks
indexer/global_db.py ADDED
@@ -0,0 +1,197 @@
1
+ """
2
+ global_db.py — Global SQLite database for cross-project intelligence.
3
+
4
+ Stores aggregated preferences, learned rules, and project registry in
5
+ ~/.codevira/global.db. Enables new projects to inherit intelligence from
6
+ all past projects on day 1.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import sqlite3
13
+ from pathlib import Path
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class GlobalDB:
19
+ """Lightweight SQLite wrapper for the global cross-project database."""
20
+
21
+ def __init__(self, db_path: str | Path):
22
+ self.db_path = Path(db_path)
23
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
24
+ self.conn = sqlite3.connect(str(self.db_path), timeout=5)
25
+ self.conn.row_factory = sqlite3.Row
26
+ self.conn.execute("PRAGMA journal_mode=WAL")
27
+ self.conn.execute("PRAGMA foreign_keys=ON")
28
+ self._init_schema()
29
+
30
+ def _init_schema(self) -> None:
31
+ self.conn.executescript("""
32
+ CREATE TABLE IF NOT EXISTS projects (
33
+ path TEXT PRIMARY KEY,
34
+ name TEXT NOT NULL,
35
+ language TEXT,
36
+ git_remote TEXT,
37
+ last_synced_at DATETIME DEFAULT CURRENT_TIMESTAMP
38
+ );
39
+
40
+ CREATE TABLE IF NOT EXISTS global_preferences (
41
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
42
+ category TEXT NOT NULL,
43
+ signal TEXT NOT NULL,
44
+ example TEXT,
45
+ frequency INTEGER DEFAULT 1,
46
+ source_projects TEXT DEFAULT '[]',
47
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
48
+ UNIQUE(category, signal)
49
+ );
50
+
51
+ CREATE TABLE IF NOT EXISTS global_rules (
52
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
53
+ rule_text TEXT NOT NULL UNIQUE,
54
+ confidence REAL DEFAULT 0.5,
55
+ source_projects TEXT DEFAULT '[]',
56
+ category TEXT,
57
+ language TEXT,
58
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
59
+ );
60
+ """)
61
+ self.conn.commit()
62
+
63
+ def close(self) -> None:
64
+ self.conn.close()
65
+
66
+ # ------------------------------------------------------------------
67
+ # Project registry
68
+ # ------------------------------------------------------------------
69
+
70
+ def register_project(self, path: str, name: str, language: str,
71
+ git_remote: str | None = None) -> None:
72
+ # Ensure git_remote column exists (handles DBs created before v1.6)
73
+ try:
74
+ cols = [row[1] for row in self.conn.execute("PRAGMA table_info(projects)").fetchall()]
75
+ if "git_remote" not in cols:
76
+ self.conn.execute("ALTER TABLE projects ADD COLUMN git_remote TEXT")
77
+ self.conn.commit()
78
+ except Exception:
79
+ pass
80
+ self.conn.execute(
81
+ "INSERT OR REPLACE INTO projects (path, name, language, git_remote, last_synced_at) "
82
+ "VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)",
83
+ (path, name, language, git_remote),
84
+ )
85
+ self.conn.commit()
86
+
87
+ def find_project_by_remote(self, remote_url: str) -> str | None:
88
+ """Return the registered path for a project matching the given git remote URL, or None."""
89
+ try:
90
+ row = self.conn.execute(
91
+ "SELECT path FROM projects WHERE git_remote = ? LIMIT 1",
92
+ (remote_url,),
93
+ ).fetchone()
94
+ return row["path"] if row else None
95
+ except Exception:
96
+ return None
97
+
98
+ def get_project_count(self) -> int:
99
+ row = self.conn.execute("SELECT COUNT(*) FROM projects").fetchone()
100
+ return row[0] if row else 0
101
+
102
+ # ------------------------------------------------------------------
103
+ # Preferences
104
+ # ------------------------------------------------------------------
105
+
106
+ def upsert_preference(self, category: str, signal: str, example: str | None,
107
+ source_project: str, frequency: int = 1) -> None:
108
+ """Insert or update a global preference. Aggregates frequency across projects."""
109
+ existing = self.conn.execute(
110
+ "SELECT id, frequency, source_projects FROM global_preferences WHERE category = ? AND signal = ?",
111
+ (category, signal),
112
+ ).fetchone()
113
+
114
+ if existing:
115
+ projects = json.loads(existing["source_projects"] or "[]")
116
+ if source_project not in projects:
117
+ projects.append(source_project)
118
+ new_freq = existing["frequency"] + frequency
119
+ self.conn.execute(
120
+ "UPDATE global_preferences SET frequency = ?, source_projects = ?, example = COALESCE(?, example), "
121
+ "updated_at = CURRENT_TIMESTAMP WHERE id = ?",
122
+ (new_freq, json.dumps(projects), example, existing["id"]),
123
+ )
124
+ else:
125
+ self.conn.execute(
126
+ "INSERT INTO global_preferences (category, signal, example, frequency, source_projects) "
127
+ "VALUES (?, ?, ?, ?, ?)",
128
+ (category, signal, example, frequency, json.dumps([source_project])),
129
+ )
130
+ self.conn.commit()
131
+
132
+ def get_preferences(self, min_frequency: int = 3, language: str | None = None) -> list[dict]:
133
+ """Get global preferences above the frequency threshold."""
134
+ rows = self.conn.execute(
135
+ "SELECT category, signal, example, frequency, source_projects FROM global_preferences "
136
+ "WHERE frequency >= ? ORDER BY frequency DESC",
137
+ (min_frequency,),
138
+ ).fetchall()
139
+ return [dict(r) for r in rows]
140
+
141
+ # ------------------------------------------------------------------
142
+ # Rules
143
+ # ------------------------------------------------------------------
144
+
145
+ def upsert_rule(self, rule_text: str, confidence: float, source_project: str,
146
+ category: str | None = None, language: str | None = None) -> None:
147
+ """Insert or update a global rule. Merges confidence via weighted average."""
148
+ existing = self.conn.execute(
149
+ "SELECT id, confidence, source_projects FROM global_rules WHERE rule_text = ?",
150
+ (rule_text,),
151
+ ).fetchone()
152
+
153
+ if existing:
154
+ projects = json.loads(existing["source_projects"] or "[]")
155
+ if source_project not in projects:
156
+ projects.append(source_project)
157
+ new_conf = existing["confidence"] * 0.6 + confidence * 0.4
158
+ self.conn.execute(
159
+ "UPDATE global_rules SET confidence = ?, source_projects = ?, "
160
+ "updated_at = CURRENT_TIMESTAMP WHERE id = ?",
161
+ (new_conf, json.dumps(projects), existing["id"]),
162
+ )
163
+ else:
164
+ self.conn.execute(
165
+ "INSERT INTO global_rules (rule_text, confidence, source_projects, category, language) "
166
+ "VALUES (?, ?, ?, ?, ?)",
167
+ (rule_text, confidence, json.dumps([source_project]), category, language),
168
+ )
169
+ self.conn.commit()
170
+
171
+ def get_rules(self, min_confidence: float = 0.6, language: str | None = None) -> list[dict]:
172
+ """Get global rules above confidence threshold, optionally filtered by language."""
173
+ if language:
174
+ rows = self.conn.execute(
175
+ "SELECT rule_text, confidence, source_projects, category, language FROM global_rules "
176
+ "WHERE confidence >= ? AND (language = ? OR language IS NULL) ORDER BY confidence DESC",
177
+ (min_confidence, language),
178
+ ).fetchall()
179
+ else:
180
+ rows = self.conn.execute(
181
+ "SELECT rule_text, confidence, source_projects, category, language FROM global_rules "
182
+ "WHERE confidence >= ? ORDER BY confidence DESC",
183
+ (min_confidence,),
184
+ ).fetchall()
185
+ return [dict(r) for r in rows]
186
+
187
+ # ------------------------------------------------------------------
188
+ # Stats
189
+ # ------------------------------------------------------------------
190
+
191
+ def get_stats(self) -> dict:
192
+ """Return summary stats for the global database."""
193
+ return {
194
+ "project_count": self.get_project_count(),
195
+ "total_preferences": self.conn.execute("SELECT COUNT(*) FROM global_preferences").fetchone()[0],
196
+ "total_rules": self.conn.execute("SELECT COUNT(*) FROM global_rules").fetchone()[0],
197
+ }