luckyd-code 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. luckyd_code/__init__.py +54 -0
  2. luckyd_code/__main__.py +5 -0
  3. luckyd_code/_agent_loop.py +551 -0
  4. luckyd_code/_data_dir.py +73 -0
  5. luckyd_code/agent.py +38 -0
  6. luckyd_code/analytics/__init__.py +18 -0
  7. luckyd_code/analytics/reporter.py +195 -0
  8. luckyd_code/analytics/scanner.py +443 -0
  9. luckyd_code/analytics/smells.py +316 -0
  10. luckyd_code/analytics/trends.py +303 -0
  11. luckyd_code/api.py +473 -0
  12. luckyd_code/audit_daemon.py +845 -0
  13. luckyd_code/autonomous_fixer.py +473 -0
  14. luckyd_code/background.py +159 -0
  15. luckyd_code/backup.py +237 -0
  16. luckyd_code/brain/__init__.py +84 -0
  17. luckyd_code/brain/assembler.py +100 -0
  18. luckyd_code/brain/chunker.py +345 -0
  19. luckyd_code/brain/constants.py +73 -0
  20. luckyd_code/brain/embedder.py +163 -0
  21. luckyd_code/brain/graph.py +311 -0
  22. luckyd_code/brain/indexer.py +316 -0
  23. luckyd_code/brain/parser.py +140 -0
  24. luckyd_code/brain/retriever.py +234 -0
  25. luckyd_code/cli.py +894 -0
  26. luckyd_code/cli_commands/__init__.py +1 -0
  27. luckyd_code/cli_commands/audit.py +120 -0
  28. luckyd_code/cli_commands/background.py +83 -0
  29. luckyd_code/cli_commands/brain.py +87 -0
  30. luckyd_code/cli_commands/config.py +75 -0
  31. luckyd_code/cli_commands/dispatcher.py +695 -0
  32. luckyd_code/cli_commands/sessions.py +41 -0
  33. luckyd_code/cli_entry.py +147 -0
  34. luckyd_code/cli_utils.py +112 -0
  35. luckyd_code/config.py +205 -0
  36. luckyd_code/context.py +214 -0
  37. luckyd_code/cost_tracker.py +209 -0
  38. luckyd_code/error_reporter.py +508 -0
  39. luckyd_code/exceptions.py +39 -0
  40. luckyd_code/export.py +126 -0
  41. luckyd_code/feedback_analyzer.py +290 -0
  42. luckyd_code/file_watcher.py +258 -0
  43. luckyd_code/git/__init__.py +11 -0
  44. luckyd_code/git/auto_commit.py +157 -0
  45. luckyd_code/git/tools.py +85 -0
  46. luckyd_code/hooks.py +236 -0
  47. luckyd_code/indexer.py +280 -0
  48. luckyd_code/init.py +39 -0
  49. luckyd_code/keybindings.py +77 -0
  50. luckyd_code/log.py +55 -0
  51. luckyd_code/mcp/__init__.py +6 -0
  52. luckyd_code/mcp/client.py +184 -0
  53. luckyd_code/memory/__init__.py +19 -0
  54. luckyd_code/memory/manager.py +339 -0
  55. luckyd_code/metrics/__init__.py +5 -0
  56. luckyd_code/model_registry.py +131 -0
  57. luckyd_code/orchestrator.py +204 -0
  58. luckyd_code/permissions/__init__.py +1 -0
  59. luckyd_code/permissions/manager.py +103 -0
  60. luckyd_code/planner.py +361 -0
  61. luckyd_code/plugins.py +91 -0
  62. luckyd_code/py.typed +0 -0
  63. luckyd_code/retry.py +57 -0
  64. luckyd_code/router.py +417 -0
  65. luckyd_code/sandbox.py +156 -0
  66. luckyd_code/self_critique.py +2 -0
  67. luckyd_code/self_improve.py +274 -0
  68. luckyd_code/sessions.py +114 -0
  69. luckyd_code/settings.py +72 -0
  70. luckyd_code/skills/__init__.py +8 -0
  71. luckyd_code/skills/review.py +22 -0
  72. luckyd_code/skills/security.py +17 -0
  73. luckyd_code/tasks/__init__.py +1 -0
  74. luckyd_code/tasks/manager.py +102 -0
  75. luckyd_code/templates/icon-192.png +0 -0
  76. luckyd_code/templates/icon-512.png +0 -0
  77. luckyd_code/templates/index.html +1965 -0
  78. luckyd_code/templates/manifest.json +14 -0
  79. luckyd_code/templates/src/app.js +694 -0
  80. luckyd_code/templates/src/body.html +767 -0
  81. luckyd_code/templates/src/cdn.txt +2 -0
  82. luckyd_code/templates/src/style.css +474 -0
  83. luckyd_code/templates/sw.js +31 -0
  84. luckyd_code/templates/test.html +6 -0
  85. luckyd_code/themes.py +48 -0
  86. luckyd_code/tools/__init__.py +97 -0
  87. luckyd_code/tools/agent_tools.py +65 -0
  88. luckyd_code/tools/bash.py +360 -0
  89. luckyd_code/tools/brain_tools.py +137 -0
  90. luckyd_code/tools/browser.py +369 -0
  91. luckyd_code/tools/datetime_tool.py +34 -0
  92. luckyd_code/tools/dockerfile_gen.py +212 -0
  93. luckyd_code/tools/file_ops.py +381 -0
  94. luckyd_code/tools/game_gen.py +360 -0
  95. luckyd_code/tools/git_tools.py +130 -0
  96. luckyd_code/tools/git_worktree.py +63 -0
  97. luckyd_code/tools/path_validate.py +64 -0
  98. luckyd_code/tools/project_gen.py +187 -0
  99. luckyd_code/tools/readme_gen.py +227 -0
  100. luckyd_code/tools/registry.py +157 -0
  101. luckyd_code/tools/shell_detect.py +109 -0
  102. luckyd_code/tools/web.py +89 -0
  103. luckyd_code/tools/youtube.py +187 -0
  104. luckyd_code/tools_bridge.py +144 -0
  105. luckyd_code/undo.py +126 -0
  106. luckyd_code/update.py +60 -0
  107. luckyd_code/verify.py +360 -0
  108. luckyd_code/web_app.py +176 -0
  109. luckyd_code/web_routes/__init__.py +23 -0
  110. luckyd_code/web_routes/background.py +73 -0
  111. luckyd_code/web_routes/brain.py +109 -0
  112. luckyd_code/web_routes/cost.py +12 -0
  113. luckyd_code/web_routes/files.py +133 -0
  114. luckyd_code/web_routes/memories.py +94 -0
  115. luckyd_code/web_routes/misc.py +67 -0
  116. luckyd_code/web_routes/project.py +48 -0
  117. luckyd_code/web_routes/review.py +20 -0
  118. luckyd_code/web_routes/sessions.py +44 -0
  119. luckyd_code/web_routes/settings.py +43 -0
  120. luckyd_code/web_routes/static.py +70 -0
  121. luckyd_code/web_routes/update.py +19 -0
  122. luckyd_code/web_routes/ws.py +237 -0
  123. luckyd_code-1.2.2.dist-info/METADATA +297 -0
  124. luckyd_code-1.2.2.dist-info/RECORD +127 -0
  125. luckyd_code-1.2.2.dist-info/WHEEL +4 -0
  126. luckyd_code-1.2.2.dist-info/entry_points.txt +3 -0
  127. luckyd_code-1.2.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,311 @@
1
+ """Knowledge graph — stores and queries codebase structure across sessions."""
2
+
3
+ import json
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from ..log import get_logger
9
+ from .constants import BRAIN_DIR
10
+
11
+
12
+ GRAPH_FILE = BRAIN_DIR / "graph.json"
13
+
14
+ Node = dict[str, Any]
15
+ Edge = dict[str, str]
16
+
17
+
18
+ class KnowledgeGraph:
19
+ """Persistent knowledge graph of codebase structure.
20
+
21
+ Nodes: modules, classes, functions
22
+ Edges: imports, contains, calls, inherits
23
+ """
24
+
25
+ def __init__(self) -> None:
26
+ self.nodes: dict[str, Node] = {}
27
+ self.edges: list[Edge] = []
28
+ self.stats: dict[str, Any] = {
29
+ "node_count": 0,
30
+ "edge_count": 0,
31
+ "last_built": 0,
32
+ "files_parsed": 0,
33
+ "errors": 0,
34
+ }
35
+
36
+ def build(self, project_root: str, parsed_files: list[dict[str, Any]]) -> None:
37
+ self.nodes = {}
38
+ self.edges = []
39
+ self.stats["last_built"] = time.time()
40
+ self.stats["files_parsed"] = len(parsed_files)
41
+ self.stats["errors"] = 0
42
+
43
+ for pf in parsed_files:
44
+ if pf["errors"]:
45
+ self.stats["errors"] += len(pf["errors"])
46
+ continue
47
+
48
+ rel_path = pf["module"]
49
+ module_id = f"module:{rel_path}"
50
+
51
+ self.nodes[module_id] = {
52
+ "type": "module",
53
+ "name": Path(rel_path).name,
54
+ "file": rel_path,
55
+ "line": 1,
56
+ "doc": "",
57
+ "size": pf["size"],
58
+ }
59
+
60
+ for imp in pf["imports"]:
61
+ import_id = f"import:{imp['module']}:{imp['name']}"
62
+ if import_id not in self.nodes:
63
+ self.nodes[import_id] = {
64
+ "type": "import",
65
+ "name": imp["name"],
66
+ "module": imp["module"],
67
+ "alias": imp.get("alias"),
68
+ "file": rel_path,
69
+ "line": 0,
70
+ "doc": "",
71
+ }
72
+ self.edges.append({"from": module_id, "to": import_id, "type": "imports"})
73
+
74
+ for cls in pf["classes"]:
75
+ cls_id = f"class:{rel_path}:{cls['name']}"
76
+ self.nodes[cls_id] = {
77
+ "type": "class",
78
+ "name": cls["name"],
79
+ "file": rel_path,
80
+ "line": cls["line"],
81
+ "end_line": cls["end_line"],
82
+ "bases": cls["base_names"],
83
+ "decorators": cls["decorators"],
84
+ "doc": cls["docstring"][:200],
85
+ }
86
+ self.edges.append({"from": module_id, "to": cls_id, "type": "contains"})
87
+
88
+ for base in cls["base_names"]:
89
+ if base and base != "object":
90
+ self.edges.append({
91
+ "from": cls_id, "to": f"class:??:{base}", "type": "inherits"
92
+ })
93
+
94
+ for method in cls["methods"]:
95
+ method_id = f"method:{rel_path}:{cls['name']}.{method['name']}"
96
+ self.nodes[method_id] = {
97
+ "type": "method",
98
+ "name": method["name"],
99
+ "class": cls["name"],
100
+ "file": rel_path,
101
+ "line": method["line"],
102
+ "end_line": method["end_line"],
103
+ "decorators": method["decorators"],
104
+ "doc": method["docstring"][:200],
105
+ }
106
+ self.edges.append({"from": cls_id, "to": method_id, "type": "contains"})
107
+ for call in method["calls"]:
108
+ self.edges.append({"from": method_id, "to": f"func:??:{call}", "type": "calls"})
109
+
110
+ for func in pf["functions"]:
111
+ func_id = f"func:{rel_path}:{func['name']}"
112
+ self.nodes[func_id] = {
113
+ "type": "function",
114
+ "name": func["name"],
115
+ "file": rel_path,
116
+ "line": func["line"],
117
+ "end_line": func["end_line"],
118
+ "decorators": func["decorators"],
119
+ "doc": func["docstring"][:200],
120
+ }
121
+ self.edges.append({"from": module_id, "to": func_id, "type": "contains"})
122
+ for call in func["calls"]:
123
+ self.edges.append({"from": func_id, "to": f"func:??:{call}", "type": "calls"})
124
+
125
+ self.stats["node_count"] = len(self.nodes)
126
+ self.stats["edge_count"] = len(self.edges)
127
+
128
+ # --- Persistence ---
129
+
130
+ def save(self) -> None:
131
+ BRAIN_DIR.mkdir(parents=True, exist_ok=True)
132
+ data = {"nodes": self.nodes, "edges": self.edges, "stats": self.stats}
133
+ GRAPH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
134
+
135
+ def load(self) -> bool:
136
+ if GRAPH_FILE.exists():
137
+ try:
138
+ data: Any = json.loads(GRAPH_FILE.read_text(encoding="utf-8"))
139
+ self.nodes = data.get("nodes", {})
140
+ self.edges = data.get("edges", {}) if isinstance(data.get("edges"), dict) else data.get("edges", [])
141
+ self.stats = data.get("stats", {})
142
+ return True
143
+ except (json.JSONDecodeError, OSError):
144
+ get_logger().warning("Could not load knowledge graph from %s", GRAPH_FILE, exc_info=True)
145
+ return False
146
+
147
+ def search(self, query: str, max_results: int = 15) -> list[Node]:
148
+ q = query.lower()
149
+ scored: list[tuple[int, str, Node]] = []
150
+
151
+ for nid, node in self.nodes.items():
152
+ score = 0
153
+ if q in node.get("name", "").lower():
154
+ score += 10
155
+ if q in node.get("file", "").lower():
156
+ score += 5
157
+ if q in node.get("doc", "").lower():
158
+ score += 3
159
+ if q in node.get("module", "").lower():
160
+ score += 2
161
+ if q in node.get("class", "").lower():
162
+ score += 2
163
+ if score > 0:
164
+ scored.append((score, nid, node))
165
+
166
+ scored.sort(key=lambda x: -x[0])
167
+ seen: set[str] = set()
168
+ top: list[Node] = []
169
+ for _score, nid, node in scored[:max_results]:
170
+ if nid not in seen:
171
+ top.append(node)
172
+ seen.add(nid)
173
+ return top
174
+
175
+ def get_related(self, node_id: str, max_depth: int = 1) -> list[Node]:
176
+ related: set[str] = set()
177
+ current = {node_id}
178
+
179
+ for _ in range(max_depth):
180
+ next_set: set[str] = set()
181
+ for edge in self.edges:
182
+ if isinstance(edge, dict):
183
+ if edge["from"] in current:
184
+ next_set.add(edge["to"])
185
+ if edge["to"] in current:
186
+ next_set.add(edge["from"])
187
+ current = next_set
188
+ related.update(current)
189
+
190
+ return [
191
+ self.nodes.get(nid, {"name": nid, "type": "unknown", "file": ""})
192
+ for nid in related if nid != node_id
193
+ ]
194
+
195
+ def get_by_file(self, filepath: str) -> list[Node]:
196
+ return [
197
+ node for node in self.nodes.values()
198
+ if node.get("file", "").endswith(filepath)
199
+ ]
200
+
201
+ def get_by_type(self, node_type: str) -> list[Node]:
202
+ return [
203
+ node for node in self.nodes.values()
204
+ if node.get("type") == node_type
205
+ ]
206
+
207
+ def find_dependents(self, symbol_name: str, max_results: int = 15) -> list[dict]:
208
+ """Find all nodes that depend on a symbol by traversing incoming edges."""
209
+ matches = self.search(symbol_name, max_results=5)
210
+ if not matches:
211
+ return []
212
+
213
+ # Build a lookup of known node IDs so we can match even with incomplete IDs
214
+ node_ids = set(self.nodes.keys())
215
+ dependents: list[dict] = []
216
+ seen: set[str] = set()
217
+
218
+ for match in matches:
219
+ # Generate the most likely node ID for this match
220
+ candidate_ids = [
221
+ f"{match['type']}:{match.get('file', '')}:{match['name']}",
222
+ f"func:??:{match['name']}",
223
+ f"method:??:{match['name']}",
224
+ f"class:??:{match['name']}",
225
+ ]
226
+ matched_id = None
227
+ for cid in candidate_ids:
228
+ if cid in node_ids:
229
+ matched_id = cid
230
+ break
231
+
232
+ if not matched_id:
233
+ continue
234
+
235
+ for edge in self.edges:
236
+ if isinstance(edge, dict) and edge.get("to") == matched_id:
237
+ src = self.nodes.get(edge["from"])
238
+ if src and edge["from"] not in seen:
239
+ seen.add(edge["from"])
240
+ dependents.append({
241
+ "name": f"{src.get('type', '?')}:{src.get('name', '?')}",
242
+ "file": src.get("file", ""),
243
+ "relation": edge.get("type", ""),
244
+ "line": src.get("line", 0),
245
+ })
246
+
247
+ dependents.sort(key=lambda x: (x["file"], x["line"]))
248
+ return dependents[:max_results]
249
+
250
+ def summarize(self, max_modules: int = 20) -> str:
251
+ lines = ["<knowledge-graph>"]
252
+ lines.append(f"Graph: {self.stats.get('node_count', 0)} symbols, "
253
+ f"{self.stats.get('edge_count', 0)} relationships, "
254
+ f"{self.stats.get('files_parsed', 0)} files")
255
+
256
+ by_file: dict[str, list[Node]] = {}
257
+ for node in self.nodes.values():
258
+ f = node.get("file", "")
259
+ if f:
260
+ by_file.setdefault(f, []).append(node)
261
+
262
+ count = 0
263
+ for filepath, nodes in sorted(by_file.items()):
264
+ if count >= max_modules:
265
+ break
266
+ count += 1
267
+ classes = [n for n in nodes if n["type"] == "class"]
268
+ functions = [n for n in nodes if n["type"] == "function"]
269
+
270
+ short_path = Path(filepath).name
271
+ parts = [short_path]
272
+ if classes:
273
+ parts.append(f"classes={{{','.join(c['name'] for c in classes)}}}")
274
+ if functions:
275
+ parts.append(f"functions={{{','.join(f['name'] for f in functions)}}}")
276
+ lines.append(f" {' | '.join(parts)}")
277
+
278
+ lines.append("</knowledge-graph>")
279
+ return "\n".join(lines)
280
+
281
+ def stats_text(self) -> str:
282
+ by_type: dict[str, int] = {}
283
+ for node in self.nodes.values():
284
+ t = node.get("type", "unknown")
285
+ by_type[t] = by_type.get(t, 0) + 1
286
+
287
+ by_file: dict[str, int] = {}
288
+ for node in self.nodes.values():
289
+ f = node.get("file", "")
290
+ if f:
291
+ by_file[f] = by_file.get(f, 0) + 1
292
+
293
+ lines = [
294
+ f"Nodes: {self.stats.get('node_count', 0)}",
295
+ f"Edges: {self.stats.get('edge_count', 0)}",
296
+ f"Files parsed: {self.stats.get('files_parsed', 0)}",
297
+ f"Parse errors: {self.stats.get('errors', 0)}",
298
+ ]
299
+ if by_type:
300
+ lines.append("\nBy type:")
301
+ for t, c in sorted(by_type.items(), key=lambda x: -x[1]):
302
+ lines.append(f" {t}: {c}")
303
+ if by_file:
304
+ lines.append("\nBy file:")
305
+ for f, c in sorted(by_file.items(), key=lambda x: -x[1])[:20]:
306
+ lines.append(f" {f}: {c} symbols")
307
+ if self.stats.get("last_built"):
308
+ last = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.stats["last_built"]))
309
+ lines.append(f"\nLast built: {last}")
310
+
311
+ return "\n".join(lines)
@@ -0,0 +1,316 @@
1
+ """Vector indexer — builds and queries FAISS vector index for code chunks."""
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from ..log import get_logger
10
+ from .constants import BRAIN_DIR, LANGUAGE_MAP, SKIP_DIRS
11
+
12
+ INDEX_FILE = BRAIN_DIR / "index.faiss"
13
+ CHUNKS_FILE = BRAIN_DIR / "chunks.json"
14
+ MTIMES_FILE = BRAIN_DIR / "mtimes.json"
15
+ STATS_FILE = BRAIN_DIR / "stats.json"
16
+
17
+ CHUNK_SIZE = 384 # all-MiniLM-L6-v2 dimension
18
+
19
+
20
+ class VectorIndexer:
21
+ """Manages the FAISS vector index with mtime tracking."""
22
+
23
+ def __init__(self):
24
+ self.index: Any = None # FAISS index object
25
+ self.chunks: list[dict[str, Any]] = []
26
+ self.file_mtimes: dict[str, tuple[float, int]] = {}
27
+ self.stats: dict[str, Any] = {
28
+ "chunks": 0,
29
+ "files": 0,
30
+ "languages": {},
31
+ "last_indexed": 0,
32
+ "dimension": 0,
33
+ "index_size_bytes": 0,
34
+ }
35
+ self._faiss_available = False
36
+
37
+ def _check_deps(self) -> bool:
38
+ """Check if FAISS and numpy are available."""
39
+ if not self._faiss_available:
40
+ try:
41
+ import faiss
42
+ import numpy as np
43
+
44
+ self._faiss = faiss
45
+ self._np = np
46
+ self._faiss_available = True
47
+ return True
48
+ except ImportError:
49
+ get_logger().info(
50
+ "faiss-cpu not available. Vector search disabled. "
51
+ "Install with: pip install faiss-cpu"
52
+ )
53
+ return False
54
+ return True
55
+
56
+ def build(self, chunks: list[dict[str, Any]]) -> dict[str, Any]:
57
+ """Build the FAISS index from chunks.
58
+
59
+ Args:
60
+ chunks: List of chunk dicts from chunker.
61
+
62
+ Returns:
63
+ Stats dict.
64
+ """
65
+ from .embedder import get_embedder
66
+
67
+ if not self._check_deps():
68
+ self.stats["chunks"] = len(chunks)
69
+ self.stats["files"] = len(set(c["file_path"] for c in chunks))
70
+ self.stats["last_indexed"] = time.time()
71
+ return self.stats
72
+
73
+ embedder = get_embedder()
74
+ if not embedder.available:
75
+ self.stats["chunks"] = len(chunks)
76
+ self.stats["files"] = len(set(c["file_path"] for c in chunks))
77
+ self.stats["last_indexed"] = time.time()
78
+ return self.stats
79
+
80
+ if not chunks:
81
+ self.chunks = []
82
+ self.index = None
83
+ self.stats["chunks"] = 0
84
+ self.stats["files"] = 0
85
+ self.stats["languages"] = {}
86
+ self.stats["last_indexed"] = time.time()
87
+ return self.stats
88
+
89
+ # Track languages
90
+ languages: dict[str, int] = {}
91
+ for c in chunks:
92
+ lang = c.get("language", "unknown")
93
+ languages[lang] = languages.get(lang, 0) + 1
94
+
95
+ # Sort chunks by file_path then start_line for stable ordering
96
+ chunks.sort(key=lambda c: (c["file_path"], c.get("start_line", 0)))
97
+ self.chunks = chunks
98
+
99
+ # Embed all chunk contents
100
+ texts = [c.get("content", "") for c in chunks]
101
+ embeddings = embedder.embed(texts)
102
+
103
+ if embeddings is None:
104
+ self.stats["chunks"] = len(chunks)
105
+ self.stats["files"] = len(set(c["file_path"] for c in chunks))
106
+ self.stats["last_indexed"] = time.time()
107
+ return self.stats
108
+
109
+ # Build FAISS index
110
+ dim = len(embeddings[0])
111
+ idx = self._faiss.IndexFlatIP(dim) # Inner product = cosine sim for normalized vectors
112
+ vectors = self._np.array(embeddings, dtype=self._np.float32)
113
+
114
+ # Normalize vectors for cosine similarity
115
+ self._faiss.normalize_L2(vectors)
116
+ idx.add(vectors)
117
+
118
+ self.index = idx
119
+ self.stats = {
120
+ "chunks": len(chunks),
121
+ "files": len(set(c["file_path"] for c in chunks)),
122
+ "languages": languages,
123
+ "last_indexed": time.time(),
124
+ "dimension": dim,
125
+ "index_size_bytes": 0,
126
+ }
127
+
128
+ return self.stats
129
+
130
+ def search(
131
+ self, query: str, k: int = 10
132
+ ) -> list[dict[str, Any]]:
133
+ """Search the index by embedding the query.
134
+
135
+ Args:
136
+ query: Natural language search query.
137
+ k: Number of results to return.
138
+
139
+ Returns:
140
+ List of chunk dicts with a 'score' key added.
141
+ """
142
+ from .embedder import get_embedder
143
+
144
+ if not self._check_deps() or self.index is None or self.index.ntotal == 0:
145
+ return []
146
+
147
+ embedder = get_embedder()
148
+ if not embedder.available:
149
+ return []
150
+
151
+ query_vec = embedder.embed_query(query)
152
+ if query_vec is None:
153
+ return []
154
+
155
+ # Normalize query vector
156
+ q = self._np.array([query_vec], dtype=self._np.float32)
157
+ self._faiss.normalize_L2(q)
158
+
159
+ k_actual = min(k, self.index.ntotal)
160
+ if k_actual == 0:
161
+ return []
162
+
163
+ scores, indices = self.index.search(q, k_actual)
164
+
165
+ results = []
166
+ for score, idx in zip(scores[0], indices[0]):
167
+ if idx < 0 or idx >= len(self.chunks):
168
+ continue
169
+ chunk = dict(self.chunks[idx])
170
+ chunk["score"] = float(score)
171
+ results.append(chunk)
172
+
173
+ return results
174
+
175
+ def save(self) -> bool:
176
+ """Save the index, chunks, and mtimes to disk."""
177
+ BRAIN_DIR.mkdir(parents=True, exist_ok=True)
178
+
179
+ try:
180
+ # Save FAISS index
181
+ if self._faiss_available and self.index is not None:
182
+ self._faiss.write_index(self.index, str(INDEX_FILE))
183
+ self.stats["index_size_bytes"] = INDEX_FILE.stat().st_size
184
+
185
+ # Save chunks with content
186
+ CHUNKS_FILE.write_text(
187
+ json.dumps(self.chunks, indent=2), encoding="utf-8"
188
+ )
189
+
190
+ # Save mtimes
191
+ MTIMES_FILE.write_text(
192
+ json.dumps(self.file_mtimes), encoding="utf-8"
193
+ )
194
+
195
+ # Save stats
196
+ STATS_FILE.write_text(
197
+ json.dumps(self.stats), encoding="utf-8"
198
+ )
199
+
200
+ return True
201
+ except Exception as exc:
202
+ get_logger().warning("Failed to save vector index: %s", exc)
203
+ return False
204
+
205
+ def load(self) -> bool:
206
+ """Load the index and metadata from disk.
207
+
208
+ Returns:
209
+ True if index was loaded successfully.
210
+ """
211
+ if not INDEX_FILE.exists() or not CHUNKS_FILE.exists():
212
+ return False
213
+
214
+ try:
215
+ self._check_deps()
216
+
217
+ # Load chunks
218
+ self.chunks = json.loads(CHUNKS_FILE.read_text(encoding="utf-8"))
219
+ if not self.chunks:
220
+ return False
221
+
222
+ # Load FAISS index
223
+ if self._faiss_available and INDEX_FILE.exists():
224
+ self.index = self._faiss.read_index(str(INDEX_FILE))
225
+
226
+ # Load mtimes
227
+ if MTIMES_FILE.exists():
228
+ self.file_mtimes = json.loads(MTIMES_FILE.read_text(encoding="utf-8")) or {}
229
+
230
+ # Load stats
231
+ if STATS_FILE.exists():
232
+ self.stats = json.loads(STATS_FILE.read_text(encoding="utf-8")) or {}
233
+
234
+ return True
235
+
236
+ except Exception as exc:
237
+ get_logger().warning("Failed to load vector index: %s", exc)
238
+ return False
239
+
240
+ def stats_text(self) -> str:
241
+ """Return human-readable statistics."""
242
+ lines = [
243
+ f"Chunks indexed: {self.stats.get('chunks', 0)}",
244
+ f"Files: {self.stats.get('files', 0)}",
245
+ ]
246
+
247
+ languages = self.stats.get("languages", {})
248
+ if languages:
249
+ lines.append(f"Languages: {', '.join(f'{k}={v}' for k, v in sorted(languages.items()))}")
250
+
251
+ dim = self.stats.get("dimension", 0)
252
+ if dim:
253
+ lines.append(f"Vector dimension: {dim}")
254
+
255
+ size = self.stats.get("index_size_bytes", 0)
256
+ if size:
257
+ if size < 1024:
258
+ size_str = f"{size} B"
259
+ elif size < 1024 * 1024:
260
+ size_str = f"{size / 1024:.1f} KB"
261
+ else:
262
+ size_str = f"{size / 1024 / 1024:.1f} MB"
263
+ lines.append(f"Index size: {size_str}")
264
+
265
+ last = self.stats.get("last_indexed", 0)
266
+ if last:
267
+ last_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(last))
268
+ lines.append(f"Last indexed: {last_str}")
269
+
270
+ if not self._faiss_available:
271
+ lines.append("FAISS not available (install faiss-cpu for vector search)")
272
+
273
+ return "\n".join(lines)
274
+
275
+ def get_changed_files(self, project_root: str) -> list[str]:
276
+ """Check which files have changed since last index.
277
+
278
+ Args:
279
+ project_root: Root directory to scan.
280
+
281
+ Returns:
282
+ List of file paths that have changed or are new.
283
+ """
284
+ changed: list[str] = []
285
+ root = Path(project_root).resolve()
286
+
287
+ for dirpath, dirnames, filenames in os.walk(root):
288
+ dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS and not d.startswith(".")]
289
+
290
+ for fname in filenames:
291
+ suffix = Path(fname).suffix.lower()
292
+ if suffix not in LANGUAGE_MAP:
293
+ continue
294
+
295
+ fpath = Path(dirpath) / fname
296
+ try:
297
+ st = fpath.stat()
298
+ mtime = st.st_mtime
299
+ size = st.st_size
300
+ except OSError:
301
+ continue
302
+
303
+ fpath_str = str(fpath)
304
+ if fpath_str in self.file_mtimes:
305
+ old_mtime, old_size = self.file_mtimes[fpath_str]
306
+ if old_mtime == mtime and old_size == size:
307
+ continue
308
+
309
+ changed.append(fpath_str)
310
+
311
+ return changed
312
+
313
+ @property
314
+ def is_available(self) -> bool:
315
+ """Whether the index is loaded and ready."""
316
+ return self._faiss_available and self.index is not None and self.index.ntotal > 0