hanuscode 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. hanus/__init__.py +5 -0
  2. hanus/__main__.py +10 -0
  3. hanus/action_handlers.py +76 -0
  4. hanus/action_parser.py +82 -0
  5. hanus/agent_runner.py +1445 -0
  6. hanus/analysis/__init__.py +5 -0
  7. hanus/analysis/debt.py +702 -0
  8. hanus/analysis/dependencies.py +475 -0
  9. hanus/cache/__init__.py +5 -0
  10. hanus/cache/response_cache.py +560 -0
  11. hanus/config.py +401 -0
  12. hanus/connectors/__init__.py +19 -0
  13. hanus/connectors/base.py +114 -0
  14. hanus/connectors/claude_connector.py +146 -0
  15. hanus/connectors/gemini_connector.py +141 -0
  16. hanus/connectors/glm_connector.py +160 -0
  17. hanus/connectors/ollama_connector.py +174 -0
  18. hanus/connectors/openai_connector.py +122 -0
  19. hanus/connectors/registry.py +26 -0
  20. hanus/context/__init__.py +7 -0
  21. hanus/context/manager.py +837 -0
  22. hanus/context/selective.py +626 -0
  23. hanus/error_recovery/__init__.py +5 -0
  24. hanus/error_recovery/auto_fix.py +605 -0
  25. hanus/hooks/__init__.py +5 -0
  26. hanus/hooks/manager.py +247 -0
  27. hanus/instincts/__init__.py +44 -0
  28. hanus/instincts/cli.py +372 -0
  29. hanus/instincts/detector.py +281 -0
  30. hanus/instincts/evolver.py +361 -0
  31. hanus/instincts/manager.py +343 -0
  32. hanus/instincts/types.py +253 -0
  33. hanus/logger.py +81 -0
  34. hanus/memory/__init__.py +8 -0
  35. hanus/memory/manager.py +265 -0
  36. hanus/memory/types.py +119 -0
  37. hanus/monitor.py +341 -0
  38. hanus/parallel/__init__.py +5 -0
  39. hanus/parallel/executor.py +300 -0
  40. hanus/permissions.py +182 -0
  41. hanus/plan/__init__.py +8 -0
  42. hanus/plan/mode.py +267 -0
  43. hanus/plan/models.py +152 -0
  44. hanus/plugin_manager.py +754 -0
  45. hanus/plugin_registry.py +391 -0
  46. hanus/plugins/__init__.py +1 -0
  47. hanus/plugins/arena.py +630 -0
  48. hanus/plugins/code_review.py +123 -0
  49. hanus/plugins/cortex.py +1750 -0
  50. hanus/plugins/deps_check.py +27 -0
  51. hanus/plugins/git_ops.py +33 -0
  52. hanus/plugins/metasploit.py +530 -0
  53. hanus/plugins/notes.py +583 -0
  54. hanus/plugins/search_code.py +59 -0
  55. hanus/plugins/searchsploit.py +495 -0
  56. hanus/plugins/strategist.py +175 -0
  57. hanus/plugins/webui.py +5200 -0
  58. hanus/profiles.py +479 -0
  59. hanus/profiles_builtin/__init__.py +0 -0
  60. hanus/profiles_builtin/architect/profile.yaml +12 -0
  61. hanus/profiles_builtin/architect/system_prompt.txt +71 -0
  62. hanus/profiles_builtin/deep/profile.yaml +12 -0
  63. hanus/profiles_builtin/deep/system_prompt.txt +66 -0
  64. hanus/profiles_builtin/developer/__init__.py +0 -0
  65. hanus/profiles_builtin/developer/profile.yaml +9 -0
  66. hanus/profiles_builtin/developer/system_prompt.txt +176 -0
  67. hanus/profiles_builtin/speed/profile.yaml +12 -0
  68. hanus/profiles_builtin/speed/system_prompt.txt +51 -0
  69. hanus/project_tools.py +177 -0
  70. hanus/query_engine.py +1594 -0
  71. hanus/rules/__init__.py +237 -0
  72. hanus/search/__init__.py +5 -0
  73. hanus/search/semantic.py +596 -0
  74. hanus/session_manager.py +547 -0
  75. hanus/skill_manager.py +702 -0
  76. hanus/skills/__init__.py +4 -0
  77. hanus/subagent/__init__.py +8 -0
  78. hanus/subagent/agents/__init__.py +253 -0
  79. hanus/subagent/manager.py +309 -0
  80. hanus/subagent/types.py +266 -0
  81. hanus/suggestions/__init__.py +5 -0
  82. hanus/suggestions/proactive.py +451 -0
  83. hanus/tasks/__init__.py +8 -0
  84. hanus/tasks/manager.py +330 -0
  85. hanus/tasks/models.py +106 -0
  86. hanus/terminal_prompt.py +166 -0
  87. hanus/tools.py +1849 -0
  88. hanus/ui.py +939 -0
  89. hanuscode-1.0.0.dist-info/METADATA +1151 -0
  90. hanuscode-1.0.0.dist-info/RECORD +93 -0
  91. hanuscode-1.0.0.dist-info/WHEEL +5 -0
  92. hanuscode-1.0.0.dist-info/entry_points.txt +2 -0
  93. hanuscode-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,596 @@
1
+ # hanus/search/semantic.py
2
+ """
3
+ Búsqueda semántica de código usando embeddings.
4
+
5
+ Permite buscar código por concepto, no solo por texto exacto.
6
+ """
7
+ from __future__ import annotations
8
+ import json
9
+ import re
10
+ import hashlib
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Dict, List, Optional, Any, Tuple
15
+ from collections import defaultdict
16
+ import os
17
+
18
+
19
+ @dataclass
20
+ class CodeChunk:
21
+ """Un fragmento de código indexable."""
22
+ id: str
23
+ file_path: str
24
+ content: str
25
+ start_line: int
26
+ end_line: int
27
+ chunk_type: str # function, class, module, etc.
28
+ name: Optional[str] = None
29
+ embedding: Optional[List[float]] = None
30
+ metadata: Dict[str, Any] = field(default_factory=dict)
31
+
32
+ def to_dict(self) -> Dict:
33
+ return {
34
+ "id": self.id,
35
+ "file_path": self.file_path,
36
+ "content": self.content[:500], # Truncar para serialización
37
+ "start_line": self.start_line,
38
+ "end_line": self.end_line,
39
+ "chunk_type": self.chunk_type,
40
+ "name": self.name,
41
+ "metadata": self.metadata,
42
+ }
43
+
44
+
45
+ @dataclass
46
+ class SearchResult:
47
+ """Resultado de búsqueda."""
48
+ chunk: CodeChunk
49
+ score: float
50
+ match_type: str # "semantic", "keyword", "exact"
51
+ highlights: List[str] = field(default_factory=list)
52
+
53
+ def to_dict(self) -> Dict:
54
+ return {
55
+ "file_path": self.chunk.file_path,
56
+ "name": self.chunk.name,
57
+ "content": self.chunk.content[:300],
58
+ "start_line": self.chunk.start_line,
59
+ "end_line": self.chunk.end_line,
60
+ "chunk_type": self.chunk.chunk_type,
61
+ "score": self.score,
62
+ "match_type": self.match_type,
63
+ "highlights": self.highlights,
64
+ }
65
+
66
+
67
+ class SemanticSearch:
68
+ """
69
+ Búsqueda semántica de código.
70
+
71
+ Features:
72
+ - Búsqueda por concepto usando embeddings
73
+ - Fallback a búsqueda fuzzy si no hay embeddings
74
+ - Índice de código por proyecto
75
+ - Cache de resultados
76
+ """
77
+
78
+ # Extensiones soportadas
79
+ CODE_EXTENSIONS = {'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.go', '.rs', '.c', '.cpp', '.h', '.hpp', '.rb', '.php'}
80
+
81
+ def __init__(
82
+ self,
83
+ project_root: Path,
84
+ use_embeddings: bool = True,
85
+ index_dir: Optional[Path] = None
86
+ ):
87
+ self.project_root = project_root
88
+ self.use_embeddings = use_embeddings
89
+ self.index_dir = index_dir or (Path.home() / ".hanus" / "search_index")
90
+ self.index_dir.mkdir(parents=True, exist_ok=True)
91
+
92
+ # Índice de chunks
93
+ self._chunks: Dict[str, CodeChunk] = {}
94
+ self._file_chunks: Dict[str, List[str]] = defaultdict(list) # file -> chunk_ids
95
+
96
+ # Modelo de embeddings (lazy load)
97
+ self._model = None
98
+ self._embedding_dim = 384 # Default para sentence-transformers
99
+
100
+ # Índice invertido para búsqueda keyword
101
+ self._keyword_index: Dict[str, List[str]] = defaultdict(list) # word -> chunk_ids
102
+
103
+ # Cache de embeddings
104
+ self._embedding_cache: Dict[str, List[float]] = {}
105
+
106
+ # Cargar índice existente
107
+ self._load_index()
108
+
109
+ # Intentar cargar modelo de embeddings
110
+ if use_embeddings:
111
+ self._init_embedding_model()
112
+
113
+ def index_project(self, force_reindex: bool = False) -> Dict[str, int]:
114
+ """
115
+ Indexa todo el proyecto.
116
+
117
+ Args:
118
+ force_reindex: Forzar reindexación completa
119
+
120
+ Returns:
121
+ Estadísticas de indexación
122
+ """
123
+ stats = {"files_indexed": 0, "chunks_created": 0, "errors": 0}
124
+
125
+ for ext in self.CODE_EXTENSIONS:
126
+ for file_path in self.project_root.rglob(f"*{ext}"):
127
+ # Skip directorios comunes
128
+ if any(skip in str(file_path) for skip in ['node_modules', '__pycache__', '.git', 'venv', 'env', 'dist', 'build']):
129
+ continue
130
+
131
+ try:
132
+ chunks = self._index_file(file_path)
133
+ stats["files_indexed"] += 1
134
+ stats["chunks_created"] += len(chunks)
135
+ except Exception as e:
136
+ stats["errors"] += 1
137
+
138
+ # Guardar índice
139
+ self._save_index()
140
+
141
+ return stats
142
+
143
+ def search(
144
+ self,
145
+ query: str,
146
+ limit: int = 10,
147
+ min_score: float = 0.3,
148
+ file_filter: Optional[str] = None
149
+ ) -> List[SearchResult]:
150
+ """
151
+ Busca código por concepto o keyword.
152
+
153
+ Args:
154
+ query: Consulta de búsqueda
155
+ limit: Máximo de resultados
156
+ min_score: Score mínimo
157
+ file_filter: Filtro de archivo (glob pattern)
158
+
159
+ Returns:
160
+ Lista de resultados
161
+ """
162
+ results = []
163
+
164
+ # 1. Búsqueda semántica si hay embeddings
165
+ if self.use_embeddings and self._model:
166
+ semantic_results = self._semantic_search(query, limit * 2)
167
+ results.extend(semantic_results)
168
+
169
+ # 2. Búsqueda keyword/fuzzy
170
+ keyword_results = self._keyword_search(query, limit * 2)
171
+ results.extend(keyword_results)
172
+
173
+ # 3. Combinar y deduplicar
174
+ seen_chunks = set()
175
+ combined = []
176
+ for result in results:
177
+ if result.chunk.id not in seen_chunks:
178
+ seen_chunks.add(result.chunk.id)
179
+ combined.append(result)
180
+
181
+ # 4. Filtrar por archivo
182
+ if file_filter:
183
+ combined = [r for r in combined if self._matches_filter(r.chunk.file_path, file_filter)]
184
+
185
+ # 5. Ordenar por score y limitar
186
+ combined.sort(key=lambda x: -x.score)
187
+ combined = [r for r in combined if r.score >= min_score]
188
+
189
+ return combined[:limit]
190
+
191
+ def search_similar(
192
+ self,
193
+ chunk_id: str,
194
+ limit: int = 5
195
+ ) -> List[SearchResult]:
196
+ """
197
+ Busca código similar a un chunk específico.
198
+
199
+ Args:
200
+ chunk_id: ID del chunk base
201
+ limit: Máximo de resultados
202
+
203
+ Returns:
204
+ Lista de resultados similares
205
+ """
206
+ if chunk_id not in self._chunks:
207
+ return []
208
+
209
+ base_chunk = self._chunks[chunk_id]
210
+
211
+ if not base_chunk.embedding:
212
+ return []
213
+
214
+ # Buscar por similitud de embedding
215
+ results = []
216
+ for cid, chunk in self._chunks.items():
217
+ if cid == chunk_id:
218
+ continue
219
+
220
+ if chunk.embedding:
221
+ similarity = self._cosine_similarity(base_chunk.embedding, chunk.embedding)
222
+ if similarity > 0.5:
223
+ results.append(SearchResult(
224
+ chunk=chunk,
225
+ score=similarity,
226
+ match_type="semantic"
227
+ ))
228
+
229
+ results.sort(key=lambda x: -x.score)
230
+ return results[:limit]
231
+
232
+ def get_stats(self) -> Dict[str, Any]:
233
+ """Obtiene estadísticas del índice."""
234
+ chunks_with_embeddings = sum(1 for c in self._chunks.values() if c.embedding is not None)
235
+
236
+ return {
237
+ "total_chunks": len(self._chunks),
238
+ "files_indexed": len(self._file_chunks),
239
+ "chunks_with_embeddings": chunks_with_embeddings,
240
+ "embedding_model": "sentence-transformers" if self._model else "none",
241
+ "keyword_index_size": len(self._keyword_index),
242
+ }
243
+
244
+ # ══════════════════════════════════════════════════════════════════════════
245
+ # MÉTODOS PRIVADOS
246
+ # ══════════════════════════════════════════════════════════════════════════
247
+
248
+ def _init_embedding_model(self):
249
+ """Inicializa el modelo de embeddings."""
250
+ try:
251
+ from sentence_transformers import SentenceTransformer
252
+ self._model = SentenceTransformer('all-MiniLM-L6-v2')
253
+ self._embedding_dim = 384
254
+ except ImportError:
255
+ print("[SemanticSearch] sentence-transformers not available, using keyword search only")
256
+ self._model = None
257
+ except Exception as e:
258
+ print(f"[SemanticSearch] Error loading model: {e}")
259
+ self._model = None
260
+
261
+ def _index_file(self, file_path: Path) -> List[CodeChunk]:
262
+ """Indexa un archivo."""
263
+ chunks = []
264
+
265
+ try:
266
+ content = file_path.read_text(encoding="utf-8", errors="replace")
267
+ except Exception:
268
+ return chunks
269
+
270
+ # Dividir en chunks por función/clase
271
+ code_chunks = self._split_into_chunks(content, str(file_path))
272
+
273
+ for chunk_data in code_chunks:
274
+ chunk_id = self._generate_chunk_id(file_path, chunk_data)
275
+
276
+ # Crear embedding si el modelo está disponible
277
+ embedding = None
278
+ if self._model:
279
+ embedding = self._get_embedding(chunk_data["content"])
280
+
281
+ chunk = CodeChunk(
282
+ id=chunk_id,
283
+ file_path=str(file_path.relative_to(self.project_root)),
284
+ content=chunk_data["content"],
285
+ start_line=chunk_data["start_line"],
286
+ end_line=chunk_data["end_line"],
287
+ chunk_type=chunk_data["type"],
288
+ name=chunk_data.get("name"),
289
+ embedding=embedding,
290
+ )
291
+
292
+ self._chunks[chunk_id] = chunk
293
+ self._file_chunks[str(file_path)].append(chunk_id)
294
+ chunks.append(chunk)
295
+
296
+ # Actualizar índice keyword
297
+ self._update_keyword_index(chunk)
298
+
299
+ return chunks
300
+
301
+ def _split_into_chunks(self, content: str, file_path: str) -> List[Dict]:
302
+ """Divide el código en chunks indexables."""
303
+ chunks = []
304
+ lines = content.split('\n')
305
+ ext = Path(file_path).suffix.lower()
306
+
307
+ if ext == '.py':
308
+ chunks = self._split_python(content, lines)
309
+ elif ext in ('.js', '.ts', '.jsx', '.tsx'):
310
+ chunks = self._split_javascript(content, lines)
311
+ else:
312
+ # Chunk por líneas para otros lenguajes
313
+ chunks = self._split_by_lines(content, lines)
314
+
315
+ return chunks
316
+
317
+ def _split_python(self, content: str, lines: List[str]) -> List[Dict]:
318
+ """Divide código Python en chunks."""
319
+ chunks = []
320
+
321
+ try:
322
+ import ast
323
+ tree = ast.parse(content)
324
+
325
+ for node in ast.iter_child_nodes(tree):
326
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
327
+ chunks.append({
328
+ "content": "\n".join(lines[node.lineno - 1:node.end_lineno]),
329
+ "start_line": node.lineno,
330
+ "end_line": node.end_lineno,
331
+ "type": "function",
332
+ "name": node.name,
333
+ })
334
+ elif isinstance(node, ast.ClassDef):
335
+ # Clase completa
336
+ class_content = "\n".join(lines[node.lineno - 1:node.end_lineno])
337
+ chunks.append({
338
+ "content": class_content,
339
+ "start_line": node.lineno,
340
+ "end_line": node.end_lineno,
341
+ "type": "class",
342
+ "name": node.name,
343
+ })
344
+
345
+ # Métodos individuales
346
+ for item in node.body:
347
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
348
+ chunks.append({
349
+ "content": "\n".join(lines[item.lineno - 1:item.end_lineno]),
350
+ "start_line": item.lineno,
351
+ "end_line": item.end_lineno,
352
+ "type": "method",
353
+ "name": f"{node.name}.{item.name}",
354
+ })
355
+
356
+ except SyntaxError:
357
+ # Fallback a división por líneas
358
+ return self._split_by_lines(content, lines)
359
+
360
+ # Si no hay chunks, crear uno para el archivo completo
361
+ if not chunks:
362
+ chunks.append({
363
+ "content": content,
364
+ "start_line": 1,
365
+ "end_line": len(lines),
366
+ "type": "module",
367
+ })
368
+
369
+ return chunks
370
+
371
+ def _split_javascript(self, content: str, lines: List[str]) -> List[Dict]:
372
+ """Divide código JavaScript en chunks."""
373
+ chunks = []
374
+
375
+ # Funciones
376
+ func_pattern = r'(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\([^)]*\)\s*\{'
377
+ for match in re.finditer(func_pattern, content):
378
+ name = match.group(1)
379
+ start = content[:match.start()].count('\n') + 1
380
+ # Simplificado: tomar ~50 líneas
381
+ end = min(start + 50, len(lines))
382
+ chunks.append({
383
+ "content": "\n".join(lines[start - 1:end]),
384
+ "start_line": start,
385
+ "end_line": end,
386
+ "type": "function",
387
+ "name": name,
388
+ })
389
+
390
+ # Clases
391
+ class_pattern = r'(?:export\s+)?class\s+(\w+)'
392
+ for match in re.finditer(class_pattern, content):
393
+ name = match.group(1)
394
+ start = content[:match.start()].count('\n') + 1
395
+ end = min(start + 100, len(lines))
396
+ chunks.append({
397
+ "content": "\n".join(lines[start - 1:end]),
398
+ "start_line": start,
399
+ "end_line": end,
400
+ "type": "class",
401
+ "name": name,
402
+ })
403
+
404
+ if not chunks:
405
+ return self._split_by_lines(content, lines)
406
+
407
+ return chunks
408
+
409
+ def _split_by_lines(self, content: str, lines: List[str], chunk_size: int = 50) -> List[Dict]:
410
+ """Divide por líneas fijas."""
411
+ chunks = []
412
+ for i in range(0, len(lines), chunk_size):
413
+ chunk_lines = lines[i:i + chunk_size]
414
+ chunks.append({
415
+ "content": "\n".join(chunk_lines),
416
+ "start_line": i + 1,
417
+ "end_line": min(i + chunk_size, len(lines)),
418
+ "type": "block",
419
+ })
420
+ return chunks
421
+
422
+ def _generate_chunk_id(self, file_path: Path, chunk_data: Dict) -> str:
423
+ """Genera ID único para un chunk."""
424
+ content_hash = hashlib.md5(chunk_data["content"].encode()).hexdigest()[:8]
425
+ name = chunk_data.get("name", f"L{chunk_data['start_line']}")
426
+ return f"{file_path.stem}:{name}:{content_hash}"
427
+
428
+ def _get_embedding(self, text: str) -> Optional[List[float]]:
429
+ """Obtiene embedding para un texto."""
430
+ if not self._model:
431
+ return None
432
+
433
+ # Verificar cache
434
+ text_hash = hashlib.md5(text.encode()).hexdigest()
435
+ if text_hash in self._embedding_cache:
436
+ return self._embedding_cache[text_hash]
437
+
438
+ try:
439
+ embedding = self._model.encode(text[:500]).tolist() # Limitar texto
440
+ self._embedding_cache[text_hash] = embedding
441
+ return embedding
442
+ except Exception:
443
+ return None
444
+
445
+ def _update_keyword_index(self, chunk: CodeChunk):
446
+ """Actualiza índice invertido de keywords."""
447
+ # Tokenizar contenido
448
+ words = re.findall(r'\b\w+\b', chunk.content.lower())
449
+ words = [w for w in words if len(w) > 2] # Ignorar palabras muy cortas
450
+
451
+ for word in set(words):
452
+ if chunk.id not in self._keyword_index[word]:
453
+ self._keyword_index[word].append(chunk.id)
454
+
455
+ def _semantic_search(self, query: str, limit: int) -> List[SearchResult]:
456
+ """Búsqueda semántica usando embeddings."""
457
+ if not self._model:
458
+ return []
459
+
460
+ query_embedding = self._get_embedding(query)
461
+ if not query_embedding:
462
+ return []
463
+
464
+ results = []
465
+ for chunk_id, chunk in self._chunks.items():
466
+ if chunk.embedding:
467
+ similarity = self._cosine_similarity(query_embedding, chunk.embedding)
468
+ if similarity > 0.2:
469
+ results.append(SearchResult(
470
+ chunk=chunk,
471
+ score=similarity,
472
+ match_type="semantic",
473
+ ))
474
+
475
+ results.sort(key=lambda x: -x.score)
476
+ return results[:limit]
477
+
478
+ def _keyword_search(self, query: str, limit: int) -> List[SearchResult]:
479
+ """Búsqueda por keywords con fuzzy matching."""
480
+ query_words = set(re.findall(r'\b\w+\b', query.lower()))
481
+ query_words = {w for w in query_words if len(w) > 2}
482
+
483
+ if not query_words:
484
+ return []
485
+
486
+ # Contar matches por chunk
487
+ chunk_scores: Dict[str, float] = defaultdict(float)
488
+
489
+ for word in query_words:
490
+ # Exact match
491
+ if word in self._keyword_index:
492
+ for chunk_id in self._keyword_index[word]:
493
+ chunk_scores[chunk_id] += 1.0
494
+
495
+ # Fuzzy match (prefijo)
496
+ for indexed_word in self._keyword_index:
497
+ if indexed_word.startswith(word) or word.startswith(indexed_word):
498
+ for chunk_id in self._keyword_index[indexed_word]:
499
+ chunk_scores[chunk_id] += 0.5
500
+
501
+ # Normalizar scores
502
+ max_score = max(chunk_scores.values()) if chunk_scores else 1
503
+ chunk_scores = {k: v / max_score for k, v in chunk_scores.items()}
504
+
505
+ # Crear resultados
506
+ results = []
507
+ for chunk_id, score in chunk_scores.items():
508
+ if chunk_id in self._chunks and score > 0.1:
509
+ results.append(SearchResult(
510
+ chunk=self._chunks[chunk_id],
511
+ score=score,
512
+ match_type="keyword",
513
+ ))
514
+
515
+ results.sort(key=lambda x: -x.score)
516
+ return results[:limit]
517
+
518
+ def _cosine_similarity(self, a: List[float], b: List[float]) -> float:
519
+ """Calcula similitud de coseno."""
520
+ if len(a) != len(b):
521
+ return 0.0
522
+
523
+ dot_product = sum(x * y for x, y in zip(a, b))
524
+ norm_a = sum(x * x for x in a) ** 0.5
525
+ norm_b = sum(x * x for x in b) ** 0.5
526
+
527
+ if norm_a == 0 or norm_b == 0:
528
+ return 0.0
529
+
530
+ return dot_product / (norm_a * norm_b)
531
+
532
+ def _matches_filter(self, file_path: str, pattern: str) -> bool:
533
+ """Verifica si un archivo coincide con un patrón."""
534
+ from fnmatch import fnmatch
535
+ return fnmatch(file_path, pattern)
536
+
537
+ def _load_index(self):
538
+ """Carga índice desde disco."""
539
+ index_file = self.index_dir / "semantic_index.json"
540
+
541
+ if not index_file.exists():
542
+ return
543
+
544
+ try:
545
+ data = json.loads(index_file.read_text(encoding="utf-8"))
546
+
547
+ for chunk_data in data.get("chunks", []):
548
+ chunk = CodeChunk(
549
+ id=chunk_data["id"],
550
+ file_path=chunk_data["file_path"],
551
+ content=chunk_data["content"],
552
+ start_line=chunk_data["start_line"],
553
+ end_line=chunk_data["end_line"],
554
+ chunk_type=chunk_data["chunk_type"],
555
+ name=chunk_data.get("name"),
556
+ embedding=chunk_data.get("embedding"),
557
+ )
558
+ self._chunks[chunk.id] = chunk
559
+ self._file_chunks[chunk.file_path].append(chunk.id)
560
+
561
+ except Exception as e:
562
+ print(f"[SemanticSearch] Error loading index: {e}")
563
+
564
+ def _save_index(self):
565
+ """Guarda índice a disco."""
566
+ index_file = self.index_dir / "semantic_index.json"
567
+
568
+ try:
569
+ data = {
570
+ "chunks": [c.to_dict() for c in self._chunks.values()],
571
+ "project_root": str(self.project_root),
572
+ "timestamp": time.time(),
573
+ }
574
+
575
+ index_file.write_text(
576
+ json.dumps(data, indent=2, ensure_ascii=False),
577
+ encoding="utf-8"
578
+ )
579
+
580
+ except Exception as e:
581
+ print(f"[SemanticSearch] Error saving index: {e}")
582
+
583
+
584
+ # ══════════════════════════════════════════════════════════════════════════════
585
+ # INSTANCIA GLOBAL
586
+ # ══════════════════════════════════════════════════════════════════════════════
587
+
588
+ _search_instance: Optional[SemanticSearch] = None
589
+
590
+
591
+ def get_semantic_search(project_root: Path = None) -> SemanticSearch:
592
+ """Obtiene la instancia global de búsqueda semántica."""
593
+ global _search_instance
594
+ if _search_instance is None or (project_root and _search_instance.project_root != project_root):
595
+ _search_instance = SemanticSearch(project_root or Path.cwd())
596
+ return _search_instance