codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,211 @@
1
+ """Tests for the daemon/watcher subsystem."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from pathlib import Path
7
+
8
+ import pytest
9
+
10
+ from semantic_code_intelligence.daemon.watcher import (
11
+ AsyncIndexer,
12
+ FileChangeEvent,
13
+ FileWatcher,
14
+ IndexingDaemon,
15
+ )
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # FileChangeEvent
20
+ # ---------------------------------------------------------------------------
21
+
22
+ class TestFileChangeEvent:
23
+ def test_creation(self):
24
+ event = FileChangeEvent(
25
+ path=Path("/tmp/test.py"),
26
+ relative_path="test.py",
27
+ change_type="created",
28
+ timestamp=1000.0,
29
+ )
30
+ assert event.change_type == "created"
31
+ assert event.relative_path == "test.py"
32
+
33
+ def test_to_dict(self):
34
+ event = FileChangeEvent(
35
+ path=Path("/tmp/test.py"),
36
+ relative_path="test.py",
37
+ change_type="modified",
38
+ timestamp=123.0,
39
+ )
40
+ d = event.to_dict()
41
+ assert d["change_type"] == "modified"
42
+ assert d["relative_path"] == "test.py"
43
+ assert d["timestamp"] == 123.0
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # FileWatcher
48
+ # ---------------------------------------------------------------------------
49
+
50
+ class TestFileWatcher:
51
+ def test_init(self, tmp_path):
52
+ # Create minimal codexa config
53
+ config_dir = tmp_path / ".codexa"
54
+ config_dir.mkdir()
55
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
56
+
57
+ watcher = FileWatcher(tmp_path, poll_interval=0.1)
58
+ assert not watcher.is_running
59
+
60
+ def test_callback_registration(self, tmp_path):
61
+ config_dir = tmp_path / ".codexa"
62
+ config_dir.mkdir()
63
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
64
+
65
+ watcher = FileWatcher(tmp_path, poll_interval=0.1)
66
+ events_received = []
67
+ watcher.on_change(lambda e: events_received.append(e))
68
+ assert len(watcher._callbacks) == 1
69
+
70
+ def test_scan_once_baseline(self, tmp_path):
71
+ config_dir = tmp_path / ".codexa"
72
+ config_dir.mkdir()
73
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
74
+ (tmp_path / "file.py").write_text("x = 1", encoding="utf-8")
75
+
76
+ watcher = FileWatcher(tmp_path, poll_interval=0.1)
77
+ # First scan is baseline
78
+ events = watcher.scan_once()
79
+ assert events == []
80
+
81
+ def test_scan_once_detects_new_file(self, tmp_path):
82
+ config_dir = tmp_path / ".codexa"
83
+ config_dir.mkdir()
84
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
85
+ (tmp_path / "file.py").write_text("x = 1", encoding="utf-8")
86
+
87
+ watcher = FileWatcher(tmp_path, poll_interval=0.1)
88
+ watcher.scan_once() # baseline
89
+
90
+ # Add a new file
91
+ (tmp_path / "file2.py").write_text("y = 2", encoding="utf-8")
92
+ events = watcher.scan_once()
93
+ assert any(e.change_type == "created" for e in events)
94
+
95
+ def test_scan_once_detects_modification(self, tmp_path):
96
+ config_dir = tmp_path / ".codexa"
97
+ config_dir.mkdir()
98
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
99
+ f = tmp_path / "file.py"
100
+ f.write_text("x = 1", encoding="utf-8")
101
+
102
+ watcher = FileWatcher(tmp_path, poll_interval=0.1)
103
+ watcher.scan_once() # baseline
104
+
105
+ f.write_text("x = 2", encoding="utf-8")
106
+ events = watcher.scan_once()
107
+ assert any(e.change_type == "modified" for e in events)
108
+
109
+ def test_scan_once_detects_deletion(self, tmp_path):
110
+ config_dir = tmp_path / ".codexa"
111
+ config_dir.mkdir()
112
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
113
+ f = tmp_path / "file.py"
114
+ f.write_text("x = 1", encoding="utf-8")
115
+
116
+ watcher = FileWatcher(tmp_path, poll_interval=0.1)
117
+ watcher.scan_once() # baseline
118
+
119
+ f.unlink()
120
+ events = watcher.scan_once()
121
+ assert any(e.change_type == "deleted" for e in events)
122
+
123
+ def test_start_stop(self, tmp_path):
124
+ config_dir = tmp_path / ".codexa"
125
+ config_dir.mkdir()
126
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
127
+
128
+ watcher = FileWatcher(tmp_path, poll_interval=0.1)
129
+ watcher.start()
130
+ assert watcher.is_running
131
+ time.sleep(0.3)
132
+ watcher.stop()
133
+ assert not watcher.is_running
134
+
135
+
136
+ # ---------------------------------------------------------------------------
137
+ # AsyncIndexer
138
+ # ---------------------------------------------------------------------------
139
+
140
+ class TestAsyncIndexer:
141
+ def test_init(self, tmp_path):
142
+ indexer = AsyncIndexer(tmp_path)
143
+ assert indexer.pending_count == 0
144
+ assert indexer.tasks_processed == 0
145
+
146
+ def test_enqueue(self, tmp_path):
147
+ indexer = AsyncIndexer(tmp_path)
148
+ indexer.enqueue(["file1.py", "file2.py"])
149
+ assert indexer.pending_count == 1
150
+
151
+ def test_enqueue_multiple(self, tmp_path):
152
+ indexer = AsyncIndexer(tmp_path)
153
+ indexer.enqueue(["f1.py"])
154
+ indexer.enqueue(["f2.py"])
155
+ assert indexer.pending_count == 2
156
+
157
+ def test_callbacks(self, tmp_path):
158
+ indexer = AsyncIndexer(tmp_path)
159
+ completed = []
160
+ errors = []
161
+ indexer.set_callbacks(
162
+ on_complete=lambda n: completed.append(n),
163
+ on_error=lambda e: errors.append(e),
164
+ )
165
+ assert indexer._on_complete is not None
166
+ assert indexer._on_error is not None
167
+
168
+
169
+ # ---------------------------------------------------------------------------
170
+ # IndexingDaemon
171
+ # ---------------------------------------------------------------------------
172
+
173
+ class TestIndexingDaemon:
174
+ def test_init(self, tmp_path):
175
+ config_dir = tmp_path / ".codexa"
176
+ config_dir.mkdir()
177
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
178
+
179
+ daemon = IndexingDaemon(tmp_path)
180
+ assert not daemon.is_running
181
+
182
+ def test_get_status(self, tmp_path):
183
+ config_dir = tmp_path / ".codexa"
184
+ config_dir.mkdir()
185
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
186
+
187
+ daemon = IndexingDaemon(tmp_path)
188
+ status = daemon.get_status()
189
+ assert "running" in status
190
+ assert status["running"] is False
191
+ assert "events_recorded" in status
192
+
193
+ def test_event_log(self, tmp_path):
194
+ config_dir = tmp_path / ".codexa"
195
+ config_dir.mkdir()
196
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
197
+
198
+ daemon = IndexingDaemon(tmp_path)
199
+ assert daemon.event_log == []
200
+
201
+ def test_start_stop(self, tmp_path):
202
+ config_dir = tmp_path / ".codexa"
203
+ config_dir.mkdir()
204
+ (config_dir / "config.json").write_text("{}", encoding="utf-8")
205
+
206
+ daemon = IndexingDaemon(tmp_path)
207
+ daemon.start()
208
+ assert daemon.is_running
209
+ time.sleep(0.3)
210
+ daemon.stop()
211
+ assert not daemon.is_running
@@ -0,0 +1,442 @@
1
+ """AI tool interaction layer — structured protocol for LLM agents.
2
+
3
+ Provides a tool-calling interface that LLMs can use to interact with
4
+ the CodexA intelligence engine: search, explain, summarize, navigate.
5
+ Each tool returns structured JSON suitable for LLM consumption.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from semantic_code_intelligence.analysis.ai_features import (
16
+ CodeExplanation,
17
+ explain_file,
18
+ explain_symbol,
19
+ generate_ai_context,
20
+ summarize_repository,
21
+ )
22
+ from semantic_code_intelligence.context.engine import (
23
+ CallGraph,
24
+ ContextBuilder,
25
+ DependencyMap,
26
+ )
27
+ from semantic_code_intelligence.services.search_service import SearchResult, search_codebase
28
+ from semantic_code_intelligence.utils.logging import get_logger
29
+
30
+ logger = get_logger("tools")
31
+
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Tool Result Protocol
35
+ # ---------------------------------------------------------------------------
36
+
37
+ @dataclass
38
+ class ToolResult:
39
+ """Structured result from a tool invocation."""
40
+
41
+ tool_name: str
42
+ success: bool
43
+ data: dict[str, Any] = field(default_factory=dict)
44
+ error: str | None = None
45
+
46
+ def to_dict(self) -> dict[str, Any]:
47
+ result: dict[str, Any] = {
48
+ "tool": self.tool_name,
49
+ "success": self.success,
50
+ }
51
+ if self.success:
52
+ result["data"] = self.data
53
+ else:
54
+ result["error"] = self.error or "Unknown error"
55
+ return result
56
+
57
+ def to_json(self, indent: int = 2) -> str:
58
+ return json.dumps(self.to_dict(), indent=indent)
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Tool Definitions (for schema / manifest)
63
+ # ---------------------------------------------------------------------------
64
+
65
+ TOOL_DEFINITIONS: list[dict[str, Any]] = [
66
+ {
67
+ "name": "semantic_search",
68
+ "description": "Search the codebase using natural language. Returns relevant code snippets ranked by similarity.",
69
+ "parameters": {
70
+ "query": {"type": "string", "required": True, "description": "Natural language search query"},
71
+ "top_k": {"type": "integer", "required": False, "default": 10, "description": "Max results"},
72
+ "threshold": {"type": "float", "required": False, "default": 0.3, "description": "Min similarity"},
73
+ },
74
+ },
75
+ {
76
+ "name": "explain_symbol",
77
+ "description": "Get a structural explanation of a code symbol (function, class, method).",
78
+ "parameters": {
79
+ "symbol_name": {"type": "string", "required": True, "description": "Name of the symbol"},
80
+ "file_path": {"type": "string", "required": False, "description": "File containing the symbol"},
81
+ },
82
+ },
83
+ {
84
+ "name": "explain_file",
85
+ "description": "Get explanations of all symbols in a source file.",
86
+ "parameters": {
87
+ "file_path": {"type": "string", "required": True, "description": "Path to the source file"},
88
+ },
89
+ },
90
+ {
91
+ "name": "summarize_repo",
92
+ "description": "Get a structured summary of the entire repository.",
93
+ "parameters": {},
94
+ },
95
+ {
96
+ "name": "find_references",
97
+ "description": "Find all references to a symbol across the codebase.",
98
+ "parameters": {
99
+ "symbol_name": {"type": "string", "required": True, "description": "Name to search for"},
100
+ },
101
+ },
102
+ {
103
+ "name": "get_dependencies",
104
+ "description": "Get the dependency map (imports) for a specific file.",
105
+ "parameters": {
106
+ "file_path": {"type": "string", "required": True, "description": "Source file path"},
107
+ },
108
+ },
109
+ {
110
+ "name": "get_call_graph",
111
+ "description": "Get the call graph for a symbol, showing callers and callees.",
112
+ "parameters": {
113
+ "symbol_name": {"type": "string", "required": True, "description": "Symbol to analyze"},
114
+ },
115
+ },
116
+ {
117
+ "name": "get_context",
118
+ "description": "Build a rich context window around a symbol for AI-assisted tasks.",
119
+ "parameters": {
120
+ "symbol_name": {"type": "string", "required": True, "description": "Focal symbol name"},
121
+ },
122
+ },
123
+ {
124
+ "name": "get_quality_score",
125
+ "description": "Run code quality analysis: complexity, dead code, duplicates, and safety issues.",
126
+ "parameters": {
127
+ "file_path": {"type": "string", "required": False, "description": "Specific file to analyze (omit for full project)"},
128
+ },
129
+ },
130
+ {
131
+ "name": "find_duplicates",
132
+ "description": "Detect duplicate or near-duplicate code blocks across the codebase.",
133
+ "parameters": {
134
+ "threshold": {"type": "float", "required": False, "default": 0.75, "description": "Similarity threshold (0-1)"},
135
+ },
136
+ },
137
+ {
138
+ "name": "grep_files",
139
+ "description": "Search raw files using regex — no index required. Uses ripgrep when available.",
140
+ "parameters": {
141
+ "pattern": {"type": "string", "required": True, "description": "Regex pattern to search for"},
142
+ "file_glob": {"type": "string", "required": False, "description": "Glob to filter files (e.g. '*.py')"},
143
+ "max_results": {"type": "integer", "required": False, "default": 50, "description": "Max matches"},
144
+ },
145
+ },
146
+ ]
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # Tool Registry & Executor
151
+ # ---------------------------------------------------------------------------
152
+
153
+ class ToolRegistry:
154
+ """Central registry that maps tool names to their implementations.
155
+
156
+ Provides a unified interface for LLM agents to discover and invoke tools.
157
+ """
158
+
159
+ def __init__(self, project_root: Path) -> None:
160
+ self._root = project_root.resolve()
161
+ self._builder: ContextBuilder | None = None
162
+ self._indexed_files: set[str] = set()
163
+
164
+ @property
165
+ def tool_definitions(self) -> list[dict[str, Any]]:
166
+ """Return schema of all available tools."""
167
+ return TOOL_DEFINITIONS
168
+
169
+ def _ensure_builder(self) -> ContextBuilder:
170
+ """Lazily initialize ContextBuilder with repo files."""
171
+ if self._builder is None:
172
+ self._builder = ContextBuilder()
173
+ return self._builder
174
+
175
+ def index_file(self, file_path: str, content: str | None = None) -> None:
176
+ """Index a file for tools that need parsed symbol data."""
177
+ builder = self._ensure_builder()
178
+ if file_path not in self._indexed_files:
179
+ builder.index_file(file_path, content)
180
+ self._indexed_files.add(file_path)
181
+
182
+ def index_directory(self, directory: Path | None = None) -> int:
183
+ """Index all supported files in a directory."""
184
+ from semantic_code_intelligence.config.settings import load_config
185
+ from semantic_code_intelligence.indexing.scanner import scan_repository
186
+
187
+ target = directory or self._root
188
+ config = load_config(self._root)
189
+ scanned = scan_repository(target, config.index)
190
+
191
+ builder = self._ensure_builder()
192
+ count = 0
193
+ for sf in scanned:
194
+ full_path = str(target / sf.relative_path)
195
+ if full_path not in self._indexed_files:
196
+ try:
197
+ builder.index_file(full_path)
198
+ self._indexed_files.add(full_path)
199
+ count += 1
200
+ except Exception:
201
+ logger.debug("Failed to index %s", full_path)
202
+ return count
203
+
204
+ def invoke(self, tool_name: str, **kwargs: Any) -> ToolResult:
205
+ """Invoke a tool by name with keyword arguments."""
206
+ handler = getattr(self, f"_tool_{tool_name}", None)
207
+ if handler is None:
208
+ return ToolResult(
209
+ tool_name=tool_name,
210
+ success=False,
211
+ error=f"Unknown tool: {tool_name}",
212
+ )
213
+ try:
214
+ result: ToolResult = handler(**kwargs)
215
+ return result
216
+ except Exception as e:
217
+ logger.exception("Tool %s failed", tool_name)
218
+ return ToolResult(
219
+ tool_name=tool_name,
220
+ success=False,
221
+ error=str(e),
222
+ )
223
+
224
+ # --- Tool implementations ---
225
+
226
+ def _tool_semantic_search(
227
+ self, query: str, top_k: int = 10, threshold: float = 0.3
228
+ ) -> ToolResult:
229
+ results = search_codebase(
230
+ query, self._root, top_k=top_k, threshold=threshold
231
+ )
232
+ return ToolResult(
233
+ tool_name="semantic_search",
234
+ success=True,
235
+ data={
236
+ "query": query,
237
+ "result_count": len(results),
238
+ "results": [r.to_dict() for r in results],
239
+ },
240
+ )
241
+
242
+ def _tool_explain_symbol(
243
+ self, symbol_name: str, file_path: str | None = None
244
+ ) -> ToolResult:
245
+ builder = self._ensure_builder()
246
+
247
+ if file_path:
248
+ self.index_file(file_path)
249
+ symbols = builder.get_symbols(file_path)
250
+ match = [s for s in symbols if s.name == symbol_name]
251
+ else:
252
+ match = builder.find_symbol(symbol_name)
253
+
254
+ if not match:
255
+ return ToolResult(
256
+ tool_name="explain_symbol",
257
+ success=False,
258
+ error=f"Symbol '{symbol_name}' not found.",
259
+ )
260
+
261
+ explanations = [explain_symbol(s, builder) for s in match]
262
+ return ToolResult(
263
+ tool_name="explain_symbol",
264
+ success=True,
265
+ data={
266
+ "symbol_name": symbol_name,
267
+ "explanations": [e.to_dict() for e in explanations],
268
+ },
269
+ )
270
+
271
+ def _tool_explain_file(self, file_path: str) -> ToolResult:
272
+ self.index_file(file_path)
273
+ explanations = explain_file(file_path)
274
+ return ToolResult(
275
+ tool_name="explain_file",
276
+ success=True,
277
+ data={
278
+ "file_path": file_path,
279
+ "symbols": [e.to_dict() for e in explanations],
280
+ },
281
+ )
282
+
283
+ def _tool_summarize_repo(self) -> ToolResult:
284
+ builder = self._ensure_builder()
285
+ summary = summarize_repository(builder)
286
+ return ToolResult(
287
+ tool_name="summarize_repo",
288
+ success=True,
289
+ data=summary.to_dict(),
290
+ )
291
+
292
+ def _tool_find_references(self, symbol_name: str) -> ToolResult:
293
+ builder = self._ensure_builder()
294
+ all_syms = builder.get_all_symbols()
295
+
296
+ references: list[dict[str, Any]] = []
297
+ for sym in all_syms:
298
+ if sym.name == symbol_name:
299
+ references.append(sym.to_dict())
300
+ elif symbol_name in sym.body:
301
+ references.append({
302
+ "referencing_symbol": sym.name,
303
+ "kind": sym.kind,
304
+ "file_path": sym.file_path,
305
+ "start_line": sym.start_line,
306
+ "end_line": sym.end_line,
307
+ })
308
+
309
+ return ToolResult(
310
+ tool_name="find_references",
311
+ success=True,
312
+ data={
313
+ "symbol_name": symbol_name,
314
+ "reference_count": len(references),
315
+ "references": references,
316
+ },
317
+ )
318
+
319
+ def _tool_get_dependencies(self, file_path: str) -> ToolResult:
320
+ self.index_file(file_path)
321
+ builder = self._ensure_builder()
322
+ dep_map = DependencyMap()
323
+
324
+ if file_path in builder._file_contents:
325
+ dep_map.add_file(file_path, builder._file_contents[file_path])
326
+
327
+ return ToolResult(
328
+ tool_name="get_dependencies",
329
+ success=True,
330
+ data={
331
+ "file_path": file_path,
332
+ "dependencies": dep_map.to_dict(),
333
+ },
334
+ )
335
+
336
+ def _tool_get_call_graph(self, symbol_name: str) -> ToolResult:
337
+ builder = self._ensure_builder()
338
+ all_syms = builder.get_all_symbols()
339
+ graph = CallGraph()
340
+ graph.build(all_syms)
341
+
342
+ callers = [e.to_dict() for e in graph.callers_of(symbol_name)]
343
+
344
+ # callees_of needs "file:name" key; collect from all matching
345
+ all_callees: list[dict[str, Any]] = []
346
+ for edge in graph.edges:
347
+ if edge.caller.endswith(f":{symbol_name}"):
348
+ all_callees.append(edge.to_dict())
349
+
350
+ return ToolResult(
351
+ tool_name="get_call_graph",
352
+ success=True,
353
+ data={
354
+ "symbol_name": symbol_name,
355
+ "callers": callers,
356
+ "callees": all_callees,
357
+ },
358
+ )
359
+
360
+ def _tool_get_context(self, symbol_name: str) -> ToolResult:
361
+ builder = self._ensure_builder()
362
+ contexts = builder.build_context_for_name(symbol_name)
363
+
364
+ if not contexts:
365
+ return ToolResult(
366
+ tool_name="get_context",
367
+ success=False,
368
+ error=f"Symbol '{symbol_name}' not found.",
369
+ )
370
+
371
+ return ToolResult(
372
+ tool_name="get_context",
373
+ success=True,
374
+ data={
375
+ "symbol_name": symbol_name,
376
+ "contexts": [c.to_dict() for c in contexts],
377
+ },
378
+ )
379
+
380
+ def _tool_get_quality_score(self, file_path: str | None = None) -> ToolResult:
381
+ from semantic_code_intelligence.ci.quality import analyze_project
382
+
383
+ file_paths = [file_path] if file_path else None
384
+ report = analyze_project(self._root, file_paths=file_paths)
385
+
386
+ return ToolResult(
387
+ tool_name="get_quality_score",
388
+ success=True,
389
+ data={
390
+ "complexity_issues": len(report.complexity_issues),
391
+ "dead_code": len(report.dead_code),
392
+ "duplicates": len(report.duplicates),
393
+ "safety_issues": len(report.bandit_issues),
394
+ "maintainability_index": report.maintainability_index,
395
+ "high_complexity": [
396
+ {"symbol": c.symbol_name, "file": c.file_path,
397
+ "complexity": c.complexity}
398
+ for c in report.complexity_issues[:10]
399
+ ],
400
+ },
401
+ )
402
+
403
+ def _tool_find_duplicates(self, threshold: float = 0.75) -> ToolResult:
404
+ from semantic_code_intelligence.ci.quality import detect_duplicates
405
+
406
+ builder = self._ensure_builder()
407
+ all_syms = builder.get_all_symbols()
408
+ duplicates = detect_duplicates(all_syms, threshold=threshold)
409
+
410
+ return ToolResult(
411
+ tool_name="find_duplicates",
412
+ success=True,
413
+ data={
414
+ "duplicate_count": len(duplicates),
415
+ "duplicates": [
416
+ {
417
+ "symbol_a": d.symbol_a,
418
+ "symbol_b": d.symbol_b,
419
+ "similarity": round(d.similarity, 3),
420
+ "file_a": d.file_a,
421
+ "file_b": d.file_b,
422
+ }
423
+ for d in duplicates[:20]
424
+ ],
425
+ },
426
+ )
427
+
428
+ def _tool_grep_files(
429
+ self, pattern: str, file_glob: str | None = None, max_results: int = 50
430
+ ) -> ToolResult:
431
+ from semantic_code_intelligence.search.grep import grep_search
432
+
433
+ result = grep_search(
434
+ pattern, self._root,
435
+ max_results=max_results, file_glob=file_glob,
436
+ )
437
+
438
+ return ToolResult(
439
+ tool_name="grep_files",
440
+ success=True,
441
+ data=result.to_dict(),
442
+ )