codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,727 @@
1
+ """Tests for Priority 1-5 features: hybrid search, keyword search, model registry,
2
+ chunk hash store, section expander, parallel indexing, codexaignore, AST call graphs,
3
+ cross-repo search modes, TUI, MCP, and streaming.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import os
10
+ import re
11
+ import textwrap
12
+ from pathlib import Path
13
+ from unittest.mock import MagicMock, patch
14
+
15
+ import numpy as np
16
+ import pytest
17
+
18
+ from semantic_code_intelligence.config.settings import AppConfig, init_project
19
+ from semantic_code_intelligence.embeddings.generator import generate_embeddings
20
+ from semantic_code_intelligence.storage.vector_store import ChunkMetadata, VectorStore
21
+
22
+
23
+ # ===========================================================================
24
+ # Fixtures
25
+ # ===========================================================================
26
+
27
+ @pytest.fixture
28
+ def indexed_project(tmp_path: Path) -> Path:
29
+ """Create a project with indexed code chunks for search tests."""
30
+ config, _ = init_project(tmp_path)
31
+ index_dir = AppConfig.index_dir(tmp_path)
32
+
33
+ code_snippets = [
34
+ "def authenticate_user(username, password):\n return check_credentials(username, password)\n",
35
+ "def connect_to_database(host, port):\n return Database(host=host, port=port)\n",
36
+ "def handle_http_request(request):\n response = process(request)\n return response\n",
37
+ "def verify_jwt_token(token):\n payload = jwt.decode(token, SECRET_KEY)\n return payload\n",
38
+ "def calculate_statistics(data):\n mean = sum(data) / len(data)\n return mean\n",
39
+ ]
40
+
41
+ embeddings = generate_embeddings(code_snippets)
42
+ metadata = [
43
+ ChunkMetadata(
44
+ file_path=f"src/module_{i}.py",
45
+ start_line=1,
46
+ end_line=3,
47
+ chunk_index=0,
48
+ language="python",
49
+ content=snippet,
50
+ content_hash=f"hash_{i}",
51
+ )
52
+ for i, snippet in enumerate(code_snippets)
53
+ ]
54
+
55
+ store = VectorStore(embeddings.shape[1])
56
+ store.add(embeddings, metadata)
57
+ store.save(index_dir)
58
+ return tmp_path
59
+
60
+
61
+ @pytest.fixture
62
+ def vector_store(indexed_project: Path) -> VectorStore:
63
+ """Load the vector store from the indexed project."""
64
+ index_dir = AppConfig.index_dir(indexed_project)
65
+ return VectorStore.load(index_dir)
66
+
67
+
68
+ # ===========================================================================
69
+ # P1: Keyword Search (BM25)
70
+ # ===========================================================================
71
+
72
+ class TestKeywordSearch:
73
+ """Tests for the BM25 keyword search engine."""
74
+
75
+ def test_keyword_search_returns_results(self, indexed_project: Path, vector_store: VectorStore):
76
+ from semantic_code_intelligence.search.keyword_search import keyword_search, _bm25_cache
77
+
78
+ # Clear cache so we get a fresh BM25 index for this store
79
+ _bm25_cache.clear()
80
+ index_dir = AppConfig.index_dir(indexed_project)
81
+ results = keyword_search("authenticate", vector_store, index_dir, top_k=3)
82
+ assert len(results) > 0
83
+ assert any("authenticate" in r.content.lower() for r in results)
84
+
85
+ def test_keyword_search_empty_query(self, indexed_project: Path, vector_store: VectorStore):
86
+ from semantic_code_intelligence.search.keyword_search import keyword_search
87
+
88
+ index_dir = AppConfig.index_dir(indexed_project)
89
+ results = keyword_search("", vector_store, index_dir, top_k=3)
90
+ assert isinstance(results, list)
91
+
92
+ def test_keyword_search_no_match(self, indexed_project: Path, vector_store: VectorStore):
93
+ from semantic_code_intelligence.search.keyword_search import keyword_search
94
+
95
+ index_dir = AppConfig.index_dir(indexed_project)
96
+ results = keyword_search("xyznonexistent999", vector_store, index_dir, top_k=3)
97
+ assert len(results) == 0
98
+
99
+
100
+ class TestRegexSearch:
101
+ """Tests for the regex search engine."""
102
+
103
+ def test_regex_search_finds_pattern(self, vector_store: VectorStore):
104
+ from semantic_code_intelligence.search.keyword_search import regex_search
105
+
106
+ results = regex_search(r"def \w+_user", vector_store, top_k=5)
107
+ assert len(results) > 0
108
+
109
+ def test_regex_search_case_insensitive(self, vector_store: VectorStore):
110
+ from semantic_code_intelligence.search.keyword_search import regex_search
111
+
112
+ results = regex_search("DATABASE", vector_store, top_k=5, case_insensitive=True)
113
+ assert len(results) > 0
114
+
115
+ def test_regex_search_case_sensitive(self, vector_store: VectorStore):
116
+ from semantic_code_intelligence.search.keyword_search import regex_search
117
+
118
+ results = regex_search("DATABASE", vector_store, top_k=5, case_insensitive=False)
119
+ # All content uses lowercase, so no match expected
120
+ assert len(results) == 0
121
+
122
+ def test_regex_search_invalid_pattern(self, vector_store: VectorStore):
123
+ from semantic_code_intelligence.search.keyword_search import regex_search
124
+
125
+ # Invalid regex should return empty, not crash
126
+ results = regex_search("[invalid", vector_store, top_k=5)
127
+ assert isinstance(results, list)
128
+
129
+
130
+ # ===========================================================================
131
+ # P1: Hybrid Search (RRF)
132
+ # ===========================================================================
133
+
134
+ class TestHybridSearch:
135
+ """Tests for Reciprocal Rank Fusion hybrid search."""
136
+
137
+ def test_hybrid_search_returns_results(self, indexed_project: Path, vector_store: VectorStore):
138
+ from semantic_code_intelligence.search.hybrid_search import hybrid_search
139
+
140
+ index_dir = AppConfig.index_dir(indexed_project)
141
+ results = hybrid_search(
142
+ "authenticate user",
143
+ vector_store,
144
+ index_dir,
145
+ top_k=3,
146
+ )
147
+ assert len(results) > 0
148
+
149
+ def test_hybrid_search_rrf_formula(self):
150
+ from semantic_code_intelligence.search.hybrid_search import reciprocal_rank_fusion
151
+
152
+ # RRF expects list of (chunk_index, score) tuples
153
+ semantic = [(0, 1.0), (1, 0.5), (2, 0.3)]
154
+ keyword = [(1, 1.0), (3, 0.8)]
155
+ fused = reciprocal_rank_fusion(semantic, keyword, k=60)
156
+ # Returns list of (index, fused_score, sem_score, kw_score)
157
+ indices = {t[0] for t in fused}
158
+ assert 1 in indices # "b" equivalent — in both lists
159
+ # Item in both lists should have higher fused score
160
+ scores_by_idx = {t[0]: t[1] for t in fused}
161
+ assert scores_by_idx[1] >= scores_by_idx.get(0, 0)
162
+
163
+ def test_hybrid_search_empty_query(self, indexed_project: Path, vector_store: VectorStore):
164
+ from semantic_code_intelligence.search.hybrid_search import hybrid_search
165
+
166
+ index_dir = AppConfig.index_dir(indexed_project)
167
+ results = hybrid_search("", vector_store, index_dir, top_k=3)
168
+ assert isinstance(results, list)
169
+
170
+
171
+ # ===========================================================================
172
+ # P1: Section Expander
173
+ # ===========================================================================
174
+
175
+ class TestSectionExpander:
176
+ """Tests for full-section expansion."""
177
+
178
+ def test_expand_returns_results(self, indexed_project: Path):
179
+ from semantic_code_intelligence.search.section_expander import expand_to_full_section
180
+ from semantic_code_intelligence.services.search_service import SearchResult
181
+
182
+ results = [
183
+ SearchResult(
184
+ file_path="src/module_0.py",
185
+ start_line=1,
186
+ end_line=3,
187
+ language="python",
188
+ content="def authenticate_user():\n pass\n",
189
+ score=0.9,
190
+ chunk_index=0,
191
+ )
192
+ ]
193
+ index_dir = AppConfig.index_dir(indexed_project)
194
+ expanded = expand_to_full_section(results, indexed_project, index_dir)
195
+ # Should return at least the original results
196
+ assert len(expanded) >= 1
197
+
198
+
199
+ # ===========================================================================
200
+ # P1: Auto-index on Search
201
+ # ===========================================================================
202
+
203
+ class TestAutoIndex:
204
+ """Tests for auto-indexing when searching without an existing index."""
205
+
206
+ def test_search_auto_indexes(self, tmp_path: Path):
207
+ """Searching a project with no index should trigger auto-index."""
208
+ config, _ = init_project(tmp_path)
209
+ # Create a file to index
210
+ src = tmp_path / "hello.py"
211
+ src.write_text("def greet(name):\n return f'Hello {name}'\n")
212
+
213
+ from semantic_code_intelligence.services.search_service import search_codebase
214
+
215
+ results = search_codebase("greet", tmp_path, auto_index=True)
216
+ # At minimum the auto-index ran without error
217
+ assert isinstance(results, list)
218
+
219
+
220
+ # ===========================================================================
221
+ # P2: Chunk Hash Store
222
+ # ===========================================================================
223
+
224
+ class TestChunkHashStore:
225
+ """Tests for chunk-level content hashing."""
226
+
227
+ def test_store_and_check(self, tmp_path: Path):
228
+ from semantic_code_intelligence.storage.chunk_hash_store import ChunkHashStore
229
+
230
+ store = ChunkHashStore()
231
+ store.set("file.py:1:10", "abc123")
232
+ assert store.get("file.py:1:10") == "abc123"
233
+ assert store.get("nonexistent") is None
234
+
235
+ def test_has_changed(self, tmp_path: Path):
236
+ from semantic_code_intelligence.storage.chunk_hash_store import ChunkHashStore
237
+
238
+ store = ChunkHashStore()
239
+ store.set("file.py:1:10", "abc123")
240
+ assert not store.has_changed("file.py:1:10", "abc123")
241
+ assert store.has_changed("file.py:1:10", "def456")
242
+ assert store.has_changed("new_key", "anything")
243
+
244
+ def test_remove_by_file(self, tmp_path: Path):
245
+ from semantic_code_intelligence.storage.chunk_hash_store import ChunkHashStore
246
+
247
+ store = ChunkHashStore()
248
+ store.set("a.py:1:10", "h1")
249
+ store.set("a.py:11:20", "h2")
250
+ store.set("b.py:1:5", "h3")
251
+
252
+ removed = store.remove_by_file("a.py")
253
+ assert removed == 2
254
+ assert store.get("a.py:1:10") is None
255
+ assert store.get("b.py:1:5") == "h3"
256
+
257
+ def test_save_and_load(self, tmp_path: Path):
258
+ from semantic_code_intelligence.storage.chunk_hash_store import ChunkHashStore
259
+
260
+ store = ChunkHashStore()
261
+ store.set("file.py:1:10", "abc123")
262
+ store.save(tmp_path)
263
+
264
+ loaded = ChunkHashStore.load(tmp_path)
265
+ assert loaded.get("file.py:1:10") == "abc123"
266
+
267
+ def test_keys_for_file(self):
268
+ from semantic_code_intelligence.storage.chunk_hash_store import ChunkHashStore
269
+
270
+ store = ChunkHashStore()
271
+ store.set("a.py:1:10", "h1")
272
+ store.set("a.py:11:20", "h2")
273
+ store.set("b.py:1:5", "h3")
274
+
275
+ keys = store.keys_for_file("a.py")
276
+ assert len(keys) == 2
277
+ assert "a.py:1:10" in keys
278
+
279
+
280
+ # ===========================================================================
281
+ # P2: Model Registry
282
+ # ===========================================================================
283
+
284
+ class TestModelRegistry:
285
+ """Tests for the embedding model registry."""
286
+
287
+ def test_resolve_alias(self):
288
+ from semantic_code_intelligence.embeddings.model_registry import resolve_model_name
289
+
290
+ assert resolve_model_name("minilm") == "all-MiniLM-L6-v2"
291
+ assert resolve_model_name("bge-small") == "BAAI/bge-small-en-v1.5"
292
+
293
+ def test_resolve_full_name(self):
294
+ from semantic_code_intelligence.embeddings.model_registry import resolve_model_name
295
+
296
+ assert resolve_model_name("all-MiniLM-L6-v2") == "all-MiniLM-L6-v2"
297
+
298
+ def test_resolve_unknown(self):
299
+ from semantic_code_intelligence.embeddings.model_registry import resolve_model_name
300
+
301
+ # Unknown names should be returned as-is (for custom models)
302
+ assert resolve_model_name("my-custom-model") == "my-custom-model"
303
+
304
+ def test_get_model_info(self):
305
+ from semantic_code_intelligence.embeddings.model_registry import get_model_info
306
+
307
+ info = get_model_info("all-MiniLM-L6-v2")
308
+ assert info is not None
309
+ assert info.dimension == 384
310
+
311
+ def test_list_models(self):
312
+ from semantic_code_intelligence.embeddings.model_registry import list_models
313
+
314
+ models = list_models()
315
+ assert len(models) >= 5
316
+ names = [m.name for m in models]
317
+ assert "all-MiniLM-L6-v2" in names
318
+
319
+
320
+ # ===========================================================================
321
+ # P2: ONNX Backend Detection
322
+ # ===========================================================================
323
+
324
+ class TestONNXBackend:
325
+ """Tests for ONNX backend detection in generator."""
326
+
327
+ def test_onnx_available_detection(self):
328
+ from semantic_code_intelligence.embeddings.generator import _onnx_available
329
+
330
+ # Just verify it returns a bool without crashing
331
+ result = _onnx_available()
332
+ assert isinstance(result, bool)
333
+
334
+
335
+ # ===========================================================================
336
+ # P3: Parallel Indexing
337
+ # ===========================================================================
338
+
339
+ class TestParallelIndexing:
340
+ """Tests for parallel file chunking and hash scanning."""
341
+
342
+ def test_parallel_chunk_files(self, tmp_path: Path):
343
+ from semantic_code_intelligence.indexing.parallel import parallel_chunk_files
344
+ from semantic_code_intelligence.indexing.scanner import ScannedFile
345
+
346
+ # Create some source files and wrap them as ScannedFile
347
+ scanned = []
348
+ for i in range(5):
349
+ p = tmp_path / f"file_{i}.py"
350
+ p.write_text(f"def func_{i}():\n return {i}\n")
351
+ scanned.append(ScannedFile(
352
+ path=p,
353
+ relative_path=f"file_{i}.py",
354
+ extension=".py",
355
+ size_bytes=p.stat().st_size,
356
+ content_hash=f"hash_{i}",
357
+ ))
358
+ chunks = parallel_chunk_files(scanned, chunk_size=200, chunk_overlap=0)
359
+ assert len(chunks) >= 5 # At least one tuple per file
360
+
361
+ def test_parallel_scan_hashes(self, tmp_path: Path):
362
+ from semantic_code_intelligence.indexing.parallel import parallel_scan_hashes
363
+
364
+ for i in range(3):
365
+ (tmp_path / f"f{i}.py").write_text(f"# file {i}\n")
366
+ files = [tmp_path / f"f{i}.py" for i in range(3)]
367
+ hashes = parallel_scan_hashes(files)
368
+ assert len(hashes) == 3
369
+ # All hashes should be hex strings
370
+ for h in hashes.values():
371
+ assert len(h) == 64 # SHA-256 hex
372
+
373
+
374
+ # ===========================================================================
375
+ # P4: .codexaignore
376
+ # ===========================================================================
377
+
378
+ class TestCodexaIgnore:
379
+ """Tests for .codexaignore file support in the scanner."""
380
+
381
+ def test_codexaignore_excludes_files(self, tmp_path: Path):
382
+ from semantic_code_intelligence.indexing.scanner import scan_repository
383
+ from semantic_code_intelligence.config.settings import IndexConfig
384
+
385
+ # Create files
386
+ (tmp_path / "keep.py").write_text("x = 1\n")
387
+ (tmp_path / "secret.py").write_text("password = 'abc'\n")
388
+ subdir = tmp_path / "vendor"
389
+ subdir.mkdir()
390
+ (subdir / "lib.py").write_text("y = 2\n")
391
+
392
+ # Create .codexaignore
393
+ (tmp_path / ".codexaignore").write_text("secret.py\nvendor/*\n")
394
+
395
+ config = IndexConfig(extensions={".py"})
396
+ results = scan_repository(tmp_path, config)
397
+ paths = [r.relative_path for r in results]
398
+
399
+ assert any("keep.py" in p for p in paths)
400
+ assert not any("secret.py" in p for p in paths)
401
+ assert not any("vendor" in p for p in paths)
402
+
403
+ def test_codexaignore_comments_ignored(self, tmp_path: Path):
404
+ from semantic_code_intelligence.indexing.scanner import _load_ignore_patterns
405
+
406
+ (tmp_path / ".codexaignore").write_text("# comment\npattern\n \n")
407
+ patterns = _load_ignore_patterns(tmp_path)
408
+ assert patterns == ["pattern"]
409
+
410
+ def test_no_codexaignore_file(self, tmp_path: Path):
411
+ from semantic_code_intelligence.indexing.scanner import _load_ignore_patterns
412
+
413
+ patterns = _load_ignore_patterns(tmp_path)
414
+ assert patterns == []
415
+
416
+
417
+ # ===========================================================================
418
+ # P4: TUI
419
+ # ===========================================================================
420
+
421
+ class TestTUI:
422
+ """Tests for the TUI module."""
423
+
424
+ def test_tui_import(self):
425
+ from semantic_code_intelligence.tui import run_tui
426
+ assert callable(run_tui)
427
+
428
+
429
+ # ===========================================================================
430
+ # P4: MCP Server
431
+ # ===========================================================================
432
+
433
+ class TestMCPServer:
434
+ """Tests for the MCP server (official SDK)."""
435
+
436
+ @pytest.fixture(autouse=True)
437
+ def _require_mcp(self):
438
+ pytest.importorskip("mcp", reason="mcp SDK not installed")
439
+
440
+ def test_mcp_import(self):
441
+ from semantic_code_intelligence.mcp import run_mcp_server, MCP_TOOLS
442
+ assert callable(run_mcp_server)
443
+ assert len(MCP_TOOLS) >= 8
444
+
445
+ def test_mcp_tool_definitions(self):
446
+ from semantic_code_intelligence.mcp import MCP_TOOLS
447
+
448
+ names = {t.name for t in MCP_TOOLS}
449
+ assert "semantic_search" in names
450
+ assert "keyword_search" in names
451
+ assert "hybrid_search" in names
452
+ assert "regex_search" in names
453
+ assert "explain_symbol" in names
454
+ assert "health_check" in names
455
+
456
+ def test_mcp_dispatch_health_check(self):
457
+ from semantic_code_intelligence.mcp import _dispatch_tool
458
+
459
+ result = _dispatch_tool("health_check", {}, Path("."))
460
+ assert result["status"] == "ok"
461
+
462
+ def test_mcp_dispatch_unknown_tool(self):
463
+ from semantic_code_intelligence.mcp import _dispatch_tool
464
+
465
+ result = _dispatch_tool("nonexistent_tool", {}, Path("."))
466
+ assert "error" in result
467
+
468
+ def test_mcp_create_server(self):
469
+ from semantic_code_intelligence.mcp import _create_server
470
+
471
+ server = _create_server(Path("."))
472
+ assert server is not None
473
+
474
+
475
+ # ===========================================================================
476
+ # P5: AST-based Call Graphs
477
+ # ===========================================================================
478
+
479
+ class TestASTCallGraph:
480
+ """Tests for the AST-based call graph."""
481
+
482
+ def test_ast_call_graph_detects_calls(self):
483
+ from semantic_code_intelligence.context.engine import CallGraph
484
+ from semantic_code_intelligence.parsing.parser import Symbol
485
+
486
+ symbols = [
487
+ Symbol(
488
+ name="caller_func",
489
+ kind="function",
490
+ file_path="test.py",
491
+ start_line=1,
492
+ end_line=3,
493
+ start_col=0,
494
+ end_col=0,
495
+ body="def caller_func():\n result = callee_func()\n return result\n",
496
+ ),
497
+ Symbol(
498
+ name="callee_func",
499
+ kind="function",
500
+ file_path="test.py",
501
+ start_line=5,
502
+ end_line=6,
503
+ start_col=0,
504
+ end_col=0,
505
+ body="def callee_func():\n return 42\n",
506
+ ),
507
+ ]
508
+
509
+ cg = CallGraph()
510
+ cg.build(symbols)
511
+
512
+ callers = cg.callers_of("callee_func")
513
+ assert len(callers) >= 1
514
+ assert any("caller_func" in e.caller for e in callers)
515
+
516
+ def test_ast_call_graph_no_self_reference(self):
517
+ from semantic_code_intelligence.context.engine import CallGraph
518
+ from semantic_code_intelligence.parsing.parser import Symbol
519
+
520
+ symbols = [
521
+ Symbol(
522
+ name="recursive",
523
+ kind="function",
524
+ file_path="test.py",
525
+ start_line=1,
526
+ end_line=3,
527
+ start_col=0,
528
+ end_col=0,
529
+ body="def recursive():\n return recursive()\n",
530
+ ),
531
+ ]
532
+
533
+ cg = CallGraph()
534
+ cg.build(symbols)
535
+ assert len(cg.edges) == 0
536
+
537
+ def test_ast_call_graph_method_call(self):
538
+ from semantic_code_intelligence.context.engine import CallGraph
539
+ from semantic_code_intelligence.parsing.parser import Symbol
540
+
541
+ symbols = [
542
+ Symbol(
543
+ name="main",
544
+ kind="function",
545
+ file_path="test.py",
546
+ start_line=1,
547
+ end_line=3,
548
+ start_col=0,
549
+ end_col=0,
550
+ body="def main():\n obj.helper()\n return\n",
551
+ ),
552
+ Symbol(
553
+ name="helper",
554
+ kind="method",
555
+ file_path="test.py",
556
+ start_line=5,
557
+ end_line=6,
558
+ start_col=0,
559
+ end_col=0,
560
+ body="def helper(self):\n pass\n",
561
+ ),
562
+ ]
563
+
564
+ cg = CallGraph()
565
+ cg.build(symbols)
566
+ callers = cg.callers_of("helper")
567
+ assert len(callers) >= 1
568
+
569
+ def test_call_graph_to_dict(self):
570
+ from semantic_code_intelligence.context.engine import CallGraph
571
+ from semantic_code_intelligence.parsing.parser import Symbol
572
+
573
+ symbols = [
574
+ Symbol(
575
+ name="a", kind="function", file_path="t.py",
576
+ start_line=1, end_line=2, start_col=0, end_col=0,
577
+ body="def a():\n b()\n",
578
+ ),
579
+ Symbol(
580
+ name="b", kind="function", file_path="t.py",
581
+ start_line=3, end_line=4, start_col=0, end_col=0,
582
+ body="def b():\n pass\n",
583
+ ),
584
+ ]
585
+
586
+ cg = CallGraph()
587
+ cg.build(symbols)
588
+ d = cg.to_dict()
589
+ assert "edges" in d
590
+ assert "node_count" in d
591
+ assert "edge_count" in d
592
+ assert d["edge_count"] >= 1
593
+
594
+
595
+ # ===========================================================================
596
+ # P5: Cross-repo Search Modes
597
+ # ===========================================================================
598
+
599
+ class TestCrossRepoSearchModes:
600
+ """Tests for multi-mode cross-repo workspace search."""
601
+
602
+ def test_workspace_search_keyword_mode(self, tmp_path: Path):
603
+ from semantic_code_intelligence.workspace import Workspace
604
+
605
+ ws_root = tmp_path / "workspace"
606
+ ws_root.mkdir()
607
+ repo_a = ws_root / "repo_a"
608
+ repo_a.mkdir()
609
+ (repo_a / "hello.py").write_text("def greet():\n print('hello')\n")
610
+
611
+ ws = Workspace.load_or_create(ws_root)
612
+ ws.add_repo("repo_a", repo_a)
613
+ ws.save()
614
+ ws.index_all()
615
+
616
+ # Keyword mode search
617
+ results = ws.search("greet", top_k=5, mode="keyword")
618
+ assert isinstance(results, list)
619
+
620
+ def test_workspace_search_regex_mode(self, tmp_path: Path):
621
+ from semantic_code_intelligence.workspace import Workspace
622
+
623
+ ws_root = tmp_path / "workspace"
624
+ ws_root.mkdir()
625
+ repo_a = ws_root / "repo_a"
626
+ repo_a.mkdir()
627
+ (repo_a / "hello.py").write_text("def greet():\n print('hello')\n")
628
+
629
+ ws = Workspace.load_or_create(ws_root)
630
+ ws.add_repo("repo_a", repo_a)
631
+ ws.save()
632
+ ws.index_all()
633
+
634
+ results = ws.search(r"def \w+", top_k=5, mode="regex")
635
+ assert isinstance(results, list)
636
+
637
+
638
+ # ===========================================================================
639
+ # P5: Streaming
640
+ # ===========================================================================
641
+
642
+ class TestStreaming:
643
+ """Tests for streaming chat and investigation responses."""
644
+
645
+ def test_stream_chat_mock(self):
646
+ from semantic_code_intelligence.llm.mock_provider import MockProvider
647
+ from semantic_code_intelligence.llm.provider import LLMMessage, MessageRole
648
+ from semantic_code_intelligence.llm.streaming import StreamEvent, stream_chat
649
+
650
+ provider = MockProvider()
651
+ messages = [LLMMessage(role=MessageRole.USER, content="Hello")]
652
+
653
+ events: list[StreamEvent] = []
654
+ gen = stream_chat(provider, messages)
655
+ for event in gen:
656
+ events.append(event)
657
+
658
+ kinds = [e.kind for e in events]
659
+ assert "start" in kinds
660
+ assert "token" in kinds
661
+ assert "done" in kinds
662
+
663
+ def test_chat_cmd_stream_flag_exists(self):
664
+ """Verify the --stream option is registered on chat_cmd."""
665
+ from semantic_code_intelligence.cli.commands.chat_cmd import chat_cmd
666
+
667
+ param_names = [p.name for p in chat_cmd.params]
668
+ assert "stream" in param_names
669
+
670
+ def test_investigate_cmd_stream_flag_exists(self):
671
+ """Verify the --stream option is registered on investigate_cmd."""
672
+ from semantic_code_intelligence.cli.commands.investigate_cmd import investigate_cmd
673
+
674
+ param_names = [p.name for p in investigate_cmd.params]
675
+ assert "stream" in param_names
676
+
677
+
678
+ # ===========================================================================
679
+ # Router Registration
680
+ # ===========================================================================
681
+
682
+ class TestRouterRegistration:
683
+ """Tests that new commands are registered in the CLI router."""
684
+
685
+ def test_tui_and_mcp_registered(self):
686
+ from semantic_code_intelligence.cli.router import register_commands
687
+
688
+ group = __import__("click").Group()
689
+ register_commands(group)
690
+ command_names = list(group.commands.keys())
691
+ assert "tui" in command_names
692
+ assert "mcp" in command_names
693
+
694
+
695
+ # ===========================================================================
696
+ # Search Service Multi-Mode
697
+ # ===========================================================================
698
+
699
+ class TestSearchServiceModes:
700
+ """Tests for the search service's multi-mode dispatch."""
701
+
702
+ def test_semantic_mode(self, indexed_project: Path):
703
+ from semantic_code_intelligence.services.search_service import search_codebase
704
+
705
+ results = search_codebase("authenticate", indexed_project, mode="semantic")
706
+ assert len(results) > 0
707
+
708
+ def test_keyword_mode(self, indexed_project: Path):
709
+ from semantic_code_intelligence.services.search_service import search_codebase
710
+
711
+ # BM25 tokenizer splits camelCase; use a token that directly matches
712
+ results = search_codebase("authenticate", indexed_project, mode="keyword")
713
+ # Keyword search should at least not crash; may return 0 if token
714
+ # tokenization doesn't match exactly — that's valid BM25 behaviour.
715
+ assert isinstance(results, list)
716
+
717
+ def test_regex_mode(self, indexed_project: Path):
718
+ from semantic_code_intelligence.services.search_service import search_codebase
719
+
720
+ results = search_codebase(r"def \w+_user", indexed_project, mode="regex")
721
+ assert len(results) > 0
722
+
723
+ def test_hybrid_mode(self, indexed_project: Path):
724
+ from semantic_code_intelligence.services.search_service import search_codebase
725
+
726
+ results = search_codebase("authenticate user database", indexed_project, mode="hybrid")
727
+ assert len(results) > 0