codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,783 @@
1
+ """Phase 23 — Persistent Intelligence Index.
2
+
3
+ Tests verify:
4
+ 1. IndexManifest — serialisation, persistence, compatibility checking
5
+ 2. SymbolRegistry — add/remove/find/search, persistence, summaries
6
+ 3. IndexStats — coverage tracking, staleness, persistence
7
+ 4. QueryHistory — record/recent/popular, FIFO eviction, persistence
8
+ 5. Indexing integration — manifest/registry/stats populated after indexing
9
+ 6. Search integration — query history recorded after search
10
+ 7. Module imports and version
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import time
17
+ from pathlib import Path
18
+ from unittest.mock import MagicMock, patch
19
+
20
+ import numpy as np
21
+ import pytest
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Imports under test
25
+ # ---------------------------------------------------------------------------
26
+
27
+ from semantic_code_intelligence.storage.index_manifest import (
28
+ MANIFEST_FILE,
29
+ SCHEMA_VERSION,
30
+ IndexManifest,
31
+ )
32
+ from semantic_code_intelligence.storage.index_stats import (
33
+ STATS_FILE,
34
+ IndexStats,
35
+ LanguageCoverage,
36
+ )
37
+ from semantic_code_intelligence.storage.query_history import (
38
+ HISTORY_FILE,
39
+ MAX_HISTORY,
40
+ QueryHistory,
41
+ QueryRecord,
42
+ )
43
+ from semantic_code_intelligence.storage.symbol_registry import (
44
+ REGISTRY_FILE,
45
+ SymbolEntry,
46
+ SymbolRegistry,
47
+ )
48
+
49
+ _PROJECT_ROOT = Path(__file__).resolve().parents[2]
50
+ _SRC = _PROJECT_ROOT / "semantic_code_intelligence"
51
+
52
+
53
+ # ═══════════════════════════════════════════════════════════════════════════
54
+ # 1 — IndexManifest
55
+ # ═══════════════════════════════════════════════════════════════════════════
56
+
57
+
58
+ class TestIndexManifest:
59
+ """Tests for IndexManifest dataclass."""
60
+
61
+ def test_defaults(self):
62
+ m = IndexManifest()
63
+ assert m.schema_version == SCHEMA_VERSION
64
+ assert m.embedding_model == "all-MiniLM-L6-v2"
65
+ assert m.embedding_dimension == 384
66
+ assert m.created_at == 0.0
67
+ assert m.updated_at == 0.0
68
+ assert m.total_files == 0
69
+ assert m.total_chunks == 0
70
+ assert m.total_symbols == 0
71
+ assert m.languages == []
72
+ assert m.project_root == ""
73
+
74
+ def test_to_dict_and_from_dict(self):
75
+ m = IndexManifest(
76
+ total_files=10,
77
+ total_chunks=50,
78
+ total_symbols=30,
79
+ languages=["python", "javascript"],
80
+ project_root="/repo",
81
+ )
82
+ d = m.to_dict()
83
+ m2 = IndexManifest.from_dict(d)
84
+ assert m2.total_files == 10
85
+ assert m2.total_chunks == 50
86
+ assert m2.languages == ["python", "javascript"]
87
+ assert m2.project_root == "/repo"
88
+
89
+ def test_from_dict_ignores_unknown_keys(self):
90
+ d = {"total_files": 3, "unknown_field": "ignored"}
91
+ m = IndexManifest.from_dict(d)
92
+ assert m.total_files == 3
93
+
94
+ def test_touch_sets_timestamps(self):
95
+ m = IndexManifest()
96
+ assert m.created_at == 0.0
97
+ m.touch()
98
+ assert m.created_at > 0.0
99
+ assert m.updated_at > 0.0
100
+ first_created = m.created_at
101
+ time.sleep(0.01)
102
+ m.touch()
103
+ assert m.created_at == first_created # created_at unchanged
104
+ assert m.updated_at > first_created
105
+
106
+ def test_is_compatible(self):
107
+ m = IndexManifest(embedding_model="all-MiniLM-L6-v2", embedding_dimension=384)
108
+ assert m.is_compatible("all-MiniLM-L6-v2", 384) is True
109
+ assert m.is_compatible("other-model", 384) is False
110
+ assert m.is_compatible("all-MiniLM-L6-v2", 768) is False
111
+
112
+ def test_save_and_load(self, tmp_path: Path):
113
+ m = IndexManifest(total_files=5, total_chunks=20, project_root="/repo")
114
+ m.touch()
115
+ m.save(tmp_path)
116
+
117
+ assert (tmp_path / MANIFEST_FILE).exists()
118
+
119
+ loaded = IndexManifest.load(tmp_path)
120
+ assert loaded is not None
121
+ assert loaded.total_files == 5
122
+ assert loaded.total_chunks == 20
123
+ assert loaded.project_root == "/repo"
124
+ assert loaded.created_at > 0.0
125
+
126
+ def test_load_returns_none_when_missing(self, tmp_path: Path):
127
+ assert IndexManifest.load(tmp_path) is None
128
+
129
+ def test_load_returns_none_on_corrupt_json(self, tmp_path: Path):
130
+ (tmp_path / MANIFEST_FILE).write_text("not json", encoding="utf-8")
131
+ assert IndexManifest.load(tmp_path) is None
132
+
133
+ def test_save_creates_directory(self, tmp_path: Path):
134
+ deep = tmp_path / "a" / "b" / "c"
135
+ IndexManifest().save(deep)
136
+ assert (deep / MANIFEST_FILE).exists()
137
+
138
+
139
+ # ═══════════════════════════════════════════════════════════════════════════
140
+ # 2 — SymbolRegistry
141
+ # ═══════════════════════════════════════════════════════════════════════════
142
+
143
+
144
+ def _make_entry(**kwargs) -> SymbolEntry:
145
+ defaults = dict(
146
+ name="foo",
147
+ kind="function",
148
+ file_path="src/main.py",
149
+ start_line=1,
150
+ end_line=10,
151
+ language="python",
152
+ )
153
+ defaults.update(kwargs)
154
+ return SymbolEntry(**defaults)
155
+
156
+
157
+ class TestSymbolEntry:
158
+ """Tests for SymbolEntry dataclass."""
159
+
160
+ def test_qualified_name_no_parent(self):
161
+ e = _make_entry(name="bar", parent=None)
162
+ assert e.qualified_name == "bar"
163
+
164
+ def test_qualified_name_with_parent(self):
165
+ e = _make_entry(name="method", parent="MyClass")
166
+ assert e.qualified_name == "MyClass.method"
167
+
168
+ def test_to_dict_and_from_dict(self):
169
+ e = _make_entry(name="hello", parameters=["a", "b"], decorators=["@staticmethod"])
170
+ d = e.to_dict()
171
+ e2 = SymbolEntry.from_dict(d)
172
+ assert e2.name == "hello"
173
+ assert e2.parameters == ["a", "b"]
174
+ assert e2.decorators == ["@staticmethod"]
175
+
176
+ def test_from_dict_ignores_unknown_keys(self):
177
+ d = {"name": "x", "kind": "class", "file_path": "a.py", "start_line": 1, "end_line": 2, "extra": True}
178
+ e = SymbolEntry.from_dict(d)
179
+ assert e.name == "x"
180
+
181
+
182
+ class TestSymbolRegistry:
183
+ """Tests for SymbolRegistry."""
184
+
185
+ def test_add_and_size(self):
186
+ reg = SymbolRegistry()
187
+ assert reg.size == 0
188
+ reg.add(_make_entry(name="a"))
189
+ reg.add(_make_entry(name="b"))
190
+ assert reg.size == 2
191
+
192
+ def test_add_many(self):
193
+ reg = SymbolRegistry()
194
+ reg.add_many([_make_entry(name="x"), _make_entry(name="y"), _make_entry(name="z")])
195
+ assert reg.size == 3
196
+
197
+ def test_remove_file(self):
198
+ reg = SymbolRegistry()
199
+ reg.add(_make_entry(name="a", file_path="f1.py"))
200
+ reg.add(_make_entry(name="b", file_path="f1.py"))
201
+ reg.add(_make_entry(name="c", file_path="f2.py"))
202
+ removed = reg.remove_file("f1.py")
203
+ assert removed == 2
204
+ assert reg.size == 1
205
+ assert reg.find_by_file("f1.py") == []
206
+
207
+ def test_remove_file_nonexistent(self):
208
+ reg = SymbolRegistry()
209
+ assert reg.remove_file("no.py") == 0
210
+
211
+ def test_clear(self):
212
+ reg = SymbolRegistry()
213
+ reg.add_many([_make_entry(), _make_entry()])
214
+ reg.clear()
215
+ assert reg.size == 0
216
+
217
+ def test_files(self):
218
+ reg = SymbolRegistry()
219
+ reg.add(_make_entry(file_path="a.py"))
220
+ reg.add(_make_entry(file_path="b.py"))
221
+ assert sorted(reg.files) == ["a.py", "b.py"]
222
+
223
+ def test_find_by_name(self):
224
+ reg = SymbolRegistry()
225
+ reg.add(_make_entry(name="foo"))
226
+ reg.add(_make_entry(name="bar"))
227
+ reg.add(_make_entry(name="foo", file_path="other.py"))
228
+ assert len(reg.find_by_name("foo")) == 2
229
+ assert len(reg.find_by_name("bar")) == 1
230
+ assert len(reg.find_by_name("baz")) == 0
231
+
232
+ def test_find_by_kind(self):
233
+ reg = SymbolRegistry()
234
+ reg.add(_make_entry(kind="function"))
235
+ reg.add(_make_entry(kind="class"))
236
+ reg.add(_make_entry(kind="function"))
237
+ assert len(reg.find_by_kind("function")) == 2
238
+ assert len(reg.find_by_kind("class")) == 1
239
+
240
+ def test_find_by_file(self):
241
+ reg = SymbolRegistry()
242
+ reg.add(_make_entry(file_path="a.py"))
243
+ reg.add(_make_entry(file_path="b.py"))
244
+ reg.add(_make_entry(file_path="a.py"))
245
+ assert len(reg.find_by_file("a.py")) == 2
246
+
247
+ def test_find_multi_criteria(self):
248
+ reg = SymbolRegistry()
249
+ reg.add(_make_entry(name="f", kind="function", language="python"))
250
+ reg.add(_make_entry(name="f", kind="method", language="python"))
251
+ reg.add(_make_entry(name="g", kind="function", language="javascript"))
252
+ # name + kind
253
+ assert len(reg.find(name="f", kind="function")) == 1
254
+ # language only
255
+ assert len(reg.find(language="python")) == 2
256
+ # no criteria → all
257
+ assert len(reg.find()) == 3
258
+
259
+ def test_find_with_parent(self):
260
+ reg = SymbolRegistry()
261
+ reg.add(_make_entry(name="method1", parent="ClassA"))
262
+ reg.add(_make_entry(name="method2", parent="ClassB"))
263
+ assert len(reg.find(parent="ClassA")) == 1
264
+
265
+ def test_search_name(self):
266
+ reg = SymbolRegistry()
267
+ reg.add(_make_entry(name="calculate_total"))
268
+ reg.add(_make_entry(name="get_customer"))
269
+ reg.add(_make_entry(name="recalculate"))
270
+ results = reg.search_name("calc")
271
+ assert len(results) == 2 # calculate_total and recalculate
272
+
273
+ def test_search_name_case_insensitive(self):
274
+ reg = SymbolRegistry()
275
+ reg.add(_make_entry(name="MyClass"))
276
+ results = reg.search_name("myclass")
277
+ assert len(results) == 1
278
+
279
+ def test_language_summary(self):
280
+ reg = SymbolRegistry()
281
+ reg.add(_make_entry(language="python"))
282
+ reg.add(_make_entry(language="python"))
283
+ reg.add(_make_entry(language="javascript"))
284
+ summary = reg.language_summary()
285
+ assert summary["python"] == 2
286
+ assert summary["javascript"] == 1
287
+
288
+ def test_kind_summary(self):
289
+ reg = SymbolRegistry()
290
+ reg.add(_make_entry(kind="function"))
291
+ reg.add(_make_entry(kind="class"))
292
+ reg.add(_make_entry(kind="function"))
293
+ summary = reg.kind_summary()
294
+ assert summary["function"] == 2
295
+ assert summary["class"] == 1
296
+
297
+ def test_save_and_load(self, tmp_path: Path):
298
+ reg = SymbolRegistry()
299
+ reg.add(_make_entry(name="func1", kind="function", language="python"))
300
+ reg.add(_make_entry(name="Cls1", kind="class", language="python"))
301
+ reg.save(tmp_path)
302
+
303
+ assert (tmp_path / REGISTRY_FILE).exists()
304
+
305
+ loaded = SymbolRegistry.load(tmp_path)
306
+ assert loaded.size == 2
307
+ assert len(loaded.find_by_name("func1")) == 1
308
+ assert loaded.find_by_name("Cls1")[0].kind == "class"
309
+
310
+ def test_load_returns_empty_when_missing(self, tmp_path: Path):
311
+ reg = SymbolRegistry.load(tmp_path)
312
+ assert reg.size == 0
313
+
314
+ def test_load_handles_corrupt_json(self, tmp_path: Path):
315
+ (tmp_path / REGISTRY_FILE).write_text("not json", encoding="utf-8")
316
+ reg = SymbolRegistry.load(tmp_path)
317
+ assert reg.size == 0
318
+
319
+
320
+ # ═══════════════════════════════════════════════════════════════════════════
321
+ # 3 — IndexStats
322
+ # ═══════════════════════════════════════════════════════════════════════════
323
+
324
+
325
+ class TestLanguageCoverage:
326
+ """Tests for LanguageCoverage dataclass."""
327
+
328
+ def test_defaults(self):
329
+ lc = LanguageCoverage()
330
+ assert lc.language == ""
331
+ assert lc.files == 0
332
+ assert lc.chunks == 0
333
+ assert lc.symbols == 0
334
+ assert lc.total_lines == 0
335
+
336
+ def test_to_dict_and_from_dict(self):
337
+ lc = LanguageCoverage(language="python", files=5, chunks=20, symbols=15, total_lines=300)
338
+ d = lc.to_dict()
339
+ lc2 = LanguageCoverage.from_dict(d)
340
+ assert lc2.language == "python"
341
+ assert lc2.files == 5
342
+ assert lc2.total_lines == 300
343
+
344
+
345
+ class TestIndexStats:
346
+ """Tests for IndexStats."""
347
+
348
+ def test_defaults(self):
349
+ s = IndexStats()
350
+ assert s.total_files == 0
351
+ assert s.total_chunks == 0
352
+ assert s.total_symbols == 0
353
+ assert s.total_vectors == 0
354
+ assert s.language_coverage == []
355
+
356
+ def test_staleness_seconds_zero_when_not_indexed(self):
357
+ s = IndexStats()
358
+ assert s.staleness_seconds == 0.0
359
+
360
+ def test_staleness_seconds_positive(self):
361
+ s = IndexStats(last_indexed_at=time.time() - 100)
362
+ assert s.staleness_seconds >= 99.0
363
+
364
+ def test_languages_property(self):
365
+ s = IndexStats(language_coverage=[
366
+ LanguageCoverage(language="python"),
367
+ LanguageCoverage(language="javascript"),
368
+ ])
369
+ assert s.languages == ["python", "javascript"]
370
+
371
+ def test_get_language(self):
372
+ s = IndexStats(language_coverage=[
373
+ LanguageCoverage(language="python", files=3),
374
+ ])
375
+ assert s.get_language("python") is not None
376
+ assert s.get_language("python").files == 3
377
+ assert s.get_language("rust") is None
378
+
379
+ def test_set_language_add_new(self):
380
+ s = IndexStats()
381
+ s.set_language(LanguageCoverage(language="go", files=2))
382
+ assert len(s.language_coverage) == 1
383
+ assert s.get_language("go").files == 2
384
+
385
+ def test_set_language_replace_existing(self):
386
+ s = IndexStats(language_coverage=[LanguageCoverage(language="go", files=1)])
387
+ s.set_language(LanguageCoverage(language="go", files=5))
388
+ assert len(s.language_coverage) == 1
389
+ assert s.get_language("go").files == 5
390
+
391
+ def test_to_dict_and_from_dict(self):
392
+ s = IndexStats(
393
+ total_files=10,
394
+ total_chunks=50,
395
+ total_symbols=30,
396
+ embedding_model="test-model",
397
+ language_coverage=[
398
+ LanguageCoverage(language="python", files=7, chunks=35),
399
+ ],
400
+ )
401
+ d = s.to_dict()
402
+ s2 = IndexStats.from_dict(d)
403
+ assert s2.total_files == 10
404
+ assert s2.embedding_model == "test-model"
405
+ assert len(s2.language_coverage) == 1
406
+ assert s2.language_coverage[0].language == "python"
407
+
408
+ def test_save_and_load(self, tmp_path: Path):
409
+ s = IndexStats(
410
+ total_files=8,
411
+ total_chunks=40,
412
+ last_indexed_at=time.time(),
413
+ language_coverage=[LanguageCoverage(language="python", files=8, chunks=40)],
414
+ )
415
+ s.save(tmp_path)
416
+ assert (tmp_path / STATS_FILE).exists()
417
+
418
+ loaded = IndexStats.load(tmp_path)
419
+ assert loaded is not None
420
+ assert loaded.total_files == 8
421
+ assert len(loaded.language_coverage) == 1
422
+
423
+ def test_load_returns_none_when_missing(self, tmp_path: Path):
424
+ assert IndexStats.load(tmp_path) is None
425
+
426
+ def test_load_returns_none_on_corrupt_json(self, tmp_path: Path):
427
+ (tmp_path / STATS_FILE).write_text("broken", encoding="utf-8")
428
+ assert IndexStats.load(tmp_path) is None
429
+
430
+
431
+ # ═══════════════════════════════════════════════════════════════════════════
432
+ # 4 — QueryHistory
433
+ # ═══════════════════════════════════════════════════════════════════════════
434
+
435
+
436
+ class TestQueryRecord:
437
+ """Tests for QueryRecord dataclass."""
438
+
439
+ def test_defaults(self):
440
+ r = QueryRecord(query="test")
441
+ assert r.query == "test"
442
+ assert r.timestamp == 0.0
443
+ assert r.result_count == 0
444
+ assert r.languages == []
445
+ assert r.top_files == []
446
+
447
+ def test_to_dict_and_from_dict(self):
448
+ r = QueryRecord(query="hello", result_count=5, top_score=0.95, languages=["python"])
449
+ d = r.to_dict()
450
+ r2 = QueryRecord.from_dict(d)
451
+ assert r2.query == "hello"
452
+ assert r2.result_count == 5
453
+ assert r2.top_score == 0.95
454
+
455
+
456
+ class TestQueryHistory:
457
+ """Tests for QueryHistory."""
458
+
459
+ def test_record_and_size(self):
460
+ h = QueryHistory()
461
+ assert h.size == 0
462
+ h.record("query1", result_count=3)
463
+ h.record("query2", result_count=5)
464
+ assert h.size == 2
465
+
466
+ def test_record_returns_query_record(self):
467
+ h = QueryHistory()
468
+ r = h.record("test", result_count=2, top_score=0.8, languages=["python"])
469
+ assert isinstance(r, QueryRecord)
470
+ assert r.query == "test"
471
+ assert r.result_count == 2
472
+ assert r.timestamp > 0
473
+
474
+ def test_recent(self):
475
+ h = QueryHistory()
476
+ for i in range(20):
477
+ h.record(f"q{i}")
478
+ recent = h.recent(5)
479
+ assert len(recent) == 5
480
+ assert recent[-1].query == "q19"
481
+ assert recent[0].query == "q15"
482
+
483
+ def test_popular_queries(self):
484
+ h = QueryHistory()
485
+ h.record("foo")
486
+ h.record("bar")
487
+ h.record("foo")
488
+ h.record("foo")
489
+ h.record("bar")
490
+ popular = h.popular_queries(2)
491
+ assert popular[0] == ("foo", 3)
492
+ assert popular[1] == ("bar", 2)
493
+
494
+ def test_popular_files(self):
495
+ h = QueryHistory()
496
+ h.record("q1", top_files=["a.py", "b.py"])
497
+ h.record("q2", top_files=["a.py", "c.py"])
498
+ h.record("q3", top_files=["a.py"])
499
+ popular = h.popular_files(2)
500
+ assert popular[0] == ("a.py", 3)
501
+
502
+ def test_avg_result_count(self):
503
+ h = QueryHistory()
504
+ h.record("q1", result_count=10)
505
+ h.record("q2", result_count=20)
506
+ assert h.avg_result_count() == 15.0
507
+
508
+ def test_avg_result_count_empty(self):
509
+ h = QueryHistory()
510
+ assert h.avg_result_count() == 0.0
511
+
512
+ def test_fifo_eviction(self):
513
+ h = QueryHistory(max_entries=3)
514
+ h.record("a")
515
+ h.record("b")
516
+ h.record("c")
517
+ h.record("d") # evicts "a"
518
+ assert h.size == 3
519
+ queries = [r.query for r in h.records]
520
+ assert "a" not in queries
521
+ assert "d" in queries
522
+
523
+ def test_clear(self):
524
+ h = QueryHistory()
525
+ h.record("x")
526
+ h.record("y")
527
+ h.clear()
528
+ assert h.size == 0
529
+
530
+ def test_save_and_load(self, tmp_path: Path):
531
+ h = QueryHistory()
532
+ h.record("search1", result_count=3, top_score=0.9, languages=["python"])
533
+ h.record("search2", result_count=5, top_files=["main.py"])
534
+ h.save(tmp_path)
535
+
536
+ assert (tmp_path / HISTORY_FILE).exists()
537
+
538
+ loaded = QueryHistory.load(tmp_path)
539
+ assert loaded.size == 2
540
+ assert loaded.records[0].query == "search1"
541
+ assert loaded.records[1].top_files == ["main.py"]
542
+
543
+ def test_load_returns_empty_when_missing(self, tmp_path: Path):
544
+ h = QueryHistory.load(tmp_path)
545
+ assert h.size == 0
546
+
547
+ def test_load_handles_corrupt_json(self, tmp_path: Path):
548
+ (tmp_path / HISTORY_FILE).write_text("not json", encoding="utf-8")
549
+ h = QueryHistory.load(tmp_path)
550
+ assert h.size == 0
551
+
552
+
553
+ # ═══════════════════════════════════════════════════════════════════════════
554
+ # 5 — Indexing integration (manifest, registry, stats populated)
555
+ # ═══════════════════════════════════════════════════════════════════════════
556
+
557
+
558
+ class TestIndexingIntegration:
559
+ """Verify that run_indexing populates manifest, registry, and stats."""
560
+
561
+ @pytest.fixture()
562
+ def project(self, tmp_path: Path):
563
+ """Create a minimal Python project for indexing."""
564
+ src = tmp_path / "src"
565
+ src.mkdir()
566
+ (src / "hello.py").write_text(
567
+ 'def greet(name):\n return f"Hello, {name}!"\n\nclass Greeter:\n def say_hi(self):\n pass\n',
568
+ encoding="utf-8",
569
+ )
570
+ (src / "utils.py").write_text(
571
+ "def add(a, b):\n return a + b\n",
572
+ encoding="utf-8",
573
+ )
574
+ # Config file
575
+ (tmp_path / ".codexa.yaml").write_text(
576
+ "index:\n ignore_dirs: []\n extensions: ['.py']\n",
577
+ encoding="utf-8",
578
+ )
579
+ return tmp_path
580
+
581
+ @patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
582
+ @patch("semantic_code_intelligence.services.indexing_service.scan_repository")
583
+ def test_indexing_populates_manifest(self, mock_scan, mock_embed, project, tmp_path):
584
+ from semantic_code_intelligence.indexing.scanner import ScannedFile
585
+ from semantic_code_intelligence.services.indexing_service import run_indexing
586
+
587
+ mock_scan.return_value = [
588
+ ScannedFile(
589
+ path=project / "src" / "hello.py",
590
+ relative_path="src/hello.py",
591
+ extension=".py",
592
+ size_bytes=80,
593
+ content_hash="abc123",
594
+ ),
595
+ ]
596
+ mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
597
+
598
+ result = run_indexing(project)
599
+ index_dir = project / ".codexa" / "index"
600
+ manifest = IndexManifest.load(index_dir)
601
+
602
+ assert manifest is not None
603
+ assert manifest.total_files >= 1
604
+ assert manifest.total_chunks >= 1
605
+ assert manifest.created_at > 0.0
606
+ assert manifest.updated_at > 0.0
607
+
608
+ @patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
609
+ @patch("semantic_code_intelligence.services.indexing_service.scan_repository")
610
+ def test_indexing_populates_symbol_registry(self, mock_scan, mock_embed, project):
611
+ from semantic_code_intelligence.indexing.scanner import ScannedFile
612
+ from semantic_code_intelligence.services.indexing_service import run_indexing
613
+
614
+ mock_scan.return_value = [
615
+ ScannedFile(
616
+ path=project / "src" / "hello.py",
617
+ relative_path="src/hello.py",
618
+ extension=".py",
619
+ size_bytes=80,
620
+ content_hash="abc123",
621
+ ),
622
+ ]
623
+ mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
624
+
625
+ result = run_indexing(project)
626
+ index_dir = project / ".codexa" / "index"
627
+ reg = SymbolRegistry.load(index_dir)
628
+
629
+ assert reg.size > 0
630
+ assert result.symbols_extracted > 0
631
+
632
+ @patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
633
+ @patch("semantic_code_intelligence.services.indexing_service.scan_repository")
634
+ def test_indexing_populates_stats(self, mock_scan, mock_embed, project):
635
+ from semantic_code_intelligence.indexing.scanner import ScannedFile
636
+ from semantic_code_intelligence.services.indexing_service import run_indexing
637
+
638
+ mock_scan.return_value = [
639
+ ScannedFile(
640
+ path=project / "src" / "hello.py",
641
+ relative_path="src/hello.py",
642
+ extension=".py",
643
+ size_bytes=80,
644
+ content_hash="abc123",
645
+ ),
646
+ ]
647
+ mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
648
+
649
+ result = run_indexing(project)
650
+ index_dir = project / ".codexa" / "index"
651
+ stats = IndexStats.load(index_dir)
652
+
653
+ assert stats is not None
654
+ assert stats.total_files >= 1
655
+ assert stats.total_chunks >= 1
656
+ assert stats.last_indexed_at > 0.0
657
+ assert stats.indexing_duration_seconds >= 0.0
658
+ assert stats.embedding_model != ""
659
+
660
+ @patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
661
+ @patch("semantic_code_intelligence.services.indexing_service.scan_repository")
662
+ def test_indexing_result_includes_symbols(self, mock_scan, mock_embed, project):
663
+ from semantic_code_intelligence.indexing.scanner import ScannedFile
664
+ from semantic_code_intelligence.services.indexing_service import run_indexing
665
+
666
+ mock_scan.return_value = [
667
+ ScannedFile(
668
+ path=project / "src" / "hello.py",
669
+ relative_path="src/hello.py",
670
+ extension=".py",
671
+ size_bytes=80,
672
+ content_hash="abc123",
673
+ ),
674
+ ]
675
+ mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
676
+
677
+ result = run_indexing(project)
678
+ assert result.symbols_extracted >= 0
679
+ assert "symbols=" in repr(result)
680
+
681
+
682
+ # ═══════════════════════════════════════════════════════════════════════════
683
+ # 6 — Search integration (query history recorded)
684
+ # ═══════════════════════════════════════════════════════════════════════════
685
+
686
+
687
+ class TestSearchIntegration:
688
+ """Verify that search_codebase records query history."""
689
+
690
+ @patch("semantic_code_intelligence.services.search_service.generate_embeddings")
691
+ @patch("semantic_code_intelligence.services.search_service.VectorStore.load")
692
+ def test_search_records_query_history(self, mock_load, mock_embed, tmp_path):
693
+ from semantic_code_intelligence.services.search_service import search_codebase
694
+
695
+ # Set up mock vector store
696
+ store = MagicMock()
697
+ meta = MagicMock()
698
+ meta.file_path = "src/main.py"
699
+ meta.start_line = 1
700
+ meta.end_line = 10
701
+ meta.language = "python"
702
+ meta.content = "def hello(): pass"
703
+ meta.chunk_index = 0
704
+ store.search.return_value = [(meta, 0.95)]
705
+ store.size = 1
706
+ mock_load.return_value = store
707
+
708
+ mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
709
+
710
+ # Create required config and index dir
711
+ (tmp_path / ".codexa.yaml").write_text("", encoding="utf-8")
712
+ index_dir = tmp_path / ".codexa" / "index"
713
+ index_dir.mkdir(parents=True, exist_ok=True)
714
+
715
+ results = search_codebase("hello world", tmp_path, top_k=5, threshold=0.1)
716
+
717
+ # Verify history was recorded
718
+ history = QueryHistory.load(index_dir)
719
+ assert history.size == 1
720
+ assert history.records[0].query == "hello world"
721
+ assert history.records[0].result_count == 1
722
+
723
+ @patch("semantic_code_intelligence.services.search_service.generate_embeddings")
724
+ @patch("semantic_code_intelligence.services.search_service.VectorStore.load")
725
+ def test_search_records_empty_results(self, mock_load, mock_embed, tmp_path):
726
+ from semantic_code_intelligence.services.search_service import search_codebase
727
+
728
+ store = MagicMock()
729
+ store.search.return_value = []
730
+ store.size = 1
731
+ mock_load.return_value = store
732
+
733
+ mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
734
+
735
+ (tmp_path / ".codexa.yaml").write_text("", encoding="utf-8")
736
+ index_dir = tmp_path / ".codexa" / "index"
737
+ index_dir.mkdir(parents=True, exist_ok=True)
738
+
739
+ results = search_codebase("nonexistent query", tmp_path, top_k=5, threshold=0.1)
740
+
741
+ history = QueryHistory.load(index_dir)
742
+ assert history.size == 1
743
+ assert history.records[0].result_count == 0
744
+ assert history.records[0].top_score == 0.0
745
+
746
+
747
+ # ═══════════════════════════════════════════════════════════════════════════
748
+ # 7 — Module imports and version
749
+ # ═══════════════════════════════════════════════════════════════════════════
750
+
751
+
752
+ class TestModuleImports:
753
+ """Verify modules import cleanly."""
754
+
755
+ def test_import_index_manifest(self):
756
+ from semantic_code_intelligence.storage.index_manifest import MANIFEST_FILE, IndexManifest
757
+ assert IndexManifest is not None
758
+ assert MANIFEST_FILE == "index_manifest.json"
759
+
760
+ def test_import_symbol_registry(self):
761
+ from semantic_code_intelligence.storage.symbol_registry import SymbolEntry, SymbolRegistry
762
+ assert SymbolEntry is not None
763
+ assert SymbolRegistry is not None
764
+
765
+ def test_import_index_stats(self):
766
+ from semantic_code_intelligence.storage.index_stats import IndexStats, LanguageCoverage
767
+ assert IndexStats is not None
768
+ assert LanguageCoverage is not None
769
+
770
+ def test_import_query_history(self):
771
+ from semantic_code_intelligence.storage.query_history import QueryHistory, QueryRecord
772
+ assert QueryHistory is not None
773
+ assert QueryRecord is not None
774
+
775
+ def test_version(self):
776
+ from semantic_code_intelligence import __version__
777
+ assert __version__ == "0.4.0"
778
+
779
+ def test_indexing_result_has_symbols_field(self):
780
+ from semantic_code_intelligence.services.indexing_service import IndexingResult
781
+ r = IndexingResult()
782
+ assert hasattr(r, "symbols_extracted")
783
+ assert r.symbols_extracted == 0