codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,783 @@
|
|
|
1
|
+
"""Phase 23 — Persistent Intelligence Index.
|
|
2
|
+
|
|
3
|
+
Tests verify:
|
|
4
|
+
1. IndexManifest — serialisation, persistence, compatibility checking
|
|
5
|
+
2. SymbolRegistry — add/remove/find/search, persistence, summaries
|
|
6
|
+
3. IndexStats — coverage tracking, staleness, persistence
|
|
7
|
+
4. QueryHistory — record/recent/popular, FIFO eviction, persistence
|
|
8
|
+
5. Indexing integration — manifest/registry/stats populated after indexing
|
|
9
|
+
6. Search integration — query history recorded after search
|
|
10
|
+
7. Module imports and version
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import time
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from unittest.mock import MagicMock, patch
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pytest
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Imports under test
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
from semantic_code_intelligence.storage.index_manifest import (
|
|
28
|
+
MANIFEST_FILE,
|
|
29
|
+
SCHEMA_VERSION,
|
|
30
|
+
IndexManifest,
|
|
31
|
+
)
|
|
32
|
+
from semantic_code_intelligence.storage.index_stats import (
|
|
33
|
+
STATS_FILE,
|
|
34
|
+
IndexStats,
|
|
35
|
+
LanguageCoverage,
|
|
36
|
+
)
|
|
37
|
+
from semantic_code_intelligence.storage.query_history import (
|
|
38
|
+
HISTORY_FILE,
|
|
39
|
+
MAX_HISTORY,
|
|
40
|
+
QueryHistory,
|
|
41
|
+
QueryRecord,
|
|
42
|
+
)
|
|
43
|
+
from semantic_code_intelligence.storage.symbol_registry import (
|
|
44
|
+
REGISTRY_FILE,
|
|
45
|
+
SymbolEntry,
|
|
46
|
+
SymbolRegistry,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
|
50
|
+
_SRC = _PROJECT_ROOT / "semantic_code_intelligence"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
54
|
+
# 1 — IndexManifest
|
|
55
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class TestIndexManifest:
|
|
59
|
+
"""Tests for IndexManifest dataclass."""
|
|
60
|
+
|
|
61
|
+
def test_defaults(self):
|
|
62
|
+
m = IndexManifest()
|
|
63
|
+
assert m.schema_version == SCHEMA_VERSION
|
|
64
|
+
assert m.embedding_model == "all-MiniLM-L6-v2"
|
|
65
|
+
assert m.embedding_dimension == 384
|
|
66
|
+
assert m.created_at == 0.0
|
|
67
|
+
assert m.updated_at == 0.0
|
|
68
|
+
assert m.total_files == 0
|
|
69
|
+
assert m.total_chunks == 0
|
|
70
|
+
assert m.total_symbols == 0
|
|
71
|
+
assert m.languages == []
|
|
72
|
+
assert m.project_root == ""
|
|
73
|
+
|
|
74
|
+
def test_to_dict_and_from_dict(self):
|
|
75
|
+
m = IndexManifest(
|
|
76
|
+
total_files=10,
|
|
77
|
+
total_chunks=50,
|
|
78
|
+
total_symbols=30,
|
|
79
|
+
languages=["python", "javascript"],
|
|
80
|
+
project_root="/repo",
|
|
81
|
+
)
|
|
82
|
+
d = m.to_dict()
|
|
83
|
+
m2 = IndexManifest.from_dict(d)
|
|
84
|
+
assert m2.total_files == 10
|
|
85
|
+
assert m2.total_chunks == 50
|
|
86
|
+
assert m2.languages == ["python", "javascript"]
|
|
87
|
+
assert m2.project_root == "/repo"
|
|
88
|
+
|
|
89
|
+
def test_from_dict_ignores_unknown_keys(self):
|
|
90
|
+
d = {"total_files": 3, "unknown_field": "ignored"}
|
|
91
|
+
m = IndexManifest.from_dict(d)
|
|
92
|
+
assert m.total_files == 3
|
|
93
|
+
|
|
94
|
+
def test_touch_sets_timestamps(self):
|
|
95
|
+
m = IndexManifest()
|
|
96
|
+
assert m.created_at == 0.0
|
|
97
|
+
m.touch()
|
|
98
|
+
assert m.created_at > 0.0
|
|
99
|
+
assert m.updated_at > 0.0
|
|
100
|
+
first_created = m.created_at
|
|
101
|
+
time.sleep(0.01)
|
|
102
|
+
m.touch()
|
|
103
|
+
assert m.created_at == first_created # created_at unchanged
|
|
104
|
+
assert m.updated_at > first_created
|
|
105
|
+
|
|
106
|
+
def test_is_compatible(self):
|
|
107
|
+
m = IndexManifest(embedding_model="all-MiniLM-L6-v2", embedding_dimension=384)
|
|
108
|
+
assert m.is_compatible("all-MiniLM-L6-v2", 384) is True
|
|
109
|
+
assert m.is_compatible("other-model", 384) is False
|
|
110
|
+
assert m.is_compatible("all-MiniLM-L6-v2", 768) is False
|
|
111
|
+
|
|
112
|
+
def test_save_and_load(self, tmp_path: Path):
|
|
113
|
+
m = IndexManifest(total_files=5, total_chunks=20, project_root="/repo")
|
|
114
|
+
m.touch()
|
|
115
|
+
m.save(tmp_path)
|
|
116
|
+
|
|
117
|
+
assert (tmp_path / MANIFEST_FILE).exists()
|
|
118
|
+
|
|
119
|
+
loaded = IndexManifest.load(tmp_path)
|
|
120
|
+
assert loaded is not None
|
|
121
|
+
assert loaded.total_files == 5
|
|
122
|
+
assert loaded.total_chunks == 20
|
|
123
|
+
assert loaded.project_root == "/repo"
|
|
124
|
+
assert loaded.created_at > 0.0
|
|
125
|
+
|
|
126
|
+
def test_load_returns_none_when_missing(self, tmp_path: Path):
|
|
127
|
+
assert IndexManifest.load(tmp_path) is None
|
|
128
|
+
|
|
129
|
+
def test_load_returns_none_on_corrupt_json(self, tmp_path: Path):
|
|
130
|
+
(tmp_path / MANIFEST_FILE).write_text("not json", encoding="utf-8")
|
|
131
|
+
assert IndexManifest.load(tmp_path) is None
|
|
132
|
+
|
|
133
|
+
def test_save_creates_directory(self, tmp_path: Path):
|
|
134
|
+
deep = tmp_path / "a" / "b" / "c"
|
|
135
|
+
IndexManifest().save(deep)
|
|
136
|
+
assert (deep / MANIFEST_FILE).exists()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
140
|
+
# 2 — SymbolRegistry
|
|
141
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _make_entry(**kwargs) -> SymbolEntry:
|
|
145
|
+
defaults = dict(
|
|
146
|
+
name="foo",
|
|
147
|
+
kind="function",
|
|
148
|
+
file_path="src/main.py",
|
|
149
|
+
start_line=1,
|
|
150
|
+
end_line=10,
|
|
151
|
+
language="python",
|
|
152
|
+
)
|
|
153
|
+
defaults.update(kwargs)
|
|
154
|
+
return SymbolEntry(**defaults)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class TestSymbolEntry:
|
|
158
|
+
"""Tests for SymbolEntry dataclass."""
|
|
159
|
+
|
|
160
|
+
def test_qualified_name_no_parent(self):
|
|
161
|
+
e = _make_entry(name="bar", parent=None)
|
|
162
|
+
assert e.qualified_name == "bar"
|
|
163
|
+
|
|
164
|
+
def test_qualified_name_with_parent(self):
|
|
165
|
+
e = _make_entry(name="method", parent="MyClass")
|
|
166
|
+
assert e.qualified_name == "MyClass.method"
|
|
167
|
+
|
|
168
|
+
def test_to_dict_and_from_dict(self):
|
|
169
|
+
e = _make_entry(name="hello", parameters=["a", "b"], decorators=["@staticmethod"])
|
|
170
|
+
d = e.to_dict()
|
|
171
|
+
e2 = SymbolEntry.from_dict(d)
|
|
172
|
+
assert e2.name == "hello"
|
|
173
|
+
assert e2.parameters == ["a", "b"]
|
|
174
|
+
assert e2.decorators == ["@staticmethod"]
|
|
175
|
+
|
|
176
|
+
def test_from_dict_ignores_unknown_keys(self):
|
|
177
|
+
d = {"name": "x", "kind": "class", "file_path": "a.py", "start_line": 1, "end_line": 2, "extra": True}
|
|
178
|
+
e = SymbolEntry.from_dict(d)
|
|
179
|
+
assert e.name == "x"
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class TestSymbolRegistry:
|
|
183
|
+
"""Tests for SymbolRegistry."""
|
|
184
|
+
|
|
185
|
+
def test_add_and_size(self):
|
|
186
|
+
reg = SymbolRegistry()
|
|
187
|
+
assert reg.size == 0
|
|
188
|
+
reg.add(_make_entry(name="a"))
|
|
189
|
+
reg.add(_make_entry(name="b"))
|
|
190
|
+
assert reg.size == 2
|
|
191
|
+
|
|
192
|
+
def test_add_many(self):
|
|
193
|
+
reg = SymbolRegistry()
|
|
194
|
+
reg.add_many([_make_entry(name="x"), _make_entry(name="y"), _make_entry(name="z")])
|
|
195
|
+
assert reg.size == 3
|
|
196
|
+
|
|
197
|
+
def test_remove_file(self):
|
|
198
|
+
reg = SymbolRegistry()
|
|
199
|
+
reg.add(_make_entry(name="a", file_path="f1.py"))
|
|
200
|
+
reg.add(_make_entry(name="b", file_path="f1.py"))
|
|
201
|
+
reg.add(_make_entry(name="c", file_path="f2.py"))
|
|
202
|
+
removed = reg.remove_file("f1.py")
|
|
203
|
+
assert removed == 2
|
|
204
|
+
assert reg.size == 1
|
|
205
|
+
assert reg.find_by_file("f1.py") == []
|
|
206
|
+
|
|
207
|
+
def test_remove_file_nonexistent(self):
|
|
208
|
+
reg = SymbolRegistry()
|
|
209
|
+
assert reg.remove_file("no.py") == 0
|
|
210
|
+
|
|
211
|
+
def test_clear(self):
|
|
212
|
+
reg = SymbolRegistry()
|
|
213
|
+
reg.add_many([_make_entry(), _make_entry()])
|
|
214
|
+
reg.clear()
|
|
215
|
+
assert reg.size == 0
|
|
216
|
+
|
|
217
|
+
def test_files(self):
|
|
218
|
+
reg = SymbolRegistry()
|
|
219
|
+
reg.add(_make_entry(file_path="a.py"))
|
|
220
|
+
reg.add(_make_entry(file_path="b.py"))
|
|
221
|
+
assert sorted(reg.files) == ["a.py", "b.py"]
|
|
222
|
+
|
|
223
|
+
def test_find_by_name(self):
|
|
224
|
+
reg = SymbolRegistry()
|
|
225
|
+
reg.add(_make_entry(name="foo"))
|
|
226
|
+
reg.add(_make_entry(name="bar"))
|
|
227
|
+
reg.add(_make_entry(name="foo", file_path="other.py"))
|
|
228
|
+
assert len(reg.find_by_name("foo")) == 2
|
|
229
|
+
assert len(reg.find_by_name("bar")) == 1
|
|
230
|
+
assert len(reg.find_by_name("baz")) == 0
|
|
231
|
+
|
|
232
|
+
def test_find_by_kind(self):
|
|
233
|
+
reg = SymbolRegistry()
|
|
234
|
+
reg.add(_make_entry(kind="function"))
|
|
235
|
+
reg.add(_make_entry(kind="class"))
|
|
236
|
+
reg.add(_make_entry(kind="function"))
|
|
237
|
+
assert len(reg.find_by_kind("function")) == 2
|
|
238
|
+
assert len(reg.find_by_kind("class")) == 1
|
|
239
|
+
|
|
240
|
+
def test_find_by_file(self):
|
|
241
|
+
reg = SymbolRegistry()
|
|
242
|
+
reg.add(_make_entry(file_path="a.py"))
|
|
243
|
+
reg.add(_make_entry(file_path="b.py"))
|
|
244
|
+
reg.add(_make_entry(file_path="a.py"))
|
|
245
|
+
assert len(reg.find_by_file("a.py")) == 2
|
|
246
|
+
|
|
247
|
+
def test_find_multi_criteria(self):
|
|
248
|
+
reg = SymbolRegistry()
|
|
249
|
+
reg.add(_make_entry(name="f", kind="function", language="python"))
|
|
250
|
+
reg.add(_make_entry(name="f", kind="method", language="python"))
|
|
251
|
+
reg.add(_make_entry(name="g", kind="function", language="javascript"))
|
|
252
|
+
# name + kind
|
|
253
|
+
assert len(reg.find(name="f", kind="function")) == 1
|
|
254
|
+
# language only
|
|
255
|
+
assert len(reg.find(language="python")) == 2
|
|
256
|
+
# no criteria → all
|
|
257
|
+
assert len(reg.find()) == 3
|
|
258
|
+
|
|
259
|
+
def test_find_with_parent(self):
|
|
260
|
+
reg = SymbolRegistry()
|
|
261
|
+
reg.add(_make_entry(name="method1", parent="ClassA"))
|
|
262
|
+
reg.add(_make_entry(name="method2", parent="ClassB"))
|
|
263
|
+
assert len(reg.find(parent="ClassA")) == 1
|
|
264
|
+
|
|
265
|
+
def test_search_name(self):
|
|
266
|
+
reg = SymbolRegistry()
|
|
267
|
+
reg.add(_make_entry(name="calculate_total"))
|
|
268
|
+
reg.add(_make_entry(name="get_customer"))
|
|
269
|
+
reg.add(_make_entry(name="recalculate"))
|
|
270
|
+
results = reg.search_name("calc")
|
|
271
|
+
assert len(results) == 2 # calculate_total and recalculate
|
|
272
|
+
|
|
273
|
+
def test_search_name_case_insensitive(self):
|
|
274
|
+
reg = SymbolRegistry()
|
|
275
|
+
reg.add(_make_entry(name="MyClass"))
|
|
276
|
+
results = reg.search_name("myclass")
|
|
277
|
+
assert len(results) == 1
|
|
278
|
+
|
|
279
|
+
def test_language_summary(self):
|
|
280
|
+
reg = SymbolRegistry()
|
|
281
|
+
reg.add(_make_entry(language="python"))
|
|
282
|
+
reg.add(_make_entry(language="python"))
|
|
283
|
+
reg.add(_make_entry(language="javascript"))
|
|
284
|
+
summary = reg.language_summary()
|
|
285
|
+
assert summary["python"] == 2
|
|
286
|
+
assert summary["javascript"] == 1
|
|
287
|
+
|
|
288
|
+
def test_kind_summary(self):
|
|
289
|
+
reg = SymbolRegistry()
|
|
290
|
+
reg.add(_make_entry(kind="function"))
|
|
291
|
+
reg.add(_make_entry(kind="class"))
|
|
292
|
+
reg.add(_make_entry(kind="function"))
|
|
293
|
+
summary = reg.kind_summary()
|
|
294
|
+
assert summary["function"] == 2
|
|
295
|
+
assert summary["class"] == 1
|
|
296
|
+
|
|
297
|
+
def test_save_and_load(self, tmp_path: Path):
|
|
298
|
+
reg = SymbolRegistry()
|
|
299
|
+
reg.add(_make_entry(name="func1", kind="function", language="python"))
|
|
300
|
+
reg.add(_make_entry(name="Cls1", kind="class", language="python"))
|
|
301
|
+
reg.save(tmp_path)
|
|
302
|
+
|
|
303
|
+
assert (tmp_path / REGISTRY_FILE).exists()
|
|
304
|
+
|
|
305
|
+
loaded = SymbolRegistry.load(tmp_path)
|
|
306
|
+
assert loaded.size == 2
|
|
307
|
+
assert len(loaded.find_by_name("func1")) == 1
|
|
308
|
+
assert loaded.find_by_name("Cls1")[0].kind == "class"
|
|
309
|
+
|
|
310
|
+
def test_load_returns_empty_when_missing(self, tmp_path: Path):
|
|
311
|
+
reg = SymbolRegistry.load(tmp_path)
|
|
312
|
+
assert reg.size == 0
|
|
313
|
+
|
|
314
|
+
def test_load_handles_corrupt_json(self, tmp_path: Path):
|
|
315
|
+
(tmp_path / REGISTRY_FILE).write_text("not json", encoding="utf-8")
|
|
316
|
+
reg = SymbolRegistry.load(tmp_path)
|
|
317
|
+
assert reg.size == 0
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
321
|
+
# 3 — IndexStats
|
|
322
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class TestLanguageCoverage:
|
|
326
|
+
"""Tests for LanguageCoverage dataclass."""
|
|
327
|
+
|
|
328
|
+
def test_defaults(self):
|
|
329
|
+
lc = LanguageCoverage()
|
|
330
|
+
assert lc.language == ""
|
|
331
|
+
assert lc.files == 0
|
|
332
|
+
assert lc.chunks == 0
|
|
333
|
+
assert lc.symbols == 0
|
|
334
|
+
assert lc.total_lines == 0
|
|
335
|
+
|
|
336
|
+
def test_to_dict_and_from_dict(self):
|
|
337
|
+
lc = LanguageCoverage(language="python", files=5, chunks=20, symbols=15, total_lines=300)
|
|
338
|
+
d = lc.to_dict()
|
|
339
|
+
lc2 = LanguageCoverage.from_dict(d)
|
|
340
|
+
assert lc2.language == "python"
|
|
341
|
+
assert lc2.files == 5
|
|
342
|
+
assert lc2.total_lines == 300
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class TestIndexStats:
|
|
346
|
+
"""Tests for IndexStats."""
|
|
347
|
+
|
|
348
|
+
def test_defaults(self):
|
|
349
|
+
s = IndexStats()
|
|
350
|
+
assert s.total_files == 0
|
|
351
|
+
assert s.total_chunks == 0
|
|
352
|
+
assert s.total_symbols == 0
|
|
353
|
+
assert s.total_vectors == 0
|
|
354
|
+
assert s.language_coverage == []
|
|
355
|
+
|
|
356
|
+
def test_staleness_seconds_zero_when_not_indexed(self):
|
|
357
|
+
s = IndexStats()
|
|
358
|
+
assert s.staleness_seconds == 0.0
|
|
359
|
+
|
|
360
|
+
def test_staleness_seconds_positive(self):
|
|
361
|
+
s = IndexStats(last_indexed_at=time.time() - 100)
|
|
362
|
+
assert s.staleness_seconds >= 99.0
|
|
363
|
+
|
|
364
|
+
def test_languages_property(self):
|
|
365
|
+
s = IndexStats(language_coverage=[
|
|
366
|
+
LanguageCoverage(language="python"),
|
|
367
|
+
LanguageCoverage(language="javascript"),
|
|
368
|
+
])
|
|
369
|
+
assert s.languages == ["python", "javascript"]
|
|
370
|
+
|
|
371
|
+
def test_get_language(self):
|
|
372
|
+
s = IndexStats(language_coverage=[
|
|
373
|
+
LanguageCoverage(language="python", files=3),
|
|
374
|
+
])
|
|
375
|
+
assert s.get_language("python") is not None
|
|
376
|
+
assert s.get_language("python").files == 3
|
|
377
|
+
assert s.get_language("rust") is None
|
|
378
|
+
|
|
379
|
+
def test_set_language_add_new(self):
|
|
380
|
+
s = IndexStats()
|
|
381
|
+
s.set_language(LanguageCoverage(language="go", files=2))
|
|
382
|
+
assert len(s.language_coverage) == 1
|
|
383
|
+
assert s.get_language("go").files == 2
|
|
384
|
+
|
|
385
|
+
def test_set_language_replace_existing(self):
|
|
386
|
+
s = IndexStats(language_coverage=[LanguageCoverage(language="go", files=1)])
|
|
387
|
+
s.set_language(LanguageCoverage(language="go", files=5))
|
|
388
|
+
assert len(s.language_coverage) == 1
|
|
389
|
+
assert s.get_language("go").files == 5
|
|
390
|
+
|
|
391
|
+
def test_to_dict_and_from_dict(self):
|
|
392
|
+
s = IndexStats(
|
|
393
|
+
total_files=10,
|
|
394
|
+
total_chunks=50,
|
|
395
|
+
total_symbols=30,
|
|
396
|
+
embedding_model="test-model",
|
|
397
|
+
language_coverage=[
|
|
398
|
+
LanguageCoverage(language="python", files=7, chunks=35),
|
|
399
|
+
],
|
|
400
|
+
)
|
|
401
|
+
d = s.to_dict()
|
|
402
|
+
s2 = IndexStats.from_dict(d)
|
|
403
|
+
assert s2.total_files == 10
|
|
404
|
+
assert s2.embedding_model == "test-model"
|
|
405
|
+
assert len(s2.language_coverage) == 1
|
|
406
|
+
assert s2.language_coverage[0].language == "python"
|
|
407
|
+
|
|
408
|
+
def test_save_and_load(self, tmp_path: Path):
|
|
409
|
+
s = IndexStats(
|
|
410
|
+
total_files=8,
|
|
411
|
+
total_chunks=40,
|
|
412
|
+
last_indexed_at=time.time(),
|
|
413
|
+
language_coverage=[LanguageCoverage(language="python", files=8, chunks=40)],
|
|
414
|
+
)
|
|
415
|
+
s.save(tmp_path)
|
|
416
|
+
assert (tmp_path / STATS_FILE).exists()
|
|
417
|
+
|
|
418
|
+
loaded = IndexStats.load(tmp_path)
|
|
419
|
+
assert loaded is not None
|
|
420
|
+
assert loaded.total_files == 8
|
|
421
|
+
assert len(loaded.language_coverage) == 1
|
|
422
|
+
|
|
423
|
+
def test_load_returns_none_when_missing(self, tmp_path: Path):
|
|
424
|
+
assert IndexStats.load(tmp_path) is None
|
|
425
|
+
|
|
426
|
+
def test_load_returns_none_on_corrupt_json(self, tmp_path: Path):
|
|
427
|
+
(tmp_path / STATS_FILE).write_text("broken", encoding="utf-8")
|
|
428
|
+
assert IndexStats.load(tmp_path) is None
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
432
|
+
# 4 — QueryHistory
|
|
433
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
class TestQueryRecord:
|
|
437
|
+
"""Tests for QueryRecord dataclass."""
|
|
438
|
+
|
|
439
|
+
def test_defaults(self):
|
|
440
|
+
r = QueryRecord(query="test")
|
|
441
|
+
assert r.query == "test"
|
|
442
|
+
assert r.timestamp == 0.0
|
|
443
|
+
assert r.result_count == 0
|
|
444
|
+
assert r.languages == []
|
|
445
|
+
assert r.top_files == []
|
|
446
|
+
|
|
447
|
+
def test_to_dict_and_from_dict(self):
|
|
448
|
+
r = QueryRecord(query="hello", result_count=5, top_score=0.95, languages=["python"])
|
|
449
|
+
d = r.to_dict()
|
|
450
|
+
r2 = QueryRecord.from_dict(d)
|
|
451
|
+
assert r2.query == "hello"
|
|
452
|
+
assert r2.result_count == 5
|
|
453
|
+
assert r2.top_score == 0.95
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class TestQueryHistory:
|
|
457
|
+
"""Tests for QueryHistory."""
|
|
458
|
+
|
|
459
|
+
def test_record_and_size(self):
|
|
460
|
+
h = QueryHistory()
|
|
461
|
+
assert h.size == 0
|
|
462
|
+
h.record("query1", result_count=3)
|
|
463
|
+
h.record("query2", result_count=5)
|
|
464
|
+
assert h.size == 2
|
|
465
|
+
|
|
466
|
+
def test_record_returns_query_record(self):
|
|
467
|
+
h = QueryHistory()
|
|
468
|
+
r = h.record("test", result_count=2, top_score=0.8, languages=["python"])
|
|
469
|
+
assert isinstance(r, QueryRecord)
|
|
470
|
+
assert r.query == "test"
|
|
471
|
+
assert r.result_count == 2
|
|
472
|
+
assert r.timestamp > 0
|
|
473
|
+
|
|
474
|
+
def test_recent(self):
|
|
475
|
+
h = QueryHistory()
|
|
476
|
+
for i in range(20):
|
|
477
|
+
h.record(f"q{i}")
|
|
478
|
+
recent = h.recent(5)
|
|
479
|
+
assert len(recent) == 5
|
|
480
|
+
assert recent[-1].query == "q19"
|
|
481
|
+
assert recent[0].query == "q15"
|
|
482
|
+
|
|
483
|
+
def test_popular_queries(self):
|
|
484
|
+
h = QueryHistory()
|
|
485
|
+
h.record("foo")
|
|
486
|
+
h.record("bar")
|
|
487
|
+
h.record("foo")
|
|
488
|
+
h.record("foo")
|
|
489
|
+
h.record("bar")
|
|
490
|
+
popular = h.popular_queries(2)
|
|
491
|
+
assert popular[0] == ("foo", 3)
|
|
492
|
+
assert popular[1] == ("bar", 2)
|
|
493
|
+
|
|
494
|
+
def test_popular_files(self):
|
|
495
|
+
h = QueryHistory()
|
|
496
|
+
h.record("q1", top_files=["a.py", "b.py"])
|
|
497
|
+
h.record("q2", top_files=["a.py", "c.py"])
|
|
498
|
+
h.record("q3", top_files=["a.py"])
|
|
499
|
+
popular = h.popular_files(2)
|
|
500
|
+
assert popular[0] == ("a.py", 3)
|
|
501
|
+
|
|
502
|
+
def test_avg_result_count(self):
|
|
503
|
+
h = QueryHistory()
|
|
504
|
+
h.record("q1", result_count=10)
|
|
505
|
+
h.record("q2", result_count=20)
|
|
506
|
+
assert h.avg_result_count() == 15.0
|
|
507
|
+
|
|
508
|
+
def test_avg_result_count_empty(self):
|
|
509
|
+
h = QueryHistory()
|
|
510
|
+
assert h.avg_result_count() == 0.0
|
|
511
|
+
|
|
512
|
+
def test_fifo_eviction(self):
|
|
513
|
+
h = QueryHistory(max_entries=3)
|
|
514
|
+
h.record("a")
|
|
515
|
+
h.record("b")
|
|
516
|
+
h.record("c")
|
|
517
|
+
h.record("d") # evicts "a"
|
|
518
|
+
assert h.size == 3
|
|
519
|
+
queries = [r.query for r in h.records]
|
|
520
|
+
assert "a" not in queries
|
|
521
|
+
assert "d" in queries
|
|
522
|
+
|
|
523
|
+
def test_clear(self):
|
|
524
|
+
h = QueryHistory()
|
|
525
|
+
h.record("x")
|
|
526
|
+
h.record("y")
|
|
527
|
+
h.clear()
|
|
528
|
+
assert h.size == 0
|
|
529
|
+
|
|
530
|
+
def test_save_and_load(self, tmp_path: Path):
|
|
531
|
+
h = QueryHistory()
|
|
532
|
+
h.record("search1", result_count=3, top_score=0.9, languages=["python"])
|
|
533
|
+
h.record("search2", result_count=5, top_files=["main.py"])
|
|
534
|
+
h.save(tmp_path)
|
|
535
|
+
|
|
536
|
+
assert (tmp_path / HISTORY_FILE).exists()
|
|
537
|
+
|
|
538
|
+
loaded = QueryHistory.load(tmp_path)
|
|
539
|
+
assert loaded.size == 2
|
|
540
|
+
assert loaded.records[0].query == "search1"
|
|
541
|
+
assert loaded.records[1].top_files == ["main.py"]
|
|
542
|
+
|
|
543
|
+
def test_load_returns_empty_when_missing(self, tmp_path: Path):
|
|
544
|
+
h = QueryHistory.load(tmp_path)
|
|
545
|
+
assert h.size == 0
|
|
546
|
+
|
|
547
|
+
def test_load_handles_corrupt_json(self, tmp_path: Path):
|
|
548
|
+
(tmp_path / HISTORY_FILE).write_text("not json", encoding="utf-8")
|
|
549
|
+
h = QueryHistory.load(tmp_path)
|
|
550
|
+
assert h.size == 0
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
554
|
+
# 5 — Indexing integration (manifest, registry, stats populated)
|
|
555
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
class TestIndexingIntegration:
|
|
559
|
+
"""Verify that run_indexing populates manifest, registry, and stats."""
|
|
560
|
+
|
|
561
|
+
@pytest.fixture()
|
|
562
|
+
def project(self, tmp_path: Path):
|
|
563
|
+
"""Create a minimal Python project for indexing."""
|
|
564
|
+
src = tmp_path / "src"
|
|
565
|
+
src.mkdir()
|
|
566
|
+
(src / "hello.py").write_text(
|
|
567
|
+
'def greet(name):\n return f"Hello, {name}!"\n\nclass Greeter:\n def say_hi(self):\n pass\n',
|
|
568
|
+
encoding="utf-8",
|
|
569
|
+
)
|
|
570
|
+
(src / "utils.py").write_text(
|
|
571
|
+
"def add(a, b):\n return a + b\n",
|
|
572
|
+
encoding="utf-8",
|
|
573
|
+
)
|
|
574
|
+
# Config file
|
|
575
|
+
(tmp_path / ".codexa.yaml").write_text(
|
|
576
|
+
"index:\n ignore_dirs: []\n extensions: ['.py']\n",
|
|
577
|
+
encoding="utf-8",
|
|
578
|
+
)
|
|
579
|
+
return tmp_path
|
|
580
|
+
|
|
581
|
+
@patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
|
|
582
|
+
@patch("semantic_code_intelligence.services.indexing_service.scan_repository")
|
|
583
|
+
def test_indexing_populates_manifest(self, mock_scan, mock_embed, project, tmp_path):
|
|
584
|
+
from semantic_code_intelligence.indexing.scanner import ScannedFile
|
|
585
|
+
from semantic_code_intelligence.services.indexing_service import run_indexing
|
|
586
|
+
|
|
587
|
+
mock_scan.return_value = [
|
|
588
|
+
ScannedFile(
|
|
589
|
+
path=project / "src" / "hello.py",
|
|
590
|
+
relative_path="src/hello.py",
|
|
591
|
+
extension=".py",
|
|
592
|
+
size_bytes=80,
|
|
593
|
+
content_hash="abc123",
|
|
594
|
+
),
|
|
595
|
+
]
|
|
596
|
+
mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
|
|
597
|
+
|
|
598
|
+
result = run_indexing(project)
|
|
599
|
+
index_dir = project / ".codexa" / "index"
|
|
600
|
+
manifest = IndexManifest.load(index_dir)
|
|
601
|
+
|
|
602
|
+
assert manifest is not None
|
|
603
|
+
assert manifest.total_files >= 1
|
|
604
|
+
assert manifest.total_chunks >= 1
|
|
605
|
+
assert manifest.created_at > 0.0
|
|
606
|
+
assert manifest.updated_at > 0.0
|
|
607
|
+
|
|
608
|
+
@patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
|
|
609
|
+
@patch("semantic_code_intelligence.services.indexing_service.scan_repository")
|
|
610
|
+
def test_indexing_populates_symbol_registry(self, mock_scan, mock_embed, project):
|
|
611
|
+
from semantic_code_intelligence.indexing.scanner import ScannedFile
|
|
612
|
+
from semantic_code_intelligence.services.indexing_service import run_indexing
|
|
613
|
+
|
|
614
|
+
mock_scan.return_value = [
|
|
615
|
+
ScannedFile(
|
|
616
|
+
path=project / "src" / "hello.py",
|
|
617
|
+
relative_path="src/hello.py",
|
|
618
|
+
extension=".py",
|
|
619
|
+
size_bytes=80,
|
|
620
|
+
content_hash="abc123",
|
|
621
|
+
),
|
|
622
|
+
]
|
|
623
|
+
mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
|
|
624
|
+
|
|
625
|
+
result = run_indexing(project)
|
|
626
|
+
index_dir = project / ".codexa" / "index"
|
|
627
|
+
reg = SymbolRegistry.load(index_dir)
|
|
628
|
+
|
|
629
|
+
assert reg.size > 0
|
|
630
|
+
assert result.symbols_extracted > 0
|
|
631
|
+
|
|
632
|
+
@patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
|
|
633
|
+
@patch("semantic_code_intelligence.services.indexing_service.scan_repository")
|
|
634
|
+
def test_indexing_populates_stats(self, mock_scan, mock_embed, project):
|
|
635
|
+
from semantic_code_intelligence.indexing.scanner import ScannedFile
|
|
636
|
+
from semantic_code_intelligence.services.indexing_service import run_indexing
|
|
637
|
+
|
|
638
|
+
mock_scan.return_value = [
|
|
639
|
+
ScannedFile(
|
|
640
|
+
path=project / "src" / "hello.py",
|
|
641
|
+
relative_path="src/hello.py",
|
|
642
|
+
extension=".py",
|
|
643
|
+
size_bytes=80,
|
|
644
|
+
content_hash="abc123",
|
|
645
|
+
),
|
|
646
|
+
]
|
|
647
|
+
mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
|
|
648
|
+
|
|
649
|
+
result = run_indexing(project)
|
|
650
|
+
index_dir = project / ".codexa" / "index"
|
|
651
|
+
stats = IndexStats.load(index_dir)
|
|
652
|
+
|
|
653
|
+
assert stats is not None
|
|
654
|
+
assert stats.total_files >= 1
|
|
655
|
+
assert stats.total_chunks >= 1
|
|
656
|
+
assert stats.last_indexed_at > 0.0
|
|
657
|
+
assert stats.indexing_duration_seconds >= 0.0
|
|
658
|
+
assert stats.embedding_model != ""
|
|
659
|
+
|
|
660
|
+
@patch("semantic_code_intelligence.services.indexing_service.generate_embeddings")
|
|
661
|
+
@patch("semantic_code_intelligence.services.indexing_service.scan_repository")
|
|
662
|
+
def test_indexing_result_includes_symbols(self, mock_scan, mock_embed, project):
|
|
663
|
+
from semantic_code_intelligence.indexing.scanner import ScannedFile
|
|
664
|
+
from semantic_code_intelligence.services.indexing_service import run_indexing
|
|
665
|
+
|
|
666
|
+
mock_scan.return_value = [
|
|
667
|
+
ScannedFile(
|
|
668
|
+
path=project / "src" / "hello.py",
|
|
669
|
+
relative_path="src/hello.py",
|
|
670
|
+
extension=".py",
|
|
671
|
+
size_bytes=80,
|
|
672
|
+
content_hash="abc123",
|
|
673
|
+
),
|
|
674
|
+
]
|
|
675
|
+
mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
|
|
676
|
+
|
|
677
|
+
result = run_indexing(project)
|
|
678
|
+
assert result.symbols_extracted >= 0
|
|
679
|
+
assert "symbols=" in repr(result)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
683
|
+
# 6 — Search integration (query history recorded)
|
|
684
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
class TestSearchIntegration:
|
|
688
|
+
"""Verify that search_codebase records query history."""
|
|
689
|
+
|
|
690
|
+
@patch("semantic_code_intelligence.services.search_service.generate_embeddings")
|
|
691
|
+
@patch("semantic_code_intelligence.services.search_service.VectorStore.load")
|
|
692
|
+
def test_search_records_query_history(self, mock_load, mock_embed, tmp_path):
|
|
693
|
+
from semantic_code_intelligence.services.search_service import search_codebase
|
|
694
|
+
|
|
695
|
+
# Set up mock vector store
|
|
696
|
+
store = MagicMock()
|
|
697
|
+
meta = MagicMock()
|
|
698
|
+
meta.file_path = "src/main.py"
|
|
699
|
+
meta.start_line = 1
|
|
700
|
+
meta.end_line = 10
|
|
701
|
+
meta.language = "python"
|
|
702
|
+
meta.content = "def hello(): pass"
|
|
703
|
+
meta.chunk_index = 0
|
|
704
|
+
store.search.return_value = [(meta, 0.95)]
|
|
705
|
+
store.size = 1
|
|
706
|
+
mock_load.return_value = store
|
|
707
|
+
|
|
708
|
+
mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
|
|
709
|
+
|
|
710
|
+
# Create required config and index dir
|
|
711
|
+
(tmp_path / ".codexa.yaml").write_text("", encoding="utf-8")
|
|
712
|
+
index_dir = tmp_path / ".codexa" / "index"
|
|
713
|
+
index_dir.mkdir(parents=True, exist_ok=True)
|
|
714
|
+
|
|
715
|
+
results = search_codebase("hello world", tmp_path, top_k=5, threshold=0.1)
|
|
716
|
+
|
|
717
|
+
# Verify history was recorded
|
|
718
|
+
history = QueryHistory.load(index_dir)
|
|
719
|
+
assert history.size == 1
|
|
720
|
+
assert history.records[0].query == "hello world"
|
|
721
|
+
assert history.records[0].result_count == 1
|
|
722
|
+
|
|
723
|
+
@patch("semantic_code_intelligence.services.search_service.generate_embeddings")
|
|
724
|
+
@patch("semantic_code_intelligence.services.search_service.VectorStore.load")
|
|
725
|
+
def test_search_records_empty_results(self, mock_load, mock_embed, tmp_path):
|
|
726
|
+
from semantic_code_intelligence.services.search_service import search_codebase
|
|
727
|
+
|
|
728
|
+
store = MagicMock()
|
|
729
|
+
store.search.return_value = []
|
|
730
|
+
store.size = 1
|
|
731
|
+
mock_load.return_value = store
|
|
732
|
+
|
|
733
|
+
mock_embed.return_value = np.random.rand(1, 384).astype(np.float32)
|
|
734
|
+
|
|
735
|
+
(tmp_path / ".codexa.yaml").write_text("", encoding="utf-8")
|
|
736
|
+
index_dir = tmp_path / ".codexa" / "index"
|
|
737
|
+
index_dir.mkdir(parents=True, exist_ok=True)
|
|
738
|
+
|
|
739
|
+
results = search_codebase("nonexistent query", tmp_path, top_k=5, threshold=0.1)
|
|
740
|
+
|
|
741
|
+
history = QueryHistory.load(index_dir)
|
|
742
|
+
assert history.size == 1
|
|
743
|
+
assert history.records[0].result_count == 0
|
|
744
|
+
assert history.records[0].top_score == 0.0
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
748
|
+
# 7 — Module imports and version
|
|
749
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
class TestModuleImports:
|
|
753
|
+
"""Verify modules import cleanly."""
|
|
754
|
+
|
|
755
|
+
def test_import_index_manifest(self):
|
|
756
|
+
from semantic_code_intelligence.storage.index_manifest import MANIFEST_FILE, IndexManifest
|
|
757
|
+
assert IndexManifest is not None
|
|
758
|
+
assert MANIFEST_FILE == "index_manifest.json"
|
|
759
|
+
|
|
760
|
+
def test_import_symbol_registry(self):
|
|
761
|
+
from semantic_code_intelligence.storage.symbol_registry import SymbolEntry, SymbolRegistry
|
|
762
|
+
assert SymbolEntry is not None
|
|
763
|
+
assert SymbolRegistry is not None
|
|
764
|
+
|
|
765
|
+
def test_import_index_stats(self):
|
|
766
|
+
from semantic_code_intelligence.storage.index_stats import IndexStats, LanguageCoverage
|
|
767
|
+
assert IndexStats is not None
|
|
768
|
+
assert LanguageCoverage is not None
|
|
769
|
+
|
|
770
|
+
def test_import_query_history(self):
|
|
771
|
+
from semantic_code_intelligence.storage.query_history import QueryHistory, QueryRecord
|
|
772
|
+
assert QueryHistory is not None
|
|
773
|
+
assert QueryRecord is not None
|
|
774
|
+
|
|
775
|
+
def test_version(self):
|
|
776
|
+
from semantic_code_intelligence import __version__
|
|
777
|
+
assert __version__ == "0.4.0"
|
|
778
|
+
|
|
779
|
+
def test_indexing_result_has_symbols_field(self):
|
|
780
|
+
from semantic_code_intelligence.services.indexing_service import IndexingResult
|
|
781
|
+
r = IndexingResult()
|
|
782
|
+
assert hasattr(r, "symbols_extracted")
|
|
783
|
+
assert r.symbols_extracted == 0
|