codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Tests for the AST-aware semantic chunker."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from semantic_code_intelligence.indexing.semantic_chunker import (
|
|
10
|
+
SemanticChunk,
|
|
11
|
+
_build_semantic_label,
|
|
12
|
+
_extract_uncovered_blocks,
|
|
13
|
+
_symbols_to_chunks,
|
|
14
|
+
semantic_chunk_code,
|
|
15
|
+
semantic_chunk_file,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
SAMPLE_PYTHON = """\
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
|
|
23
|
+
def hello(name):
|
|
24
|
+
\"\"\"Say hello.\"\"\"
|
|
25
|
+
return f"Hello, {name}!"
|
|
26
|
+
|
|
27
|
+
class Greeter:
|
|
28
|
+
def __init__(self, prefix):
|
|
29
|
+
self.prefix = prefix
|
|
30
|
+
|
|
31
|
+
def greet(self, name):
|
|
32
|
+
return f"{self.prefix} {name}"
|
|
33
|
+
|
|
34
|
+
x = 42
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
SAMPLE_JS = """\
|
|
38
|
+
const fs = require('fs');
|
|
39
|
+
|
|
40
|
+
function add(a, b) {
|
|
41
|
+
return a + b;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
class Calculator {
|
|
45
|
+
constructor() {
|
|
46
|
+
this.result = 0;
|
|
47
|
+
}
|
|
48
|
+
add(value) {
|
|
49
|
+
this.result += value;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# SemanticChunk dataclass
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
class TestSemanticChunk:
|
|
60
|
+
def test_creation(self):
|
|
61
|
+
sc = SemanticChunk(
|
|
62
|
+
file_path="test.py",
|
|
63
|
+
content="def foo(): pass",
|
|
64
|
+
start_line=1,
|
|
65
|
+
end_line=1,
|
|
66
|
+
chunk_index=0,
|
|
67
|
+
language="python",
|
|
68
|
+
symbol_name="foo",
|
|
69
|
+
symbol_kind="function",
|
|
70
|
+
)
|
|
71
|
+
assert sc.symbol_name == "foo"
|
|
72
|
+
assert sc.symbol_kind == "function"
|
|
73
|
+
|
|
74
|
+
def test_to_dict(self):
|
|
75
|
+
sc = SemanticChunk(
|
|
76
|
+
file_path="test.py",
|
|
77
|
+
content="class Bar: pass",
|
|
78
|
+
start_line=1,
|
|
79
|
+
end_line=1,
|
|
80
|
+
chunk_index=0,
|
|
81
|
+
language="python",
|
|
82
|
+
symbol_name="Bar",
|
|
83
|
+
symbol_kind="class",
|
|
84
|
+
parameters=["x", "y"],
|
|
85
|
+
)
|
|
86
|
+
d = sc.to_dict()
|
|
87
|
+
assert d["symbol_name"] == "Bar"
|
|
88
|
+
assert d["symbol_kind"] == "class"
|
|
89
|
+
assert d["parameters"] == ["x", "y"]
|
|
90
|
+
|
|
91
|
+
def test_defaults(self):
|
|
92
|
+
sc = SemanticChunk(
|
|
93
|
+
file_path="t.py", content="x=1", start_line=1,
|
|
94
|
+
end_line=1, chunk_index=0, language="python",
|
|
95
|
+
)
|
|
96
|
+
assert sc.symbol_name == ""
|
|
97
|
+
assert sc.parent_symbol == ""
|
|
98
|
+
assert sc.parameters == []
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# Semantic label builder
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
class TestBuildSemanticLabel:
|
|
106
|
+
def test_function(self):
|
|
107
|
+
sc = SemanticChunk(
|
|
108
|
+
file_path="t.py", content="", start_line=1, end_line=1,
|
|
109
|
+
chunk_index=0, language="python",
|
|
110
|
+
symbol_name="foo", symbol_kind="function", parameters=["x"],
|
|
111
|
+
)
|
|
112
|
+
label = _build_semantic_label(sc)
|
|
113
|
+
assert "[python]" in label
|
|
114
|
+
assert "function" in label
|
|
115
|
+
assert "foo" in label
|
|
116
|
+
assert "(x)" in label
|
|
117
|
+
|
|
118
|
+
def test_method_with_parent(self):
|
|
119
|
+
sc = SemanticChunk(
|
|
120
|
+
file_path="t.py", content="", start_line=1, end_line=1,
|
|
121
|
+
chunk_index=0, language="python",
|
|
122
|
+
symbol_name="greet", symbol_kind="method",
|
|
123
|
+
parent_symbol="Greeter",
|
|
124
|
+
)
|
|
125
|
+
label = _build_semantic_label(sc)
|
|
126
|
+
assert "Greeter.greet" in label
|
|
127
|
+
|
|
128
|
+
def test_empty_kind(self):
|
|
129
|
+
sc = SemanticChunk(
|
|
130
|
+
file_path="t.py", content="x=1", start_line=1, end_line=1,
|
|
131
|
+
chunk_index=0, language="python",
|
|
132
|
+
)
|
|
133
|
+
label = _build_semantic_label(sc)
|
|
134
|
+
assert "[python]" in label
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# Uncovered blocks extraction
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
class TestExtractUncoveredBlocks:
|
|
142
|
+
def test_all_covered(self):
|
|
143
|
+
lines = ["a\n", "b\n", "c\n"]
|
|
144
|
+
covered = {1, 2, 3}
|
|
145
|
+
assert _extract_uncovered_blocks(lines, covered) == []
|
|
146
|
+
|
|
147
|
+
def test_none_covered(self):
|
|
148
|
+
lines = ["a\n", "b\n"]
|
|
149
|
+
blocks = _extract_uncovered_blocks(lines, set())
|
|
150
|
+
assert len(blocks) == 1
|
|
151
|
+
assert blocks[0][0] == 1 # start_line
|
|
152
|
+
assert blocks[0][1] == 2 # end_line
|
|
153
|
+
|
|
154
|
+
def test_gap_in_middle(self):
|
|
155
|
+
lines = ["a\n", "b\n", "c\n", "d\n", "e\n"]
|
|
156
|
+
covered = {1, 2, 5}
|
|
157
|
+
blocks = _extract_uncovered_blocks(lines, covered)
|
|
158
|
+
assert len(blocks) == 1
|
|
159
|
+
assert blocks[0][0] == 3
|
|
160
|
+
assert blocks[0][1] == 4
|
|
161
|
+
|
|
162
|
+
def test_multiple_gaps(self):
|
|
163
|
+
lines = [f"line{i}\n" for i in range(1, 8)]
|
|
164
|
+
covered = {2, 5}
|
|
165
|
+
blocks = _extract_uncovered_blocks(lines, covered)
|
|
166
|
+
assert len(blocks) == 3 # lines 1, 3-4, 6-7
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ---------------------------------------------------------------------------
|
|
170
|
+
# semantic_chunk_code
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
class TestSemanticChunkCode:
|
|
174
|
+
def test_empty_content(self):
|
|
175
|
+
result = semantic_chunk_code("", "test.py")
|
|
176
|
+
assert result == []
|
|
177
|
+
|
|
178
|
+
def test_whitespace_only(self):
|
|
179
|
+
result = semantic_chunk_code(" \n ", "test.py")
|
|
180
|
+
assert result == []
|
|
181
|
+
|
|
182
|
+
def test_python_produces_chunks(self):
|
|
183
|
+
chunks = semantic_chunk_code(SAMPLE_PYTHON, "test.py")
|
|
184
|
+
assert len(chunks) > 0
|
|
185
|
+
assert all(isinstance(c, SemanticChunk) for c in chunks)
|
|
186
|
+
|
|
187
|
+
def test_python_has_function_chunks(self):
|
|
188
|
+
chunks = semantic_chunk_code(SAMPLE_PYTHON, "test.py")
|
|
189
|
+
func_chunks = [c for c in chunks if c.symbol_kind == "function"]
|
|
190
|
+
assert len(func_chunks) >= 1
|
|
191
|
+
names = [c.symbol_name for c in func_chunks]
|
|
192
|
+
assert "hello" in names
|
|
193
|
+
|
|
194
|
+
def test_python_has_class_chunks(self):
|
|
195
|
+
chunks = semantic_chunk_code(SAMPLE_PYTHON, "test.py")
|
|
196
|
+
class_chunks = [c for c in chunks if c.symbol_kind == "class"]
|
|
197
|
+
assert len(class_chunks) >= 1
|
|
198
|
+
assert any(c.symbol_name == "Greeter" for c in class_chunks)
|
|
199
|
+
|
|
200
|
+
def test_javascript_produces_chunks(self):
|
|
201
|
+
chunks = semantic_chunk_code(SAMPLE_JS, "test.js")
|
|
202
|
+
assert len(chunks) > 0
|
|
203
|
+
|
|
204
|
+
def test_unsupported_language_fallback(self):
|
|
205
|
+
code = "some random code\nanother line\n"
|
|
206
|
+
chunks = semantic_chunk_code(code, "test.xyz")
|
|
207
|
+
assert len(chunks) >= 1
|
|
208
|
+
# Fallback chunks are "block" kind
|
|
209
|
+
assert all(c.symbol_kind == "block" for c in chunks)
|
|
210
|
+
|
|
211
|
+
def test_chunk_indices_sequential(self):
|
|
212
|
+
chunks = semantic_chunk_code(SAMPLE_PYTHON, "test.py")
|
|
213
|
+
indices = [c.chunk_index for c in chunks]
|
|
214
|
+
assert indices == list(range(len(chunks)))
|
|
215
|
+
|
|
216
|
+
def test_chunks_have_content(self):
|
|
217
|
+
chunks = semantic_chunk_code(SAMPLE_PYTHON, "test.py")
|
|
218
|
+
for c in chunks:
|
|
219
|
+
assert c.content.strip() != ""
|
|
220
|
+
|
|
221
|
+
def test_large_function_gets_split(self):
|
|
222
|
+
# Generate a big function
|
|
223
|
+
big_body = "\n".join(f" x{i} = {i}" for i in range(100))
|
|
224
|
+
code = f"def big_func():\n{big_body}\n"
|
|
225
|
+
chunks = semantic_chunk_code(code, "big.py", chunk_size=200)
|
|
226
|
+
func_chunks = [c for c in chunks if c.symbol_name == "big_func"]
|
|
227
|
+
assert len(func_chunks) > 1
|
|
228
|
+
|
|
229
|
+
def test_semantic_labels_populated(self):
|
|
230
|
+
chunks = semantic_chunk_code(SAMPLE_PYTHON, "test.py")
|
|
231
|
+
labeled = [c for c in chunks if c.semantic_label]
|
|
232
|
+
assert len(labeled) > 0
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
# ---------------------------------------------------------------------------
|
|
236
|
+
# semantic_chunk_file
|
|
237
|
+
# ---------------------------------------------------------------------------
|
|
238
|
+
|
|
239
|
+
class TestSemanticChunkFile:
|
|
240
|
+
def test_existing_file(self, tmp_path):
|
|
241
|
+
f = tmp_path / "sample.py"
|
|
242
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
243
|
+
chunks = semantic_chunk_file(f)
|
|
244
|
+
assert len(chunks) > 0
|
|
245
|
+
|
|
246
|
+
def test_nonexistent_file(self, tmp_path):
|
|
247
|
+
f = tmp_path / "nope.py"
|
|
248
|
+
chunks = semantic_chunk_file(f)
|
|
249
|
+
assert chunks == []
|
|
250
|
+
|
|
251
|
+
def test_empty_file(self, tmp_path):
|
|
252
|
+
f = tmp_path / "empty.py"
|
|
253
|
+
f.write_text("", encoding="utf-8")
|
|
254
|
+
chunks = semantic_chunk_file(f)
|
|
255
|
+
assert chunks == []
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Tests for the AI tool interaction layer."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from semantic_code_intelligence.tools import (
|
|
10
|
+
TOOL_DEFINITIONS,
|
|
11
|
+
ToolRegistry,
|
|
12
|
+
ToolResult,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
SAMPLE_PYTHON = """\
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
def greet(name):
|
|
20
|
+
return f"Hello, {name}!"
|
|
21
|
+
|
|
22
|
+
class Service:
|
|
23
|
+
def __init__(self, url):
|
|
24
|
+
self.url = url
|
|
25
|
+
|
|
26
|
+
def call(self):
|
|
27
|
+
return os.getenv("API_KEY")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# ToolResult
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
class TestToolResult:
|
|
36
|
+
def test_success(self):
|
|
37
|
+
r = ToolResult(tool_name="test", success=True, data={"x": 1})
|
|
38
|
+
d = r.to_dict()
|
|
39
|
+
assert d["success"] is True
|
|
40
|
+
assert d["data"]["x"] == 1
|
|
41
|
+
|
|
42
|
+
def test_failure(self):
|
|
43
|
+
r = ToolResult(tool_name="test", success=False, error="boom")
|
|
44
|
+
d = r.to_dict()
|
|
45
|
+
assert d["success"] is False
|
|
46
|
+
assert d["error"] == "boom"
|
|
47
|
+
|
|
48
|
+
def test_to_json(self):
|
|
49
|
+
r = ToolResult(tool_name="test", success=True, data={})
|
|
50
|
+
j = r.to_json()
|
|
51
|
+
assert '"tool": "test"' in j
|
|
52
|
+
assert '"success": true' in j
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Tool Definitions
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
class TestToolDefinitions:
|
|
60
|
+
def test_definitions_exist(self):
|
|
61
|
+
assert len(TOOL_DEFINITIONS) > 0
|
|
62
|
+
|
|
63
|
+
def test_all_have_names(self):
|
|
64
|
+
for defn in TOOL_DEFINITIONS:
|
|
65
|
+
assert "name" in defn
|
|
66
|
+
assert "description" in defn
|
|
67
|
+
|
|
68
|
+
def test_known_tools(self):
|
|
69
|
+
names = {d["name"] for d in TOOL_DEFINITIONS}
|
|
70
|
+
assert "semantic_search" in names
|
|
71
|
+
assert "explain_symbol" in names
|
|
72
|
+
assert "summarize_repo" in names
|
|
73
|
+
assert "get_context" in names
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# ToolRegistry
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
class TestToolRegistry:
|
|
81
|
+
def test_init(self, tmp_path):
|
|
82
|
+
registry = ToolRegistry(tmp_path)
|
|
83
|
+
assert registry.tool_definitions == TOOL_DEFINITIONS
|
|
84
|
+
|
|
85
|
+
def test_unknown_tool(self, tmp_path):
|
|
86
|
+
registry = ToolRegistry(tmp_path)
|
|
87
|
+
result = registry.invoke("nonexistent_tool")
|
|
88
|
+
assert result.success is False
|
|
89
|
+
assert "Unknown tool" in result.error
|
|
90
|
+
|
|
91
|
+
def test_explain_symbol_not_found(self, tmp_path):
|
|
92
|
+
registry = ToolRegistry(tmp_path)
|
|
93
|
+
result = registry.invoke("explain_symbol", symbol_name="NoSuchSymbol")
|
|
94
|
+
assert result.success is False
|
|
95
|
+
|
|
96
|
+
def test_index_and_explain_symbol(self, tmp_path):
|
|
97
|
+
f = tmp_path / "service.py"
|
|
98
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
99
|
+
|
|
100
|
+
registry = ToolRegistry(tmp_path)
|
|
101
|
+
registry.index_file(str(f))
|
|
102
|
+
|
|
103
|
+
result = registry.invoke("explain_symbol", symbol_name="greet")
|
|
104
|
+
assert result.success is True
|
|
105
|
+
assert result.data["symbol_name"] == "greet"
|
|
106
|
+
assert len(result.data["explanations"]) >= 1
|
|
107
|
+
|
|
108
|
+
def test_explain_file(self, tmp_path):
|
|
109
|
+
f = tmp_path / "service.py"
|
|
110
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
111
|
+
|
|
112
|
+
registry = ToolRegistry(tmp_path)
|
|
113
|
+
result = registry.invoke("explain_file", file_path=str(f))
|
|
114
|
+
assert result.success is True
|
|
115
|
+
assert len(result.data["symbols"]) >= 1
|
|
116
|
+
|
|
117
|
+
def test_summarize_repo(self, tmp_path):
|
|
118
|
+
f = tmp_path / "service.py"
|
|
119
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
120
|
+
|
|
121
|
+
registry = ToolRegistry(tmp_path)
|
|
122
|
+
registry.index_file(str(f))
|
|
123
|
+
|
|
124
|
+
result = registry.invoke("summarize_repo")
|
|
125
|
+
assert result.success is True
|
|
126
|
+
assert "total_files" in result.data
|
|
127
|
+
|
|
128
|
+
def test_find_references(self, tmp_path):
|
|
129
|
+
f = tmp_path / "service.py"
|
|
130
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
131
|
+
|
|
132
|
+
registry = ToolRegistry(tmp_path)
|
|
133
|
+
registry.index_file(str(f))
|
|
134
|
+
|
|
135
|
+
result = registry.invoke("find_references", symbol_name="greet")
|
|
136
|
+
assert result.success is True
|
|
137
|
+
assert result.data["reference_count"] >= 1
|
|
138
|
+
|
|
139
|
+
def test_get_dependencies(self, tmp_path):
|
|
140
|
+
f = tmp_path / "service.py"
|
|
141
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
142
|
+
|
|
143
|
+
registry = ToolRegistry(tmp_path)
|
|
144
|
+
result = registry.invoke("get_dependencies", file_path=str(f))
|
|
145
|
+
assert result.success is True
|
|
146
|
+
|
|
147
|
+
def test_get_call_graph(self, tmp_path):
|
|
148
|
+
f = tmp_path / "service.py"
|
|
149
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
150
|
+
|
|
151
|
+
registry = ToolRegistry(tmp_path)
|
|
152
|
+
registry.index_file(str(f))
|
|
153
|
+
|
|
154
|
+
result = registry.invoke("get_call_graph", symbol_name="greet")
|
|
155
|
+
assert result.success is True
|
|
156
|
+
assert "callers" in result.data
|
|
157
|
+
assert "callees" in result.data
|
|
158
|
+
|
|
159
|
+
def test_get_context_found(self, tmp_path):
|
|
160
|
+
f = tmp_path / "service.py"
|
|
161
|
+
f.write_text(SAMPLE_PYTHON, encoding="utf-8")
|
|
162
|
+
|
|
163
|
+
registry = ToolRegistry(tmp_path)
|
|
164
|
+
registry.index_file(str(f))
|
|
165
|
+
|
|
166
|
+
result = registry.invoke("get_context", symbol_name="Service")
|
|
167
|
+
assert result.success is True
|
|
168
|
+
|
|
169
|
+
def test_get_context_not_found(self, tmp_path):
|
|
170
|
+
registry = ToolRegistry(tmp_path)
|
|
171
|
+
result = registry.invoke("get_context", symbol_name="Missing")
|
|
172
|
+
assert result.success is False
|
|
173
|
+
|
|
174
|
+
def test_index_directory(self, tmp_path):
|
|
175
|
+
(tmp_path / ".codexa").mkdir()
|
|
176
|
+
(tmp_path / ".codexa" / "config.json").write_text("{}", encoding="utf-8")
|
|
177
|
+
(tmp_path / "a.py").write_text("x = 1\n", encoding="utf-8")
|
|
178
|
+
(tmp_path / "b.py").write_text("y = 2\n", encoding="utf-8")
|
|
179
|
+
|
|
180
|
+
registry = ToolRegistry(tmp_path)
|
|
181
|
+
count = registry.index_directory()
|
|
182
|
+
assert count >= 2
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Tests for the vector store."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from semantic_code_intelligence.storage.vector_store import (
|
|
11
|
+
ChunkMetadata,
|
|
12
|
+
VectorStore,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _make_metadata(n: int) -> list[ChunkMetadata]:
|
|
17
|
+
"""Create n dummy metadata entries."""
|
|
18
|
+
return [
|
|
19
|
+
ChunkMetadata(
|
|
20
|
+
file_path=f"file_{i}.py",
|
|
21
|
+
start_line=1,
|
|
22
|
+
end_line=10,
|
|
23
|
+
chunk_index=i,
|
|
24
|
+
language="python",
|
|
25
|
+
content=f"content_{i}",
|
|
26
|
+
)
|
|
27
|
+
for i in range(n)
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _random_embeddings(n: int, dim: int = 128) -> np.ndarray:
|
|
32
|
+
"""Create n random normalized embeddings."""
|
|
33
|
+
vecs = np.random.randn(n, dim).astype(np.float32)
|
|
34
|
+
norms = np.linalg.norm(vecs, axis=1, keepdims=True)
|
|
35
|
+
return vecs / norms
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TestVectorStore:
|
|
39
|
+
"""Tests for VectorStore operations."""
|
|
40
|
+
|
|
41
|
+
def test_create_empty(self):
|
|
42
|
+
store = VectorStore(128)
|
|
43
|
+
assert store.size == 0
|
|
44
|
+
assert store.dimension == 128
|
|
45
|
+
|
|
46
|
+
def test_add_and_size(self):
|
|
47
|
+
store = VectorStore(128)
|
|
48
|
+
emb = _random_embeddings(5)
|
|
49
|
+
meta = _make_metadata(5)
|
|
50
|
+
store.add(emb, meta)
|
|
51
|
+
assert store.size == 5
|
|
52
|
+
|
|
53
|
+
def test_add_mismatched_raises(self):
|
|
54
|
+
store = VectorStore(128)
|
|
55
|
+
emb = _random_embeddings(3)
|
|
56
|
+
meta = _make_metadata(5)
|
|
57
|
+
with pytest.raises(ValueError):
|
|
58
|
+
store.add(emb, meta)
|
|
59
|
+
|
|
60
|
+
def test_add_empty(self):
|
|
61
|
+
store = VectorStore(128)
|
|
62
|
+
store.add(np.array([], dtype=np.float32).reshape(0, 128), [])
|
|
63
|
+
assert store.size == 0
|
|
64
|
+
|
|
65
|
+
def test_search_returns_results(self):
|
|
66
|
+
store = VectorStore(128)
|
|
67
|
+
emb = _random_embeddings(10)
|
|
68
|
+
meta = _make_metadata(10)
|
|
69
|
+
store.add(emb, meta)
|
|
70
|
+
|
|
71
|
+
results = store.search(emb[0], top_k=3)
|
|
72
|
+
assert len(results) == 3
|
|
73
|
+
# First result should be the query itself (highest similarity)
|
|
74
|
+
assert results[0][0].file_path == "file_0.py"
|
|
75
|
+
assert results[0][1] > 0.99 # self-similarity ≈ 1.0
|
|
76
|
+
|
|
77
|
+
def test_search_empty_store(self):
|
|
78
|
+
store = VectorStore(128)
|
|
79
|
+
query = _random_embeddings(1)[0]
|
|
80
|
+
results = store.search(query, top_k=5)
|
|
81
|
+
assert results == []
|
|
82
|
+
|
|
83
|
+
def test_search_top_k_larger_than_store(self):
|
|
84
|
+
store = VectorStore(128)
|
|
85
|
+
emb = _random_embeddings(3)
|
|
86
|
+
meta = _make_metadata(3)
|
|
87
|
+
store.add(emb, meta)
|
|
88
|
+
results = store.search(emb[0], top_k=100)
|
|
89
|
+
assert len(results) == 3
|
|
90
|
+
|
|
91
|
+
def test_search_scores_descending(self):
|
|
92
|
+
store = VectorStore(128)
|
|
93
|
+
emb = _random_embeddings(20)
|
|
94
|
+
meta = _make_metadata(20)
|
|
95
|
+
store.add(emb, meta)
|
|
96
|
+
|
|
97
|
+
results = store.search(emb[0], top_k=10)
|
|
98
|
+
scores = [r[1] for r in results]
|
|
99
|
+
assert scores == sorted(scores, reverse=True)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class TestVectorStorePersistence:
|
|
103
|
+
"""Tests for save/load functionality."""
|
|
104
|
+
|
|
105
|
+
def test_save_creates_files(self, tmp_path: Path):
|
|
106
|
+
store = VectorStore(128)
|
|
107
|
+
emb = _random_embeddings(5)
|
|
108
|
+
meta = _make_metadata(5)
|
|
109
|
+
store.add(emb, meta)
|
|
110
|
+
store.save(tmp_path / "index")
|
|
111
|
+
|
|
112
|
+
assert (tmp_path / "index" / "vectors.faiss").exists()
|
|
113
|
+
assert (tmp_path / "index" / "metadata.json").exists()
|
|
114
|
+
|
|
115
|
+
def test_load_roundtrip(self, tmp_path: Path):
|
|
116
|
+
store = VectorStore(128)
|
|
117
|
+
emb = _random_embeddings(5)
|
|
118
|
+
meta = _make_metadata(5)
|
|
119
|
+
store.add(emb, meta)
|
|
120
|
+
store.save(tmp_path / "index")
|
|
121
|
+
|
|
122
|
+
loaded = VectorStore.load(tmp_path / "index")
|
|
123
|
+
assert loaded.size == 5
|
|
124
|
+
assert loaded.dimension == 128
|
|
125
|
+
assert len(loaded.metadata) == 5
|
|
126
|
+
assert loaded.metadata[0].file_path == "file_0.py"
|
|
127
|
+
|
|
128
|
+
def test_load_nonexistent_raises(self, tmp_path: Path):
|
|
129
|
+
with pytest.raises(FileNotFoundError):
|
|
130
|
+
VectorStore.load(tmp_path / "nonexistent")
|
|
131
|
+
|
|
132
|
+
def test_search_after_load(self, tmp_path: Path):
|
|
133
|
+
store = VectorStore(128)
|
|
134
|
+
emb = _random_embeddings(10)
|
|
135
|
+
meta = _make_metadata(10)
|
|
136
|
+
store.add(emb, meta)
|
|
137
|
+
store.save(tmp_path / "index")
|
|
138
|
+
|
|
139
|
+
loaded = VectorStore.load(tmp_path / "index")
|
|
140
|
+
results = loaded.search(emb[0], top_k=3)
|
|
141
|
+
assert len(results) == 3
|
|
142
|
+
assert results[0][0].file_path == "file_0.py"
|
|
143
|
+
|
|
144
|
+
def test_clear(self):
|
|
145
|
+
store = VectorStore(128)
|
|
146
|
+
emb = _random_embeddings(5)
|
|
147
|
+
meta = _make_metadata(5)
|
|
148
|
+
store.add(emb, meta)
|
|
149
|
+
store.clear()
|
|
150
|
+
assert store.size == 0
|
|
151
|
+
assert store.metadata == []
|