codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Tests for the command router."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from semantic_code_intelligence.cli.router import register_commands
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestRouter:
|
|
12
|
+
"""Tests for command registration."""
|
|
13
|
+
|
|
14
|
+
def test_register_commands_adds_all(self):
|
|
15
|
+
group = click.Group(name="test")
|
|
16
|
+
register_commands(group)
|
|
17
|
+
|
|
18
|
+
command_names = list(group.commands.keys())
|
|
19
|
+
assert "init" in command_names
|
|
20
|
+
assert "index" in command_names
|
|
21
|
+
assert "search" in command_names
|
|
22
|
+
assert "grep" in command_names
|
|
23
|
+
assert "benchmark" in command_names
|
|
24
|
+
|
|
25
|
+
def test_register_commands_count(self):
|
|
26
|
+
group = click.Group(name="test")
|
|
27
|
+
register_commands(group)
|
|
28
|
+
assert len(group.commands) == 39
|
|
29
|
+
|
|
30
|
+
def test_registered_commands_are_click_commands(self):
|
|
31
|
+
group = click.Group(name="test")
|
|
32
|
+
register_commands(group)
|
|
33
|
+
|
|
34
|
+
for name, cmd in group.commands.items():
|
|
35
|
+
assert isinstance(cmd, click.Command), f"{name} is not a click.Command"
|
|
36
|
+
|
|
37
|
+
def test_register_to_empty_group(self):
|
|
38
|
+
group = click.Group(name="empty")
|
|
39
|
+
assert len(group.commands) == 0
|
|
40
|
+
register_commands(group)
|
|
41
|
+
assert len(group.commands) > 0
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Tests for the scalability utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from semantic_code_intelligence.scalability import (
|
|
8
|
+
BatchProcessor,
|
|
9
|
+
BatchStats,
|
|
10
|
+
ParallelScanner,
|
|
11
|
+
)
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# BatchStats
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
class TestBatchStats:
|
|
20
|
+
def test_defaults(self):
|
|
21
|
+
stats = BatchStats()
|
|
22
|
+
assert stats.total_items == 0
|
|
23
|
+
assert stats.batches_processed == 0
|
|
24
|
+
|
|
25
|
+
def test_to_dict(self):
|
|
26
|
+
stats = BatchStats(
|
|
27
|
+
total_items=100,
|
|
28
|
+
batches_processed=10,
|
|
29
|
+
items_succeeded=95,
|
|
30
|
+
items_failed=5,
|
|
31
|
+
elapsed_seconds=2.0,
|
|
32
|
+
)
|
|
33
|
+
d = stats.to_dict()
|
|
34
|
+
assert d["total_items"] == 100
|
|
35
|
+
assert d["items_per_second"] == 47.5
|
|
36
|
+
|
|
37
|
+
def test_to_dict_zero_elapsed(self):
|
|
38
|
+
stats = BatchStats(elapsed_seconds=0)
|
|
39
|
+
d = stats.to_dict()
|
|
40
|
+
assert d["items_per_second"] == 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# BatchProcessor
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
class TestBatchProcessor:
|
|
48
|
+
def test_empty_items(self):
|
|
49
|
+
proc = BatchProcessor(batch_size=10)
|
|
50
|
+
results, stats = proc.process([], lambda batch: batch)
|
|
51
|
+
assert results == []
|
|
52
|
+
assert stats.total_items == 0
|
|
53
|
+
assert stats.batches_processed == 0
|
|
54
|
+
|
|
55
|
+
def test_single_batch(self):
|
|
56
|
+
proc = BatchProcessor(batch_size=10)
|
|
57
|
+
items = list(range(5))
|
|
58
|
+
results, stats = proc.process(items, lambda batch: [x * 2 for x in batch])
|
|
59
|
+
assert results == [0, 2, 4, 6, 8]
|
|
60
|
+
assert stats.total_items == 5
|
|
61
|
+
assert stats.batches_processed == 1
|
|
62
|
+
assert stats.items_succeeded == 5
|
|
63
|
+
|
|
64
|
+
def test_multiple_batches(self):
|
|
65
|
+
proc = BatchProcessor(batch_size=3)
|
|
66
|
+
items = list(range(10))
|
|
67
|
+
results, stats = proc.process(items, lambda batch: batch)
|
|
68
|
+
assert results == list(range(10))
|
|
69
|
+
assert stats.batches_processed == 4 # ceil(10/3)
|
|
70
|
+
|
|
71
|
+
def test_batch_callback(self):
|
|
72
|
+
proc = BatchProcessor(batch_size=2)
|
|
73
|
+
calls = []
|
|
74
|
+
items = list(range(6))
|
|
75
|
+
proc.process(
|
|
76
|
+
items,
|
|
77
|
+
lambda batch: batch,
|
|
78
|
+
on_batch=lambda cur, total: calls.append((cur, total)),
|
|
79
|
+
)
|
|
80
|
+
assert calls == [(1, 3), (2, 3), (3, 3)]
|
|
81
|
+
|
|
82
|
+
def test_batch_size_minimum(self):
|
|
83
|
+
proc = BatchProcessor(batch_size=0)
|
|
84
|
+
assert proc.batch_size == 1
|
|
85
|
+
|
|
86
|
+
def test_failing_batch(self):
|
|
87
|
+
proc = BatchProcessor(batch_size=2)
|
|
88
|
+
|
|
89
|
+
def bad_processor(batch):
|
|
90
|
+
if batch[0] == 2:
|
|
91
|
+
raise ValueError("fail")
|
|
92
|
+
return batch
|
|
93
|
+
|
|
94
|
+
items = list(range(6))
|
|
95
|
+
results, stats = proc.process(items, bad_processor)
|
|
96
|
+
assert stats.items_failed == 2
|
|
97
|
+
assert stats.items_succeeded == 4
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# ParallelScanner
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
class TestParallelScanner:
|
|
105
|
+
def test_scan_empty(self):
|
|
106
|
+
scanner = ParallelScanner(max_workers=2)
|
|
107
|
+
results, errors = scanner.scan_and_process([], lambda p: p)
|
|
108
|
+
assert results == []
|
|
109
|
+
assert errors == []
|
|
110
|
+
|
|
111
|
+
def test_scan_files(self, tmp_path):
|
|
112
|
+
for i in range(5):
|
|
113
|
+
(tmp_path / f"file{i}.txt").write_text(f"content {i}", encoding="utf-8")
|
|
114
|
+
|
|
115
|
+
paths = list(tmp_path.glob("*.txt"))
|
|
116
|
+
scanner = ParallelScanner(max_workers=2)
|
|
117
|
+
results, errors = scanner.scan_and_process(
|
|
118
|
+
paths,
|
|
119
|
+
lambda p: p.read_text(encoding="utf-8"),
|
|
120
|
+
)
|
|
121
|
+
assert len(results) == 5
|
|
122
|
+
assert len(errors) == 0
|
|
123
|
+
|
|
124
|
+
def test_scan_with_errors(self, tmp_path):
|
|
125
|
+
paths = [tmp_path / "exists.txt", tmp_path / "missing.txt"]
|
|
126
|
+
paths[0].write_text("ok", encoding="utf-8")
|
|
127
|
+
|
|
128
|
+
scanner = ParallelScanner(max_workers=2)
|
|
129
|
+
results, errors = scanner.scan_and_process(
|
|
130
|
+
paths,
|
|
131
|
+
lambda p: p.read_text(encoding="utf-8"),
|
|
132
|
+
)
|
|
133
|
+
assert len(results) == 1
|
|
134
|
+
assert len(errors) == 1
|
|
135
|
+
|
|
136
|
+
def test_max_workers_minimum(self):
|
|
137
|
+
scanner = ParallelScanner(max_workers=0)
|
|
138
|
+
assert scanner._max_workers == 1
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Tests for the repository scanner."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from semantic_code_intelligence.indexing.scanner import (
|
|
10
|
+
ScannedFile,
|
|
11
|
+
compute_file_hash,
|
|
12
|
+
scan_repository,
|
|
13
|
+
should_ignore,
|
|
14
|
+
)
|
|
15
|
+
from semantic_code_intelligence.config.settings import IndexConfig
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestComputeFileHash:
|
|
19
|
+
"""Tests for file hashing."""
|
|
20
|
+
|
|
21
|
+
def test_hash_returns_hex_string(self, tmp_path: Path):
|
|
22
|
+
f = tmp_path / "test.py"
|
|
23
|
+
f.write_text("hello world", encoding="utf-8")
|
|
24
|
+
h = compute_file_hash(f)
|
|
25
|
+
assert isinstance(h, str)
|
|
26
|
+
assert len(h) == 64 # SHA-256 hex digest
|
|
27
|
+
|
|
28
|
+
def test_same_content_same_hash(self, tmp_path: Path):
|
|
29
|
+
f1 = tmp_path / "a.py"
|
|
30
|
+
f2 = tmp_path / "b.py"
|
|
31
|
+
f1.write_text("same content", encoding="utf-8")
|
|
32
|
+
f2.write_text("same content", encoding="utf-8")
|
|
33
|
+
assert compute_file_hash(f1) == compute_file_hash(f2)
|
|
34
|
+
|
|
35
|
+
def test_different_content_different_hash(self, tmp_path: Path):
|
|
36
|
+
f1 = tmp_path / "a.py"
|
|
37
|
+
f2 = tmp_path / "b.py"
|
|
38
|
+
f1.write_text("content A", encoding="utf-8")
|
|
39
|
+
f2.write_text("content B", encoding="utf-8")
|
|
40
|
+
assert compute_file_hash(f1) != compute_file_hash(f2)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TestShouldIgnore:
|
|
44
|
+
"""Tests for directory ignore logic."""
|
|
45
|
+
|
|
46
|
+
def test_ignore_git_dir(self, tmp_path: Path):
|
|
47
|
+
p = tmp_path / ".git" / "config"
|
|
48
|
+
assert should_ignore(p, tmp_path, {".git"}) is True
|
|
49
|
+
|
|
50
|
+
def test_ignore_node_modules(self, tmp_path: Path):
|
|
51
|
+
p = tmp_path / "node_modules" / "pkg" / "index.js"
|
|
52
|
+
assert should_ignore(p, tmp_path, {"node_modules"}) is True
|
|
53
|
+
|
|
54
|
+
def test_allow_normal_file(self, tmp_path: Path):
|
|
55
|
+
p = tmp_path / "src" / "main.py"
|
|
56
|
+
assert should_ignore(p, tmp_path, {".git"}) is False
|
|
57
|
+
|
|
58
|
+
def test_nested_ignored_dir(self, tmp_path: Path):
|
|
59
|
+
p = tmp_path / "src" / "__pycache__" / "mod.cpython-312.pyc"
|
|
60
|
+
assert should_ignore(p, tmp_path, {"__pycache__"}) is True
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class TestScanRepository:
|
|
64
|
+
"""Tests for repository scanning."""
|
|
65
|
+
|
|
66
|
+
def test_empty_directory(self, tmp_path: Path):
|
|
67
|
+
result = scan_repository(tmp_path)
|
|
68
|
+
assert result == []
|
|
69
|
+
|
|
70
|
+
def test_finds_python_files(self, tmp_path: Path):
|
|
71
|
+
(tmp_path / "main.py").write_text("print('hi')", encoding="utf-8")
|
|
72
|
+
(tmp_path / "utils.py").write_text("x = 1", encoding="utf-8")
|
|
73
|
+
result = scan_repository(tmp_path)
|
|
74
|
+
assert len(result) == 2
|
|
75
|
+
|
|
76
|
+
def test_ignores_non_code_files(self, tmp_path: Path):
|
|
77
|
+
(tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
|
|
78
|
+
(tmp_path / "readme.md").write_text("# Readme", encoding="utf-8")
|
|
79
|
+
(tmp_path / "data.csv").write_text("a,b,c", encoding="utf-8")
|
|
80
|
+
result = scan_repository(tmp_path)
|
|
81
|
+
assert len(result) == 1
|
|
82
|
+
assert result[0].extension == ".py"
|
|
83
|
+
|
|
84
|
+
def test_ignores_excluded_dirs(self, tmp_path: Path):
|
|
85
|
+
(tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
|
|
86
|
+
venv = tmp_path / "venv"
|
|
87
|
+
venv.mkdir()
|
|
88
|
+
(venv / "lib.py").write_text("y = 2", encoding="utf-8")
|
|
89
|
+
result = scan_repository(tmp_path)
|
|
90
|
+
assert len(result) == 1
|
|
91
|
+
|
|
92
|
+
def test_scanned_file_metadata(self, tmp_path: Path):
|
|
93
|
+
content = "def hello(): pass"
|
|
94
|
+
(tmp_path / "test.py").write_text(content, encoding="utf-8")
|
|
95
|
+
result = scan_repository(tmp_path)
|
|
96
|
+
assert len(result) == 1
|
|
97
|
+
sf = result[0]
|
|
98
|
+
assert sf.extension == ".py"
|
|
99
|
+
assert sf.relative_path == "test.py"
|
|
100
|
+
assert sf.size_bytes > 0
|
|
101
|
+
assert len(sf.content_hash) == 64
|
|
102
|
+
|
|
103
|
+
def test_finds_multiple_languages(self, tmp_path: Path):
|
|
104
|
+
(tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
|
|
105
|
+
(tmp_path / "app.js").write_text("let x = 1;", encoding="utf-8")
|
|
106
|
+
(tmp_path / "Main.java").write_text("class Main {}", encoding="utf-8")
|
|
107
|
+
result = scan_repository(tmp_path)
|
|
108
|
+
extensions = {sf.extension for sf in result}
|
|
109
|
+
assert extensions == {".py", ".js", ".java"}
|
|
110
|
+
|
|
111
|
+
def test_custom_config(self, tmp_path: Path):
|
|
112
|
+
(tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
|
|
113
|
+
(tmp_path / "app.js").write_text("let x = 1;", encoding="utf-8")
|
|
114
|
+
config = IndexConfig(extensions={".py"}, ignore_dirs=set())
|
|
115
|
+
result = scan_repository(tmp_path, config)
|
|
116
|
+
assert len(result) == 1
|
|
117
|
+
assert result[0].extension == ".py"
|
|
118
|
+
|
|
119
|
+
def test_results_sorted(self, tmp_path: Path):
|
|
120
|
+
(tmp_path / "z.py").write_text("z", encoding="utf-8")
|
|
121
|
+
(tmp_path / "a.py").write_text("a", encoding="utf-8")
|
|
122
|
+
(tmp_path / "m.py").write_text("m", encoding="utf-8")
|
|
123
|
+
result = scan_repository(tmp_path)
|
|
124
|
+
names = [sf.relative_path for sf in result]
|
|
125
|
+
assert names == sorted(names)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Tests for the search service and formatter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
from semantic_code_intelligence.config.settings import init_project, save_config, AppConfig
|
|
12
|
+
from semantic_code_intelligence.embeddings.generator import generate_embeddings
|
|
13
|
+
from semantic_code_intelligence.services.search_service import SearchResult, search_codebase
|
|
14
|
+
from semantic_code_intelligence.search.formatter import format_results_json, format_results_rich
|
|
15
|
+
from semantic_code_intelligence.storage.vector_store import ChunkMetadata, VectorStore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def indexed_project(tmp_path: Path) -> Path:
|
|
20
|
+
"""Create a project with some indexed code chunks."""
|
|
21
|
+
config, _ = init_project(tmp_path)
|
|
22
|
+
index_dir = AppConfig.index_dir(tmp_path)
|
|
23
|
+
|
|
24
|
+
# Create code chunks and embed them
|
|
25
|
+
code_snippets = [
|
|
26
|
+
"def authenticate_user(username, password):\n return check_credentials(username, password)\n",
|
|
27
|
+
"def connect_to_database(host, port):\n return Database(host=host, port=port)\n",
|
|
28
|
+
"def handle_http_request(request):\n response = process(request)\n return response\n",
|
|
29
|
+
"def verify_jwt_token(token):\n payload = jwt.decode(token, SECRET_KEY)\n return payload\n",
|
|
30
|
+
"def calculate_statistics(data):\n mean = sum(data) / len(data)\n return mean\n",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
embeddings = generate_embeddings(code_snippets)
|
|
34
|
+
metadata = [
|
|
35
|
+
ChunkMetadata(
|
|
36
|
+
file_path=f"src/module_{i}.py",
|
|
37
|
+
start_line=1,
|
|
38
|
+
end_line=3,
|
|
39
|
+
chunk_index=0,
|
|
40
|
+
language="python",
|
|
41
|
+
content=snippet,
|
|
42
|
+
content_hash=f"hash_{i}",
|
|
43
|
+
)
|
|
44
|
+
for i, snippet in enumerate(code_snippets)
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
store = VectorStore(embeddings.shape[1])
|
|
48
|
+
store.add(embeddings, metadata)
|
|
49
|
+
store.save(index_dir)
|
|
50
|
+
|
|
51
|
+
return tmp_path
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestSearchCodebase:
|
|
55
|
+
"""Tests for the search_codebase function."""
|
|
56
|
+
|
|
57
|
+
def test_search_returns_results(self, indexed_project: Path):
|
|
58
|
+
results = search_codebase("authentication", indexed_project)
|
|
59
|
+
assert len(results) > 0
|
|
60
|
+
|
|
61
|
+
def test_search_result_type(self, indexed_project: Path):
|
|
62
|
+
results = search_codebase("database connection", indexed_project)
|
|
63
|
+
assert all(isinstance(r, SearchResult) for r in results)
|
|
64
|
+
|
|
65
|
+
def test_search_results_have_scores(self, indexed_project: Path):
|
|
66
|
+
results = search_codebase("jwt token verification", indexed_project)
|
|
67
|
+
for r in results:
|
|
68
|
+
assert isinstance(r.score, float)
|
|
69
|
+
assert r.score > 0
|
|
70
|
+
|
|
71
|
+
def test_search_results_sorted_by_score(self, indexed_project: Path):
|
|
72
|
+
results = search_codebase("authenticate user", indexed_project)
|
|
73
|
+
scores = [r.score for r in results]
|
|
74
|
+
assert scores == sorted(scores, reverse=True)
|
|
75
|
+
|
|
76
|
+
def test_search_respects_top_k(self, indexed_project: Path):
|
|
77
|
+
results = search_codebase("code", indexed_project, top_k=2)
|
|
78
|
+
assert len(results) <= 2
|
|
79
|
+
|
|
80
|
+
def test_search_relevance(self, indexed_project: Path):
|
|
81
|
+
results = search_codebase("jwt token", indexed_project, top_k=1)
|
|
82
|
+
assert len(results) == 1
|
|
83
|
+
assert "jwt" in results[0].content.lower() or "token" in results[0].content.lower()
|
|
84
|
+
|
|
85
|
+
def test_search_no_index_raises(self, tmp_path: Path):
|
|
86
|
+
init_project(tmp_path)
|
|
87
|
+
with pytest.raises(FileNotFoundError):
|
|
88
|
+
search_codebase("test", tmp_path)
|
|
89
|
+
|
|
90
|
+
def test_search_result_metadata(self, indexed_project: Path):
|
|
91
|
+
results = search_codebase("database", indexed_project, top_k=1)
|
|
92
|
+
r = results[0]
|
|
93
|
+
assert r.file_path.startswith("src/")
|
|
94
|
+
assert r.start_line > 0
|
|
95
|
+
assert r.end_line >= r.start_line
|
|
96
|
+
assert r.language == "python"
|
|
97
|
+
assert len(r.content) > 0
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TestSearchResult:
|
|
101
|
+
"""Tests for SearchResult data class."""
|
|
102
|
+
|
|
103
|
+
def test_to_dict(self):
|
|
104
|
+
r = SearchResult(
|
|
105
|
+
file_path="test.py",
|
|
106
|
+
start_line=1,
|
|
107
|
+
end_line=5,
|
|
108
|
+
language="python",
|
|
109
|
+
content="def foo(): pass",
|
|
110
|
+
score=0.9534,
|
|
111
|
+
chunk_index=0,
|
|
112
|
+
)
|
|
113
|
+
d = r.to_dict()
|
|
114
|
+
assert d["file_path"] == "test.py"
|
|
115
|
+
assert d["score"] == 0.9534
|
|
116
|
+
assert d["start_line"] == 1
|
|
117
|
+
assert d["language"] == "python"
|
|
118
|
+
|
|
119
|
+
def test_to_dict_score_rounding(self):
|
|
120
|
+
r = SearchResult(
|
|
121
|
+
file_path="x.py", start_line=1, end_line=1,
|
|
122
|
+
language="python", content="x", score=0.12345678, chunk_index=0,
|
|
123
|
+
)
|
|
124
|
+
assert r.to_dict()["score"] == 0.1235
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class TestFormatResultsJson:
|
|
128
|
+
"""Tests for JSON formatter."""
|
|
129
|
+
|
|
130
|
+
def test_valid_json(self):
|
|
131
|
+
results = [
|
|
132
|
+
SearchResult("a.py", 1, 5, "python", "code", 0.95, 0),
|
|
133
|
+
SearchResult("b.py", 10, 20, "python", "more code", 0.80, 1),
|
|
134
|
+
]
|
|
135
|
+
output = format_results_json("test query", results, 10)
|
|
136
|
+
data = json.loads(output)
|
|
137
|
+
assert data["query"] == "test query"
|
|
138
|
+
assert data["top_k"] == 10
|
|
139
|
+
assert data["result_count"] == 2
|
|
140
|
+
assert len(data["results"]) == 2
|
|
141
|
+
|
|
142
|
+
def test_empty_results(self):
|
|
143
|
+
output = format_results_json("nope", [], 5)
|
|
144
|
+
data = json.loads(output)
|
|
145
|
+
assert data["result_count"] == 0
|
|
146
|
+
assert data["results"] == []
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class TestFormatResultsRich:
|
|
150
|
+
"""Tests for rich formatter (smoke tests — output to console)."""
|
|
151
|
+
|
|
152
|
+
def test_no_crash_with_results(self):
|
|
153
|
+
results = [
|
|
154
|
+
SearchResult("test.py", 1, 3, "python", "def hello(): pass", 0.9, 0),
|
|
155
|
+
]
|
|
156
|
+
# Should not raise
|
|
157
|
+
format_results_rich("hello", results)
|
|
158
|
+
|
|
159
|
+
def test_no_crash_empty_results(self):
|
|
160
|
+
format_results_rich("nothing", [])
|