codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,41 @@
1
+ """Tests for the command router."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+ import pytest
7
+
8
+ from semantic_code_intelligence.cli.router import register_commands
9
+
10
+
11
+ class TestRouter:
12
+ """Tests for command registration."""
13
+
14
+ def test_register_commands_adds_all(self):
15
+ group = click.Group(name="test")
16
+ register_commands(group)
17
+
18
+ command_names = list(group.commands.keys())
19
+ assert "init" in command_names
20
+ assert "index" in command_names
21
+ assert "search" in command_names
22
+ assert "grep" in command_names
23
+ assert "benchmark" in command_names
24
+
25
+ def test_register_commands_count(self):
26
+ group = click.Group(name="test")
27
+ register_commands(group)
28
+ assert len(group.commands) == 39
29
+
30
+ def test_registered_commands_are_click_commands(self):
31
+ group = click.Group(name="test")
32
+ register_commands(group)
33
+
34
+ for name, cmd in group.commands.items():
35
+ assert isinstance(cmd, click.Command), f"{name} is not a click.Command"
36
+
37
+ def test_register_to_empty_group(self):
38
+ group = click.Group(name="empty")
39
+ assert len(group.commands) == 0
40
+ register_commands(group)
41
+ assert len(group.commands) > 0
@@ -0,0 +1,138 @@
1
+ """Tests for the scalability utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+ from semantic_code_intelligence.scalability import (
8
+ BatchProcessor,
9
+ BatchStats,
10
+ ParallelScanner,
11
+ )
12
+ from pathlib import Path
13
+
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # BatchStats
17
+ # ---------------------------------------------------------------------------
18
+
19
+ class TestBatchStats:
20
+ def test_defaults(self):
21
+ stats = BatchStats()
22
+ assert stats.total_items == 0
23
+ assert stats.batches_processed == 0
24
+
25
+ def test_to_dict(self):
26
+ stats = BatchStats(
27
+ total_items=100,
28
+ batches_processed=10,
29
+ items_succeeded=95,
30
+ items_failed=5,
31
+ elapsed_seconds=2.0,
32
+ )
33
+ d = stats.to_dict()
34
+ assert d["total_items"] == 100
35
+ assert d["items_per_second"] == 47.5
36
+
37
+ def test_to_dict_zero_elapsed(self):
38
+ stats = BatchStats(elapsed_seconds=0)
39
+ d = stats.to_dict()
40
+ assert d["items_per_second"] == 0
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # BatchProcessor
45
+ # ---------------------------------------------------------------------------
46
+
47
+ class TestBatchProcessor:
48
+ def test_empty_items(self):
49
+ proc = BatchProcessor(batch_size=10)
50
+ results, stats = proc.process([], lambda batch: batch)
51
+ assert results == []
52
+ assert stats.total_items == 0
53
+ assert stats.batches_processed == 0
54
+
55
+ def test_single_batch(self):
56
+ proc = BatchProcessor(batch_size=10)
57
+ items = list(range(5))
58
+ results, stats = proc.process(items, lambda batch: [x * 2 for x in batch])
59
+ assert results == [0, 2, 4, 6, 8]
60
+ assert stats.total_items == 5
61
+ assert stats.batches_processed == 1
62
+ assert stats.items_succeeded == 5
63
+
64
+ def test_multiple_batches(self):
65
+ proc = BatchProcessor(batch_size=3)
66
+ items = list(range(10))
67
+ results, stats = proc.process(items, lambda batch: batch)
68
+ assert results == list(range(10))
69
+ assert stats.batches_processed == 4 # ceil(10/3)
70
+
71
+ def test_batch_callback(self):
72
+ proc = BatchProcessor(batch_size=2)
73
+ calls = []
74
+ items = list(range(6))
75
+ proc.process(
76
+ items,
77
+ lambda batch: batch,
78
+ on_batch=lambda cur, total: calls.append((cur, total)),
79
+ )
80
+ assert calls == [(1, 3), (2, 3), (3, 3)]
81
+
82
+ def test_batch_size_minimum(self):
83
+ proc = BatchProcessor(batch_size=0)
84
+ assert proc.batch_size == 1
85
+
86
+ def test_failing_batch(self):
87
+ proc = BatchProcessor(batch_size=2)
88
+
89
+ def bad_processor(batch):
90
+ if batch[0] == 2:
91
+ raise ValueError("fail")
92
+ return batch
93
+
94
+ items = list(range(6))
95
+ results, stats = proc.process(items, bad_processor)
96
+ assert stats.items_failed == 2
97
+ assert stats.items_succeeded == 4
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # ParallelScanner
102
+ # ---------------------------------------------------------------------------
103
+
104
+ class TestParallelScanner:
105
+ def test_scan_empty(self):
106
+ scanner = ParallelScanner(max_workers=2)
107
+ results, errors = scanner.scan_and_process([], lambda p: p)
108
+ assert results == []
109
+ assert errors == []
110
+
111
+ def test_scan_files(self, tmp_path):
112
+ for i in range(5):
113
+ (tmp_path / f"file{i}.txt").write_text(f"content {i}", encoding="utf-8")
114
+
115
+ paths = list(tmp_path.glob("*.txt"))
116
+ scanner = ParallelScanner(max_workers=2)
117
+ results, errors = scanner.scan_and_process(
118
+ paths,
119
+ lambda p: p.read_text(encoding="utf-8"),
120
+ )
121
+ assert len(results) == 5
122
+ assert len(errors) == 0
123
+
124
+ def test_scan_with_errors(self, tmp_path):
125
+ paths = [tmp_path / "exists.txt", tmp_path / "missing.txt"]
126
+ paths[0].write_text("ok", encoding="utf-8")
127
+
128
+ scanner = ParallelScanner(max_workers=2)
129
+ results, errors = scanner.scan_and_process(
130
+ paths,
131
+ lambda p: p.read_text(encoding="utf-8"),
132
+ )
133
+ assert len(results) == 1
134
+ assert len(errors) == 1
135
+
136
+ def test_max_workers_minimum(self):
137
+ scanner = ParallelScanner(max_workers=0)
138
+ assert scanner._max_workers == 1
@@ -0,0 +1,125 @@
1
+ """Tests for the repository scanner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import pytest
8
+
9
+ from semantic_code_intelligence.indexing.scanner import (
10
+ ScannedFile,
11
+ compute_file_hash,
12
+ scan_repository,
13
+ should_ignore,
14
+ )
15
+ from semantic_code_intelligence.config.settings import IndexConfig
16
+
17
+
18
+ class TestComputeFileHash:
19
+ """Tests for file hashing."""
20
+
21
+ def test_hash_returns_hex_string(self, tmp_path: Path):
22
+ f = tmp_path / "test.py"
23
+ f.write_text("hello world", encoding="utf-8")
24
+ h = compute_file_hash(f)
25
+ assert isinstance(h, str)
26
+ assert len(h) == 64 # SHA-256 hex digest
27
+
28
+ def test_same_content_same_hash(self, tmp_path: Path):
29
+ f1 = tmp_path / "a.py"
30
+ f2 = tmp_path / "b.py"
31
+ f1.write_text("same content", encoding="utf-8")
32
+ f2.write_text("same content", encoding="utf-8")
33
+ assert compute_file_hash(f1) == compute_file_hash(f2)
34
+
35
+ def test_different_content_different_hash(self, tmp_path: Path):
36
+ f1 = tmp_path / "a.py"
37
+ f2 = tmp_path / "b.py"
38
+ f1.write_text("content A", encoding="utf-8")
39
+ f2.write_text("content B", encoding="utf-8")
40
+ assert compute_file_hash(f1) != compute_file_hash(f2)
41
+
42
+
43
+ class TestShouldIgnore:
44
+ """Tests for directory ignore logic."""
45
+
46
+ def test_ignore_git_dir(self, tmp_path: Path):
47
+ p = tmp_path / ".git" / "config"
48
+ assert should_ignore(p, tmp_path, {".git"}) is True
49
+
50
+ def test_ignore_node_modules(self, tmp_path: Path):
51
+ p = tmp_path / "node_modules" / "pkg" / "index.js"
52
+ assert should_ignore(p, tmp_path, {"node_modules"}) is True
53
+
54
+ def test_allow_normal_file(self, tmp_path: Path):
55
+ p = tmp_path / "src" / "main.py"
56
+ assert should_ignore(p, tmp_path, {".git"}) is False
57
+
58
+ def test_nested_ignored_dir(self, tmp_path: Path):
59
+ p = tmp_path / "src" / "__pycache__" / "mod.cpython-312.pyc"
60
+ assert should_ignore(p, tmp_path, {"__pycache__"}) is True
61
+
62
+
63
+ class TestScanRepository:
64
+ """Tests for repository scanning."""
65
+
66
+ def test_empty_directory(self, tmp_path: Path):
67
+ result = scan_repository(tmp_path)
68
+ assert result == []
69
+
70
+ def test_finds_python_files(self, tmp_path: Path):
71
+ (tmp_path / "main.py").write_text("print('hi')", encoding="utf-8")
72
+ (tmp_path / "utils.py").write_text("x = 1", encoding="utf-8")
73
+ result = scan_repository(tmp_path)
74
+ assert len(result) == 2
75
+
76
+ def test_ignores_non_code_files(self, tmp_path: Path):
77
+ (tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
78
+ (tmp_path / "readme.md").write_text("# Readme", encoding="utf-8")
79
+ (tmp_path / "data.csv").write_text("a,b,c", encoding="utf-8")
80
+ result = scan_repository(tmp_path)
81
+ assert len(result) == 1
82
+ assert result[0].extension == ".py"
83
+
84
+ def test_ignores_excluded_dirs(self, tmp_path: Path):
85
+ (tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
86
+ venv = tmp_path / "venv"
87
+ venv.mkdir()
88
+ (venv / "lib.py").write_text("y = 2", encoding="utf-8")
89
+ result = scan_repository(tmp_path)
90
+ assert len(result) == 1
91
+
92
+ def test_scanned_file_metadata(self, tmp_path: Path):
93
+ content = "def hello(): pass"
94
+ (tmp_path / "test.py").write_text(content, encoding="utf-8")
95
+ result = scan_repository(tmp_path)
96
+ assert len(result) == 1
97
+ sf = result[0]
98
+ assert sf.extension == ".py"
99
+ assert sf.relative_path == "test.py"
100
+ assert sf.size_bytes > 0
101
+ assert len(sf.content_hash) == 64
102
+
103
+ def test_finds_multiple_languages(self, tmp_path: Path):
104
+ (tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
105
+ (tmp_path / "app.js").write_text("let x = 1;", encoding="utf-8")
106
+ (tmp_path / "Main.java").write_text("class Main {}", encoding="utf-8")
107
+ result = scan_repository(tmp_path)
108
+ extensions = {sf.extension for sf in result}
109
+ assert extensions == {".py", ".js", ".java"}
110
+
111
+ def test_custom_config(self, tmp_path: Path):
112
+ (tmp_path / "main.py").write_text("x = 1", encoding="utf-8")
113
+ (tmp_path / "app.js").write_text("let x = 1;", encoding="utf-8")
114
+ config = IndexConfig(extensions={".py"}, ignore_dirs=set())
115
+ result = scan_repository(tmp_path, config)
116
+ assert len(result) == 1
117
+ assert result[0].extension == ".py"
118
+
119
+ def test_results_sorted(self, tmp_path: Path):
120
+ (tmp_path / "z.py").write_text("z", encoding="utf-8")
121
+ (tmp_path / "a.py").write_text("a", encoding="utf-8")
122
+ (tmp_path / "m.py").write_text("m", encoding="utf-8")
123
+ result = scan_repository(tmp_path)
124
+ names = [sf.relative_path for sf in result]
125
+ assert names == sorted(names)
@@ -0,0 +1,160 @@
1
+ """Tests for the search service and formatter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+
8
+ import numpy as np
9
+ import pytest
10
+
11
+ from semantic_code_intelligence.config.settings import init_project, save_config, AppConfig
12
+ from semantic_code_intelligence.embeddings.generator import generate_embeddings
13
+ from semantic_code_intelligence.services.search_service import SearchResult, search_codebase
14
+ from semantic_code_intelligence.search.formatter import format_results_json, format_results_rich
15
+ from semantic_code_intelligence.storage.vector_store import ChunkMetadata, VectorStore
16
+
17
+
18
+ @pytest.fixture
19
+ def indexed_project(tmp_path: Path) -> Path:
20
+ """Create a project with some indexed code chunks."""
21
+ config, _ = init_project(tmp_path)
22
+ index_dir = AppConfig.index_dir(tmp_path)
23
+
24
+ # Create code chunks and embed them
25
+ code_snippets = [
26
+ "def authenticate_user(username, password):\n return check_credentials(username, password)\n",
27
+ "def connect_to_database(host, port):\n return Database(host=host, port=port)\n",
28
+ "def handle_http_request(request):\n response = process(request)\n return response\n",
29
+ "def verify_jwt_token(token):\n payload = jwt.decode(token, SECRET_KEY)\n return payload\n",
30
+ "def calculate_statistics(data):\n mean = sum(data) / len(data)\n return mean\n",
31
+ ]
32
+
33
+ embeddings = generate_embeddings(code_snippets)
34
+ metadata = [
35
+ ChunkMetadata(
36
+ file_path=f"src/module_{i}.py",
37
+ start_line=1,
38
+ end_line=3,
39
+ chunk_index=0,
40
+ language="python",
41
+ content=snippet,
42
+ content_hash=f"hash_{i}",
43
+ )
44
+ for i, snippet in enumerate(code_snippets)
45
+ ]
46
+
47
+ store = VectorStore(embeddings.shape[1])
48
+ store.add(embeddings, metadata)
49
+ store.save(index_dir)
50
+
51
+ return tmp_path
52
+
53
+
54
+ class TestSearchCodebase:
55
+ """Tests for the search_codebase function."""
56
+
57
+ def test_search_returns_results(self, indexed_project: Path):
58
+ results = search_codebase("authentication", indexed_project)
59
+ assert len(results) > 0
60
+
61
+ def test_search_result_type(self, indexed_project: Path):
62
+ results = search_codebase("database connection", indexed_project)
63
+ assert all(isinstance(r, SearchResult) for r in results)
64
+
65
+ def test_search_results_have_scores(self, indexed_project: Path):
66
+ results = search_codebase("jwt token verification", indexed_project)
67
+ for r in results:
68
+ assert isinstance(r.score, float)
69
+ assert r.score > 0
70
+
71
+ def test_search_results_sorted_by_score(self, indexed_project: Path):
72
+ results = search_codebase("authenticate user", indexed_project)
73
+ scores = [r.score for r in results]
74
+ assert scores == sorted(scores, reverse=True)
75
+
76
+ def test_search_respects_top_k(self, indexed_project: Path):
77
+ results = search_codebase("code", indexed_project, top_k=2)
78
+ assert len(results) <= 2
79
+
80
+ def test_search_relevance(self, indexed_project: Path):
81
+ results = search_codebase("jwt token", indexed_project, top_k=1)
82
+ assert len(results) == 1
83
+ assert "jwt" in results[0].content.lower() or "token" in results[0].content.lower()
84
+
85
+ def test_search_no_index_raises(self, tmp_path: Path):
86
+ init_project(tmp_path)
87
+ with pytest.raises(FileNotFoundError):
88
+ search_codebase("test", tmp_path)
89
+
90
+ def test_search_result_metadata(self, indexed_project: Path):
91
+ results = search_codebase("database", indexed_project, top_k=1)
92
+ r = results[0]
93
+ assert r.file_path.startswith("src/")
94
+ assert r.start_line > 0
95
+ assert r.end_line >= r.start_line
96
+ assert r.language == "python"
97
+ assert len(r.content) > 0
98
+
99
+
100
+ class TestSearchResult:
101
+ """Tests for SearchResult data class."""
102
+
103
+ def test_to_dict(self):
104
+ r = SearchResult(
105
+ file_path="test.py",
106
+ start_line=1,
107
+ end_line=5,
108
+ language="python",
109
+ content="def foo(): pass",
110
+ score=0.9534,
111
+ chunk_index=0,
112
+ )
113
+ d = r.to_dict()
114
+ assert d["file_path"] == "test.py"
115
+ assert d["score"] == 0.9534
116
+ assert d["start_line"] == 1
117
+ assert d["language"] == "python"
118
+
119
+ def test_to_dict_score_rounding(self):
120
+ r = SearchResult(
121
+ file_path="x.py", start_line=1, end_line=1,
122
+ language="python", content="x", score=0.12345678, chunk_index=0,
123
+ )
124
+ assert r.to_dict()["score"] == 0.1235
125
+
126
+
127
+ class TestFormatResultsJson:
128
+ """Tests for JSON formatter."""
129
+
130
+ def test_valid_json(self):
131
+ results = [
132
+ SearchResult("a.py", 1, 5, "python", "code", 0.95, 0),
133
+ SearchResult("b.py", 10, 20, "python", "more code", 0.80, 1),
134
+ ]
135
+ output = format_results_json("test query", results, 10)
136
+ data = json.loads(output)
137
+ assert data["query"] == "test query"
138
+ assert data["top_k"] == 10
139
+ assert data["result_count"] == 2
140
+ assert len(data["results"]) == 2
141
+
142
+ def test_empty_results(self):
143
+ output = format_results_json("nope", [], 5)
144
+ data = json.loads(output)
145
+ assert data["result_count"] == 0
146
+ assert data["results"] == []
147
+
148
+
149
+ class TestFormatResultsRich:
150
+ """Tests for rich formatter (smoke tests — output to console)."""
151
+
152
+ def test_no_crash_with_results(self):
153
+ results = [
154
+ SearchResult("test.py", 1, 3, "python", "def hello(): pass", 0.9, 0),
155
+ ]
156
+ # Should not raise
157
+ format_results_rich("hello", results)
158
+
159
+ def test_no_crash_empty_results(self):
160
+ format_results_rich("nothing", [])