codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,1142 @@
1
+ """Comprehensive end-to-end tests — simulates a real end user working with CodexA.
2
+
3
+ Tests the entire user journey:
4
+ codexa --version → codexa init → codexa index → codexa search (all modes/flags)
5
+ → codexa models (list/info/switch) → TUI helpers → VS Code extension
6
+ → config lifecycle → vector store → formatter → build script → doctor
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import os
13
+ import textwrap
14
+ from pathlib import Path
15
+
16
+ import numpy as np
17
+ import pytest
18
+ from click.testing import CliRunner
19
+
20
+ from semantic_code_intelligence import __version__
21
+ from semantic_code_intelligence.cli.main import cli
22
+
23
+
24
+ def _extract_json(text: str) -> dict | list:
25
+ """Extract the first valid JSON object/array from mixed CLI output.
26
+
27
+ Rich console logging can contaminate stdout, so we scan for the first
28
+ '{' or '[' that successfully parses.
29
+ """
30
+ for i, ch in enumerate(text):
31
+ if ch in "{[":
32
+ try:
33
+ return json.loads(text[i:])
34
+ except json.JSONDecodeError:
35
+ continue
36
+ raise ValueError(f"No valid JSON found in output: {text[:200]!r}")
37
+
38
+
39
+ # ── Reusable project root for tmp_path fixtures ──────────────────────────
40
+ SAMPLE_PY = textwrap.dedent("""\
41
+ \"\"\"Sample module for end-to-end testing.\"\"\"
42
+
43
+ def greet(name: str) -> str:
44
+ \"\"\"Return a greeting string.\"\"\"
45
+ return f"Hello, {name}!"
46
+
47
+ def add(a: int, b: int) -> int:
48
+ \"\"\"Add two numbers.\"\"\"
49
+ return a + b
50
+
51
+ class Calculator:
52
+ \"\"\"A simple calculator.\"\"\"
53
+
54
+ def multiply(self, x: int, y: int) -> int:
55
+ return x * y
56
+
57
+ def divide(self, x: float, y: float) -> float:
58
+ if y == 0:
59
+ raise ZeroDivisionError("Cannot divide by zero")
60
+ return x / y
61
+ """)
62
+
63
+ SAMPLE_JS = textwrap.dedent("""\
64
+ // sample.js — small JS file for testing
65
+ function fibonacci(n) {
66
+ if (n <= 1) return n;
67
+ return fibonacci(n - 1) + fibonacci(n - 2);
68
+ }
69
+
70
+ module.exports = { fibonacci };
71
+ """)
72
+
73
+
74
+ @pytest.fixture()
75
+ def project(tmp_path: Path):
76
+ """Create a minimal project directory with sample source files."""
77
+ src = tmp_path / "src"
78
+ src.mkdir()
79
+ (src / "sample.py").write_text(SAMPLE_PY, encoding="utf-8")
80
+ (src / "math_utils.py").write_text(
81
+ textwrap.dedent("""\
82
+ \"\"\"Math utilities.\"\"\"
83
+
84
+ import math
85
+
86
+ def circle_area(radius: float) -> float:
87
+ return math.pi * radius ** 2
88
+
89
+ def factorial(n: int) -> int:
90
+ if n <= 1:
91
+ return 1
92
+ return n * factorial(n - 1)
93
+ """),
94
+ encoding="utf-8",
95
+ )
96
+ (src / "app.js").write_text(SAMPLE_JS, encoding="utf-8")
97
+ return tmp_path
98
+
99
+
100
+ # =========================================================================
101
+ # 1. Version & basic CLI
102
+ # =========================================================================
103
+
104
+ class TestCLIBasics:
105
+ """Test basic CLI behaviour that every user hits first."""
106
+
107
+ def test_version_flag(self):
108
+ runner = CliRunner()
109
+ result = runner.invoke(cli, ["--version"])
110
+ assert result.exit_code == 0
111
+ assert __version__ in result.output
112
+
113
+ def test_help_flag(self):
114
+ runner = CliRunner()
115
+ result = runner.invoke(cli, ["--help"])
116
+ assert result.exit_code == 0
117
+ assert "codexa" in result.output.lower()
118
+
119
+ def test_command_count(self):
120
+ assert len(cli.commands) == 39
121
+
122
+ def test_all_35_commands_registered(self):
123
+ expected = {
124
+ "init", "index", "search", "explain", "summary", "watch",
125
+ "deps", "ask", "review", "refactor", "suggest", "serve",
126
+ "context", "workspace", "docs", "doctor", "plugin", "web",
127
+ "viz", "quality", "pr-summary", "ci-gen", "chat",
128
+ "investigate", "cross-refactor", "metrics", "gate",
129
+ "hotspots", "impact", "trace", "tool", "evolve", "tui",
130
+ "mcp", "lsp", "models", "benchmark", "grep", "languages",
131
+ }
132
+ assert set(cli.commands.keys()) == expected
133
+
134
+ def test_every_command_has_help(self):
135
+ """Every registered command must produce valid --help output."""
136
+ runner = CliRunner()
137
+ for name in cli.commands:
138
+ result = runner.invoke(cli, [name, "--help"])
139
+ assert result.exit_code == 0, f"{name} --help failed: {result.output}"
140
+
141
+ def test_verbose_flag_accepted(self):
142
+ runner = CliRunner()
143
+ result = runner.invoke(cli, ["--verbose", "--help"])
144
+ assert result.exit_code == 0
145
+
146
+ def test_pipe_flag_accepted(self):
147
+ runner = CliRunner()
148
+ result = runner.invoke(cli, ["--pipe", "--help"])
149
+ assert result.exit_code == 0
150
+
151
+
152
+ # =========================================================================
153
+ # 2. Project init lifecycle
154
+ # =========================================================================
155
+
156
+ class TestInitLifecycle:
157
+ """Test codexa init — the first thing an end user does."""
158
+
159
+ def test_init_creates_codex_dir(self, project: Path):
160
+ runner = CliRunner()
161
+ result = runner.invoke(cli, ["init", str(project)])
162
+ assert result.exit_code == 0
163
+ assert (project / ".codexa").is_dir()
164
+
165
+ def test_init_creates_config_json(self, project: Path):
166
+ runner = CliRunner()
167
+ runner.invoke(cli, ["init", str(project)])
168
+ cfg = project / ".codexa" / "config.json"
169
+ assert cfg.exists()
170
+ data = json.loads(cfg.read_text(encoding="utf-8"))
171
+ assert "embedding" in data
172
+ assert "search" in data
173
+
174
+ def test_init_creates_index_dir(self, project: Path):
175
+ runner = CliRunner()
176
+ runner.invoke(cli, ["init", str(project)])
177
+ assert (project / ".codexa" / "index").is_dir()
178
+
179
+ def test_init_idempotent(self, project: Path):
180
+ """Running init twice should not error."""
181
+ runner = CliRunner()
182
+ runner.invoke(cli, ["init", str(project)])
183
+ result = runner.invoke(cli, ["init", str(project)])
184
+ assert result.exit_code == 0
185
+ assert "already initialized" in result.output.lower()
186
+
187
+ def test_init_config_roundtrip(self, project: Path):
188
+ """init → load_config → save_config → reload — data must survive."""
189
+ from semantic_code_intelligence.config.settings import (
190
+ init_project, load_config, save_config,
191
+ )
192
+ config, _ = init_project(project)
193
+ loaded = load_config(project)
194
+ assert loaded.embedding.model_name == config.embedding.model_name
195
+
196
+ loaded.embedding.model_name = "custom-model"
197
+ save_config(loaded, project)
198
+ reloaded = load_config(project)
199
+ assert reloaded.embedding.model_name == "custom-model"
200
+
201
+
202
+ # =========================================================================
203
+ # 3. Indexing
204
+ # =========================================================================
205
+
206
+ class TestIndexing:
207
+ """Test codexa index — second step in the user journey."""
208
+
209
+ def test_index_requires_init(self, project: Path):
210
+ """index on an un-initialized dir should fail cleanly."""
211
+ runner = CliRunner()
212
+ result = runner.invoke(cli, ["index", str(project)])
213
+ # Should tell the user to run init first
214
+ assert "init" in result.output.lower()
215
+
216
+ def test_index_after_init(self, project: Path):
217
+ runner = CliRunner()
218
+ runner.invoke(cli, ["init", str(project)])
219
+ result = runner.invoke(cli, ["index", str(project)])
220
+ assert result.exit_code == 0
221
+ # Should report some files indexed
222
+ assert "indexed" in result.output.lower() or "no indexable" in result.output.lower()
223
+
224
+ def test_index_force_flag(self, project: Path):
225
+ runner = CliRunner()
226
+ runner.invoke(cli, ["init", str(project)])
227
+ runner.invoke(cli, ["index", str(project)])
228
+ result = runner.invoke(cli, ["index", "--force", str(project)])
229
+ assert result.exit_code == 0
230
+
231
+ def test_index_creates_vectors(self, project: Path):
232
+ runner = CliRunner()
233
+ runner.invoke(cli, ["init", str(project)])
234
+ runner.invoke(cli, ["index", str(project)])
235
+ index_dir = project / ".codexa" / "index"
236
+ # Either vectors.faiss exists or no indexable files were found
237
+ faiss_file = index_dir / "vectors.faiss"
238
+ metadata_file = index_dir / "metadata.json"
239
+ if faiss_file.exists():
240
+ assert metadata_file.exists()
241
+
242
+
243
+ # =========================================================================
244
+ # 4. Search — all modes
245
+ # =========================================================================
246
+
247
+ class TestSearchModes:
248
+ """Test codexa search across all four modes on an indexed project."""
249
+
250
+ @pytest.fixture(autouse=True)
251
+ def _indexed_project(self, project: Path):
252
+ self.project = project
253
+ self.runner = CliRunner()
254
+ self.runner.invoke(cli, ["init", str(project)])
255
+ self.runner.invoke(cli, ["index", str(project)])
256
+
257
+ def test_search_semantic(self):
258
+ result = self.runner.invoke(cli, [
259
+ "search", "greeting function", "-p", str(self.project),
260
+ "--mode", "semantic", "--no-auto-index",
261
+ ])
262
+ assert result.exit_code == 0
263
+
264
+ def test_search_keyword(self):
265
+ result = self.runner.invoke(cli, [
266
+ "search", "greet", "-p", str(self.project),
267
+ "--mode", "keyword", "--no-auto-index",
268
+ ])
269
+ assert result.exit_code == 0
270
+
271
+ def test_search_regex(self):
272
+ result = self.runner.invoke(cli, [
273
+ "search", r"def\s+greet", "-p", str(self.project),
274
+ "--mode", "regex", "--no-auto-index",
275
+ ])
276
+ assert result.exit_code == 0
277
+
278
+ def test_search_hybrid(self):
279
+ result = self.runner.invoke(cli, [
280
+ "search", "calculator", "-p", str(self.project),
281
+ "--mode", "hybrid", "--no-auto-index",
282
+ ])
283
+ assert result.exit_code == 0
284
+
285
+ def test_search_no_init_fails(self, tmp_path: Path):
286
+ result = self.runner.invoke(cli, [
287
+ "search", "anything", "-p", str(tmp_path),
288
+ ])
289
+ assert "init" in result.output.lower()
290
+
291
+
292
+ # =========================================================================
293
+ # 5. Search — JSON / JSONL output
294
+ # =========================================================================
295
+
296
+ class TestSearchOutputFormats:
297
+ """Test structured output modes (--json, --jsonl)."""
298
+
299
+ @pytest.fixture(autouse=True)
300
+ def _indexed(self, project: Path):
301
+ self.project = project
302
+ self.runner = CliRunner()
303
+ self.runner.invoke(cli, ["init", str(project)])
304
+ self.runner.invoke(cli, ["index", str(project)])
305
+
306
+ def test_json_output_valid(self):
307
+ result = self.runner.invoke(cli, [
308
+ "search", "greet", "-p", str(self.project),
309
+ "--json", "--no-auto-index",
310
+ ])
311
+ assert result.exit_code == 0
312
+ data = _extract_json(result.output)
313
+ assert "query" in data
314
+ assert "results" in data
315
+ assert isinstance(data["results"], list)
316
+
317
+ def test_json_result_structure(self):
318
+ result = self.runner.invoke(cli, [
319
+ "search", "greet", "-p", str(self.project),
320
+ "--json", "--no-auto-index",
321
+ ])
322
+ data = _extract_json(result.output)
323
+ if data["results"]:
324
+ r = data["results"][0]
325
+ assert "file_path" in r
326
+ assert "start_line" in r
327
+ assert "end_line" in r
328
+ assert "language" in r
329
+ assert "content" in r
330
+ assert "score" in r
331
+
332
+ def test_jsonl_output(self):
333
+ result = self.runner.invoke(cli, [
334
+ "search", "add", "-p", str(self.project),
335
+ "--jsonl", "--no-auto-index",
336
+ ])
337
+ assert result.exit_code == 0
338
+ # Each non-empty line starting with '{' must be valid JSONL
339
+ for line in result.output.strip().splitlines():
340
+ line = line.strip()
341
+ if line and line.startswith("{"):
342
+ obj = json.loads(line)
343
+ assert "file_path" in obj
344
+
345
+ def test_json_empty_query(self):
346
+ result = self.runner.invoke(cli, [
347
+ "search", "xyznonexistent_zzz", "-p", str(self.project),
348
+ "--json", "--no-auto-index",
349
+ ])
350
+ assert result.exit_code == 0
351
+ data = _extract_json(result.output)
352
+ assert data["result_count"] == 0 or isinstance(data["results"], list)
353
+
354
+ def test_top_k_flag(self):
355
+ result = self.runner.invoke(cli, [
356
+ "search", "def", "-p", str(self.project),
357
+ "--json", "-k", "2", "--no-auto-index",
358
+ ])
359
+ data = _extract_json(result.output)
360
+ assert len(data["results"]) <= 2
361
+
362
+
363
+ # =========================================================================
364
+ # 6. Search — grep flags (-l, -L, -n, -C, -s)
365
+ # =========================================================================
366
+
367
+ class TestSearchGrepFlags:
368
+ """Test grep-style flags on the search command."""
369
+
370
+ @pytest.fixture(autouse=True)
371
+ def _indexed(self, project: Path):
372
+ self.project = project
373
+ self.runner = CliRunner()
374
+ self.runner.invoke(cli, ["init", str(project)])
375
+ self.runner.invoke(cli, ["index", str(project)])
376
+
377
+ def test_files_only_flag(self):
378
+ result = self.runner.invoke(cli, [
379
+ "search", "greet", "-p", str(self.project),
380
+ "-l", "--no-auto-index",
381
+ ])
382
+ assert result.exit_code == 0
383
+
384
+ def test_files_without_match_flag(self):
385
+ result = self.runner.invoke(cli, [
386
+ "search", "greet", "-p", str(self.project),
387
+ "-L", "--no-auto-index",
388
+ ])
389
+ assert result.exit_code == 0
390
+
391
+ def test_line_numbers_flag(self):
392
+ result = self.runner.invoke(cli, [
393
+ "search", "greet", "-p", str(self.project),
394
+ "-n", "--no-auto-index",
395
+ ])
396
+ assert result.exit_code == 0
397
+
398
+ def test_context_lines_flag(self):
399
+ result = self.runner.invoke(cli, [
400
+ "search", "greet", "-p", str(self.project),
401
+ "-C", "3", "--no-auto-index",
402
+ ])
403
+ assert result.exit_code == 0
404
+
405
+ def test_case_sensitive_flag(self):
406
+ result = self.runner.invoke(cli, [
407
+ "search", "Greet", "-p", str(self.project),
408
+ "--mode", "regex", "-s", "--no-auto-index",
409
+ ])
410
+ assert result.exit_code == 0
411
+
412
+ def test_search_help_shows_all_grep_flags(self):
413
+ result = self.runner.invoke(cli, ["search", "--help"])
414
+ for flag in ["--files-only", "--files-without-match", "--line-numbers",
415
+ "--context-lines", "--case-sensitive", "--jsonl"]:
416
+ assert flag in result.output, f"Missing {flag} in search --help"
417
+
418
+
419
+ # =========================================================================
420
+ # 7. Models CLI — the end user manages embedding models
421
+ # =========================================================================
422
+
423
+ class TestModelsCLI:
424
+ """Test the full models subcommand group."""
425
+
426
+ def test_models_help(self):
427
+ runner = CliRunner()
428
+ result = runner.invoke(cli, ["models", "--help"])
429
+ assert result.exit_code == 0
430
+ assert "list" in result.output
431
+ assert "info" in result.output
432
+ assert "switch" in result.output
433
+ assert "download" in result.output
434
+
435
+ def test_models_list(self):
436
+ runner = CliRunner()
437
+ result = runner.invoke(cli, ["models", "list"])
438
+ assert result.exit_code == 0
439
+ assert "MiniLM" in result.output
440
+
441
+ def test_models_list_json_structure(self):
442
+ runner = CliRunner()
443
+ result = runner.invoke(cli, ["models", "list", "--json"])
444
+ assert result.exit_code == 0
445
+ data = _extract_json(result.output)
446
+ assert isinstance(data, list)
447
+ assert len(data) >= 5
448
+ # Each model has the expected keys
449
+ for m in data:
450
+ assert {"name", "dimension", "description", "is_default"} <= set(m.keys())
451
+
452
+ def test_models_list_json_has_default(self):
453
+ runner = CliRunner()
454
+ result = runner.invoke(cli, ["models", "list", "--json"])
455
+ data = _extract_json(result.output)
456
+ defaults = [m for m in data if m["is_default"]]
457
+ assert len(defaults) == 1
458
+ assert defaults[0]["name"] == "all-MiniLM-L6-v2"
459
+
460
+ def test_models_info_valid(self):
461
+ runner = CliRunner()
462
+ result = runner.invoke(cli, ["models", "info", "minilm"])
463
+ assert result.exit_code == 0
464
+ assert "384" in result.output # dimension
465
+
466
+ def test_models_info_alias(self):
467
+ runner = CliRunner()
468
+ result = runner.invoke(cli, ["models", "info", "bge-small"])
469
+ assert result.exit_code == 0
470
+ assert "BGE" in result.output
471
+
472
+ def test_models_info_unknown_fails(self):
473
+ runner = CliRunner()
474
+ result = runner.invoke(cli, ["models", "info", "no-such-model"])
475
+ assert result.exit_code != 0
476
+
477
+ def test_models_switch_requires_init(self, tmp_path: Path):
478
+ runner = CliRunner()
479
+ result = runner.invoke(cli, ["models", "switch", "minilm", "-p", str(tmp_path)])
480
+ assert result.exit_code != 0
481
+ assert "init" in result.output.lower()
482
+
483
+ def test_models_switch_updates_config(self, project: Path):
484
+ runner = CliRunner()
485
+ runner.invoke(cli, ["init", str(project)])
486
+ result = runner.invoke(cli, ["models", "switch", "bge-small", "-p", str(project)])
487
+ assert result.exit_code == 0
488
+ # Verify config was actually updated
489
+ from semantic_code_intelligence.config.settings import load_config
490
+ config = load_config(project)
491
+ assert config.embedding.model_name == "BAAI/bge-small-en-v1.5"
492
+
493
+
494
+ # =========================================================================
495
+ # 8. Model registry (direct API)
496
+ # =========================================================================
497
+
498
+ class TestModelRegistryAPI:
499
+ """Test the model_registry module as a library user would."""
500
+
501
+ def test_resolve_known_aliases(self):
502
+ from semantic_code_intelligence.embeddings.model_registry import resolve_model_name
503
+ assert resolve_model_name("minilm") == "all-MiniLM-L6-v2"
504
+ assert resolve_model_name("bge-small") == "BAAI/bge-small-en-v1.5"
505
+ assert resolve_model_name("nomic") == "nomic-ai/nomic-embed-text-v1.5"
506
+ assert resolve_model_name("jina-code") == "jinaai/jina-embeddings-v2-base-code"
507
+ assert resolve_model_name("mxbai-xsmall") == "mixedbread-ai/mxbai-embed-xsmall-v1"
508
+
509
+ def test_resolve_full_name_passthrough(self):
510
+ from semantic_code_intelligence.embeddings.model_registry import resolve_model_name
511
+ assert resolve_model_name("all-MiniLM-L6-v2") == "all-MiniLM-L6-v2"
512
+
513
+ def test_resolve_custom_model_passthrough(self):
514
+ from semantic_code_intelligence.embeddings.model_registry import resolve_model_name
515
+ assert resolve_model_name("my-org/my-model") == "my-org/my-model"
516
+
517
+ def test_list_models_count(self):
518
+ from semantic_code_intelligence.embeddings.model_registry import list_models
519
+ assert len(list_models()) == 5
520
+
521
+ def test_model_info_dimensions(self):
522
+ from semantic_code_intelligence.embeddings.model_registry import get_model_info
523
+ assert get_model_info("minilm").dimension == 384
524
+ assert get_model_info("nomic").dimension == 768
525
+ assert get_model_info("jina-code").dimension == 768
526
+ assert get_model_info("mxbai-xsmall").dimension == 384
527
+
528
+ def test_model_info_none_for_unknown(self):
529
+ from semantic_code_intelligence.embeddings.model_registry import get_model_info
530
+ assert get_model_info("nonexistent-xxx") is None
531
+
532
+ def test_default_model_constant(self):
533
+ from semantic_code_intelligence.embeddings.model_registry import DEFAULT_MODEL
534
+ assert DEFAULT_MODEL == "all-MiniLM-L6-v2"
535
+
536
+
537
+ # =========================================================================
538
+ # 9. Config settings (API level)
539
+ # =========================================================================
540
+
541
+ class TestConfigAPI:
542
+ """Test config machinery as a library consumer."""
543
+
544
+ def test_appconfig_defaults(self):
545
+ from semantic_code_intelligence.config.settings import AppConfig
546
+ c = AppConfig()
547
+ assert c.embedding.model_name == "all-MiniLM-L6-v2"
548
+ assert c.search.top_k == 10
549
+ assert c.llm.provider == "mock"
550
+
551
+ def test_config_dir_paths(self, tmp_path: Path):
552
+ from semantic_code_intelligence.config.settings import AppConfig
553
+ assert AppConfig.config_dir(tmp_path) == tmp_path / ".codexa"
554
+ assert AppConfig.config_path(tmp_path) == tmp_path / ".codexa" / "config.json"
555
+ assert AppConfig.index_dir(tmp_path) == tmp_path / ".codexa" / "index"
556
+
557
+ def test_load_config_default_when_missing(self, tmp_path: Path):
558
+ from semantic_code_intelligence.config.settings import load_config
559
+ cfg = load_config(tmp_path)
560
+ assert cfg.embedding.model_name == "all-MiniLM-L6-v2"
561
+
562
+ def test_save_and_load_roundtrip(self, tmp_path: Path):
563
+ from semantic_code_intelligence.config.settings import (
564
+ AppConfig, save_config, load_config,
565
+ )
566
+ cfg = AppConfig(project_root=str(tmp_path))
567
+ cfg.search.top_k = 42
568
+ cfg.embedding.chunk_size = 256
569
+ save_config(cfg, tmp_path)
570
+
571
+ loaded = load_config(tmp_path)
572
+ assert loaded.search.top_k == 42
573
+ assert loaded.embedding.chunk_size == 256
574
+
575
+ def test_init_project_creates_everything(self, tmp_path: Path):
576
+ from semantic_code_intelligence.config.settings import init_project, AppConfig
577
+ config, config_path = init_project(tmp_path)
578
+ assert config_path.exists()
579
+ assert AppConfig.config_dir(tmp_path).is_dir()
580
+ assert AppConfig.index_dir(tmp_path).is_dir()
581
+ assert isinstance(config, AppConfig)
582
+
583
+
584
+ # =========================================================================
585
+ # 10. Vector store — the core data engine
586
+ # =========================================================================
587
+
588
+ class TestVectorStoreE2E:
589
+ """End-to-end vector store operations."""
590
+
591
+ def _make_vectors(self, n: int, dim: int):
592
+ vecs = np.random.randn(n, dim).astype(np.float32)
593
+ return vecs / np.linalg.norm(vecs, axis=1, keepdims=True)
594
+
595
+ def _make_metadata(self, n: int):
596
+ from semantic_code_intelligence.storage.vector_store import ChunkMetadata
597
+ return [
598
+ ChunkMetadata(
599
+ file_path=f"file_{i}.py",
600
+ start_line=i * 10 + 1,
601
+ end_line=i * 10 + 10,
602
+ chunk_index=i,
603
+ language="python",
604
+ content=f"content chunk {i}",
605
+ content_hash=f"hash{i}",
606
+ )
607
+ for i in range(n)
608
+ ]
609
+
610
+ def test_add_and_search(self):
611
+ from semantic_code_intelligence.storage.vector_store import VectorStore
612
+ store = VectorStore(64)
613
+ vecs = self._make_vectors(20, 64)
614
+ meta = self._make_metadata(20)
615
+ store.add(vecs, meta)
616
+ assert store.size == 20
617
+
618
+ results = store.search(vecs[0], top_k=5)
619
+ assert len(results) == 5
620
+ # First result should be the query vector itself (highest similarity)
621
+ assert results[0][0].chunk_index == 0
622
+ assert results[0][1] > 0.9 # near-perfect cosine sim
623
+
624
+ def test_save_and_load(self, tmp_path: Path):
625
+ from semantic_code_intelligence.storage.vector_store import VectorStore
626
+ store = VectorStore(32)
627
+ vecs = self._make_vectors(10, 32)
628
+ meta = self._make_metadata(10)
629
+ store.add(vecs, meta)
630
+
631
+ store.save(tmp_path / "vs")
632
+ assert (tmp_path / "vs" / "vectors.faiss").exists()
633
+ assert (tmp_path / "vs" / "metadata.json").exists()
634
+
635
+ loaded = VectorStore.load(tmp_path / "vs")
636
+ assert loaded.size == 10
637
+ assert loaded.metadata[0].file_path == "file_0.py"
638
+
639
+ def test_remove_by_file(self):
640
+ from semantic_code_intelligence.storage.vector_store import VectorStore
641
+ store = VectorStore(16)
642
+ vecs = self._make_vectors(10, 16)
643
+ meta = self._make_metadata(10)
644
+ store.add(vecs, meta)
645
+
646
+ removed = store.remove_by_file("file_3.py")
647
+ assert removed == 1
648
+ assert store.size == 9
649
+ assert all(m.file_path != "file_3.py" for m in store.metadata)
650
+
651
+ def test_remove_nonexistent_file(self):
652
+ from semantic_code_intelligence.storage.vector_store import VectorStore
653
+ store = VectorStore(16)
654
+ vecs = self._make_vectors(5, 16)
655
+ meta = self._make_metadata(5)
656
+ store.add(vecs, meta)
657
+ assert store.remove_by_file("no_such_file.py") == 0
658
+ assert store.size == 5
659
+
660
+ def test_clear(self):
661
+ from semantic_code_intelligence.storage.vector_store import VectorStore
662
+ store = VectorStore(16)
663
+ vecs = self._make_vectors(5, 16)
664
+ meta = self._make_metadata(5)
665
+ store.add(vecs, meta)
666
+ store.clear()
667
+ assert store.size == 0
668
+ assert len(store.metadata) == 0
669
+
670
+ def test_add_empty_noop(self):
671
+ from semantic_code_intelligence.storage.vector_store import VectorStore
672
+ store = VectorStore(16)
673
+ store.add(np.empty((0, 16), dtype=np.float32), [])
674
+ assert store.size == 0
675
+
676
+ def test_add_mismatched_raises(self):
677
+ from semantic_code_intelligence.storage.vector_store import VectorStore, ChunkMetadata
678
+ store = VectorStore(16)
679
+ vecs = self._make_vectors(3, 16)
680
+ meta = self._make_metadata(2)
681
+ with pytest.raises(ValueError, match="metadata count"):
682
+ store.add(vecs, meta)
683
+
684
+ def test_search_empty_store(self):
685
+ from semantic_code_intelligence.storage.vector_store import VectorStore
686
+ store = VectorStore(16)
687
+ results = store.search(np.zeros(16, dtype=np.float32), top_k=5)
688
+ assert results == []
689
+
690
+ def test_ivf_constructor(self):
691
+ from semantic_code_intelligence.storage.vector_store import VectorStore
692
+ store = VectorStore(64, use_ivf=True)
693
+ assert store._use_ivf is True
694
+
695
+ def test_ivf_fallback_small_batch(self):
696
+ """IVF mode falls back to flat when the batch is too small to train."""
697
+ from semantic_code_intelligence.storage.vector_store import VectorStore
698
+ store = VectorStore(8, use_ivf=True)
699
+ vecs = self._make_vectors(10, 8)
700
+ meta = self._make_metadata(10)
701
+ store.add(vecs, meta)
702
+ assert store.size == 10
703
+ # Should have silently fallen back to flat
704
+ assert store._use_ivf is False
705
+
706
+ def test_ivf_constants(self):
707
+ from semantic_code_intelligence.storage.vector_store import (
708
+ IVF_THRESHOLD, IVF_NLIST, IVF_NPROBE,
709
+ )
710
+ assert IVF_THRESHOLD == 50_000
711
+ assert IVF_NLIST == 100
712
+ assert IVF_NPROBE == 10
713
+
714
+
715
+ # =========================================================================
716
+ # 11. Formatter — JSON / JSONL / Rich / Context expansion
717
+ # =========================================================================
718
+
719
+ class TestFormatterAPI:
720
+ """Test the search formatter as a library consumer."""
721
+
722
+ def _make_results(self, n: int = 3):
723
+ from semantic_code_intelligence.services.search_service import SearchResult
724
+ return [
725
+ SearchResult(
726
+ file_path=f"file_{i}.py",
727
+ start_line=i * 10 + 1,
728
+ end_line=i * 10 + 10,
729
+ language="python",
730
+ content=f"def func_{i}():\n pass\n",
731
+ score=0.9 - i * 0.1,
732
+ chunk_index=i,
733
+ )
734
+ for i in range(n)
735
+ ]
736
+
737
+ def test_format_json(self):
738
+ from semantic_code_intelligence.search.formatter import format_results_json
739
+ results = self._make_results(2)
740
+ output = format_results_json("test query", results, 10)
741
+ data = json.loads(output)
742
+ assert data["query"] == "test query"
743
+ assert data["top_k"] == 10
744
+ assert data["result_count"] == 2
745
+ assert len(data["results"]) == 2
746
+
747
+ def test_format_json_empty(self):
748
+ from semantic_code_intelligence.search.formatter import format_results_json
749
+ output = format_results_json("empty", [], 5)
750
+ data = json.loads(output)
751
+ assert data["result_count"] == 0
752
+ assert data["results"] == []
753
+
754
+ def test_format_jsonl(self):
755
+ from semantic_code_intelligence.search.formatter import format_results_jsonl
756
+ results = self._make_results(3)
757
+ output = format_results_jsonl(results)
758
+ lines = output.strip().split("\n")
759
+ assert len(lines) == 3
760
+ for line in lines:
761
+ obj = json.loads(line)
762
+ assert "file_path" in obj
763
+ assert "score" in obj
764
+
765
+ def test_format_jsonl_empty(self):
766
+ from semantic_code_intelligence.search.formatter import format_results_jsonl
767
+ assert format_results_jsonl([]) == ""
768
+
769
+ def test_expand_context_missing_file(self):
770
+ from semantic_code_intelligence.search.formatter import _expand_context
771
+ from semantic_code_intelligence.services.search_service import SearchResult
772
+ r = SearchResult("nonexistent.py", 5, 10, "python", "hello", 0.5, 0)
773
+ content, start = _expand_context(r, 3)
774
+ assert content == "hello"
775
+ assert start == 5
776
+
777
+ def test_expand_context_real_file(self, tmp_path: Path):
778
+ from semantic_code_intelligence.search.formatter import _expand_context
779
+ from semantic_code_intelligence.services.search_service import SearchResult
780
+ src = tmp_path / "test.py"
781
+ lines = [f"line {i}\n" for i in range(1, 21)]
782
+ src.write_text("".join(lines), encoding="utf-8")
783
+
784
+ r = SearchResult(str(src), 10, 12, "python", "line 10\n", 0.8, 0)
785
+ content, start = _expand_context(r, 2)
786
+ assert start == 8 # 10 - 2
787
+ assert "line 8" in content
788
+ assert "line 14" in content
789
+
790
+
791
+ # =========================================================================
792
+ # 12. TUI helpers (fallback REPL utilities)
793
+ # =========================================================================
794
+
795
+ class TestTUIHelpers:
796
+ """Test TUI utility functions without launching the full TUI."""
797
+
798
+ def test_textual_available_returns_bool(self):
799
+ from semantic_code_intelligence.tui import _textual_available
800
+ assert isinstance(_textual_available(), bool)
801
+
802
+ def test_format_result_line(self):
803
+ from semantic_code_intelligence.tui import _format_result_line
804
+ from semantic_code_intelligence.services.search_service import SearchResult
805
+ r = SearchResult("src/main.py", 10, 20, "python", "code", 0.85, 0)
806
+ line = _format_result_line(1, r)
807
+ assert "main.py" in line
808
+ assert "0.850" in line
809
+ assert "L10-20" in line
810
+
811
+ def test_print_results_no_results(self, capsys):
812
+ from semantic_code_intelligence.tui import _print_results
813
+ _print_results([], "test query")
814
+ captured = capsys.readouterr()
815
+ assert "no results" in captured.out.lower()
816
+
817
+ def test_print_results_with_results(self, capsys):
818
+ from semantic_code_intelligence.tui import _print_results
819
+ from semantic_code_intelligence.services.search_service import SearchResult
820
+ results = [
821
+ SearchResult("a.py", 1, 5, "python", "code", 0.9, 0),
822
+ SearchResult("b.py", 10, 20, "python", "more", 0.8, 1),
823
+ ]
824
+ _print_results(results, "test")
825
+ captured = capsys.readouterr()
826
+ assert "2 results" in captured.out
827
+ assert "a.py" in captured.out
828
+
829
+ def test_show_detail_valid_index(self, capsys):
830
+ from semantic_code_intelligence.tui import _show_detail
831
+ from semantic_code_intelligence.services.search_service import SearchResult
832
+ results = [
833
+ SearchResult("a.py", 1, 3, "python", "line1\nline2\nline3", 0.9, 0),
834
+ ]
835
+ _show_detail(results, 1)
836
+ captured = capsys.readouterr()
837
+ assert "a.py" in captured.out
838
+ assert "line1" in captured.out
839
+
840
+ def test_show_detail_invalid_index(self, capsys):
841
+ from semantic_code_intelligence.tui import _show_detail
842
+ _show_detail([], 5)
843
+ captured = capsys.readouterr()
844
+ assert "invalid" in captured.out.lower()
845
+
846
+ def test_run_tui_function_exists(self):
847
+ from semantic_code_intelligence.tui import run_tui
848
+ assert callable(run_tui)
849
+
850
+
851
+ # =========================================================================
852
+ # 13. VS Code extension — validate all artifacts
853
+ # =========================================================================
854
+
855
+ class TestVSCodeExtension:
856
+ """Validate the VS Code extension from an end-user/developer perspective."""
857
+
858
+ VSCODE_DIR = Path(__file__).resolve().parents[2] / "vscode-extension"
859
+
860
+ def test_extension_directory_exists(self):
861
+ assert self.VSCODE_DIR.is_dir()
862
+
863
+ # --- package.json ---
864
+
865
+ def test_package_json_exists(self):
866
+ assert (self.VSCODE_DIR / "package.json").exists()
867
+
868
+ def test_package_json_valid_json(self):
869
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
870
+ assert isinstance(data, dict)
871
+
872
+ def test_package_json_name(self):
873
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
874
+ assert data["name"] == "codexa"
875
+
876
+ def test_package_json_version(self):
877
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
878
+ assert "version" in data
879
+
880
+ def test_package_json_engine(self):
881
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
882
+ assert data["engines"]["vscode"].startswith("^")
883
+
884
+ def test_package_json_main_entry(self):
885
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
886
+ assert data["main"] == "./out/extension.js"
887
+
888
+ def test_package_json_4_commands(self):
889
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
890
+ commands = data["contributes"]["commands"]
891
+ assert len(commands) == 8
892
+ command_ids = {c["command"] for c in commands}
893
+ assert command_ids == {
894
+ "codexa.search", "codexa.askCodexA",
895
+ "codexa.callGraph", "codexa.models",
896
+ "codexa.quality", "codexa.explainSymbol",
897
+ "codexa.doctor", "codexa.index",
898
+ }
899
+
900
+ def test_package_json_activation_events(self):
901
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
902
+ events = data["activationEvents"]
903
+ assert "onCommand:codexa.search" in events
904
+ assert "onView:codexaSearchView" in events
905
+
906
+ def test_package_json_sidebar_webview(self):
907
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
908
+ views = data["contributes"]["views"]
909
+ assert "codexa" in views
910
+ view_ids = [v["id"] for v in views["codexa"]]
911
+ assert "codexaSearchView" in view_ids
912
+
913
+ def test_package_json_keybinding(self):
914
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
915
+ keybindings = data["contributes"]["keybindings"]
916
+ assert len(keybindings) >= 1
917
+ kb = keybindings[0]
918
+ assert kb["command"] == "codexa.search"
919
+ assert "ctrl+shift+f5" in kb.get("key", "")
920
+
921
+ def test_package_json_activity_bar(self):
922
+ data = json.loads((self.VSCODE_DIR / "package.json").read_text("utf-8"))
923
+ containers = data["contributes"]["viewsContainers"]["activitybar"]
924
+ assert any(c["id"] == "codexa" for c in containers)
925
+
926
+ # --- extension.ts ---
927
+
928
+ def test_extension_ts_exists(self):
929
+ assert (self.VSCODE_DIR / "src" / "extension.ts").exists()
930
+
931
+ def test_extension_ts_exports_activate(self):
932
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
933
+ assert "export function activate" in src
934
+
935
+ def test_extension_ts_exports_deactivate(self):
936
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
937
+ assert "export function deactivate" in src
938
+
939
+ def test_extension_ts_search_view_provider(self):
940
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
941
+ assert "class SearchViewProvider" in src
942
+
943
+ def test_extension_ts_codex_bin_helper(self):
944
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
945
+ assert "function codexBin" in src
946
+
947
+ def test_extension_ts_run_codex_helper(self):
948
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
949
+ assert "async function runCodex" in src
950
+
951
+ def test_extension_ts_registers_4_commands(self):
952
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
953
+ for cmd in ["codexa.search", "codexa.askCodexA", "codexa.callGraph", "codexa.models"]:
954
+ assert cmd in src, f"Command {cmd} not registered in extension.ts"
955
+
956
+ def test_extension_ts_webview_html(self):
957
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
958
+ assert "<!DOCTYPE html>" in src
959
+ assert "acquireVsCodeApi" in src
960
+
961
+ def test_extension_ts_escape_html(self):
962
+ """Extension must escape HTML in search results to prevent XSS."""
963
+ src = (self.VSCODE_DIR / "src" / "extension.ts").read_text("utf-8")
964
+ assert "escapeHtml" in src
965
+
966
+ # --- tsconfig.json ---
967
+
968
+ def test_tsconfig_json_exists(self):
969
+ assert (self.VSCODE_DIR / "tsconfig.json").exists()
970
+
971
+ def test_tsconfig_json_valid(self):
972
+ raw = (self.VSCODE_DIR / "tsconfig.json").read_text("utf-8")
973
+ data = json.loads(raw)
974
+ assert "compilerOptions" in data
975
+
976
+ # --- README ---
977
+
978
+ def test_readme_exists(self):
979
+ assert (self.VSCODE_DIR / "README.md").exists()
980
+
981
+ def test_readme_not_empty(self):
982
+ content = (self.VSCODE_DIR / "README.md").read_text("utf-8")
983
+ assert len(content) > 50
984
+
985
+
986
+ # =========================================================================
987
+ # 14. Build script (PyInstaller)
988
+ # =========================================================================
989
+
990
+ class TestBuildScript:
991
+ """Verify the PyInstaller build script is usable."""
992
+
993
+ BUILD_PY = Path(__file__).resolve().parents[2] / "scripts" / "build_binary.py"
994
+
995
+ def test_build_py_exists(self):
996
+ assert self.BUILD_PY.exists()
997
+
998
+ def test_build_py_importable(self):
999
+ import importlib.util
1000
+ spec = importlib.util.spec_from_file_location("build_binary", str(self.BUILD_PY))
1001
+ assert spec is not None
1002
+ mod = importlib.util.module_from_spec(spec)
1003
+ # Don't exec the module (it would try to build) — just check it
1004
+ assert mod is not None
1005
+
1006
+ def test_build_py_has_build_function(self):
1007
+ src = self.BUILD_PY.read_text("utf-8")
1008
+ assert "def build" in src
1009
+
1010
+ def test_build_py_supports_onefile(self):
1011
+ src = self.BUILD_PY.read_text("utf-8")
1012
+ assert "onefile" in src.lower()
1013
+
1014
+
1015
+ # =========================================================================
1016
+ # 15. Doctor command
1017
+ # =========================================================================
1018
+
1019
+ class TestDoctorCommand:
1020
+ """Test the doctor subcommand — quick health check."""
1021
+
1022
+ def test_doctor_help(self):
1023
+ runner = CliRunner()
1024
+ result = runner.invoke(cli, ["doctor", "--help"])
1025
+ assert result.exit_code == 0
1026
+
1027
+ def test_doctor_runs(self):
1028
+ runner = CliRunner()
1029
+ result = runner.invoke(cli, ["doctor"])
1030
+ assert result.exit_code == 0
1031
+ assert "python" in result.output.lower()
1032
+
1033
+
1034
+ # =========================================================================
1035
+ # 16. Full user journey — init → index → search → json → switch → re-search
1036
+ # =========================================================================
1037
+
1038
+ class TestFullUserJourney:
1039
+ """Simulate a complete end-user workflow from start to finish."""
1040
+
1041
+ def test_full_pipeline(self, project: Path):
1042
+ runner = CliRunner()
1043
+
1044
+ # 1. Init
1045
+ r = runner.invoke(cli, ["init", str(project)])
1046
+ assert r.exit_code == 0
1047
+ assert (project / ".codexa").is_dir()
1048
+
1049
+ # 2. Index
1050
+ r = runner.invoke(cli, ["index", str(project)])
1051
+ assert r.exit_code == 0
1052
+
1053
+ # 3. Semantic search → JSON
1054
+ r = runner.invoke(cli, [
1055
+ "search", "greeting", "-p", str(project),
1056
+ "--json", "--no-auto-index",
1057
+ ])
1058
+ assert r.exit_code == 0
1059
+ data = _extract_json(r.output)
1060
+ assert "results" in data
1061
+
1062
+ # 4. Regex search → files-only
1063
+ r = runner.invoke(cli, [
1064
+ "search", r"def\s+\w+", "-p", str(project),
1065
+ "--mode", "regex", "-l", "--no-auto-index",
1066
+ ])
1067
+ assert r.exit_code == 0
1068
+
1069
+ # 5. JSONL output
1070
+ r = runner.invoke(cli, [
1071
+ "search", "calculator", "-p", str(project),
1072
+ "--jsonl", "--no-auto-index",
1073
+ ])
1074
+ assert r.exit_code == 0
1075
+
1076
+ # 6. Models list
1077
+ r = runner.invoke(cli, ["models", "list", "--json"])
1078
+ assert r.exit_code == 0
1079
+ models = _extract_json(r.output)
1080
+ assert len(models) >= 5
1081
+
1082
+ # 7. Switch model
1083
+ r = runner.invoke(cli, ["models", "switch", "bge-small", "-p", str(project)])
1084
+ assert r.exit_code == 0
1085
+
1086
+ # 8. Verify config changed
1087
+ from semantic_code_intelligence.config.settings import load_config
1088
+ cfg = load_config(project)
1089
+ assert cfg.embedding.model_name == "BAAI/bge-small-en-v1.5"
1090
+
1091
+ # 9. Re-index with force (new model)
1092
+ r = runner.invoke(cli, ["index", "--force", str(project)])
1093
+ assert r.exit_code == 0
1094
+
1095
+ # 10. Search again after model switch
1096
+ r = runner.invoke(cli, [
1097
+ "search", "fibonacci", "-p", str(project),
1098
+ "--json", "--no-auto-index",
1099
+ ])
1100
+ assert r.exit_code == 0
1101
+ data = _extract_json(r.output)
1102
+ assert "results" in data
1103
+
1104
+ def test_doctor_in_initialized_project(self, project: Path):
1105
+ runner = CliRunner()
1106
+ runner.invoke(cli, ["init", str(project)])
1107
+ result = runner.invoke(cli, ["doctor"])
1108
+ assert result.exit_code == 0
1109
+
1110
+ def test_version_consistency(self):
1111
+ """The version in __init__ should match --version output."""
1112
+ runner = CliRunner()
1113
+ result = runner.invoke(cli, ["--version"])
1114
+ assert __version__ in result.output
1115
+
1116
+
1117
+ # =========================================================================
1118
+ # 17. SearchResult.to_dict()
1119
+ # =========================================================================
1120
+
1121
+ class TestSearchResultContract:
1122
+ """Verify the SearchResult dataclass contract."""
1123
+
1124
+ def test_to_dict_keys(self):
1125
+ from semantic_code_intelligence.services.search_service import SearchResult
1126
+ r = SearchResult("a.py", 1, 10, "python", "code", 0.999, 0)
1127
+ d = r.to_dict()
1128
+ assert set(d.keys()) == {
1129
+ "file_path", "start_line", "end_line",
1130
+ "language", "content", "score", "chunk_index",
1131
+ }
1132
+
1133
+ def test_to_dict_score_rounded(self):
1134
+ from semantic_code_intelligence.services.search_service import SearchResult
1135
+ r = SearchResult("a.py", 1, 10, "python", "code", 0.123456789, 0)
1136
+ assert r.to_dict()["score"] == 0.1235
1137
+
1138
+ def test_to_dict_serializable(self):
1139
+ from semantic_code_intelligence.services.search_service import SearchResult
1140
+ r = SearchResult("a.py", 1, 10, "python", "code", 0.9, 0)
1141
+ # Should not raise
1142
+ json.dumps(r.to_dict())