codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,303 @@
1
+ """CLI command: benchmark — measure indexing speed, search latency, and memory usage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import time
8
+ from pathlib import Path
9
+
10
+ import click
11
+
12
+ from semantic_code_intelligence.config.settings import AppConfig, load_config
13
+ from semantic_code_intelligence.utils.logging import (
14
+ get_logger,
15
+ print_error,
16
+ print_info,
17
+ print_success,
18
+ console,
19
+ )
20
+
21
+ logger = get_logger("cli.benchmark")
22
+
23
+
24
+ def _get_memory_mb() -> float:
25
+ """Get current process memory usage in MB."""
26
+ try:
27
+ import resource # type: ignore[import-untyped]
28
+ return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
29
+ except ImportError:
30
+ # Windows fallback
31
+ try:
32
+ import psutil # type: ignore[import-untyped]
33
+ return psutil.Process(os.getpid()).memory_info().rss / (1024 * 1024)
34
+ except ImportError:
35
+ return 0.0
36
+
37
+
38
+ def _format_duration(seconds: float) -> str:
39
+ if seconds < 1:
40
+ return f"{seconds * 1000:.1f}ms"
41
+ return f"{seconds:.2f}s"
42
+
43
+
44
+ def _count_files(root: Path) -> int:
45
+ """Count indexable files without importing heavy modules."""
46
+ config = load_config(root)
47
+ extensions = set(config.index.extensions)
48
+ count = 0
49
+ for dirpath, _dirnames, filenames in os.walk(root):
50
+ if any(part.startswith(".") for part in Path(dirpath).relative_to(root).parts):
51
+ continue
52
+ for f in filenames:
53
+ if Path(f).suffix in extensions:
54
+ count += 1
55
+ return count
56
+
57
+
58
+ @click.command("benchmark")
59
+ @click.option(
60
+ "--path",
61
+ "-p",
62
+ default=".",
63
+ type=click.Path(exists=True, file_okay=False, resolve_path=True),
64
+ help="Project root path to benchmark against.",
65
+ )
66
+ @click.option(
67
+ "--json-output",
68
+ "--json",
69
+ "json_mode",
70
+ is_flag=True,
71
+ default=False,
72
+ help="Output results as JSON.",
73
+ )
74
+ @click.option(
75
+ "--rounds",
76
+ "-r",
77
+ default=3,
78
+ type=int,
79
+ help="Number of search rounds for latency averaging.",
80
+ )
81
+ @click.option(
82
+ "--profile",
83
+ is_flag=True,
84
+ default=False,
85
+ help="Run cProfile on full indexing and dump top 20 hotspots.",
86
+ )
87
+ @click.pass_context
88
+ def benchmark_cmd(
89
+ ctx: click.Context,
90
+ path: str,
91
+ json_mode: bool,
92
+ rounds: int,
93
+ profile: bool,
94
+ ) -> None:
95
+ """Benchmark indexing speed, search latency, and memory usage.
96
+
97
+ Measures the full indexing pipeline, incremental re-indexing, all
98
+ four search modes (semantic, keyword, regex, hybrid), and reports
99
+ memory consumption and cache hit rates.
100
+
101
+ Examples:
102
+
103
+ \b
104
+ codexa benchmark
105
+ codexa benchmark --json
106
+ codexa benchmark --rounds 5
107
+ """
108
+ from rich.table import Table
109
+
110
+ root = Path(path).resolve()
111
+ config_dir = AppConfig.config_dir(root)
112
+
113
+ if not config_dir.exists():
114
+ print_error(f"Project not initialized at {root}. Run 'codexa init' first.")
115
+ ctx.exit(1)
116
+ return
117
+
118
+ index_dir = AppConfig.index_dir(root)
119
+ results: dict[str, object] = {
120
+ "project_root": str(root),
121
+ "rounds": rounds,
122
+ }
123
+
124
+ file_count = _count_files(root)
125
+ results["file_count"] = file_count
126
+ print_info(f"Benchmarking {root} ({file_count} indexable files)")
127
+
128
+ # --- 1. Full indexing benchmark ---
129
+ print_info("1/5: Full indexing...")
130
+ mem_before = _get_memory_mb()
131
+ t0 = time.perf_counter()
132
+ from semantic_code_intelligence.services.indexing_service import run_indexing
133
+
134
+ if profile:
135
+ import cProfile
136
+ import pstats
137
+ import io
138
+
139
+ profiler = cProfile.Profile()
140
+ profiler.enable()
141
+ idx_result = run_indexing(root, force=True)
142
+ profiler.disable()
143
+ full_index_time = time.perf_counter() - t0
144
+
145
+ # Print profiling results
146
+ stream = io.StringIO()
147
+ stats = pstats.Stats(profiler, stream=stream)
148
+ stats.sort_stats("cumulative")
149
+ stats.print_stats(20)
150
+ print_info("cProfile top 20 hotspots (by cumulative time):")
151
+ click.echo(stream.getvalue())
152
+ else:
153
+ idx_result = run_indexing(root, force=True)
154
+ full_index_time = time.perf_counter() - t0
155
+
156
+ mem_after = _get_memory_mb()
157
+
158
+ results["full_index"] = {
159
+ "duration_s": round(full_index_time, 3),
160
+ "files_indexed": idx_result.files_indexed,
161
+ "chunks_created": idx_result.chunks_created,
162
+ "total_vectors": idx_result.total_vectors,
163
+ "symbols_extracted": idx_result.symbols_extracted,
164
+ "files_per_second": round(idx_result.files_indexed / full_index_time, 1) if full_index_time > 0 else 0,
165
+ "memory_delta_mb": round(mem_after - mem_before, 1),
166
+ }
167
+ print_success(f" Full index: {_format_duration(full_index_time)} "
168
+ f"({idx_result.files_indexed} files, {idx_result.chunks_created} chunks)")
169
+
170
+ # --- 2. Incremental indexing benchmark (no changes → should be fast) ---
171
+ print_info("2/5: Incremental indexing (no changes)...")
172
+ t0 = time.perf_counter()
173
+ inc_result = run_indexing(root, force=False)
174
+ inc_time = time.perf_counter() - t0
175
+
176
+ results["incremental_index"] = {
177
+ "duration_s": round(inc_time, 3),
178
+ "files_skipped": inc_result.files_skipped,
179
+ "files_indexed": inc_result.files_indexed,
180
+ "chunks_reused": inc_result.chunks_reused,
181
+ "cache_hit_rate": round(
182
+ 100 * inc_result.files_skipped / inc_result.files_scanned, 1
183
+ ) if inc_result.files_scanned > 0 else 100.0,
184
+ }
185
+ print_success(f" Incremental: {_format_duration(inc_time)} "
186
+ f"(cache hit {results['incremental_index']['cache_hit_rate']}%)")
187
+
188
+ # --- 3. Search latency benchmarks ---
189
+ print_info("3/5: Search latency ({} rounds)...".format(rounds))
190
+ test_queries = [
191
+ "authentication middleware",
192
+ "error handling",
193
+ "database connection",
194
+ "parse configuration",
195
+ "search codebase",
196
+ ]
197
+ from semantic_code_intelligence.services.search_service import search_codebase
198
+
199
+ search_results: dict[str, dict[str, float]] = {}
200
+ for mode in ["semantic", "keyword", "regex", "hybrid"]:
201
+ times: list[float] = []
202
+ for _r in range(rounds):
203
+ for query in test_queries:
204
+ q = query if mode != "regex" else r"def\s+\w+"
205
+ t0 = time.perf_counter()
206
+ try:
207
+ search_codebase(
208
+ query=q,
209
+ project_root=root,
210
+ top_k=10,
211
+ mode=mode,
212
+ auto_index=False,
213
+ )
214
+ except Exception:
215
+ pass
216
+ times.append(time.perf_counter() - t0)
217
+ avg_ms = (sum(times) / len(times)) * 1000 if times else 0
218
+ p50_ms = sorted(times)[len(times) // 2] * 1000 if times else 0
219
+ p99_ms = sorted(times)[int(len(times) * 0.99)] * 1000 if times else 0
220
+ search_results[mode] = {
221
+ "avg_ms": round(avg_ms, 2),
222
+ "p50_ms": round(p50_ms, 2),
223
+ "p99_ms": round(p99_ms, 2),
224
+ "queries_per_second": round(1000 / avg_ms, 1) if avg_ms > 0 else 0,
225
+ }
226
+ print_success(f" {mode:>8}: avg={avg_ms:.1f}ms p50={p50_ms:.1f}ms p99={p99_ms:.1f}ms")
227
+
228
+ results["search_latency"] = search_results
229
+
230
+ # --- 4. BM25 index load benchmark ---
231
+ print_info("4/5: BM25 index persistence...")
232
+ from semantic_code_intelligence.search.keyword_search import BM25Index, _bm25_cache
233
+ from semantic_code_intelligence.storage.vector_store import VectorStore
234
+
235
+ _bm25_cache.clear() # force disk load
236
+ store = VectorStore.load(index_dir)
237
+
238
+ t0 = time.perf_counter()
239
+ bm25_loaded = BM25Index.load(index_dir, store.metadata)
240
+ bm25_load_time = time.perf_counter() - t0
241
+
242
+ t0 = time.perf_counter()
243
+ bm25_fresh = BM25Index(store.metadata)
244
+ bm25_build_time = time.perf_counter() - t0
245
+
246
+ results["bm25"] = {
247
+ "load_from_disk_ms": round(bm25_load_time * 1000, 2),
248
+ "build_from_scratch_ms": round(bm25_build_time * 1000, 2),
249
+ "speedup": round(bm25_build_time / bm25_load_time, 1) if bm25_load_time > 0 else 0,
250
+ "loaded_from_cache": bm25_loaded is not None,
251
+ }
252
+ print_success(f" BM25 load: {bm25_load_time*1000:.1f}ms (vs build: {bm25_build_time*1000:.1f}ms)")
253
+
254
+ # --- 5. Memory snapshot ---
255
+ print_info("5/5: Memory usage...")
256
+ peak_mem = _get_memory_mb()
257
+ results["memory"] = {
258
+ "peak_mb": round(peak_mem, 1),
259
+ "index_size_mb": round(
260
+ sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (1024 * 1024), 2
261
+ ) if index_dir.exists() else 0,
262
+ }
263
+ print_success(f" Peak memory: {peak_mem:.0f}MB, Index size: {results['memory']['index_size_mb']:.1f}MB")
264
+
265
+ # --- Output ---
266
+ if json_mode:
267
+ click.echo(json.dumps(results, indent=2))
268
+ else:
269
+ table = Table(title="CodexA Benchmark Results", show_header=True)
270
+ table.add_column("Metric", style="cyan", min_width=30)
271
+ table.add_column("Value", style="green", min_width=20)
272
+
273
+ table.add_row("Project", str(root))
274
+ table.add_row("Indexable files", str(file_count))
275
+ table.add_row("", "")
276
+
277
+ fi = results["full_index"]
278
+ table.add_row("Full index time", _format_duration(fi["duration_s"]))
279
+ table.add_row("Files/second", f"{fi['files_per_second']}")
280
+ table.add_row("Total chunks", str(fi["chunks_created"]))
281
+ table.add_row("Total vectors", str(fi["total_vectors"]))
282
+ table.add_row("Symbols extracted", str(fi["symbols_extracted"]))
283
+ table.add_row("", "")
284
+
285
+ ii = results["incremental_index"]
286
+ table.add_row("Incremental index time", _format_duration(ii["duration_s"]))
287
+ table.add_row("Cache hit rate", f"{ii['cache_hit_rate']}%")
288
+ table.add_row("", "")
289
+
290
+ for mode, stats in search_results.items():
291
+ table.add_row(f"Search ({mode}) avg", f"{stats['avg_ms']:.1f}ms")
292
+ table.add_row(f"Search ({mode}) QPS", f"{stats['queries_per_second']}")
293
+
294
+ table.add_row("", "")
295
+ bm25 = results["bm25"]
296
+ table.add_row("BM25 load (disk)", f"{bm25['load_from_disk_ms']:.1f}ms")
297
+ table.add_row("BM25 build (fresh)", f"{bm25['build_from_scratch_ms']:.1f}ms")
298
+ table.add_row("BM25 speedup", f"{bm25['speedup']}x")
299
+ table.add_row("", "")
300
+ table.add_row("Peak memory", f"{results['memory']['peak_mb']:.0f}MB")
301
+ table.add_row("Index size on disk", f"{results['memory']['index_size_mb']:.1f}MB")
302
+
303
+ console.print(table)
@@ -0,0 +1,252 @@
1
+ """CLI command: chat — multi-turn conversation with session persistence."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json as json_mod
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ import click
10
+
11
+ from semantic_code_intelligence.utils.logging import (
12
+ console,
13
+ get_logger,
14
+ print_error,
15
+ print_info,
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from semantic_code_intelligence.llm.provider import LLMProvider
20
+
21
+ logger = get_logger("cli.chat")
22
+
23
+
24
+ def _wrap_provider(provider: LLMProvider, llm: Any, config: Any) -> LLMProvider:
25
+ """Wrap a provider with caching and rate limiting based on config."""
26
+ from semantic_code_intelligence.llm.cache import LLMCache
27
+ from semantic_code_intelligence.llm.cached_provider import CachedProvider
28
+ from semantic_code_intelligence.llm.rate_limiter import RateLimiter
29
+
30
+ cache = None
31
+ if getattr(llm, "cache_enabled", False):
32
+ cache_dir = str(config.config_dir(config.project_root)) if hasattr(config, "config_dir") else None
33
+ cache = LLMCache(
34
+ cache_dir=cache_dir,
35
+ ttl_hours=getattr(llm, "cache_ttl_hours", 24),
36
+ max_entries=getattr(llm, "cache_max_entries", 1000),
37
+ )
38
+
39
+ rate_limiter = None
40
+ rpm = getattr(llm, "rate_limit_rpm", 0)
41
+ tpm = getattr(llm, "rate_limit_tpm", 0)
42
+ if rpm > 0 or tpm > 0:
43
+ rate_limiter = RateLimiter(rpm=rpm, tpm=tpm)
44
+
45
+ if cache is not None or rate_limiter is not None:
46
+ return CachedProvider(provider, cache=cache, rate_limiter=rate_limiter)
47
+ return provider
48
+
49
+
50
+ def _get_provider(config: Any) -> LLMProvider:
51
+ """Build an LLM provider from the app configuration."""
52
+ from semantic_code_intelligence.config.settings import LLMConfig
53
+
54
+ llm: LLMConfig = config.llm
55
+ if llm.provider == "openai":
56
+ from semantic_code_intelligence.llm.openai_provider import OpenAIProvider
57
+
58
+ provider: LLMProvider = OpenAIProvider(
59
+ api_key=llm.api_key,
60
+ model=llm.model,
61
+ base_url=llm.base_url or None,
62
+ temperature=llm.temperature,
63
+ max_tokens=llm.max_tokens,
64
+ )
65
+ elif llm.provider == "ollama":
66
+ from semantic_code_intelligence.llm.ollama_provider import OllamaProvider
67
+
68
+ provider = OllamaProvider(
69
+ model=llm.model,
70
+ base_url=llm.base_url or "http://localhost:11434",
71
+ temperature=llm.temperature,
72
+ max_tokens=llm.max_tokens,
73
+ )
74
+ else:
75
+ from semantic_code_intelligence.llm.mock_provider import MockProvider
76
+
77
+ provider = MockProvider()
78
+
79
+ return _wrap_provider(provider, llm, config)
80
+
81
+
82
+ @click.command("chat")
83
+ @click.argument("message", type=str)
84
+ @click.option(
85
+ "--session", "-s",
86
+ default=None,
87
+ type=str,
88
+ help="Session ID to resume. Creates a new session if not given.",
89
+ )
90
+ @click.option(
91
+ "--list-sessions", "list_sessions",
92
+ is_flag=True,
93
+ default=False,
94
+ help="List all stored chat sessions and exit.",
95
+ )
96
+ @click.option(
97
+ "--json-output", "--json", "json_mode",
98
+ is_flag=True,
99
+ default=False,
100
+ help="Output in JSON format.",
101
+ )
102
+ @click.option(
103
+ "--max-turns", "-t",
104
+ default=20,
105
+ type=int,
106
+ help="Maximum conversation turns to send to LLM.",
107
+ )
108
+ @click.option(
109
+ "--path", "-p",
110
+ default=".",
111
+ type=click.Path(exists=True, file_okay=False, resolve_path=True),
112
+ help="Project root path.",
113
+ )
114
+ @click.option(
115
+ "--stream",
116
+ is_flag=True,
117
+ default=False,
118
+ help="Stream tokens incrementally as they arrive.",
119
+ )
120
+ @click.option("--pipe", is_flag=True, default=False, hidden=True)
121
+ @click.pass_context
122
+ def chat_cmd(
123
+ ctx: click.Context,
124
+ message: str,
125
+ session: str | None,
126
+ list_sessions: bool,
127
+ json_mode: bool,
128
+ max_turns: int,
129
+ path: str,
130
+ stream: bool,
131
+ pipe: bool,
132
+ ) -> None:
133
+ """Continue or start a multi-turn conversation about the codebase.
134
+
135
+ Each conversation is persisted to disk so you can resume later with
136
+ --session <id>. Use --list-sessions to see saved conversations.
137
+ """
138
+ from semantic_code_intelligence.config.settings import load_config
139
+ from semantic_code_intelligence.llm.conversation import SessionStore
140
+ from semantic_code_intelligence.llm.reasoning import ReasoningEngine
141
+
142
+ root = Path(path).resolve()
143
+ pipe = pipe or ctx.obj.get("pipe", False)
144
+
145
+ store = SessionStore(root)
146
+
147
+ # --- list sessions mode ---
148
+ if list_sessions:
149
+ sessions = store.list_sessions()
150
+ if json_mode:
151
+ click.echo(json_mod.dumps(sessions, indent=2))
152
+ elif pipe:
153
+ for s in sessions:
154
+ click.echo(f"{s['session_id']} turns={s['turns']} {s['title']}")
155
+ else:
156
+ if not sessions:
157
+ print_info("No stored sessions.")
158
+ else:
159
+ from rich.table import Table
160
+
161
+ table = Table(title="Chat Sessions")
162
+ table.add_column("ID")
163
+ table.add_column("Title")
164
+ table.add_column("Turns")
165
+ for s in sessions:
166
+ table.add_row(s["session_id"], s["title"], str(s["turns"]))
167
+ console.print(table)
168
+ return
169
+
170
+ # --- conversation mode ---
171
+ config = load_config(root)
172
+ provider = _get_provider(config)
173
+
174
+ conv = store.get_or_create(session)
175
+
176
+ # If this is a fresh session, set up the system prompt
177
+ if not conv.messages:
178
+ conv.add_system(
179
+ "You are CodexA, an AI coding assistant. Answer questions about the "
180
+ "user's codebase. Be concise, accurate, and cite file paths when relevant."
181
+ )
182
+ conv.title = message[:60]
183
+
184
+ # Add user message
185
+ conv.add_user(message)
186
+
187
+ # Get context-enriched messages
188
+ messages = conv.get_messages_for_llm(max_turns=max_turns)
189
+
190
+ # Also inject search context into the user's message
191
+ engine = ReasoningEngine(provider, root)
192
+ try:
193
+ snippets = engine._search_context(message, top_k=3)
194
+ if snippets:
195
+ ctx_text = "\n".join(
196
+ f"[{s.get('file_path', '?')}] {s.get('content', '')[:200]}"
197
+ for s in snippets[:3]
198
+ )
199
+ # Inject context before the last user message
200
+ messages[-1] = type(messages[-1])(
201
+ role=messages[-1].role,
202
+ content=f"Relevant code:\n{ctx_text}\n\nUser: {message}",
203
+ )
204
+ except Exception:
205
+ logger.debug("Context injection failed; continuing without code context")
206
+
207
+ # Call LLM (streaming or batch)
208
+ if stream and not json_mode:
209
+ from semantic_code_intelligence.llm.streaming import stream_chat
210
+
211
+ gen = stream_chat(provider, messages)
212
+ accumulated = ""
213
+ if not pipe:
214
+ console.print(f"[bold cyan]CodexA [{conv.session_id}][/]", end="")
215
+ click.echo("")
216
+ for event in gen:
217
+ if event.kind == "token":
218
+ accumulated += event.content
219
+ click.echo(event.content, nl=False)
220
+ click.echo("") # trailing newline
221
+ conv.add_assistant(accumulated)
222
+ store.save(conv)
223
+ if not pipe:
224
+ print_info(f"Session: {conv.session_id} (use --session {conv.session_id} to continue)")
225
+ return
226
+
227
+ resp = provider.chat(messages)
228
+ conv.add_assistant(resp.content)
229
+
230
+ # Persist session
231
+ store.save(conv)
232
+
233
+ # Output
234
+ if json_mode:
235
+ click.echo(json_mod.dumps({
236
+ "session_id": conv.session_id,
237
+ "answer": resp.content,
238
+ "turns": conv.turn_count,
239
+ "usage": resp.usage,
240
+ }, indent=2))
241
+ elif pipe:
242
+ click.echo(resp.content)
243
+ else:
244
+ from rich.markdown import Markdown
245
+ from rich.panel import Panel
246
+
247
+ console.print(Panel(
248
+ Markdown(resp.content),
249
+ title=f"CodexA [{conv.session_id}]",
250
+ subtitle=f"Turn {conv.turn_count // 2}",
251
+ ))
252
+ print_info(f"Session: {conv.session_id} (use --session {conv.session_id} to continue)")
@@ -0,0 +1,74 @@
1
+ """CLI command: ci-gen — generate CI workflow templates."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import click
8
+
9
+ from semantic_code_intelligence.utils.logging import (
10
+ console,
11
+ get_logger,
12
+ print_info,
13
+ print_success,
14
+ )
15
+
16
+ logger = get_logger("cli.ci_gen")
17
+
18
+
19
+ @click.command("ci-gen")
20
+ @click.argument(
21
+ "template",
22
+ type=click.Choice(["analysis", "safety", "precommit"], case_sensitive=False),
23
+ )
24
+ @click.option(
25
+ "--output",
26
+ "-o",
27
+ default=None,
28
+ help="Write output to a file instead of stdout.",
29
+ )
30
+ @click.option(
31
+ "--python-version",
32
+ default="3.12",
33
+ help="Python version for workflow (default: 3.12).",
34
+ )
35
+ @click.pass_context
36
+ def ci_gen_cmd(
37
+ ctx: click.Context,
38
+ template: str,
39
+ output: str | None,
40
+ python_version: str,
41
+ ) -> None:
42
+ """Generate CI/CD workflow templates for CodexA integration.
43
+
44
+ Available templates:
45
+
46
+ - analysis — Full analysis workflow (quality + PR summary)
47
+
48
+ - safety — Lightweight safety-only workflow
49
+
50
+ - precommit — Pre-commit hook configuration
51
+
52
+ Examples:
53
+
54
+ codexa ci-gen analysis
55
+
56
+ codexa ci-gen safety -o .github/workflows/codexa-safety.yml
57
+
58
+ codexa ci-gen precommit -o .pre-commit-config.yaml
59
+ """
60
+ from semantic_code_intelligence.ci.templates import get_template
61
+
62
+ kwargs = {}
63
+ if template != "precommit":
64
+ kwargs["python_version"] = python_version
65
+
66
+ content = get_template(template, **kwargs)
67
+
68
+ if output:
69
+ out_path = Path(output)
70
+ out_path.parent.mkdir(parents=True, exist_ok=True)
71
+ out_path.write_text(content, encoding="utf-8")
72
+ print_success(f"Written to {output}")
73
+ else:
74
+ click.echo(content)