codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""CLI command: benchmark — measure indexing speed, search latency, and memory usage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
|
|
12
|
+
from semantic_code_intelligence.config.settings import AppConfig, load_config
|
|
13
|
+
from semantic_code_intelligence.utils.logging import (
|
|
14
|
+
get_logger,
|
|
15
|
+
print_error,
|
|
16
|
+
print_info,
|
|
17
|
+
print_success,
|
|
18
|
+
console,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
logger = get_logger("cli.benchmark")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _get_memory_mb() -> float:
|
|
25
|
+
"""Get current process memory usage in MB."""
|
|
26
|
+
try:
|
|
27
|
+
import resource # type: ignore[import-untyped]
|
|
28
|
+
return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
|
|
29
|
+
except ImportError:
|
|
30
|
+
# Windows fallback
|
|
31
|
+
try:
|
|
32
|
+
import psutil # type: ignore[import-untyped]
|
|
33
|
+
return psutil.Process(os.getpid()).memory_info().rss / (1024 * 1024)
|
|
34
|
+
except ImportError:
|
|
35
|
+
return 0.0
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _format_duration(seconds: float) -> str:
|
|
39
|
+
if seconds < 1:
|
|
40
|
+
return f"{seconds * 1000:.1f}ms"
|
|
41
|
+
return f"{seconds:.2f}s"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _count_files(root: Path) -> int:
|
|
45
|
+
"""Count indexable files without importing heavy modules."""
|
|
46
|
+
config = load_config(root)
|
|
47
|
+
extensions = set(config.index.extensions)
|
|
48
|
+
count = 0
|
|
49
|
+
for dirpath, _dirnames, filenames in os.walk(root):
|
|
50
|
+
if any(part.startswith(".") for part in Path(dirpath).relative_to(root).parts):
|
|
51
|
+
continue
|
|
52
|
+
for f in filenames:
|
|
53
|
+
if Path(f).suffix in extensions:
|
|
54
|
+
count += 1
|
|
55
|
+
return count
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@click.command("benchmark")
|
|
59
|
+
@click.option(
|
|
60
|
+
"--path",
|
|
61
|
+
"-p",
|
|
62
|
+
default=".",
|
|
63
|
+
type=click.Path(exists=True, file_okay=False, resolve_path=True),
|
|
64
|
+
help="Project root path to benchmark against.",
|
|
65
|
+
)
|
|
66
|
+
@click.option(
|
|
67
|
+
"--json-output",
|
|
68
|
+
"--json",
|
|
69
|
+
"json_mode",
|
|
70
|
+
is_flag=True,
|
|
71
|
+
default=False,
|
|
72
|
+
help="Output results as JSON.",
|
|
73
|
+
)
|
|
74
|
+
@click.option(
|
|
75
|
+
"--rounds",
|
|
76
|
+
"-r",
|
|
77
|
+
default=3,
|
|
78
|
+
type=int,
|
|
79
|
+
help="Number of search rounds for latency averaging.",
|
|
80
|
+
)
|
|
81
|
+
@click.option(
|
|
82
|
+
"--profile",
|
|
83
|
+
is_flag=True,
|
|
84
|
+
default=False,
|
|
85
|
+
help="Run cProfile on full indexing and dump top 20 hotspots.",
|
|
86
|
+
)
|
|
87
|
+
@click.pass_context
|
|
88
|
+
def benchmark_cmd(
|
|
89
|
+
ctx: click.Context,
|
|
90
|
+
path: str,
|
|
91
|
+
json_mode: bool,
|
|
92
|
+
rounds: int,
|
|
93
|
+
profile: bool,
|
|
94
|
+
) -> None:
|
|
95
|
+
"""Benchmark indexing speed, search latency, and memory usage.
|
|
96
|
+
|
|
97
|
+
Measures the full indexing pipeline, incremental re-indexing, all
|
|
98
|
+
four search modes (semantic, keyword, regex, hybrid), and reports
|
|
99
|
+
memory consumption and cache hit rates.
|
|
100
|
+
|
|
101
|
+
Examples:
|
|
102
|
+
|
|
103
|
+
\b
|
|
104
|
+
codexa benchmark
|
|
105
|
+
codexa benchmark --json
|
|
106
|
+
codexa benchmark --rounds 5
|
|
107
|
+
"""
|
|
108
|
+
from rich.table import Table
|
|
109
|
+
|
|
110
|
+
root = Path(path).resolve()
|
|
111
|
+
config_dir = AppConfig.config_dir(root)
|
|
112
|
+
|
|
113
|
+
if not config_dir.exists():
|
|
114
|
+
print_error(f"Project not initialized at {root}. Run 'codexa init' first.")
|
|
115
|
+
ctx.exit(1)
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
index_dir = AppConfig.index_dir(root)
|
|
119
|
+
results: dict[str, object] = {
|
|
120
|
+
"project_root": str(root),
|
|
121
|
+
"rounds": rounds,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
file_count = _count_files(root)
|
|
125
|
+
results["file_count"] = file_count
|
|
126
|
+
print_info(f"Benchmarking {root} ({file_count} indexable files)")
|
|
127
|
+
|
|
128
|
+
# --- 1. Full indexing benchmark ---
|
|
129
|
+
print_info("1/5: Full indexing...")
|
|
130
|
+
mem_before = _get_memory_mb()
|
|
131
|
+
t0 = time.perf_counter()
|
|
132
|
+
from semantic_code_intelligence.services.indexing_service import run_indexing
|
|
133
|
+
|
|
134
|
+
if profile:
|
|
135
|
+
import cProfile
|
|
136
|
+
import pstats
|
|
137
|
+
import io
|
|
138
|
+
|
|
139
|
+
profiler = cProfile.Profile()
|
|
140
|
+
profiler.enable()
|
|
141
|
+
idx_result = run_indexing(root, force=True)
|
|
142
|
+
profiler.disable()
|
|
143
|
+
full_index_time = time.perf_counter() - t0
|
|
144
|
+
|
|
145
|
+
# Print profiling results
|
|
146
|
+
stream = io.StringIO()
|
|
147
|
+
stats = pstats.Stats(profiler, stream=stream)
|
|
148
|
+
stats.sort_stats("cumulative")
|
|
149
|
+
stats.print_stats(20)
|
|
150
|
+
print_info("cProfile top 20 hotspots (by cumulative time):")
|
|
151
|
+
click.echo(stream.getvalue())
|
|
152
|
+
else:
|
|
153
|
+
idx_result = run_indexing(root, force=True)
|
|
154
|
+
full_index_time = time.perf_counter() - t0
|
|
155
|
+
|
|
156
|
+
mem_after = _get_memory_mb()
|
|
157
|
+
|
|
158
|
+
results["full_index"] = {
|
|
159
|
+
"duration_s": round(full_index_time, 3),
|
|
160
|
+
"files_indexed": idx_result.files_indexed,
|
|
161
|
+
"chunks_created": idx_result.chunks_created,
|
|
162
|
+
"total_vectors": idx_result.total_vectors,
|
|
163
|
+
"symbols_extracted": idx_result.symbols_extracted,
|
|
164
|
+
"files_per_second": round(idx_result.files_indexed / full_index_time, 1) if full_index_time > 0 else 0,
|
|
165
|
+
"memory_delta_mb": round(mem_after - mem_before, 1),
|
|
166
|
+
}
|
|
167
|
+
print_success(f" Full index: {_format_duration(full_index_time)} "
|
|
168
|
+
f"({idx_result.files_indexed} files, {idx_result.chunks_created} chunks)")
|
|
169
|
+
|
|
170
|
+
# --- 2. Incremental indexing benchmark (no changes → should be fast) ---
|
|
171
|
+
print_info("2/5: Incremental indexing (no changes)...")
|
|
172
|
+
t0 = time.perf_counter()
|
|
173
|
+
inc_result = run_indexing(root, force=False)
|
|
174
|
+
inc_time = time.perf_counter() - t0
|
|
175
|
+
|
|
176
|
+
results["incremental_index"] = {
|
|
177
|
+
"duration_s": round(inc_time, 3),
|
|
178
|
+
"files_skipped": inc_result.files_skipped,
|
|
179
|
+
"files_indexed": inc_result.files_indexed,
|
|
180
|
+
"chunks_reused": inc_result.chunks_reused,
|
|
181
|
+
"cache_hit_rate": round(
|
|
182
|
+
100 * inc_result.files_skipped / inc_result.files_scanned, 1
|
|
183
|
+
) if inc_result.files_scanned > 0 else 100.0,
|
|
184
|
+
}
|
|
185
|
+
print_success(f" Incremental: {_format_duration(inc_time)} "
|
|
186
|
+
f"(cache hit {results['incremental_index']['cache_hit_rate']}%)")
|
|
187
|
+
|
|
188
|
+
# --- 3. Search latency benchmarks ---
|
|
189
|
+
print_info("3/5: Search latency ({} rounds)...".format(rounds))
|
|
190
|
+
test_queries = [
|
|
191
|
+
"authentication middleware",
|
|
192
|
+
"error handling",
|
|
193
|
+
"database connection",
|
|
194
|
+
"parse configuration",
|
|
195
|
+
"search codebase",
|
|
196
|
+
]
|
|
197
|
+
from semantic_code_intelligence.services.search_service import search_codebase
|
|
198
|
+
|
|
199
|
+
search_results: dict[str, dict[str, float]] = {}
|
|
200
|
+
for mode in ["semantic", "keyword", "regex", "hybrid"]:
|
|
201
|
+
times: list[float] = []
|
|
202
|
+
for _r in range(rounds):
|
|
203
|
+
for query in test_queries:
|
|
204
|
+
q = query if mode != "regex" else r"def\s+\w+"
|
|
205
|
+
t0 = time.perf_counter()
|
|
206
|
+
try:
|
|
207
|
+
search_codebase(
|
|
208
|
+
query=q,
|
|
209
|
+
project_root=root,
|
|
210
|
+
top_k=10,
|
|
211
|
+
mode=mode,
|
|
212
|
+
auto_index=False,
|
|
213
|
+
)
|
|
214
|
+
except Exception:
|
|
215
|
+
pass
|
|
216
|
+
times.append(time.perf_counter() - t0)
|
|
217
|
+
avg_ms = (sum(times) / len(times)) * 1000 if times else 0
|
|
218
|
+
p50_ms = sorted(times)[len(times) // 2] * 1000 if times else 0
|
|
219
|
+
p99_ms = sorted(times)[int(len(times) * 0.99)] * 1000 if times else 0
|
|
220
|
+
search_results[mode] = {
|
|
221
|
+
"avg_ms": round(avg_ms, 2),
|
|
222
|
+
"p50_ms": round(p50_ms, 2),
|
|
223
|
+
"p99_ms": round(p99_ms, 2),
|
|
224
|
+
"queries_per_second": round(1000 / avg_ms, 1) if avg_ms > 0 else 0,
|
|
225
|
+
}
|
|
226
|
+
print_success(f" {mode:>8}: avg={avg_ms:.1f}ms p50={p50_ms:.1f}ms p99={p99_ms:.1f}ms")
|
|
227
|
+
|
|
228
|
+
results["search_latency"] = search_results
|
|
229
|
+
|
|
230
|
+
# --- 4. BM25 index load benchmark ---
|
|
231
|
+
print_info("4/5: BM25 index persistence...")
|
|
232
|
+
from semantic_code_intelligence.search.keyword_search import BM25Index, _bm25_cache
|
|
233
|
+
from semantic_code_intelligence.storage.vector_store import VectorStore
|
|
234
|
+
|
|
235
|
+
_bm25_cache.clear() # force disk load
|
|
236
|
+
store = VectorStore.load(index_dir)
|
|
237
|
+
|
|
238
|
+
t0 = time.perf_counter()
|
|
239
|
+
bm25_loaded = BM25Index.load(index_dir, store.metadata)
|
|
240
|
+
bm25_load_time = time.perf_counter() - t0
|
|
241
|
+
|
|
242
|
+
t0 = time.perf_counter()
|
|
243
|
+
bm25_fresh = BM25Index(store.metadata)
|
|
244
|
+
bm25_build_time = time.perf_counter() - t0
|
|
245
|
+
|
|
246
|
+
results["bm25"] = {
|
|
247
|
+
"load_from_disk_ms": round(bm25_load_time * 1000, 2),
|
|
248
|
+
"build_from_scratch_ms": round(bm25_build_time * 1000, 2),
|
|
249
|
+
"speedup": round(bm25_build_time / bm25_load_time, 1) if bm25_load_time > 0 else 0,
|
|
250
|
+
"loaded_from_cache": bm25_loaded is not None,
|
|
251
|
+
}
|
|
252
|
+
print_success(f" BM25 load: {bm25_load_time*1000:.1f}ms (vs build: {bm25_build_time*1000:.1f}ms)")
|
|
253
|
+
|
|
254
|
+
# --- 5. Memory snapshot ---
|
|
255
|
+
print_info("5/5: Memory usage...")
|
|
256
|
+
peak_mem = _get_memory_mb()
|
|
257
|
+
results["memory"] = {
|
|
258
|
+
"peak_mb": round(peak_mem, 1),
|
|
259
|
+
"index_size_mb": round(
|
|
260
|
+
sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (1024 * 1024), 2
|
|
261
|
+
) if index_dir.exists() else 0,
|
|
262
|
+
}
|
|
263
|
+
print_success(f" Peak memory: {peak_mem:.0f}MB, Index size: {results['memory']['index_size_mb']:.1f}MB")
|
|
264
|
+
|
|
265
|
+
# --- Output ---
|
|
266
|
+
if json_mode:
|
|
267
|
+
click.echo(json.dumps(results, indent=2))
|
|
268
|
+
else:
|
|
269
|
+
table = Table(title="CodexA Benchmark Results", show_header=True)
|
|
270
|
+
table.add_column("Metric", style="cyan", min_width=30)
|
|
271
|
+
table.add_column("Value", style="green", min_width=20)
|
|
272
|
+
|
|
273
|
+
table.add_row("Project", str(root))
|
|
274
|
+
table.add_row("Indexable files", str(file_count))
|
|
275
|
+
table.add_row("", "")
|
|
276
|
+
|
|
277
|
+
fi = results["full_index"]
|
|
278
|
+
table.add_row("Full index time", _format_duration(fi["duration_s"]))
|
|
279
|
+
table.add_row("Files/second", f"{fi['files_per_second']}")
|
|
280
|
+
table.add_row("Total chunks", str(fi["chunks_created"]))
|
|
281
|
+
table.add_row("Total vectors", str(fi["total_vectors"]))
|
|
282
|
+
table.add_row("Symbols extracted", str(fi["symbols_extracted"]))
|
|
283
|
+
table.add_row("", "")
|
|
284
|
+
|
|
285
|
+
ii = results["incremental_index"]
|
|
286
|
+
table.add_row("Incremental index time", _format_duration(ii["duration_s"]))
|
|
287
|
+
table.add_row("Cache hit rate", f"{ii['cache_hit_rate']}%")
|
|
288
|
+
table.add_row("", "")
|
|
289
|
+
|
|
290
|
+
for mode, stats in search_results.items():
|
|
291
|
+
table.add_row(f"Search ({mode}) avg", f"{stats['avg_ms']:.1f}ms")
|
|
292
|
+
table.add_row(f"Search ({mode}) QPS", f"{stats['queries_per_second']}")
|
|
293
|
+
|
|
294
|
+
table.add_row("", "")
|
|
295
|
+
bm25 = results["bm25"]
|
|
296
|
+
table.add_row("BM25 load (disk)", f"{bm25['load_from_disk_ms']:.1f}ms")
|
|
297
|
+
table.add_row("BM25 build (fresh)", f"{bm25['build_from_scratch_ms']:.1f}ms")
|
|
298
|
+
table.add_row("BM25 speedup", f"{bm25['speedup']}x")
|
|
299
|
+
table.add_row("", "")
|
|
300
|
+
table.add_row("Peak memory", f"{results['memory']['peak_mb']:.0f}MB")
|
|
301
|
+
table.add_row("Index size on disk", f"{results['memory']['index_size_mb']:.1f}MB")
|
|
302
|
+
|
|
303
|
+
console.print(table)
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""CLI command: chat — multi-turn conversation with session persistence."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json as json_mod
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
|
|
11
|
+
from semantic_code_intelligence.utils.logging import (
|
|
12
|
+
console,
|
|
13
|
+
get_logger,
|
|
14
|
+
print_error,
|
|
15
|
+
print_info,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from semantic_code_intelligence.llm.provider import LLMProvider
|
|
20
|
+
|
|
21
|
+
logger = get_logger("cli.chat")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _wrap_provider(provider: LLMProvider, llm: Any, config: Any) -> LLMProvider:
|
|
25
|
+
"""Wrap a provider with caching and rate limiting based on config."""
|
|
26
|
+
from semantic_code_intelligence.llm.cache import LLMCache
|
|
27
|
+
from semantic_code_intelligence.llm.cached_provider import CachedProvider
|
|
28
|
+
from semantic_code_intelligence.llm.rate_limiter import RateLimiter
|
|
29
|
+
|
|
30
|
+
cache = None
|
|
31
|
+
if getattr(llm, "cache_enabled", False):
|
|
32
|
+
cache_dir = str(config.config_dir(config.project_root)) if hasattr(config, "config_dir") else None
|
|
33
|
+
cache = LLMCache(
|
|
34
|
+
cache_dir=cache_dir,
|
|
35
|
+
ttl_hours=getattr(llm, "cache_ttl_hours", 24),
|
|
36
|
+
max_entries=getattr(llm, "cache_max_entries", 1000),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
rate_limiter = None
|
|
40
|
+
rpm = getattr(llm, "rate_limit_rpm", 0)
|
|
41
|
+
tpm = getattr(llm, "rate_limit_tpm", 0)
|
|
42
|
+
if rpm > 0 or tpm > 0:
|
|
43
|
+
rate_limiter = RateLimiter(rpm=rpm, tpm=tpm)
|
|
44
|
+
|
|
45
|
+
if cache is not None or rate_limiter is not None:
|
|
46
|
+
return CachedProvider(provider, cache=cache, rate_limiter=rate_limiter)
|
|
47
|
+
return provider
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _get_provider(config: Any) -> LLMProvider:
|
|
51
|
+
"""Build an LLM provider from the app configuration."""
|
|
52
|
+
from semantic_code_intelligence.config.settings import LLMConfig
|
|
53
|
+
|
|
54
|
+
llm: LLMConfig = config.llm
|
|
55
|
+
if llm.provider == "openai":
|
|
56
|
+
from semantic_code_intelligence.llm.openai_provider import OpenAIProvider
|
|
57
|
+
|
|
58
|
+
provider: LLMProvider = OpenAIProvider(
|
|
59
|
+
api_key=llm.api_key,
|
|
60
|
+
model=llm.model,
|
|
61
|
+
base_url=llm.base_url or None,
|
|
62
|
+
temperature=llm.temperature,
|
|
63
|
+
max_tokens=llm.max_tokens,
|
|
64
|
+
)
|
|
65
|
+
elif llm.provider == "ollama":
|
|
66
|
+
from semantic_code_intelligence.llm.ollama_provider import OllamaProvider
|
|
67
|
+
|
|
68
|
+
provider = OllamaProvider(
|
|
69
|
+
model=llm.model,
|
|
70
|
+
base_url=llm.base_url or "http://localhost:11434",
|
|
71
|
+
temperature=llm.temperature,
|
|
72
|
+
max_tokens=llm.max_tokens,
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
from semantic_code_intelligence.llm.mock_provider import MockProvider
|
|
76
|
+
|
|
77
|
+
provider = MockProvider()
|
|
78
|
+
|
|
79
|
+
return _wrap_provider(provider, llm, config)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@click.command("chat")
|
|
83
|
+
@click.argument("message", type=str)
|
|
84
|
+
@click.option(
|
|
85
|
+
"--session", "-s",
|
|
86
|
+
default=None,
|
|
87
|
+
type=str,
|
|
88
|
+
help="Session ID to resume. Creates a new session if not given.",
|
|
89
|
+
)
|
|
90
|
+
@click.option(
|
|
91
|
+
"--list-sessions", "list_sessions",
|
|
92
|
+
is_flag=True,
|
|
93
|
+
default=False,
|
|
94
|
+
help="List all stored chat sessions and exit.",
|
|
95
|
+
)
|
|
96
|
+
@click.option(
|
|
97
|
+
"--json-output", "--json", "json_mode",
|
|
98
|
+
is_flag=True,
|
|
99
|
+
default=False,
|
|
100
|
+
help="Output in JSON format.",
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"--max-turns", "-t",
|
|
104
|
+
default=20,
|
|
105
|
+
type=int,
|
|
106
|
+
help="Maximum conversation turns to send to LLM.",
|
|
107
|
+
)
|
|
108
|
+
@click.option(
|
|
109
|
+
"--path", "-p",
|
|
110
|
+
default=".",
|
|
111
|
+
type=click.Path(exists=True, file_okay=False, resolve_path=True),
|
|
112
|
+
help="Project root path.",
|
|
113
|
+
)
|
|
114
|
+
@click.option(
|
|
115
|
+
"--stream",
|
|
116
|
+
is_flag=True,
|
|
117
|
+
default=False,
|
|
118
|
+
help="Stream tokens incrementally as they arrive.",
|
|
119
|
+
)
|
|
120
|
+
@click.option("--pipe", is_flag=True, default=False, hidden=True)
|
|
121
|
+
@click.pass_context
|
|
122
|
+
def chat_cmd(
|
|
123
|
+
ctx: click.Context,
|
|
124
|
+
message: str,
|
|
125
|
+
session: str | None,
|
|
126
|
+
list_sessions: bool,
|
|
127
|
+
json_mode: bool,
|
|
128
|
+
max_turns: int,
|
|
129
|
+
path: str,
|
|
130
|
+
stream: bool,
|
|
131
|
+
pipe: bool,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Continue or start a multi-turn conversation about the codebase.
|
|
134
|
+
|
|
135
|
+
Each conversation is persisted to disk so you can resume later with
|
|
136
|
+
--session <id>. Use --list-sessions to see saved conversations.
|
|
137
|
+
"""
|
|
138
|
+
from semantic_code_intelligence.config.settings import load_config
|
|
139
|
+
from semantic_code_intelligence.llm.conversation import SessionStore
|
|
140
|
+
from semantic_code_intelligence.llm.reasoning import ReasoningEngine
|
|
141
|
+
|
|
142
|
+
root = Path(path).resolve()
|
|
143
|
+
pipe = pipe or ctx.obj.get("pipe", False)
|
|
144
|
+
|
|
145
|
+
store = SessionStore(root)
|
|
146
|
+
|
|
147
|
+
# --- list sessions mode ---
|
|
148
|
+
if list_sessions:
|
|
149
|
+
sessions = store.list_sessions()
|
|
150
|
+
if json_mode:
|
|
151
|
+
click.echo(json_mod.dumps(sessions, indent=2))
|
|
152
|
+
elif pipe:
|
|
153
|
+
for s in sessions:
|
|
154
|
+
click.echo(f"{s['session_id']} turns={s['turns']} {s['title']}")
|
|
155
|
+
else:
|
|
156
|
+
if not sessions:
|
|
157
|
+
print_info("No stored sessions.")
|
|
158
|
+
else:
|
|
159
|
+
from rich.table import Table
|
|
160
|
+
|
|
161
|
+
table = Table(title="Chat Sessions")
|
|
162
|
+
table.add_column("ID")
|
|
163
|
+
table.add_column("Title")
|
|
164
|
+
table.add_column("Turns")
|
|
165
|
+
for s in sessions:
|
|
166
|
+
table.add_row(s["session_id"], s["title"], str(s["turns"]))
|
|
167
|
+
console.print(table)
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
# --- conversation mode ---
|
|
171
|
+
config = load_config(root)
|
|
172
|
+
provider = _get_provider(config)
|
|
173
|
+
|
|
174
|
+
conv = store.get_or_create(session)
|
|
175
|
+
|
|
176
|
+
# If this is a fresh session, set up the system prompt
|
|
177
|
+
if not conv.messages:
|
|
178
|
+
conv.add_system(
|
|
179
|
+
"You are CodexA, an AI coding assistant. Answer questions about the "
|
|
180
|
+
"user's codebase. Be concise, accurate, and cite file paths when relevant."
|
|
181
|
+
)
|
|
182
|
+
conv.title = message[:60]
|
|
183
|
+
|
|
184
|
+
# Add user message
|
|
185
|
+
conv.add_user(message)
|
|
186
|
+
|
|
187
|
+
# Get context-enriched messages
|
|
188
|
+
messages = conv.get_messages_for_llm(max_turns=max_turns)
|
|
189
|
+
|
|
190
|
+
# Also inject search context into the user's message
|
|
191
|
+
engine = ReasoningEngine(provider, root)
|
|
192
|
+
try:
|
|
193
|
+
snippets = engine._search_context(message, top_k=3)
|
|
194
|
+
if snippets:
|
|
195
|
+
ctx_text = "\n".join(
|
|
196
|
+
f"[{s.get('file_path', '?')}] {s.get('content', '')[:200]}"
|
|
197
|
+
for s in snippets[:3]
|
|
198
|
+
)
|
|
199
|
+
# Inject context before the last user message
|
|
200
|
+
messages[-1] = type(messages[-1])(
|
|
201
|
+
role=messages[-1].role,
|
|
202
|
+
content=f"Relevant code:\n{ctx_text}\n\nUser: {message}",
|
|
203
|
+
)
|
|
204
|
+
except Exception:
|
|
205
|
+
logger.debug("Context injection failed; continuing without code context")
|
|
206
|
+
|
|
207
|
+
# Call LLM (streaming or batch)
|
|
208
|
+
if stream and not json_mode:
|
|
209
|
+
from semantic_code_intelligence.llm.streaming import stream_chat
|
|
210
|
+
|
|
211
|
+
gen = stream_chat(provider, messages)
|
|
212
|
+
accumulated = ""
|
|
213
|
+
if not pipe:
|
|
214
|
+
console.print(f"[bold cyan]CodexA [{conv.session_id}][/]", end="")
|
|
215
|
+
click.echo("")
|
|
216
|
+
for event in gen:
|
|
217
|
+
if event.kind == "token":
|
|
218
|
+
accumulated += event.content
|
|
219
|
+
click.echo(event.content, nl=False)
|
|
220
|
+
click.echo("") # trailing newline
|
|
221
|
+
conv.add_assistant(accumulated)
|
|
222
|
+
store.save(conv)
|
|
223
|
+
if not pipe:
|
|
224
|
+
print_info(f"Session: {conv.session_id} (use --session {conv.session_id} to continue)")
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
resp = provider.chat(messages)
|
|
228
|
+
conv.add_assistant(resp.content)
|
|
229
|
+
|
|
230
|
+
# Persist session
|
|
231
|
+
store.save(conv)
|
|
232
|
+
|
|
233
|
+
# Output
|
|
234
|
+
if json_mode:
|
|
235
|
+
click.echo(json_mod.dumps({
|
|
236
|
+
"session_id": conv.session_id,
|
|
237
|
+
"answer": resp.content,
|
|
238
|
+
"turns": conv.turn_count,
|
|
239
|
+
"usage": resp.usage,
|
|
240
|
+
}, indent=2))
|
|
241
|
+
elif pipe:
|
|
242
|
+
click.echo(resp.content)
|
|
243
|
+
else:
|
|
244
|
+
from rich.markdown import Markdown
|
|
245
|
+
from rich.panel import Panel
|
|
246
|
+
|
|
247
|
+
console.print(Panel(
|
|
248
|
+
Markdown(resp.content),
|
|
249
|
+
title=f"CodexA [{conv.session_id}]",
|
|
250
|
+
subtitle=f"Turn {conv.turn_count // 2}",
|
|
251
|
+
))
|
|
252
|
+
print_info(f"Session: {conv.session_id} (use --session {conv.session_id} to continue)")
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""CLI command: ci-gen — generate CI workflow templates."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from semantic_code_intelligence.utils.logging import (
|
|
10
|
+
console,
|
|
11
|
+
get_logger,
|
|
12
|
+
print_info,
|
|
13
|
+
print_success,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
logger = get_logger("cli.ci_gen")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@click.command("ci-gen")
|
|
20
|
+
@click.argument(
|
|
21
|
+
"template",
|
|
22
|
+
type=click.Choice(["analysis", "safety", "precommit"], case_sensitive=False),
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"--output",
|
|
26
|
+
"-o",
|
|
27
|
+
default=None,
|
|
28
|
+
help="Write output to a file instead of stdout.",
|
|
29
|
+
)
|
|
30
|
+
@click.option(
|
|
31
|
+
"--python-version",
|
|
32
|
+
default="3.12",
|
|
33
|
+
help="Python version for workflow (default: 3.12).",
|
|
34
|
+
)
|
|
35
|
+
@click.pass_context
|
|
36
|
+
def ci_gen_cmd(
|
|
37
|
+
ctx: click.Context,
|
|
38
|
+
template: str,
|
|
39
|
+
output: str | None,
|
|
40
|
+
python_version: str,
|
|
41
|
+
) -> None:
|
|
42
|
+
"""Generate CI/CD workflow templates for CodexA integration.
|
|
43
|
+
|
|
44
|
+
Available templates:
|
|
45
|
+
|
|
46
|
+
- analysis — Full analysis workflow (quality + PR summary)
|
|
47
|
+
|
|
48
|
+
- safety — Lightweight safety-only workflow
|
|
49
|
+
|
|
50
|
+
- precommit — Pre-commit hook configuration
|
|
51
|
+
|
|
52
|
+
Examples:
|
|
53
|
+
|
|
54
|
+
codexa ci-gen analysis
|
|
55
|
+
|
|
56
|
+
codexa ci-gen safety -o .github/workflows/codexa-safety.yml
|
|
57
|
+
|
|
58
|
+
codexa ci-gen precommit -o .pre-commit-config.yaml
|
|
59
|
+
"""
|
|
60
|
+
from semantic_code_intelligence.ci.templates import get_template
|
|
61
|
+
|
|
62
|
+
kwargs = {}
|
|
63
|
+
if template != "precommit":
|
|
64
|
+
kwargs["python_version"] = python_version
|
|
65
|
+
|
|
66
|
+
content = get_template(template, **kwargs)
|
|
67
|
+
|
|
68
|
+
if output:
|
|
69
|
+
out_path = Path(output)
|
|
70
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
out_path.write_text(content, encoding="utf-8")
|
|
72
|
+
print_success(f"Written to {output}")
|
|
73
|
+
else:
|
|
74
|
+
click.echo(content)
|