PyPI - codexa - Versions diffs - 0.4.0__py3-none-any.whl - Mend

codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

codexa-0.4.0.dist-info/METADATA +650 -0
codexa-0.4.0.dist-info/RECORD +189 -0
codexa-0.4.0.dist-info/WHEEL +5 -0
codexa-0.4.0.dist-info/entry_points.txt +2 -0
codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
codexa-0.4.0.dist-info/top_level.txt +1 -0
semantic_code_intelligence/__init__.py +5 -0
semantic_code_intelligence/analysis/__init__.py +21 -0
semantic_code_intelligence/analysis/ai_features.py +351 -0
semantic_code_intelligence/bridge/__init__.py +28 -0
semantic_code_intelligence/bridge/context_provider.py +245 -0
semantic_code_intelligence/bridge/protocol.py +167 -0
semantic_code_intelligence/bridge/server.py +348 -0
semantic_code_intelligence/bridge/vscode.py +271 -0
semantic_code_intelligence/ci/__init__.py +13 -0
semantic_code_intelligence/ci/hooks.py +98 -0
semantic_code_intelligence/ci/hotspots.py +272 -0
semantic_code_intelligence/ci/impact.py +246 -0
semantic_code_intelligence/ci/metrics.py +591 -0
semantic_code_intelligence/ci/pr.py +412 -0
semantic_code_intelligence/ci/quality.py +557 -0
semantic_code_intelligence/ci/templates.py +164 -0
semantic_code_intelligence/ci/trace.py +224 -0
semantic_code_intelligence/cli/__init__.py +0 -0
semantic_code_intelligence/cli/commands/__init__.py +0 -0
semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
semantic_code_intelligence/cli/main.py +65 -0
semantic_code_intelligence/cli/router.py +92 -0
semantic_code_intelligence/config/__init__.py +0 -0
semantic_code_intelligence/config/settings.py +260 -0
semantic_code_intelligence/context/__init__.py +19 -0
semantic_code_intelligence/context/engine.py +429 -0
semantic_code_intelligence/context/memory.py +253 -0
semantic_code_intelligence/daemon/__init__.py +1 -0
semantic_code_intelligence/daemon/watcher.py +515 -0
semantic_code_intelligence/docs/__init__.py +1080 -0
semantic_code_intelligence/embeddings/__init__.py +0 -0
semantic_code_intelligence/embeddings/enhanced.py +131 -0
semantic_code_intelligence/embeddings/generator.py +149 -0
semantic_code_intelligence/embeddings/model_registry.py +100 -0
semantic_code_intelligence/evolution/__init__.py +1 -0
semantic_code_intelligence/evolution/budget_guard.py +111 -0
semantic_code_intelligence/evolution/commit_manager.py +88 -0
semantic_code_intelligence/evolution/context_builder.py +131 -0
semantic_code_intelligence/evolution/engine.py +249 -0
semantic_code_intelligence/evolution/patch_generator.py +229 -0
semantic_code_intelligence/evolution/task_selector.py +214 -0
semantic_code_intelligence/evolution/test_runner.py +111 -0
semantic_code_intelligence/indexing/__init__.py +0 -0
semantic_code_intelligence/indexing/chunker.py +174 -0
semantic_code_intelligence/indexing/parallel.py +86 -0
semantic_code_intelligence/indexing/scanner.py +146 -0
semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
semantic_code_intelligence/llm/__init__.py +62 -0
semantic_code_intelligence/llm/cache.py +219 -0
semantic_code_intelligence/llm/cached_provider.py +145 -0
semantic_code_intelligence/llm/conversation.py +190 -0
semantic_code_intelligence/llm/cross_refactor.py +272 -0
semantic_code_intelligence/llm/investigation.py +274 -0
semantic_code_intelligence/llm/mock_provider.py +77 -0
semantic_code_intelligence/llm/ollama_provider.py +122 -0
semantic_code_intelligence/llm/openai_provider.py +100 -0
semantic_code_intelligence/llm/provider.py +92 -0
semantic_code_intelligence/llm/rate_limiter.py +164 -0
semantic_code_intelligence/llm/reasoning.py +438 -0
semantic_code_intelligence/llm/safety.py +110 -0
semantic_code_intelligence/llm/streaming.py +251 -0
semantic_code_intelligence/lsp/__init__.py +609 -0
semantic_code_intelligence/mcp/__init__.py +393 -0
semantic_code_intelligence/parsing/__init__.py +19 -0
semantic_code_intelligence/parsing/parser.py +375 -0
semantic_code_intelligence/plugins/__init__.py +255 -0
semantic_code_intelligence/plugins/examples/__init__.py +1 -0
semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
semantic_code_intelligence/scalability/__init__.py +205 -0
semantic_code_intelligence/search/__init__.py +0 -0
semantic_code_intelligence/search/formatter.py +123 -0
semantic_code_intelligence/search/grep.py +361 -0
semantic_code_intelligence/search/hybrid_search.py +170 -0
semantic_code_intelligence/search/keyword_search.py +311 -0
semantic_code_intelligence/search/section_expander.py +103 -0
semantic_code_intelligence/services/__init__.py +0 -0
semantic_code_intelligence/services/indexing_service.py +630 -0
semantic_code_intelligence/services/search_service.py +269 -0
semantic_code_intelligence/storage/__init__.py +0 -0
semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
semantic_code_intelligence/storage/hash_store.py +66 -0
semantic_code_intelligence/storage/index_manifest.py +85 -0
semantic_code_intelligence/storage/index_stats.py +138 -0
semantic_code_intelligence/storage/query_history.py +160 -0
semantic_code_intelligence/storage/symbol_registry.py +209 -0
semantic_code_intelligence/storage/vector_store.py +297 -0
semantic_code_intelligence/tests/__init__.py +0 -0
semantic_code_intelligence/tests/test_ai_features.py +351 -0
semantic_code_intelligence/tests/test_chunker.py +119 -0
semantic_code_intelligence/tests/test_cli.py +188 -0
semantic_code_intelligence/tests/test_config.py +154 -0
semantic_code_intelligence/tests/test_context.py +381 -0
semantic_code_intelligence/tests/test_embeddings.py +73 -0
semantic_code_intelligence/tests/test_endtoend.py +1142 -0
semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
semantic_code_intelligence/tests/test_hash_store.py +79 -0
semantic_code_intelligence/tests/test_logging.py +55 -0
semantic_code_intelligence/tests/test_new_cli.py +138 -0
semantic_code_intelligence/tests/test_parser.py +495 -0
semantic_code_intelligence/tests/test_phase10.py +355 -0
semantic_code_intelligence/tests/test_phase11.py +593 -0
semantic_code_intelligence/tests/test_phase12.py +375 -0
semantic_code_intelligence/tests/test_phase13.py +663 -0
semantic_code_intelligence/tests/test_phase14.py +568 -0
semantic_code_intelligence/tests/test_phase15.py +814 -0
semantic_code_intelligence/tests/test_phase16.py +792 -0
semantic_code_intelligence/tests/test_phase17.py +815 -0
semantic_code_intelligence/tests/test_phase18.py +934 -0
semantic_code_intelligence/tests/test_phase19.py +986 -0
semantic_code_intelligence/tests/test_phase20.py +2753 -0
semantic_code_intelligence/tests/test_phase20b.py +2058 -0
semantic_code_intelligence/tests/test_phase20c.py +962 -0
semantic_code_intelligence/tests/test_phase21.py +428 -0
semantic_code_intelligence/tests/test_phase22.py +799 -0
semantic_code_intelligence/tests/test_phase23.py +783 -0
semantic_code_intelligence/tests/test_phase24.py +715 -0
semantic_code_intelligence/tests/test_phase25.py +496 -0
semantic_code_intelligence/tests/test_phase26.py +251 -0
semantic_code_intelligence/tests/test_phase27.py +531 -0
semantic_code_intelligence/tests/test_phase8.py +592 -0
semantic_code_intelligence/tests/test_phase9.py +643 -0
semantic_code_intelligence/tests/test_plugins.py +293 -0
semantic_code_intelligence/tests/test_priority_features.py +727 -0
semantic_code_intelligence/tests/test_router.py +41 -0
semantic_code_intelligence/tests/test_scalability.py +138 -0
semantic_code_intelligence/tests/test_scanner.py +125 -0
semantic_code_intelligence/tests/test_search.py +160 -0
semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
semantic_code_intelligence/tests/test_tools.py +182 -0
semantic_code_intelligence/tests/test_vector_store.py +151 -0
semantic_code_intelligence/tests/test_watcher.py +211 -0
semantic_code_intelligence/tools/__init__.py +442 -0
semantic_code_intelligence/tools/executor.py +232 -0
semantic_code_intelligence/tools/protocol.py +200 -0
semantic_code_intelligence/tui/__init__.py +454 -0
semantic_code_intelligence/utils/__init__.py +0 -0
semantic_code_intelligence/utils/logging.py +112 -0
semantic_code_intelligence/version.py +3 -0
semantic_code_intelligence/web/__init__.py +11 -0
semantic_code_intelligence/web/api.py +289 -0
semantic_code_intelligence/web/server.py +397 -0
semantic_code_intelligence/web/ui.py +659 -0
semantic_code_intelligence/web/visualize.py +226 -0
semantic_code_intelligence/workspace/__init__.py +427 -0

semantic_code_intelligence/cli/commands/hotspots_cmd.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""CLI command: hotspots — identify high-risk code hotspots."""
+from __future__ import annotations
+import json as json_mod
+from pathlib import Path
+import click
+from semantic_code_intelligence.utils.logging import (
+    console,
+    get_logger,
+    print_error,
+    print_success,
+)
+logger = get_logger("cli.hotspots")
+@click.command("hotspots")
+@click.option(
+    "--path", "-p",
+    default=".",
+    type=click.Path(exists=True, file_okay=False, resolve_path=True),
+    help="Project root path.",
+)
+@click.option(
+    "--json-output", "--json", "json_mode",
+    is_flag=True, default=False,
+    help="Output in JSON format.",
+)
+@click.option(
+    "--pipe",
+    is_flag=True, default=False,
+    help="Plain text output for piping / CI.",
+)
+@click.option(
+    "--top-n", "-n",
+    type=int, default=20,
+    help="Number of hotspots to report (default: 20).",
+)
+@click.option(
+    "--include-git/--no-git",
+    default=True,
+    help="Include git churn data (default: enabled).",
+)
+@click.pass_context
+def hotspots_cmd(
+    ctx: click.Context,
+    path: str,
+    json_mode: bool,
+    pipe: bool,
+    top_n: int,
+    include_git: bool,
+) -> None:
+    """Identify high-risk code hotspots via multi-factor analysis.
+    Combines complexity, duplication, fan-in/out, and git churn to
+    score symbols by maintenance risk.
+    Examples:
+        codexa hotspots
+        codexa hotspots --top-n 10 --json
+        codexa hotspots --no-git --pipe
+    """
+    from semantic_code_intelligence.ci.hotspots import analyze_hotspots
+    from semantic_code_intelligence.context.engine import CallGraph, ContextBuilder, DependencyMap
+    root = Path(path).resolve()
+    builder = ContextBuilder()
+    dep_map = DependencyMap()
+    py_files = sorted(root.rglob("*.py"))
+    py_files = [f for f in py_files if ".venv" not in f.parts and "__pycache__" not in f.parts]
+    for fp in py_files:
+        try:
+            content = fp.read_text(encoding="utf-8", errors="replace")
+            builder.index_file(str(fp), content)
+            dep_map.add_file(str(fp), content)
+        except Exception:
+            logger.debug("Failed to index %s", fp)
+            continue
+    symbols = builder.get_all_symbols()
+    call_graph = CallGraph()
+    call_graph.build(symbols)
+    try:
+        report = analyze_hotspots(
+            symbols, call_graph, dep_map, root,
+            top_n=top_n, include_git=include_git,
+        )
+    except Exception as exc:
+        logger.debug("Hotspot analysis failed", exc_info=True)
+        print_error(f"Hotspot analysis failed: {exc}")
+        ctx.exit(1)
+        return
+    if json_mode:
+        click.echo(json_mod.dumps(report.to_dict(), indent=2))
+    elif pipe:
+        click.echo(f"files={report.files_analyzed} symbols={report.symbols_analyzed} hotspots={len(report.hotspots)}")
+        for h in report.hotspots:
+            click.echo(f"  {h.risk_score:.3f}  {h.kind:<10}  {h.file_path}:{h.name}")
+    else:
+        console.print(f"\n[bold]Hotspot Analysis[/bold] — {report.files_analyzed} files, {report.symbols_analyzed} symbols\n")
+        if not report.hotspots:
+            print_success("No significant hotspots detected.")
+            return
+        for i, h in enumerate(report.hotspots, 1):
+            colour = "red" if h.risk_score >= 0.7 else "yellow" if h.risk_score >= 0.4 else "green"
+            console.print(f"  [{colour}]{i:>3}. {h.risk_score:.3f}[/{colour}]  {h.kind:<10}  [cyan]{h.file_path}[/cyan]:[bold]{h.name}[/bold]")
+            for f in h.factors:
+                console.print(f"        {f.name}: {f.raw_value:.2f} (norm={f.normalized:.2f}, w={f.weight:.2f})")
+        console.print()

semantic_code_intelligence/cli/commands/impact_cmd.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""CLI command: impact — analyse blast radius of code changes."""
+from __future__ import annotations
+import json as json_mod
+from pathlib import Path
+import click
+from semantic_code_intelligence.utils.logging import (
+    console,
+    get_logger,
+    print_error,
+    print_success,
+)
+logger = get_logger("cli.impact")
+@click.command("impact")
+@click.argument("target")
+@click.option(
+    "--path", "-p",
+    default=".",
+    type=click.Path(exists=True, file_okay=False, resolve_path=True),
+    help="Project root path.",
+)
+@click.option(
+    "--json-output", "--json", "json_mode",
+    is_flag=True, default=False,
+    help="Output in JSON format.",
+)
+@click.option(
+    "--pipe",
+    is_flag=True, default=False,
+    help="Plain text output for piping / CI.",
+)
+@click.option(
+    "--max-depth", "-d",
+    type=int, default=5,
+    help="Maximum traversal depth (default: 5).",
+)
+@click.pass_context
+def impact_cmd(
+    ctx: click.Context,
+    target: str,
+    path: str,
+    json_mode: bool,
+    pipe: bool,
+    max_depth: int,
+) -> None:
+    """Analyse the blast radius of a change to TARGET.
+    TARGET can be a symbol name (function/class) or a file path relative
+    to the project root.
+    Examples:
+        codexa impact parse_file
+        codexa impact src/parser.py --json
+        codexa impact MyClass --max-depth 3 --pipe
+    """
+    from semantic_code_intelligence.ci.impact import analyze_impact
+    from semantic_code_intelligence.context.engine import CallGraph, ContextBuilder, DependencyMap
+    root = Path(path).resolve()
+    builder = ContextBuilder()
+    dep_map = DependencyMap()
+    py_files = sorted(root.rglob("*.py"))
+    py_files = [f for f in py_files if ".venv" not in f.parts and "__pycache__" not in f.parts]
+    for fp in py_files:
+        try:
+            content = fp.read_text(encoding="utf-8", errors="replace")
+            builder.index_file(str(fp), content)
+            dep_map.add_file(str(fp), content)
+        except Exception:
+            logger.debug("Failed to index %s", fp)
+            continue
+    symbols = builder.get_all_symbols()
+    call_graph = CallGraph()
+    call_graph.build(symbols)
+    try:
+        report = analyze_impact(
+            target, symbols, call_graph, dep_map, root,
+            max_depth=max_depth,
+        )
+    except Exception as exc:
+        logger.debug("Impact analysis failed", exc_info=True)
+        print_error(f"Impact analysis failed: {exc}")
+        ctx.exit(1)
+        return
+    if json_mode:
+        click.echo(json_mod.dumps(report.to_dict(), indent=2))
+    elif pipe:
+        click.echo(f"target={report.target} kind={report.target_kind} affected={report.total_affected}")
+        for s in report.direct_symbols:
+            click.echo(f"  DIRECT  {s.relationship:<20}  {s.file_path}:{s.name}")
+        for s in report.transitive_symbols:
+            click.echo(f"  TRANS   {s.relationship:<20}  {s.file_path}:{s.name}")
+        for m in report.affected_modules:
+            click.echo(f"  MODULE  {m.relationship:<20}  {m.file_path}")
+    else:
+        console.print(f"\n[bold]Impact Analysis[/bold] — target: [cyan]{report.target}[/cyan] ({report.target_kind})\n")
+        if report.total_affected == 0:
+            print_success("No downstream impact detected.")
+            return
+        if report.direct_symbols:
+            console.print("[bold]Direct callers:[/bold]")
+            for s in report.direct_symbols:
+                console.print(f"  [yellow]{s.name}[/yellow]  ({s.kind})  [dim]{s.file_path}[/dim]")
+        if report.transitive_symbols:
+            console.print("\n[bold]Transitive callers:[/bold]")
+            for s in report.transitive_symbols:
+                console.print(f"  [yellow]{s.name}[/yellow]  depth={s.depth}  [dim]{s.file_path}[/dim]")
+        if report.affected_modules:
+            console.print("\n[bold]Affected modules:[/bold]")
+            for m in report.affected_modules:
+                console.print(f"  [cyan]{m.file_path}[/cyan]  ({m.relationship}, depth={m.depth})")
+        console.print(f"\n[bold]Total affected:[/bold] {report.total_affected}")
+        console.print()

semantic_code_intelligence/cli/commands/index_cmd.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""CLI command: index - Index a codebase for semantic search."""
+from __future__ import annotations
+import time
+from pathlib import Path
+import click
+from semantic_code_intelligence.config.settings import AppConfig
+from semantic_code_intelligence.services.indexing_service import run_indexing
+from semantic_code_intelligence.utils.logging import (
+    get_logger,
+    print_error,
+    print_info,
+    print_success,
+    print_warning,
+)
+logger = get_logger("cli.index")
+def _run_watch_mode(root: Path, force: bool) -> None:
+    """Run continuous watch-mode indexing with live incremental updates."""
+    from semantic_code_intelligence.daemon.watcher import NativeFileWatcher
+    from semantic_code_intelligence.services.indexing_service import run_incremental_indexing
+    # Initial index
+    print_info("Watch mode: performing initial index...")
+    result = run_indexing(project_root=root, force=force)
+    print_success(
+        f"Initial index: {result.files_indexed} files, "
+        f"{result.chunks_created} chunks, {result.total_vectors} vectors."
+    )
+    print_info("Watching for changes... (press Ctrl+C to stop)")
+    update_count = 0
+    def _on_changes(events: list) -> None:
+        nonlocal update_count
+        changed = [str(e.path) for e in events if e.change_type in ("created", "modified")]
+        deleted = [str(e.path) for e in events if e.change_type == "deleted"]
+        if not changed and not deleted:
+            return
+        try:
+            inc = run_incremental_indexing(root, changed_files=changed, deleted_files=deleted)
+            update_count += 1
+            print_success(
+                f"[update #{update_count}] Re-indexed {inc.files_indexed} files "
+                f"({inc.chunks_created} chunks). {len(deleted)} deleted."
+            )
+        except Exception as exc:
+            logger.debug("Incremental indexing error", exc_info=True)
+            print_error(f"Incremental indexing failed: {exc}")
+    watcher = NativeFileWatcher(root)
+    watcher.on_change(_on_changes)
+    watcher.start()
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        watcher.stop()
+        print_success(f"Watch mode stopped. {update_count} incremental updates applied.")
+@click.command("index")
+@click.argument(
+    "path",
+    default=".",
+    type=click.Path(exists=True, file_okay=False, resolve_path=True),
+)
+@click.option(
+    "--force",
+    is_flag=True,
+    default=False,
+    help="Force full re-index, ignoring cache.",
+)
+@click.option(
+    "--watch",
+    "-w",
+    is_flag=True,
+    default=False,
+    help="Watch for file changes and re-index incrementally.",
+)
+@click.pass_context
+def index_cmd(ctx: click.Context, path: str, force: bool, watch: bool) -> None:
+    """Index a codebase for semantic search.
+    Scans the target directory, extracts code chunks, generates embeddings,
+    and stores them in the vector index.
+    Use --watch to enable live incremental re-indexing on file changes.
+    \b
+    Examples:
+        codexa index
+        codexa index --force
+        codexa index --watch
+    """
+    root = Path(path).resolve()
+    config_dir = AppConfig.config_dir(root)
+    if not config_dir.exists():
+        print_error(
+            f"Project not initialized at {root}. Run 'codexa init' first."
+        )
+        ctx.exit(1)
+        return
+    if watch:
+        _run_watch_mode(root, force)
+        return
+    print_info(f"Indexing codebase at: {root}")
+    if force:
+        print_info("Force mode: full re-index will be performed.")
+    try:
+        result = run_indexing(project_root=root, force=force)
+    except Exception as e:
+        print_error(f"Indexing failed: {e}")
+        logger.debug("Indexing error details:", exc_info=True)
+        ctx.exit(1)
+        return
+    if result.files_scanned == 0:
+        print_warning("No indexable files found.")
+    else:
+        print_success(
+            f"Indexed {result.files_indexed} files "
+            f"({result.chunks_created} chunks, {result.total_vectors} vectors). "
+            f"Skipped {result.files_skipped} unchanged files."
+        )

semantic_code_intelligence/cli/commands/init_cmd.py ADDED Viewed

@@ -0,0 +1,152 @@
+"""CLI command: init - Initialize a new project for semantic code intelligence."""
+from __future__ import annotations
+import json
+from pathlib import Path
+import click
+from semantic_code_intelligence.config.settings import (
+    AppConfig,
+    init_project,
+    load_config,
+)
+from semantic_code_intelligence.utils.logging import (
+    get_logger,
+    print_error,
+    print_info,
+    print_success,
+    print_warning,
+)
+logger = get_logger("cli.init")
+def _generate_vscode_mcp_config(root: Path) -> bool:
+    """Create .vscode/settings.json with MCP server config if not present."""
+    vscode_dir = root / ".vscode"
+    settings_path = vscode_dir / "settings.json"
+    mcp_block = {
+        "mcp": {
+            "servers": {
+                "codexa": {
+                    "command": "codexa",
+                    "args": ["mcp", "--path", str(root)],
+                }
+            }
+        }
+    }
+    if settings_path.exists():
+        try:
+            existing = json.loads(settings_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            existing = {}
+        if "mcp" in existing:
+            return False  # already configured
+        existing.update(mcp_block)
+        settings_path.write_text(
+            json.dumps(existing, indent=4) + "\n", encoding="utf-8"
+        )
+        return True
+    vscode_dir.mkdir(exist_ok=True)
+    settings_path.write_text(
+        json.dumps(mcp_block, indent=4) + "\n", encoding="utf-8"
+    )
+    return True
+@click.command("init")
+@click.argument(
+    "path",
+    default=".",
+    type=click.Path(exists=True, file_okay=False, resolve_path=True),
+)
+@click.option(
+    "--index",
+    "auto_index",
+    is_flag=True,
+    default=False,
+    help="Automatically index the project after initialization.",
+)
+@click.option(
+    "--vscode",
+    "setup_vscode",
+    is_flag=True,
+    default=False,
+    help="Generate .vscode/settings.json with MCP server config.",
+)
+@click.pass_context
+def init_cmd(ctx: click.Context, path: str, auto_index: bool, setup_vscode: bool) -> None:
+    """Initialize a project for semantic code indexing.
+    Creates a .codexa/ directory with default configuration and an empty index.
+    \b
+    Quick start:
+        codexa init                  # basic setup
+        codexa init --index          # setup + build index immediately
+        codexa init --vscode         # setup + configure VS Code MCP
+        codexa init --index --vscode # full setup in one command
+    """
+    root = Path(path).resolve()
+    # Check if already initialized
+    config_dir = AppConfig.config_dir(root)
+    if config_dir.exists():
+        print_info(f"Project already initialized at {root}")
+        print_info(f"Config directory: {config_dir}")
+        # Still allow --vscode and --index on existing projects
+        if setup_vscode:
+            if _generate_vscode_mcp_config(root):
+                print_success("VS Code MCP config written to .vscode/settings.json")
+            else:
+                print_info("VS Code MCP config already exists")
+        if auto_index:
+            _run_index(root)
+        return
+    try:
+        config, config_path = init_project(root)
+        print_success(f"Initialized project at {root}")
+        print_info(f"Config file: {config_path}")
+        print_info(f"Index directory: {AppConfig.index_dir(root)}")
+        logger.debug("Default config: %s", config.model_dump())
+    except OSError as e:
+        print_error(f"Failed to initialize project: {e}")
+        ctx.exit(1)
+        return
+    if setup_vscode:
+        if _generate_vscode_mcp_config(root):
+            print_success("VS Code MCP config written to .vscode/settings.json")
+    if auto_index:
+        _run_index(root)
+    else:
+        print_info("")
+        print_info("Next steps:")
+        print_info("  codexa index    — Build the search index")
+        print_info("  codexa search   — Search your code")
+        print_info("  codexa grep     — Raw file search (no index needed)")
+def _run_index(root: Path) -> None:
+    """Run indexing as part of init."""
+    from semantic_code_intelligence.services.indexing_service import index_project
+    print_info("Building search index...")
+    try:
+        result = index_project(root)
+        print_success(
+            f"Indexed {result.chunks_stored} chunks from "
+            f"{result.files_scanned} files"
+        )
+    except Exception as e:
+        print_warning(f"Indexing failed: {e}")
+        print_info("Run 'codexa index' manually to build the index.")

semantic_code_intelligence/cli/commands/investigate_cmd.py ADDED Viewed

@@ -0,0 +1,163 @@
+"""CLI command: investigate — autonomous multi-step code investigation."""
+from __future__ import annotations
+import json as json_mod
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+import click
+from semantic_code_intelligence.utils.logging import (
+    console,
+    get_logger,
+    print_error,
+    print_info,
+)
+if TYPE_CHECKING:
+    from semantic_code_intelligence.llm.provider import LLMProvider
+logger = get_logger("cli.investigate")
+def _wrap_provider(provider: LLMProvider, llm: Any, config: Any) -> LLMProvider:
+    """Wrap a provider with caching and rate limiting based on config."""
+    from semantic_code_intelligence.llm.cache import LLMCache
+    from semantic_code_intelligence.llm.cached_provider import CachedProvider
+    from semantic_code_intelligence.llm.rate_limiter import RateLimiter
+    cache = None
+    if getattr(llm, "cache_enabled", False):
+        cache_dir = str(config.config_dir(config.project_root)) if hasattr(config, "config_dir") else None
+        cache = LLMCache(
+            cache_dir=cache_dir,
+            ttl_hours=getattr(llm, "cache_ttl_hours", 24),
+            max_entries=getattr(llm, "cache_max_entries", 1000),
+        )
+    rate_limiter = None
+    rpm = getattr(llm, "rate_limit_rpm", 0)
+    tpm = getattr(llm, "rate_limit_tpm", 0)
+    if rpm > 0 or tpm > 0:
+        rate_limiter = RateLimiter(rpm=rpm, tpm=tpm)
+    if cache is not None or rate_limiter is not None:
+        return CachedProvider(provider, cache=cache, rate_limiter=rate_limiter)
+    return provider
+def _get_provider(config: Any) -> LLMProvider:
+    """Build an LLM provider from the app configuration."""
+    from semantic_code_intelligence.config.settings import LLMConfig
+    llm: LLMConfig = config.llm
+    if llm.provider == "openai":
+        from semantic_code_intelligence.llm.openai_provider import OpenAIProvider
+        provider: LLMProvider = OpenAIProvider(
+            api_key=llm.api_key,
+            model=llm.model,
+            base_url=llm.base_url or None,
+            temperature=llm.temperature,
+            max_tokens=llm.max_tokens,
+        )
+    elif llm.provider == "ollama":
+        from semantic_code_intelligence.llm.ollama_provider import OllamaProvider
+        provider = OllamaProvider(
+            model=llm.model,
+            base_url=llm.base_url or "http://localhost:11434",
+            temperature=llm.temperature,
+            max_tokens=llm.max_tokens,
+        )
+    else:
+        from semantic_code_intelligence.llm.mock_provider import MockProvider
+        provider = MockProvider()
+    return _wrap_provider(provider, llm, config)
+@click.command("investigate")
+@click.argument("question", type=str)
+@click.option(
+    "--max-steps", "-n",
+    default=6,
+    type=int,
+    help="Maximum investigation steps before forcing a conclusion.",
+)
+@click.option(
+    "--json-output", "--json", "json_mode",
+    is_flag=True,
+    default=False,
+    help="Output in JSON format.",
+)
+@click.option(
+    "--path", "-p",
+    default=".",
+    type=click.Path(exists=True, file_okay=False, resolve_path=True),
+    help="Project root path.",
+)
+@click.option(
+    "--stream",
+    is_flag=True,
+    default=False,
+    help="Stream the conclusion tokens incrementally.",
+)
+@click.option("--pipe", is_flag=True, default=False, hidden=True)
+@click.pass_context
+def investigate_cmd(
+    ctx: click.Context,
+    question: str,
+    max_steps: int,
+    json_mode: bool,
+    path: str,
+    stream: bool,
+    pipe: bool,
+) -> None:
+    """Run an autonomous multi-step investigation to answer a question.
+    CodexA iteratively searches, analyses symbols, and examines dependencies
+    until it can confidently answer your question.  Each step is visible
+    so you can follow the reasoning chain.
+    """
+    from semantic_code_intelligence.config.settings import load_config
+    from semantic_code_intelligence.llm.investigation import InvestigationChain
+    root = Path(path).resolve()
+    pipe = pipe or ctx.obj.get("pipe", False)
+    config = load_config(root)
+    provider = _get_provider(config)
+    chain = InvestigationChain(provider, root, max_steps=max_steps)
+    result = chain.investigate(question, stream_conclusion=stream and not json_mode)
+    if json_mode:
+        click.echo(json_mod.dumps(result.to_dict(), indent=2))
+    elif pipe:
+        for step in result.steps:
+            click.echo(f"[{step['step']}] {step['action']}: {step.get('action_input', '')}")
+        click.echo(f"\nConclusion: {result.conclusion}")
+    else:
+        from rich.panel import Panel
+        from rich.markdown import Markdown
+        for step in result.steps:
+            action = step["action"]
+            thought = step.get("thought", "")
+            output = step.get("output", "")[:300]
+            console.print(
+                f"  [bold cyan]Step {step['step']}[/] [{action}] "
+                f"[dim]{thought}[/]"
+            )
+            if output and action != "conclude":
+                console.print(f"    [dim]{output}[/dim]")
+        console.print()
+        console.print(Panel(
+            Markdown(result.conclusion),
+            title=f"Investigation ({result.total_steps} steps)",
+            border_style="green",
+        ))