codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Sample CodexA plugin — custom code validator.
|
|
2
|
+
|
|
3
|
+
Demonstrates the CUSTOM_VALIDATION hook to add project-specific
|
|
4
|
+
code quality checks. This example flags common issues like TODO
|
|
5
|
+
comments and print statements in production code.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
1. Copy this file to `.codexa/plugins/`
|
|
9
|
+
2. Custom validations will run during `codexa review`
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from semantic_code_intelligence.plugins import PluginBase, PluginHook, PluginMetadata
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CodeQualityPlugin(PluginBase):
|
|
21
|
+
"""Custom code quality validator."""
|
|
22
|
+
|
|
23
|
+
def metadata(self) -> PluginMetadata:
|
|
24
|
+
return PluginMetadata(
|
|
25
|
+
name="code-quality",
|
|
26
|
+
version="0.1.0",
|
|
27
|
+
description="Custom code quality validation rules",
|
|
28
|
+
author="CodexA Team",
|
|
29
|
+
hooks=[PluginHook.CUSTOM_VALIDATION],
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def on_hook(self, hook: PluginHook, data: dict[str, Any]) -> dict[str, Any]:
|
|
33
|
+
"""Run custom validation rules.
|
|
34
|
+
|
|
35
|
+
CUSTOM_VALIDATION data contract:
|
|
36
|
+
- code: str — source code to validate
|
|
37
|
+
- issues: list — existing issues (append to this)
|
|
38
|
+
"""
|
|
39
|
+
if hook != PluginHook.CUSTOM_VALIDATION:
|
|
40
|
+
return data
|
|
41
|
+
|
|
42
|
+
code = data.get("code", "")
|
|
43
|
+
issues = data.get("issues", [])
|
|
44
|
+
|
|
45
|
+
# Flag TODO/FIXME/HACK comments
|
|
46
|
+
for i, line in enumerate(code.splitlines(), 1):
|
|
47
|
+
for tag in ("TODO", "FIXME", "HACK", "XXX"):
|
|
48
|
+
if tag in line:
|
|
49
|
+
issues.append({
|
|
50
|
+
"line": i,
|
|
51
|
+
"description": f"{tag} comment found",
|
|
52
|
+
"severity": "info",
|
|
53
|
+
"source": "code-quality",
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
# Flag bare print() statements (suggests using logging)
|
|
57
|
+
for i, line in enumerate(code.splitlines(), 1):
|
|
58
|
+
stripped = line.strip()
|
|
59
|
+
if stripped.startswith("print(") and not stripped.startswith("print(f\"DEBUG"):
|
|
60
|
+
issues.append({
|
|
61
|
+
"line": i,
|
|
62
|
+
"description": "Consider using logging instead of print()",
|
|
63
|
+
"severity": "warning",
|
|
64
|
+
"source": "code-quality",
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
data["issues"] = issues
|
|
68
|
+
return data
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def create_plugin() -> CodeQualityPlugin:
|
|
72
|
+
"""Factory function for plugin discovery."""
|
|
73
|
+
return CodeQualityPlugin()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Sample CodexA plugin — search result annotator.
|
|
2
|
+
|
|
3
|
+
This example plugin demonstrates how to build a CodexA plugin that
|
|
4
|
+
hooks into the search pipeline. It adds a timestamp and custom tag
|
|
5
|
+
to every search result passing through.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
1. Copy this file to `.codexa/plugins/`
|
|
9
|
+
2. Results from `codexa search` will include the annotation
|
|
10
|
+
|
|
11
|
+
This serves as a starting point for building your own plugins.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import time
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from semantic_code_intelligence.plugins import PluginBase, PluginHook, PluginMetadata
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SearchAnnotatorPlugin(PluginBase):
|
|
23
|
+
"""Annotates search results with custom metadata."""
|
|
24
|
+
|
|
25
|
+
def metadata(self) -> PluginMetadata:
|
|
26
|
+
return PluginMetadata(
|
|
27
|
+
name="search-annotator",
|
|
28
|
+
version="0.1.0",
|
|
29
|
+
description="Annotates search results with custom metadata",
|
|
30
|
+
author="CodexA Team",
|
|
31
|
+
hooks=[PluginHook.POST_SEARCH],
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def activate(self, context: dict[str, Any]) -> None:
|
|
35
|
+
"""Store activation time for annotation."""
|
|
36
|
+
self._activated_at = time.time()
|
|
37
|
+
|
|
38
|
+
def on_hook(self, hook: PluginHook, data: dict[str, Any]) -> dict[str, Any]:
|
|
39
|
+
"""Add annotation to search results.
|
|
40
|
+
|
|
41
|
+
POST_SEARCH data contract:
|
|
42
|
+
- results: list of search result dicts
|
|
43
|
+
- query: the original search query
|
|
44
|
+
"""
|
|
45
|
+
if hook == PluginHook.POST_SEARCH:
|
|
46
|
+
results = data.get("results", [])
|
|
47
|
+
for result in results:
|
|
48
|
+
result["annotated_by"] = "search-annotator"
|
|
49
|
+
result["annotated_at"] = time.time()
|
|
50
|
+
data["annotation_count"] = len(results)
|
|
51
|
+
return data
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def create_plugin() -> SearchAnnotatorPlugin:
|
|
55
|
+
"""Factory function for plugin discovery."""
|
|
56
|
+
return SearchAnnotatorPlugin()
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""Scalability utilities — batch processing, memory management, performance.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- BatchProcessor: processes items in configurable batches
|
|
5
|
+
- MemoryAwareEmbedder: generates embeddings with memory-safe batching
|
|
6
|
+
- ParallelScanner: concurrent file scanning
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Callable, TypeVar
|
|
15
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
16
|
+
|
|
17
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
18
|
+
|
|
19
|
+
logger = get_logger("scalability")
|
|
20
|
+
|
|
21
|
+
T = TypeVar("T")
|
|
22
|
+
R = TypeVar("R")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Batch Processor
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class BatchStats:
|
|
31
|
+
"""Statistics for a batch processing run."""
|
|
32
|
+
|
|
33
|
+
total_items: int = 0
|
|
34
|
+
batches_processed: int = 0
|
|
35
|
+
items_succeeded: int = 0
|
|
36
|
+
items_failed: int = 0
|
|
37
|
+
elapsed_seconds: float = 0.0
|
|
38
|
+
|
|
39
|
+
def to_dict(self) -> dict[str, Any]:
|
|
40
|
+
return {
|
|
41
|
+
"total_items": self.total_items,
|
|
42
|
+
"batches_processed": self.batches_processed,
|
|
43
|
+
"items_succeeded": self.items_succeeded,
|
|
44
|
+
"items_failed": self.items_failed,
|
|
45
|
+
"elapsed_seconds": round(self.elapsed_seconds, 3),
|
|
46
|
+
"items_per_second": round(
|
|
47
|
+
self.items_succeeded / self.elapsed_seconds, 2
|
|
48
|
+
) if self.elapsed_seconds > 0 else 0,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class BatchProcessor:
|
|
53
|
+
"""Process items in configurable batches with progress tracking.
|
|
54
|
+
|
|
55
|
+
Useful for chunking/embedding large sets of files without loading
|
|
56
|
+
everything into memory at once.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, batch_size: int = 64) -> None:
|
|
60
|
+
self._batch_size = max(1, batch_size)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def batch_size(self) -> int:
|
|
64
|
+
return self._batch_size
|
|
65
|
+
|
|
66
|
+
def process(
|
|
67
|
+
self,
|
|
68
|
+
items: list[T],
|
|
69
|
+
processor: Callable[[list[T]], list[R]],
|
|
70
|
+
on_batch: Callable[[int, int], None] | None = None,
|
|
71
|
+
) -> tuple[list[R], BatchStats]:
|
|
72
|
+
"""Process items in batches.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
items: Items to process.
|
|
76
|
+
processor: Function that processes a batch of items.
|
|
77
|
+
on_batch: Optional callback(batch_num, total_batches).
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Tuple of (all_results, stats).
|
|
81
|
+
"""
|
|
82
|
+
stats = BatchStats(total_items=len(items))
|
|
83
|
+
all_results: list[R] = []
|
|
84
|
+
start = time.time()
|
|
85
|
+
|
|
86
|
+
total_batches = (len(items) + self._batch_size - 1) // self._batch_size
|
|
87
|
+
|
|
88
|
+
for batch_idx in range(total_batches):
|
|
89
|
+
offset = batch_idx * self._batch_size
|
|
90
|
+
batch = items[offset : offset + self._batch_size]
|
|
91
|
+
|
|
92
|
+
if on_batch:
|
|
93
|
+
on_batch(batch_idx + 1, total_batches)
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
results = processor(batch)
|
|
97
|
+
all_results.extend(results)
|
|
98
|
+
stats.items_succeeded += len(batch)
|
|
99
|
+
except Exception:
|
|
100
|
+
logger.exception("Batch %d/%d failed", batch_idx + 1, total_batches)
|
|
101
|
+
stats.items_failed += len(batch)
|
|
102
|
+
|
|
103
|
+
stats.batches_processed += 1
|
|
104
|
+
|
|
105
|
+
stats.elapsed_seconds = time.time() - start
|
|
106
|
+
return all_results, stats
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ---------------------------------------------------------------------------
|
|
110
|
+
# Memory-aware Embedding Generator
|
|
111
|
+
# ---------------------------------------------------------------------------
|
|
112
|
+
|
|
113
|
+
class MemoryAwareEmbedder:
|
|
114
|
+
"""Generates embeddings in memory-safe batches.
|
|
115
|
+
|
|
116
|
+
Wraps the base generator to handle large numbers of texts without
|
|
117
|
+
exhausting GPU/CPU memory.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
model_name: str = "all-MiniLM-L6-v2",
|
|
123
|
+
batch_size: int = 64,
|
|
124
|
+
) -> None:
|
|
125
|
+
self._model_name = model_name
|
|
126
|
+
self._batch_size = batch_size
|
|
127
|
+
self._processor = BatchProcessor(batch_size)
|
|
128
|
+
|
|
129
|
+
def generate(
|
|
130
|
+
self,
|
|
131
|
+
texts: list[str],
|
|
132
|
+
show_progress: bool = False,
|
|
133
|
+
) -> Any:
|
|
134
|
+
"""Generate embeddings in batches, concatenating results.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
numpy ndarray of shape (len(texts), dimension).
|
|
138
|
+
"""
|
|
139
|
+
import numpy as np
|
|
140
|
+
|
|
141
|
+
from semantic_code_intelligence.embeddings.generator import generate_embeddings
|
|
142
|
+
|
|
143
|
+
def _embed_batch(batch: list[str]) -> list[Any]:
|
|
144
|
+
emb = generate_embeddings(batch, model_name=self._model_name)
|
|
145
|
+
return [emb] # Return as single item to be concatenated
|
|
146
|
+
|
|
147
|
+
def _on_batch(current: int, total: int) -> None:
|
|
148
|
+
if show_progress:
|
|
149
|
+
logger.info("Embedding batch %d/%d", current, total)
|
|
150
|
+
|
|
151
|
+
raw_results, stats = self._processor.process(
|
|
152
|
+
texts, _embed_batch, on_batch=_on_batch if show_progress else None
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if not raw_results:
|
|
156
|
+
return np.empty((0, 0))
|
|
157
|
+
|
|
158
|
+
return np.vstack(raw_results)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
# Parallel File Scanner
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
class ParallelScanner:
|
|
166
|
+
"""Scan and process files using thread-based parallelism.
|
|
167
|
+
|
|
168
|
+
Useful for I/O-bound operations like reading/hashing multiple files.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
def __init__(self, max_workers: int = 4) -> None:
|
|
172
|
+
self._max_workers = max(1, max_workers)
|
|
173
|
+
|
|
174
|
+
def scan_and_process(
|
|
175
|
+
self,
|
|
176
|
+
file_paths: list[Path],
|
|
177
|
+
processor: Callable[[Path], R],
|
|
178
|
+
) -> tuple[list[R], list[str]]:
|
|
179
|
+
"""Process files in parallel.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
file_paths: Files to process.
|
|
183
|
+
processor: Function to apply to each file.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Tuple of (results, error_messages).
|
|
187
|
+
"""
|
|
188
|
+
results: list[R] = []
|
|
189
|
+
errors: list[str] = []
|
|
190
|
+
|
|
191
|
+
with ThreadPoolExecutor(max_workers=self._max_workers) as executor:
|
|
192
|
+
future_to_path = {
|
|
193
|
+
executor.submit(processor, fp): fp for fp in file_paths
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
for future in as_completed(future_to_path):
|
|
197
|
+
fp = future_to_path[future]
|
|
198
|
+
try:
|
|
199
|
+
result = future.result()
|
|
200
|
+
results.append(result)
|
|
201
|
+
except Exception as e:
|
|
202
|
+
errors.append(f"{fp}: {e}")
|
|
203
|
+
logger.debug("Failed to process %s: %s", fp, e)
|
|
204
|
+
|
|
205
|
+
return results, errors
|
|
File without changes
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Search result formatter — renders search results for CLI and JSON output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.syntax import Syntax
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
from rich.text import Text
|
|
13
|
+
|
|
14
|
+
from semantic_code_intelligence.services.search_service import SearchResult
|
|
15
|
+
from semantic_code_intelligence.utils.logging import console
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def format_results_json(query: str, results: list[SearchResult], top_k: int) -> str:
|
|
19
|
+
"""Format search results as a JSON string for AI integration.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
query: The search query.
|
|
23
|
+
results: List of SearchResult objects.
|
|
24
|
+
top_k: Number of results requested.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Pretty-printed JSON string.
|
|
28
|
+
"""
|
|
29
|
+
output: dict[str, Any] = {
|
|
30
|
+
"query": query,
|
|
31
|
+
"top_k": top_k,
|
|
32
|
+
"result_count": len(results),
|
|
33
|
+
"results": [r.to_dict() for r in results],
|
|
34
|
+
}
|
|
35
|
+
return json.dumps(output, indent=2, ensure_ascii=False)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def format_results_jsonl(results: list[SearchResult]) -> str:
|
|
39
|
+
"""Format search results as JSONL (one JSON object per line).
|
|
40
|
+
|
|
41
|
+
Each line is a self-contained JSON object suitable for piping into
|
|
42
|
+
``jq``, ``fzf``, or streaming ingestion.
|
|
43
|
+
"""
|
|
44
|
+
lines: list[str] = []
|
|
45
|
+
for r in results:
|
|
46
|
+
lines.append(json.dumps(r.to_dict(), ensure_ascii=False))
|
|
47
|
+
return "\n".join(lines)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def format_results_rich(
|
|
51
|
+
query: str,
|
|
52
|
+
results: list[SearchResult],
|
|
53
|
+
*,
|
|
54
|
+
line_numbers: bool = False,
|
|
55
|
+
context_lines: int = 0,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Print search results as rich formatted output to the console.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
query: The search query.
|
|
61
|
+
results: List of SearchResult objects.
|
|
62
|
+
line_numbers: If True, prefix code lines with line numbers (grep -n).
|
|
63
|
+
context_lines: Number of extra context lines to display around content.
|
|
64
|
+
"""
|
|
65
|
+
if not results:
|
|
66
|
+
console.print(f"\n[yellow]No results found for:[/yellow] \"{query}\"\n")
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
console.print(f"\n[bold cyan]Search results for:[/bold cyan] \"{query}\"")
|
|
70
|
+
console.print(f"[dim]Found {len(results)} results[/dim]\n")
|
|
71
|
+
|
|
72
|
+
for i, result in enumerate(results, 1):
|
|
73
|
+
# Optionally expand context lines from the file on disk
|
|
74
|
+
content = result.content
|
|
75
|
+
start = result.start_line
|
|
76
|
+
if context_lines > 0:
|
|
77
|
+
content, start = _expand_context(result, context_lines)
|
|
78
|
+
|
|
79
|
+
# Header with file path, lines, and score
|
|
80
|
+
header = (
|
|
81
|
+
f"[bold]{result.file_path}[/bold] "
|
|
82
|
+
f"[dim]L{start}-L{result.end_line + context_lines}[/dim] "
|
|
83
|
+
f"[green]score: {result.score:.4f}[/green]"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Code snippet with syntax highlighting
|
|
87
|
+
try:
|
|
88
|
+
syntax: str | Syntax = Syntax(
|
|
89
|
+
content.rstrip(),
|
|
90
|
+
result.language if result.language != "unknown" else "text",
|
|
91
|
+
line_numbers=True if line_numbers else True,
|
|
92
|
+
start_line=start,
|
|
93
|
+
theme="monokai",
|
|
94
|
+
)
|
|
95
|
+
except Exception:
|
|
96
|
+
syntax = content.rstrip()
|
|
97
|
+
|
|
98
|
+
panel = Panel(
|
|
99
|
+
syntax,
|
|
100
|
+
title=f"[bold]#{i}[/bold] {header}",
|
|
101
|
+
title_align="left",
|
|
102
|
+
border_style="cyan",
|
|
103
|
+
padding=(0, 1),
|
|
104
|
+
)
|
|
105
|
+
console.print(panel)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _expand_context(result: SearchResult, ctx: int) -> tuple[str, int]:
|
|
109
|
+
"""Read extra context lines from the original file on disk."""
|
|
110
|
+
from pathlib import Path
|
|
111
|
+
|
|
112
|
+
fp = Path(result.file_path)
|
|
113
|
+
if not fp.is_file():
|
|
114
|
+
return result.content, result.start_line
|
|
115
|
+
try:
|
|
116
|
+
lines = fp.read_text(encoding="utf-8", errors="replace").splitlines(keepends=True)
|
|
117
|
+
except OSError:
|
|
118
|
+
return result.content, result.start_line
|
|
119
|
+
|
|
120
|
+
new_start = max(1, result.start_line - ctx)
|
|
121
|
+
new_end = min(len(lines), result.end_line + ctx)
|
|
122
|
+
expanded = "".join(lines[new_start - 1 : new_end])
|
|
123
|
+
return expanded, new_start
|