codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Visualization generators — Mermaid-compatible text graphs.
|
|
2
|
+
|
|
3
|
+
All functions return plain strings (Mermaid diagram markup) so they can
|
|
4
|
+
be rendered by any compatible viewer, embedded in Markdown, or displayed
|
|
5
|
+
in the terminal as-is.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def render_call_graph(
|
|
16
|
+
edges: list[dict[str, Any]],
|
|
17
|
+
*,
|
|
18
|
+
title: str = "Call Graph",
|
|
19
|
+
direction: str = "LR",
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Render a call graph as a Mermaid flowchart.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
edges: List of dicts with ``caller``, ``callee``, ``file_path``.
|
|
25
|
+
title: Optional graph title.
|
|
26
|
+
direction: Mermaid direction (``LR``, ``TD``, ``TB``, ``RL``).
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Mermaid flowchart source text.
|
|
30
|
+
"""
|
|
31
|
+
lines: list[str] = [f"---", f"title: {title}", f"---", f"flowchart {direction}"]
|
|
32
|
+
|
|
33
|
+
nodes: set[str] = set()
|
|
34
|
+
for edge in edges:
|
|
35
|
+
raw_caller = edge.get("caller", "")
|
|
36
|
+
raw_callee = edge.get("callee", "")
|
|
37
|
+
if not raw_caller or not raw_callee:
|
|
38
|
+
continue
|
|
39
|
+
caller = _sanitize_id(raw_caller)
|
|
40
|
+
callee = _sanitize_id(raw_callee)
|
|
41
|
+
nodes.add(caller)
|
|
42
|
+
nodes.add(callee)
|
|
43
|
+
|
|
44
|
+
caller_label = _short_label(edge.get("caller", ""))
|
|
45
|
+
callee_label = _short_label(edge.get("callee", ""))
|
|
46
|
+
lines.append(f" {caller}[\"{caller_label}\"] --> {callee}[\"{callee_label}\"]")
|
|
47
|
+
|
|
48
|
+
if not nodes:
|
|
49
|
+
lines.append(" empty[\"No call edges found\"]")
|
|
50
|
+
|
|
51
|
+
return "\n".join(lines)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def render_dependency_graph(
|
|
55
|
+
deps: dict[str, Any],
|
|
56
|
+
*,
|
|
57
|
+
title: str = "Dependency Graph",
|
|
58
|
+
direction: str = "TD",
|
|
59
|
+
) -> str:
|
|
60
|
+
"""Render file-level dependencies as a Mermaid flowchart.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
deps: Dict from ContextProvider.get_dependencies() or similar,
|
|
64
|
+
expected to have a ``dependencies`` key with a list of
|
|
65
|
+
``{source_file, import_text}`` dicts.
|
|
66
|
+
title: Optional graph title.
|
|
67
|
+
direction: Mermaid direction.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Mermaid flowchart source text.
|
|
71
|
+
"""
|
|
72
|
+
lines: list[str] = [f"---", f"title: {title}", f"---", f"flowchart {direction}"]
|
|
73
|
+
|
|
74
|
+
dep_list = deps.get("dependencies", [])
|
|
75
|
+
if isinstance(dep_list, dict):
|
|
76
|
+
# Some formats nest per-file
|
|
77
|
+
flat: list[dict[str, str]] = []
|
|
78
|
+
for _file, entries in dep_list.items():
|
|
79
|
+
if isinstance(entries, list):
|
|
80
|
+
flat.extend(entries)
|
|
81
|
+
dep_list = flat
|
|
82
|
+
|
|
83
|
+
seen: set[str] = set()
|
|
84
|
+
for entry in dep_list:
|
|
85
|
+
src = entry.get("source_file", "")
|
|
86
|
+
imp = entry.get("import_text", "")
|
|
87
|
+
if not src or not imp:
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
src_id = _sanitize_id(src)
|
|
91
|
+
imp_id = _sanitize_id(imp)
|
|
92
|
+
edge_key = f"{src_id}-->{imp_id}"
|
|
93
|
+
if edge_key in seen:
|
|
94
|
+
continue
|
|
95
|
+
seen.add(edge_key)
|
|
96
|
+
|
|
97
|
+
src_label = Path(src).name
|
|
98
|
+
imp_label = imp.split()[-1] if " " in imp else imp
|
|
99
|
+
# Truncate long import labels
|
|
100
|
+
if len(imp_label) > 40:
|
|
101
|
+
imp_label = imp_label[:37] + "..."
|
|
102
|
+
lines.append(f" {src_id}[\"{src_label}\"] --> {imp_id}[\"{imp_label}\"]")
|
|
103
|
+
|
|
104
|
+
if not seen:
|
|
105
|
+
lines.append(" empty[\"No dependencies found\"]")
|
|
106
|
+
|
|
107
|
+
return "\n".join(lines)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def render_workspace_graph(
|
|
111
|
+
repos: list[dict[str, Any]],
|
|
112
|
+
*,
|
|
113
|
+
title: str = "Workspace Repositories",
|
|
114
|
+
) -> str:
|
|
115
|
+
"""Render workspace repositories as a Mermaid diagram.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
repos: List of repo entry dicts with ``name``, ``path``,
|
|
119
|
+
``file_count``, ``vector_count``.
|
|
120
|
+
title: Optional graph title.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Mermaid graph source text.
|
|
124
|
+
"""
|
|
125
|
+
lines: list[str] = [f"---", f"title: {title}", f"---", "flowchart TD"]
|
|
126
|
+
|
|
127
|
+
ws_id = "workspace"
|
|
128
|
+
lines.append(f" {ws_id}((\"Workspace\"))")
|
|
129
|
+
|
|
130
|
+
if not repos:
|
|
131
|
+
lines.append(f" {ws_id} --> none[\"No repositories\"]")
|
|
132
|
+
return "\n".join(lines)
|
|
133
|
+
|
|
134
|
+
for repo in repos:
|
|
135
|
+
name = repo.get("name", "unknown")
|
|
136
|
+
rid = _sanitize_id(f"repo_{name}")
|
|
137
|
+
file_count = repo.get("file_count", "?")
|
|
138
|
+
vec_count = repo.get("vector_count", "?")
|
|
139
|
+
label = f"{name}\\n{file_count} files, {vec_count} vectors"
|
|
140
|
+
lines.append(f" {ws_id} --> {rid}[\"{label}\"]")
|
|
141
|
+
|
|
142
|
+
return "\n".join(lines)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def render_symbol_map(
|
|
146
|
+
symbols: list[dict[str, Any]],
|
|
147
|
+
*,
|
|
148
|
+
title: str = "Symbol Map",
|
|
149
|
+
file_path: str = "",
|
|
150
|
+
) -> str:
|
|
151
|
+
"""Render a file's symbols as a Mermaid class diagram.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
symbols: List of symbol dicts with ``name``, ``kind``, ``parent``.
|
|
155
|
+
title: Optional graph title.
|
|
156
|
+
file_path: Source file being mapped.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Mermaid class diagram source text.
|
|
160
|
+
"""
|
|
161
|
+
lines: list[str] = [f"---", f"title: {title}", f"---", "classDiagram"]
|
|
162
|
+
|
|
163
|
+
classes: dict[str, list[str]] = {} # class_name -> member list
|
|
164
|
+
standalone_functions: list[str] = []
|
|
165
|
+
|
|
166
|
+
for sym in symbols:
|
|
167
|
+
kind = sym.get("kind", "")
|
|
168
|
+
name = sym.get("name", "")
|
|
169
|
+
parent = sym.get("parent", "")
|
|
170
|
+
|
|
171
|
+
if kind == "class":
|
|
172
|
+
classes.setdefault(name, [])
|
|
173
|
+
elif kind == "method" and parent:
|
|
174
|
+
classes.setdefault(parent, [])
|
|
175
|
+
classes[parent].append(f"+{name}()")
|
|
176
|
+
elif kind == "function":
|
|
177
|
+
standalone_functions.append(name)
|
|
178
|
+
elif kind == "import":
|
|
179
|
+
continue # skip imports in class diagram
|
|
180
|
+
|
|
181
|
+
for cls_name, members in classes.items():
|
|
182
|
+
lines.append(f" class {_sanitize_class_id(cls_name)} {{")
|
|
183
|
+
for member in members:
|
|
184
|
+
lines.append(f" {member}")
|
|
185
|
+
lines.append(" }")
|
|
186
|
+
|
|
187
|
+
if standalone_functions:
|
|
188
|
+
lines.append(f" class Functions {{")
|
|
189
|
+
for fn in standalone_functions:
|
|
190
|
+
lines.append(f" +{fn}()")
|
|
191
|
+
lines.append(" }")
|
|
192
|
+
|
|
193
|
+
if not classes and not standalone_functions:
|
|
194
|
+
lines.append(" class Empty {")
|
|
195
|
+
lines.append(" No symbols found")
|
|
196
|
+
lines.append(" }")
|
|
197
|
+
|
|
198
|
+
return "\n".join(lines)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# ------------------------------------------------------------------
|
|
202
|
+
# Internal helpers
|
|
203
|
+
# ------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
def _sanitize_id(text: str) -> str:
|
|
206
|
+
"""Convert arbitrary text into a Mermaid-safe node ID."""
|
|
207
|
+
# Take just the filename/symbol portion
|
|
208
|
+
if ":" in text:
|
|
209
|
+
text = text.rsplit(":", 1)[-1]
|
|
210
|
+
text = Path(text).stem if "/" in text or "\\" in text else text
|
|
211
|
+
# Replace non-alphanumeric chars with underscores
|
|
212
|
+
return re.sub(r"[^a-zA-Z0-9_]", "_", text).strip("_")[:60] or "node"
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _sanitize_class_id(text: str) -> str:
|
|
216
|
+
"""Convert a class name to a Mermaid-safe class diagram ID."""
|
|
217
|
+
return re.sub(r"[^a-zA-Z0-9_]", "_", text).strip("_")[:60] or "Unknown"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _short_label(text: str) -> str:
|
|
221
|
+
"""Extract a short display label from a caller/callee key."""
|
|
222
|
+
if ":" in text:
|
|
223
|
+
parts = text.rsplit(":", 1)
|
|
224
|
+
fname = Path(parts[0]).name
|
|
225
|
+
return f"{fname}:{parts[1]}"
|
|
226
|
+
return text
|
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
"""Multi-repository workspace — manage, index, and search across multiple repos.
|
|
2
|
+
|
|
3
|
+
A *workspace* is a collection of repositories stored in a lightweight JSON
|
|
4
|
+
manifest at ``<root>/.codexa/workspace.json``. Each repository has its own
|
|
5
|
+
vector index under ``.codexa/repos/<repo_name>/``, enabling incremental
|
|
6
|
+
per-repo indexing while supporting merged cross-repo search.
|
|
7
|
+
|
|
8
|
+
Typical usage::
|
|
9
|
+
|
|
10
|
+
ws = Workspace.load_or_create(Path("/my/workspace"))
|
|
11
|
+
ws.add_repo("backend", Path("/my/workspace/backend"))
|
|
12
|
+
ws.add_repo("frontend", Path("/my/workspace/frontend"))
|
|
13
|
+
ws.save()
|
|
14
|
+
|
|
15
|
+
ws.index_all() # index every repo
|
|
16
|
+
results = ws.search("authentication") # merged results
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import time
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
from semantic_code_intelligence.config.settings import (
|
|
28
|
+
AppConfig,
|
|
29
|
+
IndexConfig,
|
|
30
|
+
load_config,
|
|
31
|
+
)
|
|
32
|
+
from semantic_code_intelligence.indexing.scanner import scan_repository
|
|
33
|
+
from semantic_code_intelligence.services.indexing_service import IndexingResult, run_indexing
|
|
34
|
+
from semantic_code_intelligence.services.search_service import SearchMode, SearchResult
|
|
35
|
+
from semantic_code_intelligence.storage.vector_store import VectorStore
|
|
36
|
+
from semantic_code_intelligence.embeddings.generator import generate_embeddings
|
|
37
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
38
|
+
|
|
39
|
+
logger = get_logger("workspace")
|
|
40
|
+
|
|
41
|
+
WORKSPACE_FILE = "workspace.json"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# Data models
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class RepoEntry:
|
|
50
|
+
"""A single repository registered in a workspace."""
|
|
51
|
+
|
|
52
|
+
name: str
|
|
53
|
+
path: str # absolute path
|
|
54
|
+
last_indexed: float = 0.0 # epoch timestamp
|
|
55
|
+
file_count: int = 0
|
|
56
|
+
vector_count: int = 0
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> dict[str, Any]:
|
|
59
|
+
"""Serialise the repo entry to a plain dictionary."""
|
|
60
|
+
return {
|
|
61
|
+
"name": self.name,
|
|
62
|
+
"path": self.path,
|
|
63
|
+
"last_indexed": self.last_indexed,
|
|
64
|
+
"file_count": self.file_count,
|
|
65
|
+
"vector_count": self.vector_count,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def from_dict(cls, data: dict[str, Any]) -> "RepoEntry":
|
|
70
|
+
"""Construct a :class:`RepoEntry` from a dictionary."""
|
|
71
|
+
return cls(
|
|
72
|
+
name=data["name"],
|
|
73
|
+
path=data["path"],
|
|
74
|
+
last_indexed=data.get("last_indexed", 0.0),
|
|
75
|
+
file_count=data.get("file_count", 0),
|
|
76
|
+
vector_count=data.get("vector_count", 0),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class WorkspaceManifest:
|
|
82
|
+
"""Serialisable workspace manifest."""
|
|
83
|
+
|
|
84
|
+
version: str = "1.0.0"
|
|
85
|
+
repos: list[RepoEntry] = field(default_factory=list)
|
|
86
|
+
|
|
87
|
+
def to_dict(self) -> dict[str, Any]:
|
|
88
|
+
"""Serialise the manifest to a plain dictionary."""
|
|
89
|
+
return {
|
|
90
|
+
"version": self.version,
|
|
91
|
+
"repos": [r.to_dict() for r in self.repos],
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_dict(cls, data: dict[str, Any]) -> "WorkspaceManifest":
|
|
96
|
+
"""Construct a :class:`WorkspaceManifest` from a dictionary."""
|
|
97
|
+
return cls(
|
|
98
|
+
version=data.get("version", "1.0.0"),
|
|
99
|
+
repos=[RepoEntry.from_dict(r) for r in data.get("repos", [])],
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
# Workspace
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
class Workspace:
|
|
108
|
+
"""Multi-repository workspace manager.
|
|
109
|
+
|
|
110
|
+
Keeps a manifest of registered repos, provides per-repo indexing,
|
|
111
|
+
and supports cross-repo merged search.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(self, root: Path, manifest: WorkspaceManifest | None = None) -> None:
|
|
115
|
+
self._root = root.resolve()
|
|
116
|
+
self._manifest = manifest or WorkspaceManifest()
|
|
117
|
+
|
|
118
|
+
# --- persistence -------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def root(self) -> Path:
|
|
122
|
+
"""Absolute path to the workspace root directory."""
|
|
123
|
+
return self._root
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def config_dir(self) -> Path:
|
|
127
|
+
"""Path to the ``.codexa`` configuration directory."""
|
|
128
|
+
return self._root / ".codexa"
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def repos_dir(self) -> Path:
|
|
132
|
+
"""Path to the per-repo index storage directory."""
|
|
133
|
+
return self.config_dir / "repos"
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def manifest_path(self) -> Path:
|
|
137
|
+
"""Path to the workspace manifest JSON file."""
|
|
138
|
+
return self.config_dir / WORKSPACE_FILE
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def repos(self) -> list[RepoEntry]:
|
|
142
|
+
"""Snapshot of all registered repositories."""
|
|
143
|
+
return list(self._manifest.repos)
|
|
144
|
+
|
|
145
|
+
def save(self) -> Path:
|
|
146
|
+
"""Persist the workspace manifest to disk."""
|
|
147
|
+
self.config_dir.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
self.repos_dir.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
text = json.dumps(self._manifest.to_dict(), indent=2)
|
|
150
|
+
self.manifest_path.write_text(text, encoding="utf-8")
|
|
151
|
+
return self.manifest_path
|
|
152
|
+
|
|
153
|
+
@classmethod
|
|
154
|
+
def load(cls, root: Path) -> "Workspace":
|
|
155
|
+
"""Load an existing workspace. Raises FileNotFoundError if not found."""
|
|
156
|
+
root = root.resolve()
|
|
157
|
+
path = root / ".codexa" / WORKSPACE_FILE
|
|
158
|
+
if not path.exists():
|
|
159
|
+
raise FileNotFoundError(f"No workspace found at {root}")
|
|
160
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
161
|
+
manifest = WorkspaceManifest.from_dict(data)
|
|
162
|
+
return cls(root, manifest)
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def load_or_create(cls, root: Path) -> "Workspace":
|
|
166
|
+
"""Load an existing workspace or create a new one."""
|
|
167
|
+
try:
|
|
168
|
+
return cls.load(root)
|
|
169
|
+
except FileNotFoundError:
|
|
170
|
+
ws = cls(root)
|
|
171
|
+
ws.save()
|
|
172
|
+
return ws
|
|
173
|
+
|
|
174
|
+
# --- repo management ---------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def get_repo(self, name: str) -> RepoEntry | None:
|
|
177
|
+
"""Look up a repository by name, or return ``None``."""
|
|
178
|
+
for r in self._manifest.repos:
|
|
179
|
+
if r.name == name:
|
|
180
|
+
return r
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
def add_repo(self, name: str, path: Path) -> RepoEntry:
|
|
184
|
+
"""Register a repository in the workspace.
|
|
185
|
+
|
|
186
|
+
Raises ValueError if *name* is already registered.
|
|
187
|
+
"""
|
|
188
|
+
if self.get_repo(name) is not None:
|
|
189
|
+
raise ValueError(f"Repository '{name}' already registered")
|
|
190
|
+
|
|
191
|
+
resolved = path.resolve()
|
|
192
|
+
if not resolved.is_dir():
|
|
193
|
+
raise FileNotFoundError(f"Directory not found: {resolved}")
|
|
194
|
+
|
|
195
|
+
entry = RepoEntry(name=name, path=str(resolved))
|
|
196
|
+
self._manifest.repos.append(entry)
|
|
197
|
+
return entry
|
|
198
|
+
|
|
199
|
+
def remove_repo(self, name: str) -> bool:
|
|
200
|
+
"""Unregister a repository. Returns True if found and removed."""
|
|
201
|
+
for i, r in enumerate(self._manifest.repos):
|
|
202
|
+
if r.name == name:
|
|
203
|
+
self._manifest.repos.pop(i)
|
|
204
|
+
return True
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
def repo_index_dir(self, name: str) -> Path:
|
|
208
|
+
"""Return the per-repo index directory."""
|
|
209
|
+
return self.repos_dir / name
|
|
210
|
+
|
|
211
|
+
# --- indexing -----------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
def index_repo(self, name: str, force: bool = False) -> IndexingResult:
|
|
214
|
+
"""Index a single repository."""
|
|
215
|
+
entry = self.get_repo(name)
|
|
216
|
+
if entry is None:
|
|
217
|
+
raise KeyError(f"Repository '{name}' not registered")
|
|
218
|
+
|
|
219
|
+
repo_root = Path(entry.path)
|
|
220
|
+
index_dir = self.repo_index_dir(name)
|
|
221
|
+
index_dir.mkdir(parents=True, exist_ok=True)
|
|
222
|
+
|
|
223
|
+
config = load_config(repo_root)
|
|
224
|
+
|
|
225
|
+
# Run the indexing pipeline but store in workspace-local index dir
|
|
226
|
+
result = _index_repo_into(repo_root, index_dir, config, force=force)
|
|
227
|
+
|
|
228
|
+
entry.last_indexed = time.time()
|
|
229
|
+
entry.file_count = result.files_indexed
|
|
230
|
+
entry.vector_count = result.total_vectors
|
|
231
|
+
return result
|
|
232
|
+
|
|
233
|
+
def index_all(self, force: bool = False) -> dict[str, IndexingResult]:
|
|
234
|
+
"""Index all registered repositories."""
|
|
235
|
+
results: dict[str, IndexingResult] = {}
|
|
236
|
+
for entry in self._manifest.repos:
|
|
237
|
+
results[entry.name] = self.index_repo(entry.name, force=force)
|
|
238
|
+
self.save()
|
|
239
|
+
return results
|
|
240
|
+
|
|
241
|
+
# --- search -------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
def search(
|
|
244
|
+
self,
|
|
245
|
+
query: str,
|
|
246
|
+
top_k: int = 10,
|
|
247
|
+
threshold: float = 0.3,
|
|
248
|
+
repos: list[str] | None = None,
|
|
249
|
+
mode: SearchMode = "semantic",
|
|
250
|
+
case_insensitive: bool = True,
|
|
251
|
+
) -> list[dict[str, Any]]:
|
|
252
|
+
"""Search across repositories and return merged results.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
query: Natural language search query, keywords, or regex.
|
|
256
|
+
top_k: Number of top results per repo.
|
|
257
|
+
threshold: Minimum similarity score (semantic/hybrid modes).
|
|
258
|
+
repos: Restrict search to these repo names. None = all.
|
|
259
|
+
mode: Search mode — ``"semantic"``, ``"keyword"``,
|
|
260
|
+
``"regex"``, or ``"hybrid"``.
|
|
261
|
+
case_insensitive: For regex mode, whether to ignore case.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
List of result dicts sorted by score (descending), each with
|
|
265
|
+
an extra ``repo`` key identifying the source repository.
|
|
266
|
+
"""
|
|
267
|
+
targets = repos or [r.name for r in self._manifest.repos]
|
|
268
|
+
all_results: list[dict[str, Any]] = []
|
|
269
|
+
|
|
270
|
+
config = load_config(self._root)
|
|
271
|
+
model_name = config.embedding.model_name
|
|
272
|
+
|
|
273
|
+
# Pre-compute query embedding for semantic/hybrid modes
|
|
274
|
+
query_embedding = None
|
|
275
|
+
if mode in ("semantic", "hybrid"):
|
|
276
|
+
query_embedding = generate_embeddings([query], model_name=model_name)[0]
|
|
277
|
+
|
|
278
|
+
for repo_name in targets:
|
|
279
|
+
idx_dir = self.repo_index_dir(repo_name)
|
|
280
|
+
try:
|
|
281
|
+
store = VectorStore.load(idx_dir)
|
|
282
|
+
except FileNotFoundError:
|
|
283
|
+
logger.debug("No index for repo %s, skipping.", repo_name)
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
raw: list[tuple[Any, float]] = []
|
|
287
|
+
|
|
288
|
+
if mode == "keyword":
|
|
289
|
+
from semantic_code_intelligence.search.keyword_search import keyword_search
|
|
290
|
+
hits = keyword_search(query, store, idx_dir, top_k=top_k)
|
|
291
|
+
raw = [(h, h.score) for h in hits]
|
|
292
|
+
elif mode == "regex":
|
|
293
|
+
from semantic_code_intelligence.search.keyword_search import regex_search
|
|
294
|
+
hits = regex_search(query, store, top_k=top_k, case_insensitive=case_insensitive)
|
|
295
|
+
raw = [(h, h.score) for h in hits]
|
|
296
|
+
elif mode == "hybrid":
|
|
297
|
+
from semantic_code_intelligence.search.hybrid_search import hybrid_search
|
|
298
|
+
hits = hybrid_search(query, store, idx_dir, model_name=model_name, top_k=top_k)
|
|
299
|
+
raw = [(h, h.score) for h in hits]
|
|
300
|
+
else:
|
|
301
|
+
# semantic (default)
|
|
302
|
+
assert query_embedding is not None
|
|
303
|
+
raw_store = store.search(query_embedding, top_k=top_k)
|
|
304
|
+
raw = [(meta, score) for meta, score in raw_store]
|
|
305
|
+
|
|
306
|
+
for item, score in raw:
|
|
307
|
+
if mode in ("semantic", "hybrid") and score < threshold:
|
|
308
|
+
continue
|
|
309
|
+
# Normalise to dict — item may be ChunkMetadata or a hit dataclass
|
|
310
|
+
file_path = getattr(item, "file_path", "")
|
|
311
|
+
all_results.append({
|
|
312
|
+
"repo": repo_name,
|
|
313
|
+
"file_path": file_path,
|
|
314
|
+
"start_line": getattr(item, "start_line", 0),
|
|
315
|
+
"end_line": getattr(item, "end_line", 0),
|
|
316
|
+
"language": getattr(item, "language", ""),
|
|
317
|
+
"content": getattr(item, "content", ""),
|
|
318
|
+
"score": round(float(score), 4),
|
|
319
|
+
"chunk_index": getattr(item, "chunk_index", 0),
|
|
320
|
+
})
|
|
321
|
+
|
|
322
|
+
all_results.sort(key=lambda r: r["score"], reverse=True)
|
|
323
|
+
return all_results[:top_k]
|
|
324
|
+
|
|
325
|
+
# --- info ---------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
def summary(self) -> dict[str, Any]:
|
|
328
|
+
"""Return a summary dict of the workspace."""
|
|
329
|
+
return {
|
|
330
|
+
"root": str(self._root),
|
|
331
|
+
"repo_count": len(self._manifest.repos),
|
|
332
|
+
"repos": [r.to_dict() for r in self._manifest.repos],
|
|
333
|
+
"version": self._manifest.version,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
# ---------------------------------------------------------------------------
|
|
338
|
+
# Internal helpers
|
|
339
|
+
# ---------------------------------------------------------------------------
|
|
340
|
+
|
|
341
|
+
def _index_repo_into(
|
|
342
|
+
repo_root: Path,
|
|
343
|
+
index_dir: Path,
|
|
344
|
+
config: AppConfig,
|
|
345
|
+
force: bool = False,
|
|
346
|
+
) -> IndexingResult:
|
|
347
|
+
"""Run the indexing pipeline, storing artefacts into *index_dir*."""
|
|
348
|
+
from semantic_code_intelligence.indexing.chunker import chunk_file
|
|
349
|
+
from semantic_code_intelligence.storage.hash_store import HashStore
|
|
350
|
+
|
|
351
|
+
result = IndexingResult()
|
|
352
|
+
scanned = scan_repository(repo_root, config.index)
|
|
353
|
+
result.files_scanned = len(scanned)
|
|
354
|
+
|
|
355
|
+
if not scanned:
|
|
356
|
+
return result
|
|
357
|
+
|
|
358
|
+
hash_store = HashStore.load(index_dir)
|
|
359
|
+
to_index = []
|
|
360
|
+
|
|
361
|
+
if force:
|
|
362
|
+
to_index = scanned
|
|
363
|
+
else:
|
|
364
|
+
for sf in scanned:
|
|
365
|
+
if hash_store.has_changed(sf.relative_path, sf.content_hash):
|
|
366
|
+
to_index.append(sf)
|
|
367
|
+
else:
|
|
368
|
+
result.files_skipped += 1
|
|
369
|
+
|
|
370
|
+
all_chunks = []
|
|
371
|
+
chunk_hashes: list[str] = []
|
|
372
|
+
|
|
373
|
+
for sf in to_index:
|
|
374
|
+
chunks = chunk_file(
|
|
375
|
+
sf.path,
|
|
376
|
+
chunk_size=config.embedding.chunk_size,
|
|
377
|
+
chunk_overlap=config.embedding.chunk_overlap,
|
|
378
|
+
)
|
|
379
|
+
for c in chunks:
|
|
380
|
+
all_chunks.append(c)
|
|
381
|
+
chunk_hashes.append(sf.content_hash)
|
|
382
|
+
result.files_indexed += 1
|
|
383
|
+
|
|
384
|
+
result.chunks_created = len(all_chunks)
|
|
385
|
+
|
|
386
|
+
if not all_chunks:
|
|
387
|
+
for sf in to_index:
|
|
388
|
+
hash_store.set(sf.relative_path, sf.content_hash)
|
|
389
|
+
hash_store.save(index_dir)
|
|
390
|
+
return result
|
|
391
|
+
|
|
392
|
+
texts = [c.content for c in all_chunks]
|
|
393
|
+
embeddings = generate_embeddings(texts, model_name=config.embedding.model_name)
|
|
394
|
+
dimension = embeddings.shape[1]
|
|
395
|
+
|
|
396
|
+
if force:
|
|
397
|
+
store = VectorStore(dimension)
|
|
398
|
+
else:
|
|
399
|
+
try:
|
|
400
|
+
store = VectorStore.load(index_dir)
|
|
401
|
+
except FileNotFoundError:
|
|
402
|
+
store = VectorStore(dimension)
|
|
403
|
+
|
|
404
|
+
from semantic_code_intelligence.storage.vector_store import ChunkMetadata
|
|
405
|
+
|
|
406
|
+
meta_list = [
|
|
407
|
+
ChunkMetadata(
|
|
408
|
+
file_path=c.file_path,
|
|
409
|
+
start_line=c.start_line,
|
|
410
|
+
end_line=c.end_line,
|
|
411
|
+
chunk_index=c.chunk_index,
|
|
412
|
+
language=c.language,
|
|
413
|
+
content=c.content,
|
|
414
|
+
content_hash=chunk_hashes[i],
|
|
415
|
+
)
|
|
416
|
+
for i, c in enumerate(all_chunks)
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
store.add(embeddings, meta_list)
|
|
420
|
+
store.save(index_dir)
|
|
421
|
+
|
|
422
|
+
for sf in to_index:
|
|
423
|
+
hash_store.set(sf.relative_path, sf.content_hash)
|
|
424
|
+
hash_store.save(index_dir)
|
|
425
|
+
|
|
426
|
+
result.total_vectors = store.size
|
|
427
|
+
return result
|