codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,226 @@
1
+ """Visualization generators — Mermaid-compatible text graphs.
2
+
3
+ All functions return plain strings (Mermaid diagram markup) so they can
4
+ be rendered by any compatible viewer, embedded in Markdown, or displayed
5
+ in the terminal as-is.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+
15
+ def render_call_graph(
16
+ edges: list[dict[str, Any]],
17
+ *,
18
+ title: str = "Call Graph",
19
+ direction: str = "LR",
20
+ ) -> str:
21
+ """Render a call graph as a Mermaid flowchart.
22
+
23
+ Args:
24
+ edges: List of dicts with ``caller``, ``callee``, ``file_path``.
25
+ title: Optional graph title.
26
+ direction: Mermaid direction (``LR``, ``TD``, ``TB``, ``RL``).
27
+
28
+ Returns:
29
+ Mermaid flowchart source text.
30
+ """
31
+ lines: list[str] = [f"---", f"title: {title}", f"---", f"flowchart {direction}"]
32
+
33
+ nodes: set[str] = set()
34
+ for edge in edges:
35
+ raw_caller = edge.get("caller", "")
36
+ raw_callee = edge.get("callee", "")
37
+ if not raw_caller or not raw_callee:
38
+ continue
39
+ caller = _sanitize_id(raw_caller)
40
+ callee = _sanitize_id(raw_callee)
41
+ nodes.add(caller)
42
+ nodes.add(callee)
43
+
44
+ caller_label = _short_label(edge.get("caller", ""))
45
+ callee_label = _short_label(edge.get("callee", ""))
46
+ lines.append(f" {caller}[\"{caller_label}\"] --> {callee}[\"{callee_label}\"]")
47
+
48
+ if not nodes:
49
+ lines.append(" empty[\"No call edges found\"]")
50
+
51
+ return "\n".join(lines)
52
+
53
+
54
+ def render_dependency_graph(
55
+ deps: dict[str, Any],
56
+ *,
57
+ title: str = "Dependency Graph",
58
+ direction: str = "TD",
59
+ ) -> str:
60
+ """Render file-level dependencies as a Mermaid flowchart.
61
+
62
+ Args:
63
+ deps: Dict from ContextProvider.get_dependencies() or similar,
64
+ expected to have a ``dependencies`` key with a list of
65
+ ``{source_file, import_text}`` dicts.
66
+ title: Optional graph title.
67
+ direction: Mermaid direction.
68
+
69
+ Returns:
70
+ Mermaid flowchart source text.
71
+ """
72
+ lines: list[str] = [f"---", f"title: {title}", f"---", f"flowchart {direction}"]
73
+
74
+ dep_list = deps.get("dependencies", [])
75
+ if isinstance(dep_list, dict):
76
+ # Some formats nest per-file
77
+ flat: list[dict[str, str]] = []
78
+ for _file, entries in dep_list.items():
79
+ if isinstance(entries, list):
80
+ flat.extend(entries)
81
+ dep_list = flat
82
+
83
+ seen: set[str] = set()
84
+ for entry in dep_list:
85
+ src = entry.get("source_file", "")
86
+ imp = entry.get("import_text", "")
87
+ if not src or not imp:
88
+ continue
89
+
90
+ src_id = _sanitize_id(src)
91
+ imp_id = _sanitize_id(imp)
92
+ edge_key = f"{src_id}-->{imp_id}"
93
+ if edge_key in seen:
94
+ continue
95
+ seen.add(edge_key)
96
+
97
+ src_label = Path(src).name
98
+ imp_label = imp.split()[-1] if " " in imp else imp
99
+ # Truncate long import labels
100
+ if len(imp_label) > 40:
101
+ imp_label = imp_label[:37] + "..."
102
+ lines.append(f" {src_id}[\"{src_label}\"] --> {imp_id}[\"{imp_label}\"]")
103
+
104
+ if not seen:
105
+ lines.append(" empty[\"No dependencies found\"]")
106
+
107
+ return "\n".join(lines)
108
+
109
+
110
+ def render_workspace_graph(
111
+ repos: list[dict[str, Any]],
112
+ *,
113
+ title: str = "Workspace Repositories",
114
+ ) -> str:
115
+ """Render workspace repositories as a Mermaid diagram.
116
+
117
+ Args:
118
+ repos: List of repo entry dicts with ``name``, ``path``,
119
+ ``file_count``, ``vector_count``.
120
+ title: Optional graph title.
121
+
122
+ Returns:
123
+ Mermaid graph source text.
124
+ """
125
+ lines: list[str] = [f"---", f"title: {title}", f"---", "flowchart TD"]
126
+
127
+ ws_id = "workspace"
128
+ lines.append(f" {ws_id}((\"Workspace\"))")
129
+
130
+ if not repos:
131
+ lines.append(f" {ws_id} --> none[\"No repositories\"]")
132
+ return "\n".join(lines)
133
+
134
+ for repo in repos:
135
+ name = repo.get("name", "unknown")
136
+ rid = _sanitize_id(f"repo_{name}")
137
+ file_count = repo.get("file_count", "?")
138
+ vec_count = repo.get("vector_count", "?")
139
+ label = f"{name}\\n{file_count} files, {vec_count} vectors"
140
+ lines.append(f" {ws_id} --> {rid}[\"{label}\"]")
141
+
142
+ return "\n".join(lines)
143
+
144
+
145
+ def render_symbol_map(
146
+ symbols: list[dict[str, Any]],
147
+ *,
148
+ title: str = "Symbol Map",
149
+ file_path: str = "",
150
+ ) -> str:
151
+ """Render a file's symbols as a Mermaid class diagram.
152
+
153
+ Args:
154
+ symbols: List of symbol dicts with ``name``, ``kind``, ``parent``.
155
+ title: Optional graph title.
156
+ file_path: Source file being mapped.
157
+
158
+ Returns:
159
+ Mermaid class diagram source text.
160
+ """
161
+ lines: list[str] = [f"---", f"title: {title}", f"---", "classDiagram"]
162
+
163
+ classes: dict[str, list[str]] = {} # class_name -> member list
164
+ standalone_functions: list[str] = []
165
+
166
+ for sym in symbols:
167
+ kind = sym.get("kind", "")
168
+ name = sym.get("name", "")
169
+ parent = sym.get("parent", "")
170
+
171
+ if kind == "class":
172
+ classes.setdefault(name, [])
173
+ elif kind == "method" and parent:
174
+ classes.setdefault(parent, [])
175
+ classes[parent].append(f"+{name}()")
176
+ elif kind == "function":
177
+ standalone_functions.append(name)
178
+ elif kind == "import":
179
+ continue # skip imports in class diagram
180
+
181
+ for cls_name, members in classes.items():
182
+ lines.append(f" class {_sanitize_class_id(cls_name)} {{")
183
+ for member in members:
184
+ lines.append(f" {member}")
185
+ lines.append(" }")
186
+
187
+ if standalone_functions:
188
+ lines.append(f" class Functions {{")
189
+ for fn in standalone_functions:
190
+ lines.append(f" +{fn}()")
191
+ lines.append(" }")
192
+
193
+ if not classes and not standalone_functions:
194
+ lines.append(" class Empty {")
195
+ lines.append(" No symbols found")
196
+ lines.append(" }")
197
+
198
+ return "\n".join(lines)
199
+
200
+
201
+ # ------------------------------------------------------------------
202
+ # Internal helpers
203
+ # ------------------------------------------------------------------
204
+
205
+ def _sanitize_id(text: str) -> str:
206
+ """Convert arbitrary text into a Mermaid-safe node ID."""
207
+ # Take just the filename/symbol portion
208
+ if ":" in text:
209
+ text = text.rsplit(":", 1)[-1]
210
+ text = Path(text).stem if "/" in text or "\\" in text else text
211
+ # Replace non-alphanumeric chars with underscores
212
+ return re.sub(r"[^a-zA-Z0-9_]", "_", text).strip("_")[:60] or "node"
213
+
214
+
215
+ def _sanitize_class_id(text: str) -> str:
216
+ """Convert a class name to a Mermaid-safe class diagram ID."""
217
+ return re.sub(r"[^a-zA-Z0-9_]", "_", text).strip("_")[:60] or "Unknown"
218
+
219
+
220
+ def _short_label(text: str) -> str:
221
+ """Extract a short display label from a caller/callee key."""
222
+ if ":" in text:
223
+ parts = text.rsplit(":", 1)
224
+ fname = Path(parts[0]).name
225
+ return f"{fname}:{parts[1]}"
226
+ return text
@@ -0,0 +1,427 @@
1
+ """Multi-repository workspace — manage, index, and search across multiple repos.
2
+
3
+ A *workspace* is a collection of repositories stored in a lightweight JSON
4
+ manifest at ``<root>/.codexa/workspace.json``. Each repository has its own
5
+ vector index under ``.codexa/repos/<repo_name>/``, enabling incremental
6
+ per-repo indexing while supporting merged cross-repo search.
7
+
8
+ Typical usage::
9
+
10
+ ws = Workspace.load_or_create(Path("/my/workspace"))
11
+ ws.add_repo("backend", Path("/my/workspace/backend"))
12
+ ws.add_repo("frontend", Path("/my/workspace/frontend"))
13
+ ws.save()
14
+
15
+ ws.index_all() # index every repo
16
+ results = ws.search("authentication") # merged results
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import time
23
+ from dataclasses import dataclass, field
24
+ from pathlib import Path
25
+ from typing import Any
26
+
27
+ from semantic_code_intelligence.config.settings import (
28
+ AppConfig,
29
+ IndexConfig,
30
+ load_config,
31
+ )
32
+ from semantic_code_intelligence.indexing.scanner import scan_repository
33
+ from semantic_code_intelligence.services.indexing_service import IndexingResult, run_indexing
34
+ from semantic_code_intelligence.services.search_service import SearchMode, SearchResult
35
+ from semantic_code_intelligence.storage.vector_store import VectorStore
36
+ from semantic_code_intelligence.embeddings.generator import generate_embeddings
37
+ from semantic_code_intelligence.utils.logging import get_logger
38
+
39
+ logger = get_logger("workspace")
40
+
41
+ WORKSPACE_FILE = "workspace.json"
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Data models
46
+ # ---------------------------------------------------------------------------
47
+
48
+ @dataclass
49
+ class RepoEntry:
50
+ """A single repository registered in a workspace."""
51
+
52
+ name: str
53
+ path: str # absolute path
54
+ last_indexed: float = 0.0 # epoch timestamp
55
+ file_count: int = 0
56
+ vector_count: int = 0
57
+
58
+ def to_dict(self) -> dict[str, Any]:
59
+ """Serialise the repo entry to a plain dictionary."""
60
+ return {
61
+ "name": self.name,
62
+ "path": self.path,
63
+ "last_indexed": self.last_indexed,
64
+ "file_count": self.file_count,
65
+ "vector_count": self.vector_count,
66
+ }
67
+
68
+ @classmethod
69
+ def from_dict(cls, data: dict[str, Any]) -> "RepoEntry":
70
+ """Construct a :class:`RepoEntry` from a dictionary."""
71
+ return cls(
72
+ name=data["name"],
73
+ path=data["path"],
74
+ last_indexed=data.get("last_indexed", 0.0),
75
+ file_count=data.get("file_count", 0),
76
+ vector_count=data.get("vector_count", 0),
77
+ )
78
+
79
+
80
+ @dataclass
81
+ class WorkspaceManifest:
82
+ """Serialisable workspace manifest."""
83
+
84
+ version: str = "1.0.0"
85
+ repos: list[RepoEntry] = field(default_factory=list)
86
+
87
+ def to_dict(self) -> dict[str, Any]:
88
+ """Serialise the manifest to a plain dictionary."""
89
+ return {
90
+ "version": self.version,
91
+ "repos": [r.to_dict() for r in self.repos],
92
+ }
93
+
94
+ @classmethod
95
+ def from_dict(cls, data: dict[str, Any]) -> "WorkspaceManifest":
96
+ """Construct a :class:`WorkspaceManifest` from a dictionary."""
97
+ return cls(
98
+ version=data.get("version", "1.0.0"),
99
+ repos=[RepoEntry.from_dict(r) for r in data.get("repos", [])],
100
+ )
101
+
102
+
103
+ # ---------------------------------------------------------------------------
104
+ # Workspace
105
+ # ---------------------------------------------------------------------------
106
+
107
+ class Workspace:
108
+ """Multi-repository workspace manager.
109
+
110
+ Keeps a manifest of registered repos, provides per-repo indexing,
111
+ and supports cross-repo merged search.
112
+ """
113
+
114
+ def __init__(self, root: Path, manifest: WorkspaceManifest | None = None) -> None:
115
+ self._root = root.resolve()
116
+ self._manifest = manifest or WorkspaceManifest()
117
+
118
+ # --- persistence -------------------------------------------------------
119
+
120
+ @property
121
+ def root(self) -> Path:
122
+ """Absolute path to the workspace root directory."""
123
+ return self._root
124
+
125
+ @property
126
+ def config_dir(self) -> Path:
127
+ """Path to the ``.codexa`` configuration directory."""
128
+ return self._root / ".codexa"
129
+
130
+ @property
131
+ def repos_dir(self) -> Path:
132
+ """Path to the per-repo index storage directory."""
133
+ return self.config_dir / "repos"
134
+
135
+ @property
136
+ def manifest_path(self) -> Path:
137
+ """Path to the workspace manifest JSON file."""
138
+ return self.config_dir / WORKSPACE_FILE
139
+
140
+ @property
141
+ def repos(self) -> list[RepoEntry]:
142
+ """Snapshot of all registered repositories."""
143
+ return list(self._manifest.repos)
144
+
145
+ def save(self) -> Path:
146
+ """Persist the workspace manifest to disk."""
147
+ self.config_dir.mkdir(parents=True, exist_ok=True)
148
+ self.repos_dir.mkdir(parents=True, exist_ok=True)
149
+ text = json.dumps(self._manifest.to_dict(), indent=2)
150
+ self.manifest_path.write_text(text, encoding="utf-8")
151
+ return self.manifest_path
152
+
153
+ @classmethod
154
+ def load(cls, root: Path) -> "Workspace":
155
+ """Load an existing workspace. Raises FileNotFoundError if not found."""
156
+ root = root.resolve()
157
+ path = root / ".codexa" / WORKSPACE_FILE
158
+ if not path.exists():
159
+ raise FileNotFoundError(f"No workspace found at {root}")
160
+ data = json.loads(path.read_text(encoding="utf-8"))
161
+ manifest = WorkspaceManifest.from_dict(data)
162
+ return cls(root, manifest)
163
+
164
+ @classmethod
165
+ def load_or_create(cls, root: Path) -> "Workspace":
166
+ """Load an existing workspace or create a new one."""
167
+ try:
168
+ return cls.load(root)
169
+ except FileNotFoundError:
170
+ ws = cls(root)
171
+ ws.save()
172
+ return ws
173
+
174
+ # --- repo management ---------------------------------------------------
175
+
176
+ def get_repo(self, name: str) -> RepoEntry | None:
177
+ """Look up a repository by name, or return ``None``."""
178
+ for r in self._manifest.repos:
179
+ if r.name == name:
180
+ return r
181
+ return None
182
+
183
+ def add_repo(self, name: str, path: Path) -> RepoEntry:
184
+ """Register a repository in the workspace.
185
+
186
+ Raises ValueError if *name* is already registered.
187
+ """
188
+ if self.get_repo(name) is not None:
189
+ raise ValueError(f"Repository '{name}' already registered")
190
+
191
+ resolved = path.resolve()
192
+ if not resolved.is_dir():
193
+ raise FileNotFoundError(f"Directory not found: {resolved}")
194
+
195
+ entry = RepoEntry(name=name, path=str(resolved))
196
+ self._manifest.repos.append(entry)
197
+ return entry
198
+
199
+ def remove_repo(self, name: str) -> bool:
200
+ """Unregister a repository. Returns True if found and removed."""
201
+ for i, r in enumerate(self._manifest.repos):
202
+ if r.name == name:
203
+ self._manifest.repos.pop(i)
204
+ return True
205
+ return False
206
+
207
+ def repo_index_dir(self, name: str) -> Path:
208
+ """Return the per-repo index directory."""
209
+ return self.repos_dir / name
210
+
211
+ # --- indexing -----------------------------------------------------------
212
+
213
+ def index_repo(self, name: str, force: bool = False) -> IndexingResult:
214
+ """Index a single repository."""
215
+ entry = self.get_repo(name)
216
+ if entry is None:
217
+ raise KeyError(f"Repository '{name}' not registered")
218
+
219
+ repo_root = Path(entry.path)
220
+ index_dir = self.repo_index_dir(name)
221
+ index_dir.mkdir(parents=True, exist_ok=True)
222
+
223
+ config = load_config(repo_root)
224
+
225
+ # Run the indexing pipeline but store in workspace-local index dir
226
+ result = _index_repo_into(repo_root, index_dir, config, force=force)
227
+
228
+ entry.last_indexed = time.time()
229
+ entry.file_count = result.files_indexed
230
+ entry.vector_count = result.total_vectors
231
+ return result
232
+
233
+ def index_all(self, force: bool = False) -> dict[str, IndexingResult]:
234
+ """Index all registered repositories."""
235
+ results: dict[str, IndexingResult] = {}
236
+ for entry in self._manifest.repos:
237
+ results[entry.name] = self.index_repo(entry.name, force=force)
238
+ self.save()
239
+ return results
240
+
241
+ # --- search -------------------------------------------------------------
242
+
243
+ def search(
244
+ self,
245
+ query: str,
246
+ top_k: int = 10,
247
+ threshold: float = 0.3,
248
+ repos: list[str] | None = None,
249
+ mode: SearchMode = "semantic",
250
+ case_insensitive: bool = True,
251
+ ) -> list[dict[str, Any]]:
252
+ """Search across repositories and return merged results.
253
+
254
+ Args:
255
+ query: Natural language search query, keywords, or regex.
256
+ top_k: Number of top results per repo.
257
+ threshold: Minimum similarity score (semantic/hybrid modes).
258
+ repos: Restrict search to these repo names. None = all.
259
+ mode: Search mode — ``"semantic"``, ``"keyword"``,
260
+ ``"regex"``, or ``"hybrid"``.
261
+ case_insensitive: For regex mode, whether to ignore case.
262
+
263
+ Returns:
264
+ List of result dicts sorted by score (descending), each with
265
+ an extra ``repo`` key identifying the source repository.
266
+ """
267
+ targets = repos or [r.name for r in self._manifest.repos]
268
+ all_results: list[dict[str, Any]] = []
269
+
270
+ config = load_config(self._root)
271
+ model_name = config.embedding.model_name
272
+
273
+ # Pre-compute query embedding for semantic/hybrid modes
274
+ query_embedding = None
275
+ if mode in ("semantic", "hybrid"):
276
+ query_embedding = generate_embeddings([query], model_name=model_name)[0]
277
+
278
+ for repo_name in targets:
279
+ idx_dir = self.repo_index_dir(repo_name)
280
+ try:
281
+ store = VectorStore.load(idx_dir)
282
+ except FileNotFoundError:
283
+ logger.debug("No index for repo %s, skipping.", repo_name)
284
+ continue
285
+
286
+ raw: list[tuple[Any, float]] = []
287
+
288
+ if mode == "keyword":
289
+ from semantic_code_intelligence.search.keyword_search import keyword_search
290
+ hits = keyword_search(query, store, idx_dir, top_k=top_k)
291
+ raw = [(h, h.score) for h in hits]
292
+ elif mode == "regex":
293
+ from semantic_code_intelligence.search.keyword_search import regex_search
294
+ hits = regex_search(query, store, top_k=top_k, case_insensitive=case_insensitive)
295
+ raw = [(h, h.score) for h in hits]
296
+ elif mode == "hybrid":
297
+ from semantic_code_intelligence.search.hybrid_search import hybrid_search
298
+ hits = hybrid_search(query, store, idx_dir, model_name=model_name, top_k=top_k)
299
+ raw = [(h, h.score) for h in hits]
300
+ else:
301
+ # semantic (default)
302
+ assert query_embedding is not None
303
+ raw_store = store.search(query_embedding, top_k=top_k)
304
+ raw = [(meta, score) for meta, score in raw_store]
305
+
306
+ for item, score in raw:
307
+ if mode in ("semantic", "hybrid") and score < threshold:
308
+ continue
309
+ # Normalise to dict — item may be ChunkMetadata or a hit dataclass
310
+ file_path = getattr(item, "file_path", "")
311
+ all_results.append({
312
+ "repo": repo_name,
313
+ "file_path": file_path,
314
+ "start_line": getattr(item, "start_line", 0),
315
+ "end_line": getattr(item, "end_line", 0),
316
+ "language": getattr(item, "language", ""),
317
+ "content": getattr(item, "content", ""),
318
+ "score": round(float(score), 4),
319
+ "chunk_index": getattr(item, "chunk_index", 0),
320
+ })
321
+
322
+ all_results.sort(key=lambda r: r["score"], reverse=True)
323
+ return all_results[:top_k]
324
+
325
+ # --- info ---------------------------------------------------------------
326
+
327
+ def summary(self) -> dict[str, Any]:
328
+ """Return a summary dict of the workspace."""
329
+ return {
330
+ "root": str(self._root),
331
+ "repo_count": len(self._manifest.repos),
332
+ "repos": [r.to_dict() for r in self._manifest.repos],
333
+ "version": self._manifest.version,
334
+ }
335
+
336
+
337
+ # ---------------------------------------------------------------------------
338
+ # Internal helpers
339
+ # ---------------------------------------------------------------------------
340
+
341
+ def _index_repo_into(
342
+ repo_root: Path,
343
+ index_dir: Path,
344
+ config: AppConfig,
345
+ force: bool = False,
346
+ ) -> IndexingResult:
347
+ """Run the indexing pipeline, storing artefacts into *index_dir*."""
348
+ from semantic_code_intelligence.indexing.chunker import chunk_file
349
+ from semantic_code_intelligence.storage.hash_store import HashStore
350
+
351
+ result = IndexingResult()
352
+ scanned = scan_repository(repo_root, config.index)
353
+ result.files_scanned = len(scanned)
354
+
355
+ if not scanned:
356
+ return result
357
+
358
+ hash_store = HashStore.load(index_dir)
359
+ to_index = []
360
+
361
+ if force:
362
+ to_index = scanned
363
+ else:
364
+ for sf in scanned:
365
+ if hash_store.has_changed(sf.relative_path, sf.content_hash):
366
+ to_index.append(sf)
367
+ else:
368
+ result.files_skipped += 1
369
+
370
+ all_chunks = []
371
+ chunk_hashes: list[str] = []
372
+
373
+ for sf in to_index:
374
+ chunks = chunk_file(
375
+ sf.path,
376
+ chunk_size=config.embedding.chunk_size,
377
+ chunk_overlap=config.embedding.chunk_overlap,
378
+ )
379
+ for c in chunks:
380
+ all_chunks.append(c)
381
+ chunk_hashes.append(sf.content_hash)
382
+ result.files_indexed += 1
383
+
384
+ result.chunks_created = len(all_chunks)
385
+
386
+ if not all_chunks:
387
+ for sf in to_index:
388
+ hash_store.set(sf.relative_path, sf.content_hash)
389
+ hash_store.save(index_dir)
390
+ return result
391
+
392
+ texts = [c.content for c in all_chunks]
393
+ embeddings = generate_embeddings(texts, model_name=config.embedding.model_name)
394
+ dimension = embeddings.shape[1]
395
+
396
+ if force:
397
+ store = VectorStore(dimension)
398
+ else:
399
+ try:
400
+ store = VectorStore.load(index_dir)
401
+ except FileNotFoundError:
402
+ store = VectorStore(dimension)
403
+
404
+ from semantic_code_intelligence.storage.vector_store import ChunkMetadata
405
+
406
+ meta_list = [
407
+ ChunkMetadata(
408
+ file_path=c.file_path,
409
+ start_line=c.start_line,
410
+ end_line=c.end_line,
411
+ chunk_index=c.chunk_index,
412
+ language=c.language,
413
+ content=c.content,
414
+ content_hash=chunk_hashes[i],
415
+ )
416
+ for i, c in enumerate(all_chunks)
417
+ ]
418
+
419
+ store.add(embeddings, meta_list)
420
+ store.save(index_dir)
421
+
422
+ for sf in to_index:
423
+ hash_store.set(sf.relative_path, sf.content_hash)
424
+ hash_store.save(index_dir)
425
+
426
+ result.total_vectors = store.size
427
+ return result