code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1288 @@
1
+ """CodeGraphWiki CLI — direct command interface for Claude Code custom commands.
2
+
3
+ Provides the same operations as the MCP server, but as a synchronous CLI
4
+ with stdout output. Progress messages are printed inline so they appear
5
+ directly in the Claude Code conversation.
6
+
7
+ Usage:
8
+ python3 -m code_graph_builder.commands_cli <command> [args...]
9
+
10
+ Commands:
11
+ init Initialize repository (graph → api-docs → embeddings → wiki)
12
+ graph-build Build knowledge graph only (step 1)
13
+ api-doc-gen Generate API docs from existing graph (step 2)
14
+ embed-gen Rebuild embeddings only (step 3, reuses graph)
15
+ wiki-gen Regenerate wiki only (step 4, reuses graph + embeddings)
16
+ list-repos List all indexed repositories in the workspace
17
+ switch-repo Switch active repository to a previously indexed one
18
+ info Show active repository info and graph statistics
19
+ query Translate natural-language question to Cypher and execute
20
+ snippet Retrieve source code by qualified name
21
+ search Semantic vector search
22
+ list-wiki List generated wiki pages
23
+ get-wiki Read a wiki page
24
+ locate Locate function via Tree-sitter AST
25
+ list-api List public API interfaces from graph
26
+ api-docs Browse hierarchical API documentation (L1/L2)
27
+ api-doc Read detailed API doc for a function (L3)
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import argparse
33
+ import json
34
+ import os
35
+ import pickle
36
+ import sys
37
+ from pathlib import Path
38
+
39
+ from dotenv import load_dotenv
40
+
41
+ load_dotenv()
42
+
43
+ from .settings import load_settings # noqa: E402
44
+
45
+ load_settings()
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Workspace helper
50
+ # ---------------------------------------------------------------------------
51
+
52
+ class Workspace:
53
+ """Manages the CodeGraphWiki workspace directory."""
54
+
55
+ def __init__(self) -> None:
56
+ self.root = Path(
57
+ os.environ.get("CGB_WORKSPACE", Path.home() / ".code-graph-builder")
58
+ ).expanduser().resolve()
59
+ self.root.mkdir(parents=True, exist_ok=True)
60
+
61
+ def active_artifact_dir(self) -> Path | None:
62
+ active_file = self.root / "active.txt"
63
+ if not active_file.exists():
64
+ return None
65
+ name = active_file.read_text(encoding="utf-8").strip()
66
+ d = self.root / name
67
+ return d if d.exists() else None
68
+
69
+ def load_meta(self) -> dict | None:
70
+ d = self.active_artifact_dir()
71
+ if d is None:
72
+ return None
73
+ meta_file = d / "meta.json"
74
+ if not meta_file.exists():
75
+ return None
76
+ return json.loads(meta_file.read_text(encoding="utf-8"))
77
+
78
+ def set_active(self, artifact_dir: Path) -> None:
79
+ (self.root / "active.txt").write_text(artifact_dir.name, encoding="utf-8")
80
+
81
+ def require_active(self) -> Path:
82
+ d = self.active_artifact_dir()
83
+ if d is None:
84
+ _die("No repository indexed yet. Run: /init-repo <path>")
85
+ return d # type: ignore[return-value]
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Output helpers
90
+ # ---------------------------------------------------------------------------
91
+
92
+ def _progress(msg: str) -> None:
93
+ """Print a progress line that will show in the conversation."""
94
+ print(msg, flush=True)
95
+
96
+
97
+ def _result_json(data: dict | list) -> None:
98
+ """Print JSON result."""
99
+ print(json.dumps(data, ensure_ascii=False, indent=2, default=str))
100
+
101
+
102
+ def _die(msg: str) -> None:
103
+ print(f"ERROR: {msg}", file=sys.stderr)
104
+ sys.exit(1)
105
+
106
+
107
+ # ---------------------------------------------------------------------------
108
+ # Service loaders (lazy, per-invocation)
109
+ # ---------------------------------------------------------------------------
110
+
111
+ def _open_ingestor(artifact_dir: Path):
112
+ from .services.kuzu_service import KuzuIngestor
113
+
114
+ db_path = artifact_dir / "graph.db"
115
+ if not db_path.exists():
116
+ _die(f"Graph database not found: {db_path}")
117
+ ingestor = KuzuIngestor(db_path)
118
+ ingestor.__enter__()
119
+ return ingestor
120
+
121
+
122
+ def _load_vector_store(vectors_path: Path):
123
+ from .embeddings.vector_store import MemoryVectorStore, VectorRecord
124
+
125
+ if not vectors_path.exists():
126
+ return None
127
+
128
+ with open(vectors_path, "rb") as fh:
129
+ data = pickle.load(fh)
130
+
131
+ if isinstance(data, dict) and "vector_store" in data:
132
+ store = data["vector_store"]
133
+ if isinstance(store, MemoryVectorStore):
134
+ return store
135
+
136
+ if isinstance(data, list) and len(data) > 0:
137
+ first = data[0]
138
+ if isinstance(first, VectorRecord):
139
+ dim = len(first.embedding)
140
+ store = MemoryVectorStore(dimension=dim)
141
+ store.store_embeddings_batch(data)
142
+ return store
143
+
144
+ return None
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # Subcommand: init
149
+ # ---------------------------------------------------------------------------
150
+
151
+ def cmd_init(args: argparse.Namespace, ws: Workspace) -> None:
152
+ """Orchestrate: graph-build → api-doc-gen → embed-gen → wiki-gen."""
153
+ from .examples.generate_wiki import MAX_PAGES_COMPREHENSIVE, MAX_PAGES_CONCISE
154
+ from .mcp.pipeline import (
155
+ artifact_dir_for,
156
+ build_graph,
157
+ build_vector_index,
158
+ generate_api_docs_step,
159
+ run_wiki_generation,
160
+ save_meta,
161
+ )
162
+
163
+ repo_path = Path(args.repo_path).resolve()
164
+ if not repo_path.exists():
165
+ _die(f"Repository path does not exist: {repo_path}")
166
+
167
+ rebuild = args.rebuild
168
+ wiki_mode = args.mode
169
+ backend = args.backend
170
+ skip_embed = args.no_embed
171
+ skip_wiki = args.no_wiki or skip_embed # wiki requires embeddings
172
+ comprehensive = wiki_mode != "concise"
173
+ max_pages = MAX_PAGES_COMPREHENSIVE if comprehensive else MAX_PAGES_CONCISE
174
+
175
+ artifact_dir = artifact_dir_for(ws.root, repo_path)
176
+ artifact_dir.mkdir(parents=True, exist_ok=True)
177
+ db_path = artifact_dir / "graph.db"
178
+ vectors_path = artifact_dir / "vectors.pkl"
179
+ wiki_dir = artifact_dir / "wiki"
180
+
181
+ total_steps = 4
182
+ if skip_embed:
183
+ total_steps = 2 # graph + api_docs only
184
+ elif skip_wiki:
185
+ total_steps = 3 # graph + api_docs + embeddings
186
+
187
+ def step_progress(step: int, msg: str, pct: float = 0.0) -> None:
188
+ prefix = f"[{pct:.0f}%] " if pct > 0 else ""
189
+ _progress(f"{prefix}[Step {step}/{total_steps}] {msg}")
190
+
191
+ step_names = "graph → api-docs"
192
+ if not skip_embed:
193
+ step_names += " → embeddings"
194
+ if not skip_wiki:
195
+ step_names += " → wiki"
196
+
197
+ _progress(f"=== Initializing: {repo_path.name} ({step_names}) ===")
198
+ _progress(f" Workspace: {artifact_dir}")
199
+ _progress(f" Mode: {wiki_mode} | Backend: {backend} | Rebuild: {rebuild}")
200
+ _progress("")
201
+
202
+ try:
203
+ # Step 1: build graph
204
+ builder = build_graph(
205
+ repo_path, db_path, rebuild,
206
+ progress_cb=lambda msg, pct: step_progress(1, msg, pct),
207
+ backend=backend,
208
+ )
209
+
210
+ # Step 2: generate API docs
211
+ generate_api_docs_step(
212
+ builder, artifact_dir, rebuild,
213
+ progress_cb=lambda msg, pct: step_progress(2, msg, pct),
214
+ )
215
+
216
+ page_count = 0
217
+ index_path = wiki_dir / "index.md"
218
+ skipped = []
219
+
220
+ if not skip_embed:
221
+ # Step 3: build embeddings
222
+ vector_store, embedder, func_map = build_vector_index(
223
+ builder, repo_path, vectors_path, rebuild,
224
+ progress_cb=lambda msg, pct: step_progress(3, msg, pct),
225
+ )
226
+
227
+ if not skip_wiki:
228
+ # Step 4: generate wiki
229
+ index_path, page_count = run_wiki_generation(
230
+ builder=builder,
231
+ repo_path=repo_path,
232
+ output_dir=wiki_dir,
233
+ max_pages=max_pages,
234
+ rebuild=rebuild,
235
+ comprehensive=comprehensive,
236
+ vector_store=vector_store,
237
+ embedder=embedder,
238
+ func_map=func_map,
239
+ progress_cb=lambda msg, pct: step_progress(4, msg, pct),
240
+ )
241
+ else:
242
+ skipped.append("wiki")
243
+ step_progress(4, "Wiki generation skipped (--no-wiki).")
244
+ else:
245
+ skipped.extend(["embed", "wiki"])
246
+ step_progress(3, "Embedding generation skipped (--no-embed).")
247
+ step_progress(4, "Wiki generation skipped (requires embeddings).")
248
+
249
+ save_meta(artifact_dir, repo_path, page_count)
250
+ ws.set_active(artifact_dir)
251
+
252
+ _progress("")
253
+ _progress("=== Done ===")
254
+ _result_json({
255
+ "status": "success",
256
+ "repo_path": str(repo_path),
257
+ "artifact_dir": str(artifact_dir),
258
+ "wiki_index": str(index_path),
259
+ "wiki_pages": page_count,
260
+ "skipped": skipped,
261
+ })
262
+
263
+ except Exception as exc:
264
+ _progress(f"\nERROR: Pipeline failed: {exc}")
265
+ sys.exit(1)
266
+
267
+
268
+ # ---------------------------------------------------------------------------
269
+ # Subcommand: graph-build
270
+ # ---------------------------------------------------------------------------
271
+
272
+ def cmd_graph_build(args: argparse.Namespace, ws: Workspace) -> None:
273
+ """Build the code knowledge graph only (step 1)."""
274
+ from .mcp.pipeline import artifact_dir_for, build_graph, save_meta
275
+
276
+ repo_path = Path(args.repo_path).resolve()
277
+ if not repo_path.exists():
278
+ _die(f"Repository path does not exist: {repo_path}")
279
+
280
+ rebuild = args.rebuild
281
+ backend = args.backend
282
+
283
+ artifact_dir = artifact_dir_for(ws.root, repo_path)
284
+ artifact_dir.mkdir(parents=True, exist_ok=True)
285
+ db_path = artifact_dir / "graph.db"
286
+
287
+ def progress_cb(msg: str, pct: float = 0.0) -> None:
288
+ prefix = f"[{pct:.0f}%] " if pct > 0 else ""
289
+ _progress(f"{prefix}{msg}")
290
+
291
+ _progress(f"=== Graph Build: {repo_path.name} ===")
292
+ _progress(f" Workspace: {artifact_dir}")
293
+ _progress(f" Backend: {backend} | Rebuild: {rebuild}")
294
+ _progress("")
295
+
296
+ try:
297
+ builder = build_graph(repo_path, db_path, rebuild, progress_cb, backend=backend)
298
+
299
+ stats = builder.get_statistics()
300
+ save_meta(artifact_dir, repo_path, 0)
301
+ ws.set_active(artifact_dir)
302
+
303
+ _progress("")
304
+ _progress("=== Done ===")
305
+ _result_json({
306
+ "status": "success",
307
+ "repo_path": str(repo_path),
308
+ "artifact_dir": str(artifact_dir),
309
+ "node_count": stats.get("node_count", 0),
310
+ "relationship_count": stats.get("relationship_count", 0),
311
+ })
312
+
313
+ except Exception as exc:
314
+ _progress(f"\nERROR: Graph build failed: {exc}")
315
+ sys.exit(1)
316
+
317
+
318
+ # ---------------------------------------------------------------------------
319
+ # Subcommand: api-doc-gen
320
+ # ---------------------------------------------------------------------------
321
+
322
+ def cmd_api_doc_gen(args: argparse.Namespace, ws: Workspace) -> None:
323
+ """Generate API docs from existing knowledge graph (step 2)."""
324
+ from .mcp.pipeline import generate_api_docs_step, save_meta
325
+
326
+ artifact_dir = ws.require_active()
327
+ meta = ws.load_meta()
328
+ if meta is None:
329
+ _die("No metadata found. Run /graph-build or /repo-init first.")
330
+
331
+ repo_path = Path(meta["repo_path"]).resolve()
332
+ db_path = artifact_dir / "graph.db"
333
+ if not db_path.exists():
334
+ _die("Graph database not found. Run /graph-build or /repo-init first.")
335
+
336
+ rebuild = args.rebuild
337
+
338
+ def progress_cb(msg: str, pct: float = 0.0) -> None:
339
+ prefix = f"[{pct:.0f}%] " if pct > 0 else ""
340
+ _progress(f"{prefix}{msg}")
341
+
342
+ _progress(f"=== API Doc Generation: {repo_path.name} ===")
343
+ _progress(f" Rebuild: {rebuild}")
344
+ _progress("")
345
+
346
+ try:
347
+ ingestor = _open_ingestor(artifact_dir)
348
+
349
+ result = generate_api_docs_step(ingestor, artifact_dir, rebuild, progress_cb)
350
+
351
+ ingestor.__exit__(None, None, None)
352
+
353
+ _progress("")
354
+ _progress("=== Done ===")
355
+ _result_json({
356
+ "status": result.get("status", "success"),
357
+ "repo_path": str(repo_path),
358
+ "artifact_dir": str(artifact_dir),
359
+ **{k: v for k, v in result.items() if k != "status"},
360
+ })
361
+
362
+ except Exception as exc:
363
+ _progress(f"\nERROR: API doc generation failed: {exc}")
364
+ sys.exit(1)
365
+
366
+
367
+ # ---------------------------------------------------------------------------
368
+ # Subcommand: list-repos
369
+ # ---------------------------------------------------------------------------
370
+
371
+ def cmd_list_repos(_args: argparse.Namespace, ws: Workspace) -> None:
372
+ """List all indexed repositories in the workspace."""
373
+ active_file = ws.root / "active.txt"
374
+ active_name = ""
375
+ if active_file.exists():
376
+ active_name = active_file.read_text(encoding="utf-8").strip()
377
+
378
+ repos = []
379
+ for child in sorted(ws.root.iterdir()):
380
+ if not child.is_dir():
381
+ continue
382
+ meta_file = child / "meta.json"
383
+ if not meta_file.exists():
384
+ continue
385
+ try:
386
+ meta = json.loads(meta_file.read_text(encoding="utf-8"))
387
+ except (json.JSONDecodeError, OSError):
388
+ continue
389
+
390
+ repos.append({
391
+ "artifact_dir": child.name,
392
+ "repo_name": meta.get("repo_name", child.name),
393
+ "repo_path": meta.get("repo_path", "unknown"),
394
+ "indexed_at": meta.get("indexed_at"),
395
+ "wiki_page_count": meta.get("wiki_page_count", 0),
396
+ "steps": meta.get("steps", {}),
397
+ "active": child.name == active_name,
398
+ })
399
+
400
+ _result_json({
401
+ "workspace": str(ws.root),
402
+ "repository_count": len(repos),
403
+ "repositories": repos,
404
+ })
405
+
406
+
407
+ # ---------------------------------------------------------------------------
408
+ # Subcommand: switch-repo
409
+ # ---------------------------------------------------------------------------
410
+
411
+ def cmd_switch_repo(args: argparse.Namespace, ws: Workspace) -> None:
412
+ """Switch active repository to a previously indexed one."""
413
+ repo_name = args.repo_name
414
+
415
+ # Try exact match on artifact dir name
416
+ target = None
417
+ for child in ws.root.iterdir():
418
+ if not child.is_dir():
419
+ continue
420
+ if child.name == repo_name:
421
+ target = child
422
+ break
423
+
424
+ # Fallback: match by repo_name in meta.json
425
+ if target is None:
426
+ for child in sorted(ws.root.iterdir()):
427
+ if not child.is_dir():
428
+ continue
429
+ meta_file = child / "meta.json"
430
+ if not meta_file.exists():
431
+ continue
432
+ try:
433
+ meta = json.loads(meta_file.read_text(encoding="utf-8"))
434
+ except (json.JSONDecodeError, OSError):
435
+ continue
436
+ if meta.get("repo_name") == repo_name:
437
+ target = child
438
+ break
439
+
440
+ if target is None or not (target / "meta.json").exists():
441
+ _die(f"Repository not found: {repo_name}. Run /list-repos to see available repos.")
442
+
443
+ ws.set_active(target)
444
+ meta = json.loads((target / "meta.json").read_text(encoding="utf-8"))
445
+
446
+ _progress(f"Switched to: {meta.get('repo_name', target.name)}")
447
+ _result_json({
448
+ "status": "success",
449
+ "active_repo": meta.get("repo_name", target.name),
450
+ "repo_path": meta.get("repo_path"),
451
+ "artifact_dir": str(target),
452
+ "steps": meta.get("steps", {}),
453
+ })
454
+
455
+
456
+ # ---------------------------------------------------------------------------
457
+ # Subcommand: info
458
+ # ---------------------------------------------------------------------------
459
+
460
+ def cmd_info(_args: argparse.Namespace, ws: Workspace) -> None:
461
+ artifact_dir = ws.require_active()
462
+ meta = ws.load_meta() or {}
463
+
464
+ wiki_pages = []
465
+ wiki_subdir = artifact_dir / "wiki" / "wiki"
466
+ if wiki_subdir.exists():
467
+ wiki_pages = [p.stem for p in sorted(wiki_subdir.glob("*.md"))]
468
+
469
+ result: dict = {
470
+ "repo_path": meta.get("repo_path", "unknown"),
471
+ "artifact_dir": str(artifact_dir),
472
+ "indexed_at": meta.get("indexed_at"),
473
+ "wiki_pages": wiki_pages,
474
+ }
475
+
476
+ # Graph statistics + language extraction stats
477
+ db_path = artifact_dir / "graph.db"
478
+ if db_path.exists():
479
+ try:
480
+ ingestor = _open_ingestor(artifact_dir)
481
+ result["graph_stats"] = ingestor.get_statistics()
482
+
483
+ # Language extraction stats: count files by extension
484
+ try:
485
+ file_rows = ingestor.query(
486
+ "MATCH (f:File) RETURN f.path AS path"
487
+ )
488
+ from .language_spec import get_language_for_extension
489
+ lang_counts: dict[str, int] = {}
490
+ total_files = 0
491
+ for row in file_rows:
492
+ raw = row.get("result", row)
493
+ fpath = raw[0] if isinstance(raw, (list, tuple)) else raw
494
+ if isinstance(fpath, str):
495
+ ext = Path(fpath).suffix.lower()
496
+ lang = get_language_for_extension(ext)
497
+ if lang:
498
+ lang_name = lang.value
499
+ lang_counts[lang_name] = lang_counts.get(lang_name, 0) + 1
500
+ total_files += 1
501
+ result["language_stats"] = {
502
+ "total_code_files": total_files,
503
+ "by_language": dict(sorted(lang_counts.items(), key=lambda x: -x[1])),
504
+ }
505
+ except Exception:
506
+ pass # language stats are optional
507
+
508
+ ingestor.__exit__(None, None, None)
509
+ except Exception as exc:
510
+ result["graph_stats"] = {"error": str(exc)}
511
+
512
+ # Language support info
513
+ from .constants import LANGUAGE_METADATA, LanguageStatus
514
+ result["supported_languages"] = {
515
+ "full": [m.display_name for lang, m in LANGUAGE_METADATA.items() if m.status == LanguageStatus.FULL],
516
+ "in_development": [m.display_name for lang, m in LANGUAGE_METADATA.items() if m.status == LanguageStatus.DEV],
517
+ }
518
+
519
+ # Service availability
520
+ from .rag.llm_backend import create_llm_backend
521
+
522
+ llm = create_llm_backend()
523
+ result["cypher_query_available"] = llm.available
524
+ result["semantic_search_available"] = (artifact_dir / "vectors.pkl").exists()
525
+ result["api_docs_available"] = (artifact_dir / "api_docs" / "index.md").exists()
526
+
527
+ # Warnings for missing services
528
+ warnings = []
529
+ if not llm.available:
530
+ warnings.append("LLM not configured — set LLM_API_KEY, OPENAI_API_KEY, or MOONSHOT_API_KEY.")
531
+ if not (artifact_dir / "vectors.pkl").exists():
532
+ warnings.append("Embeddings not built — semantic search unavailable.")
533
+ if warnings:
534
+ result["warnings"] = warnings
535
+
536
+ _result_json(result)
537
+
538
+
539
+ # ---------------------------------------------------------------------------
540
+ # Subcommand: query
541
+ # ---------------------------------------------------------------------------
542
+
543
+ def cmd_query(args: argparse.Namespace, ws: Workspace) -> None:
544
+ artifact_dir = ws.require_active()
545
+
546
+ from .rag.cypher_generator import CypherGenerator
547
+ from .rag.llm_backend import create_llm_backend
548
+
549
+ llm = create_llm_backend()
550
+ if not llm.available:
551
+ _die(
552
+ "LLM not configured. Set one of: LLM_API_KEY, OPENAI_API_KEY, "
553
+ "or MOONSHOT_API_KEY."
554
+ )
555
+
556
+ ingestor = _open_ingestor(artifact_dir)
557
+ cypher_gen = CypherGenerator(llm)
558
+
559
+ question = args.question
560
+ _progress(f"Question: {question}")
561
+
562
+ try:
563
+ cypher = cypher_gen.generate(question)
564
+ _progress(f"Cypher: {cypher}")
565
+ except Exception as exc:
566
+ ingestor.__exit__(None, None, None)
567
+ _die(f"Cypher generation failed: {exc}")
568
+
569
+ try:
570
+ rows = ingestor.query(cypher)
571
+ serialisable = []
572
+ for row in rows:
573
+ raw = row.get("result", row)
574
+ if isinstance(raw, (list, tuple)):
575
+ serialisable.append(list(raw))
576
+ else:
577
+ serialisable.append(raw)
578
+ _result_json({
579
+ "question": question,
580
+ "cypher": cypher,
581
+ "row_count": len(serialisable),
582
+ "rows": serialisable,
583
+ })
584
+ except Exception as exc:
585
+ _die(f"Query execution failed: {exc}\nCypher: {cypher}")
586
+ finally:
587
+ ingestor.__exit__(None, None, None)
588
+
589
+
590
+ # ---------------------------------------------------------------------------
591
+ # Subcommand: snippet
592
+ # ---------------------------------------------------------------------------
593
+
594
+ def cmd_snippet(args: argparse.Namespace, ws: Workspace) -> None:
595
+ artifact_dir = ws.require_active()
596
+ meta = ws.load_meta() or {}
597
+ repo_path = Path(meta.get("repo_path", "."))
598
+
599
+ ingestor = _open_ingestor(artifact_dir)
600
+ qn = args.qualified_name
601
+
602
+ safe_qn = qn.replace("'", "\\'")
603
+ cypher = (
604
+ f"MATCH (n) WHERE n.qualified_name = '{safe_qn}' "
605
+ "RETURN n.qualified_name, n.name, n.source_code, n.path, n.start_line, n.end_line "
606
+ "LIMIT 1"
607
+ )
608
+
609
+ try:
610
+ rows = ingestor.query(cypher)
611
+ except Exception as exc:
612
+ ingestor.__exit__(None, None, None)
613
+ _die(f"Graph query failed: {exc}")
614
+
615
+ if not rows:
616
+ ingestor.__exit__(None, None, None)
617
+ _die(f"Not found: {qn}")
618
+
619
+ result = rows[0].get("result", [])
620
+ qname = result[0] if len(result) > 0 else qn
621
+ name = result[1] if len(result) > 1 else None
622
+ source_code = result[2] if len(result) > 2 else None
623
+ file_path = result[3] if len(result) > 3 else None
624
+ start_line = result[4] if len(result) > 4 else None
625
+ end_line = result[5] if len(result) > 5 else None
626
+
627
+ if not source_code and file_path and start_line and end_line:
628
+ fp = Path(str(file_path))
629
+ if not fp.is_absolute():
630
+ fp = repo_path / fp
631
+ try:
632
+ lines = fp.read_text(encoding="utf-8", errors="ignore").splitlines(keepends=True)
633
+ s = max(0, int(start_line) - 1)
634
+ e = min(len(lines), int(end_line))
635
+ source_code = "".join(lines[s:e])
636
+ except Exception:
637
+ pass
638
+
639
+ ingestor.__exit__(None, None, None)
640
+ _result_json({
641
+ "qualified_name": qname,
642
+ "name": name,
643
+ "file_path": file_path,
644
+ "start_line": start_line,
645
+ "end_line": end_line,
646
+ "source_code": source_code,
647
+ })
648
+
649
+
650
+ # ---------------------------------------------------------------------------
651
+ # Subcommand: search
652
+ # ---------------------------------------------------------------------------
653
+
654
+ def cmd_search(args: argparse.Namespace, ws: Workspace) -> None:
655
+ artifact_dir = ws.require_active()
656
+
657
+ vectors_path = artifact_dir / "vectors.pkl"
658
+ if not vectors_path.exists():
659
+ _die("Embeddings not found. Run /init-repo first to build vector index.")
660
+
661
+ from .embeddings.qwen3_embedder import create_embedder
662
+ from .tools.semantic_search import SemanticSearchService
663
+
664
+ vector_store = _load_vector_store(vectors_path)
665
+ if vector_store is None:
666
+ _die("Failed to load vector store.")
667
+
668
+ ingestor = _open_ingestor(artifact_dir)
669
+ try:
670
+ embedder = create_embedder()
671
+ except ValueError as exc:
672
+ _die(f"Embedding API not configured: {exc}")
673
+ service = SemanticSearchService(
674
+ embedder=embedder, vector_store=vector_store, graph_service=ingestor,
675
+ )
676
+
677
+ query = args.query
678
+ top_k = args.top_k
679
+ _progress(f"Searching: \"{query}\" (top {top_k})")
680
+
681
+ try:
682
+ results = service.search(query, top_k=top_k)
683
+ _result_json({
684
+ "query": query,
685
+ "result_count": len(results),
686
+ "results": [
687
+ {
688
+ "qualified_name": r.qualified_name,
689
+ "name": r.name,
690
+ "type": r.type,
691
+ "score": r.score,
692
+ "file_path": r.file_path,
693
+ "start_line": r.start_line,
694
+ "end_line": r.end_line,
695
+ "source_code": r.source_code,
696
+ }
697
+ for r in results
698
+ ],
699
+ })
700
+ except Exception as exc:
701
+ _die(f"Semantic search failed: {exc}")
702
+ finally:
703
+ ingestor.__exit__(None, None, None)
704
+
705
+
706
+ # ---------------------------------------------------------------------------
707
+ # Subcommand: list-wiki
708
+ # ---------------------------------------------------------------------------
709
+
710
+ def cmd_list_wiki(_args: argparse.Namespace, ws: Workspace) -> None:
711
+ artifact_dir = ws.require_active()
712
+
713
+ wiki_dir = artifact_dir / "wiki"
714
+ if not wiki_dir.exists():
715
+ _die("Wiki not generated yet. Run /init-repo first.")
716
+
717
+ pages = []
718
+ wiki_subdir = wiki_dir / "wiki"
719
+ if wiki_subdir.exists():
720
+ for p in sorted(wiki_subdir.glob("*.md")):
721
+ pages.append({"page_id": p.stem, "file": f"wiki/{p.name}"})
722
+
723
+ index_path = wiki_dir / "index.md"
724
+ _result_json({
725
+ "index_available": index_path.exists(),
726
+ "page_count": len(pages),
727
+ "pages": pages,
728
+ "hint": "Use /get-wiki index or /get-wiki page-1 to read a page.",
729
+ })
730
+
731
+
732
+ # ---------------------------------------------------------------------------
733
+ # Subcommand: get-wiki
734
+ # ---------------------------------------------------------------------------
735
+
736
+ def cmd_get_wiki(args: argparse.Namespace, ws: Workspace) -> None:
737
+ artifact_dir = ws.require_active()
738
+
739
+ wiki_dir = artifact_dir / "wiki"
740
+ if not wiki_dir.exists():
741
+ _die("Wiki not generated yet. Run /init-repo first.")
742
+
743
+ page_id = args.page_id
744
+ if page_id == "index":
745
+ target = wiki_dir / "index.md"
746
+ else:
747
+ target = wiki_dir / "wiki" / f"{page_id}.md"
748
+
749
+ if not target.exists():
750
+ _die(f"Wiki page not found: {page_id}")
751
+
752
+ content = target.read_text(encoding="utf-8", errors="ignore")
753
+ _result_json({
754
+ "page_id": page_id,
755
+ "file_path": str(target),
756
+ "content": content,
757
+ })
758
+
759
+
760
+ # ---------------------------------------------------------------------------
761
+ # Subcommand: locate
762
+ # ---------------------------------------------------------------------------
763
+
764
+ def cmd_locate(args: argparse.Namespace, ws: Workspace) -> None:
765
+ artifact_dir = ws.require_active()
766
+ meta = ws.load_meta() or {}
767
+ repo_path = Path(meta.get("repo_path", ".")).resolve()
768
+
769
+ from .mcp.file_editor import FileEditor
770
+
771
+ try:
772
+ editor = FileEditor(repo_path)
773
+ except Exception as exc:
774
+ _die(f"Failed to initialize FileEditor: {exc}")
775
+
776
+ file_path = args.file_path
777
+ target = (repo_path / file_path).resolve()
778
+
779
+ try:
780
+ target.relative_to(repo_path)
781
+ except ValueError:
782
+ _die(f"Path outside repository root: {file_path}")
783
+
784
+ if not target.exists():
785
+ _die(f"File not found: {file_path}")
786
+
787
+ line_number = args.line if hasattr(args, "line") else None
788
+ result = editor.locate_function(target, args.function_name, line_number)
789
+ if result is None:
790
+ _die(f"Function '{args.function_name}' not found in {file_path}")
791
+
792
+ _result_json(result)
793
+
794
+
795
+ # ---------------------------------------------------------------------------
796
+ # Subcommand: list-api
797
+ # ---------------------------------------------------------------------------
798
+
799
+ def cmd_list_api(args: argparse.Namespace, ws: Workspace) -> None:
800
+ artifact_dir = ws.require_active()
801
+ ingestor = _open_ingestor(artifact_dir)
802
+
803
+ module = args.module
804
+ visibility = args.visibility
805
+ vis_filter = None if visibility == "all" else visibility
806
+
807
+ try:
808
+ rows = ingestor.fetch_module_apis(module_qn=module, visibility=vis_filter)
809
+
810
+ by_module: dict[str, list] = {}
811
+ for row in rows:
812
+ raw = row.get("result", row)
813
+ if isinstance(raw, (list, tuple)) and len(raw) >= 8:
814
+ mod_name = raw[0] or "unknown"
815
+ entry = {
816
+ "name": raw[1],
817
+ "signature": raw[2],
818
+ "return_type": raw[3],
819
+ "visibility": raw[4],
820
+ "parameters": raw[5],
821
+ "start_line": raw[6],
822
+ "end_line": raw[7],
823
+ "entity_type": "function",
824
+ }
825
+ else:
826
+ mod_name = "unknown"
827
+ entry = {"raw": raw}
828
+
829
+ if mod_name not in by_module:
830
+ by_module[mod_name] = []
831
+ by_module[mod_name].append(entry)
832
+
833
+ # Types
834
+ type_count = 0
835
+ if args.include_types and hasattr(ingestor, "fetch_module_type_apis"):
836
+ type_rows = ingestor.fetch_module_type_apis(module_qn=module)
837
+ for row in type_rows:
838
+ raw = row.get("result", row)
839
+ if isinstance(raw, (list, tuple)) and len(raw) >= 6:
840
+ mod_name = raw[0] or "unknown"
841
+ entry = {
842
+ "name": raw[1],
843
+ "kind": raw[2],
844
+ "signature": raw[3],
845
+ "start_line": raw[4 if len(raw) <= 5 else 5],
846
+ "end_line": raw[5 if len(raw) <= 6 else 6],
847
+ "entity_type": raw[2] or "type",
848
+ }
849
+ else:
850
+ mod_name = "unknown"
851
+ entry = {"raw": raw}
852
+ if mod_name not in by_module:
853
+ by_module[mod_name] = []
854
+ by_module[mod_name].append(entry)
855
+ type_count += 1
856
+
857
+ total = sum(len(v) for v in by_module.values())
858
+ _result_json({
859
+ "total_apis": total,
860
+ "function_count": total - type_count,
861
+ "type_count": type_count,
862
+ "module_count": len(by_module),
863
+ "visibility_filter": visibility,
864
+ "modules": by_module,
865
+ })
866
+
867
+ except Exception as exc:
868
+ _die(f"Failed to list API interfaces: {exc}")
869
+ finally:
870
+ ingestor.__exit__(None, None, None)
871
+
872
+
873
+ # ---------------------------------------------------------------------------
874
+ # Subcommand: api-docs
875
+ # ---------------------------------------------------------------------------
876
+
877
+ def cmd_api_docs(args: argparse.Namespace, ws: Workspace) -> None:
878
+ artifact_dir = ws.require_active()
879
+
880
+ api_dir = artifact_dir / "api_docs"
881
+ if not (api_dir / "index.md").exists():
882
+ _die("API docs not generated yet. Run /init-repo first.")
883
+
884
+ module = args.module
885
+ if module:
886
+ safe = module.replace("/", "_").replace("\\", "_")
887
+ target = api_dir / "modules" / f"{safe}.md"
888
+ if not target.exists():
889
+ _die(f"Module doc not found: {module}. Use /api-docs (no args) to see all modules.")
890
+ _result_json({
891
+ "level": "module",
892
+ "module": module,
893
+ "content": target.read_text(encoding="utf-8", errors="ignore"),
894
+ })
895
+ else:
896
+ index_path = api_dir / "index.md"
897
+ _result_json({
898
+ "level": "index",
899
+ "content": index_path.read_text(encoding="utf-8", errors="ignore"),
900
+ })
901
+
902
+
903
+ # ---------------------------------------------------------------------------
904
+ # Subcommand: api-doc
905
+ # ---------------------------------------------------------------------------
906
+
907
+ def cmd_api_doc(args: argparse.Namespace, ws: Workspace) -> None:
908
+ artifact_dir = ws.require_active()
909
+
910
+ api_dir = artifact_dir / "api_docs"
911
+ if not (api_dir / "index.md").exists():
912
+ _die("API docs not generated yet. Run /init-repo first.")
913
+
914
+ qn = args.qualified_name
915
+ safe = qn.replace("/", "_").replace("\\", "_")
916
+ target = api_dir / "funcs" / f"{safe}.md"
917
+ if not target.exists():
918
+ _die(f"API doc not found: {qn}. Use /api-docs to browse modules first.")
919
+
920
+ _result_json({
921
+ "qualified_name": qn,
922
+ "content": target.read_text(encoding="utf-8", errors="ignore"),
923
+ })
924
+
925
+
926
+ # ---------------------------------------------------------------------------
927
+ # Subcommand: api-find (aggregated: semantic search + API doc lookup)
928
+ # ---------------------------------------------------------------------------
929
+
930
+ def cmd_api_find(args: argparse.Namespace, ws: Workspace) -> None:
931
+ artifact_dir = ws.require_active()
932
+
933
+ vectors_path = artifact_dir / "vectors.pkl"
934
+ if not vectors_path.exists():
935
+ _die("Embeddings not found. Run /repo-init first to build vector index.")
936
+
937
+ from .embeddings.qwen3_embedder import create_embedder
938
+ from .tools.semantic_search import SemanticSearchService
939
+
940
+ vector_store = _load_vector_store(vectors_path)
941
+ if vector_store is None:
942
+ _die("Failed to load vector store.")
943
+
944
+ ingestor = _open_ingestor(artifact_dir)
945
+ try:
946
+ embedder = create_embedder()
947
+ except ValueError as exc:
948
+ ingestor.__exit__(None, None, None)
949
+ _die(f"Embedding API not configured: {exc}")
950
+ service = SemanticSearchService(
951
+ embedder=embedder, vector_store=vector_store, graph_service=ingestor,
952
+ )
953
+
954
+ query = args.query
955
+ top_k = args.top_k
956
+ _progress(f"Searching APIs: \"{query}\" (top {top_k})")
957
+
958
+ try:
959
+ results = service.search(query, top_k=top_k)
960
+ except Exception as exc:
961
+ ingestor.__exit__(None, None, None)
962
+ _die(f"Semantic search failed: {exc}")
963
+
964
+ api_dir = artifact_dir / "api_docs"
965
+ funcs_dir = api_dir / "funcs"
966
+ has_api_docs = funcs_dir.exists()
967
+
968
+ combined = []
969
+ for r in results:
970
+ entry: dict = {
971
+ "qualified_name": r.qualified_name,
972
+ "name": r.name,
973
+ "type": r.type,
974
+ "score": r.score,
975
+ "file_path": r.file_path,
976
+ "start_line": r.start_line,
977
+ "end_line": r.end_line,
978
+ "source_code": r.source_code,
979
+ }
980
+
981
+ # Try to attach API doc content
982
+ if has_api_docs and r.qualified_name:
983
+ safe_qn = r.qualified_name.replace("/", "_").replace("\\", "_")
984
+ doc_file = funcs_dir / f"{safe_qn}.md"
985
+ if doc_file.exists():
986
+ entry["api_doc"] = doc_file.read_text(encoding="utf-8", errors="ignore")
987
+ else:
988
+ entry["api_doc"] = None
989
+ else:
990
+ entry["api_doc"] = None
991
+
992
+ combined.append(entry)
993
+
994
+ ingestor.__exit__(None, None, None)
995
+
996
+ _result_json({
997
+ "query": query,
998
+ "result_count": len(combined),
999
+ "api_docs_available": has_api_docs,
1000
+ "results": combined,
1001
+ })
1002
+
1003
+
1004
+ # ---------------------------------------------------------------------------
1005
+ # Subcommand: wiki-gen (standalone wiki regeneration)
1006
+ # ---------------------------------------------------------------------------
1007
+
1008
+ def cmd_wiki_gen(args: argparse.Namespace, ws: Workspace) -> None:
1009
+ from .examples.generate_wiki import MAX_PAGES_COMPREHENSIVE, MAX_PAGES_CONCISE
1010
+ from .mcp.pipeline import build_vector_index, run_wiki_generation, save_meta
1011
+
1012
+ artifact_dir = ws.require_active()
1013
+ meta = ws.load_meta()
1014
+ if meta is None:
1015
+ _die("No metadata found. Run /repo-init first.")
1016
+
1017
+ repo_path = Path(meta["repo_path"]).resolve()
1018
+ if not repo_path.exists():
1019
+ _die(f"Repository path no longer exists: {repo_path}")
1020
+
1021
+ db_path = artifact_dir / "graph.db"
1022
+ if not db_path.exists():
1023
+ _die("Graph database not found. Run /repo-init first to build the graph.")
1024
+
1025
+ vectors_path = artifact_dir / "vectors.pkl"
1026
+ if not vectors_path.exists():
1027
+ _die("Embeddings not found. Run /repo-init first to build embeddings.")
1028
+
1029
+ wiki_mode = args.mode
1030
+ comprehensive = wiki_mode != "concise"
1031
+ max_pages = MAX_PAGES_COMPREHENSIVE if comprehensive else MAX_PAGES_CONCISE
1032
+ wiki_dir = artifact_dir / "wiki"
1033
+ rebuild = args.rebuild
1034
+
1035
+ def progress_cb(msg: str, pct: float = 0.0) -> None:
1036
+ prefix = f"[{pct:.0f}%] " if pct > 0 else ""
1037
+ _progress(f"{prefix}{msg}")
1038
+
1039
+ _progress(f"=== Wiki Generation: {repo_path.name} ===")
1040
+ _progress(f" Mode: {wiki_mode} | Rebuild: {rebuild}")
1041
+ _progress("")
1042
+
1043
+ try:
1044
+ # Open existing graph (read-only)
1045
+ ingestor = _open_ingestor(artifact_dir)
1046
+
1047
+ # Load existing embeddings (no re-computation)
1048
+ with open(vectors_path, "rb") as fh:
1049
+ cache = pickle.load(fh)
1050
+ vector_store = cache["vector_store"]
1051
+ func_map = cache["func_map"]
1052
+
1053
+ from .embeddings.qwen3_embedder import create_embedder
1054
+ embedder = create_embedder()
1055
+
1056
+ _progress("Loaded existing graph and embeddings. Starting wiki generation...")
1057
+ _progress("")
1058
+
1059
+ # Delete structure cache if rebuild requested
1060
+ structure_cache = wiki_dir / f"{repo_path.name}_structure.pkl"
1061
+ if rebuild and structure_cache.exists():
1062
+ structure_cache.unlink()
1063
+
1064
+ index_path, page_count = run_wiki_generation(
1065
+ builder=ingestor,
1066
+ repo_path=repo_path,
1067
+ output_dir=wiki_dir,
1068
+ max_pages=max_pages,
1069
+ rebuild=rebuild,
1070
+ comprehensive=comprehensive,
1071
+ vector_store=vector_store,
1072
+ embedder=embedder,
1073
+ func_map=func_map,
1074
+ progress_cb=progress_cb,
1075
+ )
1076
+
1077
+ save_meta(artifact_dir, repo_path, page_count)
1078
+ ingestor.__exit__(None, None, None)
1079
+
1080
+ _progress("")
1081
+ _progress("=== Done ===")
1082
+ _result_json({
1083
+ "status": "success",
1084
+ "repo_path": str(repo_path),
1085
+ "wiki_index": str(index_path),
1086
+ "wiki_pages": page_count,
1087
+ })
1088
+
1089
+ except Exception as exc:
1090
+ _progress(f"\nERROR: Wiki generation failed: {exc}")
1091
+ sys.exit(1)
1092
+
1093
+
1094
+ # ---------------------------------------------------------------------------
1095
+ # Subcommand: embed-gen (standalone embedding rebuild)
1096
+ # ---------------------------------------------------------------------------
1097
+
1098
+ def cmd_embed_gen(args: argparse.Namespace, ws: Workspace) -> None:
1099
+ from .mcp.pipeline import build_vector_index, save_meta
1100
+
1101
+ artifact_dir = ws.require_active()
1102
+ meta = ws.load_meta()
1103
+ if meta is None:
1104
+ _die("No metadata found. Run /repo-init first.")
1105
+
1106
+ repo_path = Path(meta["repo_path"]).resolve()
1107
+ if not repo_path.exists():
1108
+ _die(f"Repository path no longer exists: {repo_path}")
1109
+
1110
+ db_path = artifact_dir / "graph.db"
1111
+ if not db_path.exists():
1112
+ _die("Graph database not found. Run /repo-init first to build the graph.")
1113
+
1114
+ vectors_path = artifact_dir / "vectors.pkl"
1115
+ rebuild = args.rebuild
1116
+
1117
+ def progress_cb(msg: str, pct: float = 0.0) -> None:
1118
+ prefix = f"[{pct:.0f}%] " if pct > 0 else ""
1119
+ _progress(f"{prefix}{msg}")
1120
+
1121
+ _progress(f"=== Embedding Generation: {repo_path.name} ===")
1122
+ _progress(f" Rebuild: {rebuild}")
1123
+ _progress("")
1124
+
1125
+ try:
1126
+ ingestor = _open_ingestor(artifact_dir)
1127
+
1128
+ _progress("Loaded existing graph. Starting embedding generation...")
1129
+ _progress("")
1130
+
1131
+ vector_store, embedder, func_map = build_vector_index(
1132
+ ingestor, repo_path, vectors_path, rebuild, progress_cb
1133
+ )
1134
+
1135
+ save_meta(artifact_dir, repo_path, meta.get("wiki_page_count", 0))
1136
+ ingestor.__exit__(None, None, None)
1137
+
1138
+ _progress("")
1139
+ _progress("=== Done ===")
1140
+ _result_json({
1141
+ "status": "success",
1142
+ "repo_path": str(repo_path),
1143
+ "vectors_path": str(vectors_path),
1144
+ "embedding_count": len(vector_store),
1145
+ })
1146
+
1147
+ except Exception as exc:
1148
+ _progress(f"\nERROR: Embedding generation failed: {exc}")
1149
+ sys.exit(1)
1150
+
1151
+
1152
+ # ---------------------------------------------------------------------------
1153
+ # Main — argparse
1154
+ # ---------------------------------------------------------------------------
1155
+
1156
+ def main() -> None:
1157
+ parser = argparse.ArgumentParser(
1158
+ prog="cgb",
1159
+ description="CodeGraphWiki CLI — local command interface",
1160
+ )
1161
+ subparsers = parser.add_subparsers(dest="command", required=True)
1162
+
1163
+ # init (orchestrator: graph-build → api-doc-gen → embed-gen → wiki-gen)
1164
+ p = subparsers.add_parser("init", help="Initialize repository (graph → api-docs → embeddings → wiki)")
1165
+ p.add_argument("repo_path", help="Absolute path to the repository")
1166
+ p.add_argument("--rebuild", action="store_true", help="Force rebuild everything")
1167
+ p.add_argument("--mode", choices=["comprehensive", "concise"], default="comprehensive",
1168
+ help="Wiki mode: comprehensive (8-10 pages) or concise (4-5 pages)")
1169
+ p.add_argument("--backend", choices=["kuzu", "memgraph", "memory"], default="kuzu",
1170
+ help="Graph database backend")
1171
+ p.add_argument("--no-wiki", action="store_true",
1172
+ help="Skip wiki generation (graph + api-docs + embeddings only)")
1173
+ p.add_argument("--no-embed", action="store_true",
1174
+ help="Skip embeddings and wiki (graph + api-docs only, fastest)")
1175
+
1176
+ # graph-build (step 1: standalone)
1177
+ p = subparsers.add_parser("graph-build", help="Build knowledge graph only (step 1)")
1178
+ p.add_argument("repo_path", help="Absolute path to the repository")
1179
+ p.add_argument("--rebuild", action="store_true", help="Force rebuild graph")
1180
+ p.add_argument("--backend", choices=["kuzu", "memgraph", "memory"], default="kuzu",
1181
+ help="Graph database backend")
1182
+
1183
+ # api-doc-gen (step 2: standalone)
1184
+ p = subparsers.add_parser("api-doc-gen", help="Generate API docs from existing graph (step 2)")
1185
+ p.add_argument("--rebuild", action="store_true", help="Force regenerate API docs")
1186
+
1187
+ # list-repos
1188
+ subparsers.add_parser("list-repos", help="List all indexed repositories in the workspace")
1189
+
1190
+ # switch-repo
1191
+ p = subparsers.add_parser("switch-repo", help="Switch active repository")
1192
+ p.add_argument("repo_name", help="Repository name or artifact dir name (see /list-repos)")
1193
+
1194
+ # info
1195
+ subparsers.add_parser("info", help="Show active repository info and graph statistics")
1196
+
1197
+ # query
1198
+ p = subparsers.add_parser("query", help="Natural-language query → Cypher → execute")
1199
+ p.add_argument("question", help="Natural language question about the codebase")
1200
+
1201
+ # snippet
1202
+ p = subparsers.add_parser("snippet", help="Get source code by qualified name")
1203
+ p.add_argument("qualified_name", help="e.g. 'mymodule.MyClass.my_method'")
1204
+
1205
+ # search
1206
+ p = subparsers.add_parser("search", help="Semantic vector search")
1207
+ p.add_argument("query", help="Natural language description of what to find")
1208
+ p.add_argument("--top-k", type=int, default=5, help="Number of results (default: 5)")
1209
+
1210
+ # list-wiki
1211
+ subparsers.add_parser("list-wiki", help="List generated wiki pages")
1212
+
1213
+ # get-wiki
1214
+ p = subparsers.add_parser("get-wiki", help="Read a wiki page")
1215
+ p.add_argument("page_id", help="Page ID: 'index' or 'page-1', 'page-2', etc.")
1216
+
1217
+ # locate
1218
+ p = subparsers.add_parser("locate", help="Locate function via Tree-sitter AST")
1219
+ p.add_argument("file_path", help="Relative path from repo root")
1220
+ p.add_argument("function_name", help="Function/method name (use 'Class.method' for methods)")
1221
+ p.add_argument("--line", type=int, default=None, help="Line number to disambiguate overloads")
1222
+
1223
+ # list-api
1224
+ p = subparsers.add_parser("list-api", help="List public API interfaces from graph")
1225
+ p.add_argument("--module", default=None, help="Filter by module qualified name")
1226
+ p.add_argument("--visibility", choices=["public", "static", "extern", "all"],
1227
+ default="public", help="Visibility filter (default: public)")
1228
+ p.add_argument("--include-types", action="store_true", default=True,
1229
+ help="Include struct/enum/typedef definitions")
1230
+
1231
+ # api-docs
1232
+ p = subparsers.add_parser("api-docs", help="Browse hierarchical API docs (L1 index or L2 module)")
1233
+ p.add_argument("--module", default=None, help="Module name for L2 detail (omit for L1 index)")
1234
+
1235
+ # api-doc
1236
+ p = subparsers.add_parser("api-doc", help="Read detailed API doc for a function (L3)")
1237
+ p.add_argument("qualified_name", help="Fully qualified function name")
1238
+
1239
+ # api-find
1240
+ p = subparsers.add_parser("api-find", help="Find APIs by natural language (search + doc lookup)")
1241
+ p.add_argument("query", help="Natural language description of what API to find")
1242
+ p.add_argument("--top-k", type=int, default=5, help="Number of results (default: 5)")
1243
+
1244
+ # wiki-gen
1245
+ p = subparsers.add_parser("wiki-gen", help="Regenerate wiki only (reuses existing graph + embeddings)")
1246
+ p.add_argument("--rebuild", action="store_true", help="Force regenerate wiki structure and pages")
1247
+ p.add_argument("--mode", choices=["comprehensive", "concise"], default="comprehensive",
1248
+ help="Wiki mode: comprehensive (8-10 pages) or concise (4-5 pages)")
1249
+
1250
+ # embed-gen
1251
+ p = subparsers.add_parser("embed-gen", help="Rebuild embeddings only (reuses existing graph)")
1252
+ p.add_argument("--rebuild", action="store_true", help="Force rebuild embeddings even if cached")
1253
+
1254
+ args = parser.parse_args()
1255
+
1256
+ ws = Workspace()
1257
+
1258
+ dispatch = {
1259
+ "init": cmd_init,
1260
+ "graph-build": cmd_graph_build,
1261
+ "api-doc-gen": cmd_api_doc_gen,
1262
+ "list-repos": cmd_list_repos,
1263
+ "switch-repo": cmd_switch_repo,
1264
+ "info": cmd_info,
1265
+ "query": cmd_query,
1266
+ "snippet": cmd_snippet,
1267
+ "search": cmd_search,
1268
+ "list-wiki": cmd_list_wiki,
1269
+ "get-wiki": cmd_get_wiki,
1270
+ "locate": cmd_locate,
1271
+ "list-api": cmd_list_api,
1272
+ "api-docs": cmd_api_docs,
1273
+ "api-doc": cmd_api_doc,
1274
+ "api-find": cmd_api_find,
1275
+ "wiki-gen": cmd_wiki_gen,
1276
+ "embed-gen": cmd_embed_gen,
1277
+ }
1278
+
1279
+ handler = dispatch.get(args.command)
1280
+ if handler is None:
1281
+ parser.print_help()
1282
+ sys.exit(1)
1283
+
1284
+ handler(args, ws)
1285
+
1286
+
1287
+ if __name__ == "__main__":
1288
+ main()