code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,640 @@
1
+ """RAG Engine for code graph-based retrieval and generation.
2
+
3
+ This module provides the main RAG engine that combines semantic search,
4
+ graph queries, and LLM generation for code analysis.
5
+
6
+ The RAG flow:
7
+ 1. Semantic search to find relevant code entities
8
+ 2. Graph traversal to gather context (callers, callees, related)
9
+ 3. Prompt assembly with retrieved context
10
+ 4. LLM generation (OpenAI-compatible API)
11
+ 5. Markdown output generation
12
+
13
+ Examples:
14
+ >>> from code_graph_builder.rag import RAGConfig, create_rag_engine
15
+ >>> from code_graph_builder.embeddings import create_embedder, create_vector_store
16
+ >>> from code_graph_builder.services import MemgraphIngestor
17
+ >>>
18
+ >>> config = RAGConfig.from_env()
19
+ >>> embedder = create_embedder()
20
+ >>> vector_store = create_vector_store(backend="memory", dimension=1536)
21
+ >>>
22
+ >>> with MemgraphIngestor("localhost", 7687) as graph_service:
23
+ ... engine = create_rag_engine(
24
+ ... config=config,
25
+ ... embedder=embedder,
26
+ ... vector_store=vector_store,
27
+ ... graph_service=graph_service,
28
+ ... )
29
+ ... result = engine.query("Explain the authentication flow")
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ from dataclasses import dataclass, field
35
+ from pathlib import Path
36
+ from typing import TYPE_CHECKING, Any, Protocol
37
+
38
+ from loguru import logger
39
+
40
+ from ..embeddings.qwen3_embedder import BaseEmbedder
41
+ from ..embeddings.vector_store import VectorStore
42
+ from ..tools.graph_query import GraphQueryService, create_graph_query_service
43
+ from ..tools.semantic_search import (
44
+ SemanticSearchService,
45
+ create_semantic_search_service,
46
+ )
47
+ from .config import RAGConfig
48
+ from .client import LLMClient, create_llm_client
49
+ from .markdown_generator import (
50
+ AnalysisResult,
51
+ MarkdownGenerator,
52
+ SourceReference,
53
+ )
54
+ from .prompt_templates import (
55
+ CodeContext,
56
+ RAGPrompts,
57
+ create_code_context,
58
+ )
59
+
60
+ if TYPE_CHECKING:
61
+ from ..types import ResultRow
62
+
63
+
64
+ @dataclass
65
+ class RAGResult:
66
+ """Result from RAG query.
67
+
68
+ Attributes:
69
+ query: Original user query
70
+ response: Generated response text
71
+ sources: List of source references used
72
+ contexts: List of code contexts retrieved
73
+ metadata: Additional metadata about the query
74
+ """
75
+
76
+ query: str
77
+ response: str
78
+ sources: list[SourceReference] = field(default_factory=list)
79
+ contexts: list[CodeContext] = field(default_factory=list)
80
+ metadata: dict[str, Any] = field(default_factory=dict)
81
+
82
+ def to_markdown(self, generator: MarkdownGenerator | None = None) -> str:
83
+ """Convert result to markdown document.
84
+
85
+ Args:
86
+ generator: Optional markdown generator
87
+
88
+ Returns:
89
+ Markdown document as string
90
+ """
91
+ if generator is None:
92
+ generator = MarkdownGenerator()
93
+
94
+ analysis_result = AnalysisResult(
95
+ query=self.query,
96
+ response=self.response,
97
+ sources=self.sources,
98
+ metadata=self.metadata,
99
+ )
100
+
101
+ return generator.generate_analysis_doc(
102
+ title="Code Analysis",
103
+ result=analysis_result,
104
+ )
105
+
106
+
107
+ class GraphServiceProtocol(Protocol):
108
+ """Protocol for graph service operations."""
109
+
110
+ def fetch_all(self, query: str, params: dict | None = None) -> list[ResultRow]: ...
111
+
112
+
113
+ class RAGEngine:
114
+ """RAG Engine for code analysis.
115
+
116
+ Combines semantic search, graph queries, and LLM generation
117
+ to provide intelligent code analysis capabilities.
118
+
119
+ Args:
120
+ config: RAG configuration
121
+ llm_client: LLM API client
122
+ semantic_service: Semantic search service
123
+ graph_service: Graph query service
124
+ prompts: RAG prompts
125
+
126
+ Example:
127
+ >>> engine = RAGEngine(
128
+ ... config=config,
129
+ ... llm_client=llm_client,
130
+ ... semantic_service=semantic_service,
131
+ ... graph_service=graph_service,
132
+ ... )
133
+ >>> result = engine.query("How does authentication work?")
134
+ """
135
+
136
+ def __init__(
137
+ self,
138
+ config: RAGConfig,
139
+ llm_client: LLMClient,
140
+ semantic_service: SemanticSearchService,
141
+ graph_service: GraphQueryService,
142
+ ):
143
+ self.config = config
144
+ self.llm_client = llm_client
145
+ self.semantic_service = semantic_service
146
+ self.graph_service = graph_service
147
+ self.prompts = RAGPrompts()
148
+ self.markdown_generator = MarkdownGenerator()
149
+
150
+ logger.info("Initialized RAGEngine")
151
+
152
+ def query(
153
+ self,
154
+ query: str,
155
+ top_k: int | None = None,
156
+ include_graph_context: bool = True,
157
+ ) -> RAGResult:
158
+ """Execute a RAG query.
159
+
160
+ Args:
161
+ query: User query string
162
+ top_k: Number of results to retrieve (overrides config)
163
+ include_graph_context: Whether to include graph relationships
164
+
165
+ Returns:
166
+ RAGResult with response and metadata
167
+ """
168
+ logger.info(f"RAG query: {query}")
169
+
170
+ # Step 1: Semantic search
171
+ semantic_results = self._semantic_search(query, top_k)
172
+ if not semantic_results:
173
+ return RAGResult(
174
+ query=query,
175
+ response="No relevant code found for your query.",
176
+ metadata={"semantic_results": 0},
177
+ )
178
+
179
+ # Step 2: Build code contexts
180
+ contexts = self._build_contexts(semantic_results, include_graph_context)
181
+
182
+ # Step 3: Generate response
183
+ response = self._generate_response(query, contexts)
184
+
185
+ # Step 4: Build source references
186
+ sources = self._build_sources(contexts)
187
+
188
+ return RAGResult(
189
+ query=query,
190
+ response=response,
191
+ sources=sources,
192
+ contexts=contexts,
193
+ metadata={
194
+ "semantic_results": len(semantic_results),
195
+ "contexts": len(contexts),
196
+ "model": self.config.moonshot.model,
197
+ },
198
+ )
199
+
200
+ def explain_code(
201
+ self,
202
+ qualified_name: str,
203
+ include_related: bool = True,
204
+ ) -> RAGResult:
205
+ """Explain a specific code entity.
206
+
207
+ Args:
208
+ qualified_name: Fully qualified name of the entity
209
+ include_related: Whether to include related entities
210
+
211
+ Returns:
212
+ RAGResult with explanation
213
+ """
214
+ logger.info(f"Explaining code: {qualified_name}")
215
+
216
+ # Fetch entity from graph
217
+ node = self.graph_service.fetch_node_by_qualified_name(qualified_name)
218
+ if not node:
219
+ return RAGResult(
220
+ query=f"Explain {qualified_name}",
221
+ response=f"Entity '{qualified_name}' not found in the code graph.",
222
+ )
223
+
224
+ # Build context
225
+ context = self._node_to_context(node)
226
+
227
+ # Get related entities if requested
228
+ contexts = [context]
229
+ if include_related:
230
+ related = self._get_related_contexts(node.node_id)
231
+ contexts.extend(related)
232
+
233
+ # Generate explanation
234
+ system_prompt = self.prompts.analysis.get_system_prompt()
235
+ user_prompt = self.prompts.analysis.format_explain_prompt(context)
236
+
237
+ chat_response = self.llm_client.chat_with_messages([
238
+ {"role": "system", "content": system_prompt},
239
+ {"role": "user", "content": user_prompt},
240
+ ])
241
+
242
+ sources = [SourceReference(
243
+ name=node.name,
244
+ qualified_name=node.qualified_name,
245
+ file_path=node.path or "",
246
+ line_start=node.start_line,
247
+ line_end=node.end_line,
248
+ entity_type=node.type,
249
+ )]
250
+
251
+ return RAGResult(
252
+ query=f"Explain {qualified_name}",
253
+ response=chat_response.content,
254
+ sources=sources,
255
+ contexts=contexts,
256
+ metadata={
257
+ "entity": qualified_name,
258
+ "type": node.type,
259
+ },
260
+ )
261
+
262
+ def analyze_architecture(
263
+ self,
264
+ module_name: str,
265
+ ) -> RAGResult:
266
+ """Analyze architecture of a module.
267
+
268
+ Args:
269
+ module_name: Module or package name
270
+
271
+ Returns:
272
+ RAGResult with architecture analysis
273
+ """
274
+ logger.info(f"Analyzing architecture: {module_name}")
275
+
276
+ # Query for module entities
277
+ query = """
278
+ MATCH (n)
279
+ WHERE n.qualified_name STARTS WITH $module_name
280
+ RETURN n.node_id AS node_id,
281
+ n.qualified_name AS qualified_name,
282
+ n.name AS name,
283
+ labels(n) AS labels,
284
+ n.path AS path,
285
+ n.start_line AS start_line,
286
+ n.end_line AS end_line,
287
+ n.source_code AS source_code
288
+ LIMIT 20
289
+ """
290
+
291
+ results = self.graph_service.execute_cypher(query, {"module_name": module_name})
292
+
293
+ if not results:
294
+ return RAGResult(
295
+ query=f"Analyze architecture of {module_name}",
296
+ response=f"No entities found for module '{module_name}'.",
297
+ )
298
+
299
+ # Build contexts
300
+ contexts = []
301
+ for row in results:
302
+ source_code = row.get("source_code", "")
303
+ if source_code:
304
+ contexts.append(create_code_context(
305
+ source_code=source_code,
306
+ file_path=row.get("path"),
307
+ qualified_name=row.get("qualified_name"),
308
+ entity_type=row.get("labels", ["Unknown"])[0] if row.get("labels") else "Unknown",
309
+ ))
310
+
311
+ # Generate analysis
312
+ system_prompt = self.prompts.analysis.get_system_prompt()
313
+ user_prompt = self.prompts.analysis.format_architecture_prompt(
314
+ contexts[0] if len(contexts) == 1 else
315
+ "\n\n".join(f"### Entity {i+1}\n{ctx.format_context()}"
316
+ for i, ctx in enumerate(contexts[:5]))
317
+ )
318
+
319
+ chat_response = self.llm_client.chat_with_messages([
320
+ {"role": "system", "content": system_prompt},
321
+ {"role": "user", "content": user_prompt},
322
+ ])
323
+
324
+ sources = [
325
+ SourceReference(
326
+ name=row.get("name", ""),
327
+ qualified_name=row.get("qualified_name", ""),
328
+ file_path=row.get("path", ""),
329
+ entity_type=row.get("labels", ["Unknown"])[0] if row.get("labels") else "Unknown",
330
+ )
331
+ for row in results[:10]
332
+ ]
333
+
334
+ return RAGResult(
335
+ query=f"Analyze architecture of {module_name}",
336
+ response=chat_response.content,
337
+ sources=sources,
338
+ contexts=contexts,
339
+ metadata={
340
+ "module": module_name,
341
+ "entities_analyzed": len(results),
342
+ },
343
+ )
344
+
345
+ def _semantic_search(
346
+ self,
347
+ query: str,
348
+ top_k: int | None = None,
349
+ ) -> list[Any]:
350
+ """Execute semantic search.
351
+
352
+ Args:
353
+ query: Search query
354
+ top_k: Number of results
355
+
356
+ Returns:
357
+ List of semantic search results
358
+ """
359
+ k = top_k or self.config.retrieval.semantic_top_k
360
+ results = self.semantic_service.search(query, top_k=k)
361
+ logger.debug(f"Semantic search returned {len(results)} results")
362
+ return results
363
+
364
+ def _build_contexts(
365
+ self,
366
+ semantic_results: list[Any],
367
+ include_graph_context: bool,
368
+ ) -> list[CodeContext]:
369
+ """Build code contexts from semantic results.
370
+
371
+ Args:
372
+ semantic_results: Results from semantic search
373
+ include_graph_context: Whether to include graph relationships
374
+
375
+ Returns:
376
+ List of code contexts
377
+ """
378
+ contexts = []
379
+
380
+ for result in semantic_results:
381
+ # Get source code
382
+ source_code = result.source_code
383
+ if not source_code and self.semantic_service.graph_service:
384
+ source_code = self.semantic_service.get_source_code(result.node_id)
385
+
386
+ if not source_code:
387
+ continue
388
+
389
+ # Build base context
390
+ context = create_code_context(
391
+ source_code=source_code,
392
+ file_path=result.file_path,
393
+ qualified_name=result.qualified_name,
394
+ entity_type=result.type,
395
+ )
396
+
397
+ # Enrich with graph context if requested
398
+ if include_graph_context:
399
+ context = self._enrich_context(context, result.node_id)
400
+
401
+ contexts.append(context)
402
+
403
+ logger.debug(f"Built {len(contexts)} code contexts")
404
+ return contexts
405
+
406
+ def _enrich_context(
407
+ self,
408
+ context: CodeContext,
409
+ node_id: int,
410
+ ) -> CodeContext:
411
+ """Enrich context with graph relationships.
412
+
413
+ Args:
414
+ context: Base code context
415
+ node_id: Node ID in graph
416
+
417
+ Returns:
418
+ Enriched context
419
+ """
420
+ try:
421
+ # Get callers
422
+ if self.config.retrieval.include_callers:
423
+ callers = self.graph_service.fetch_callers(context.qualified_name or "")
424
+ context.callers = [c.qualified_name for c in callers[:5]]
425
+
426
+ # Get callees
427
+ if self.config.retrieval.include_callees:
428
+ callees = self.graph_service.fetch_callees(context.qualified_name or "")
429
+ context.callees = [c.qualified_name for c in callees[:5]]
430
+
431
+ except Exception as e:
432
+ logger.warning(f"Failed to enrich context: {e}")
433
+
434
+ return context
435
+
436
+ def _get_related_contexts(self, node_id: int) -> list[CodeContext]:
437
+ """Get contexts for related nodes.
438
+
439
+ Args:
440
+ node_id: Node ID
441
+
442
+ Returns:
443
+ List of related contexts
444
+ """
445
+ contexts = []
446
+
447
+ try:
448
+ related = self.graph_service.fetch_related_nodes(
449
+ node_id,
450
+ relationship_types=["CALLS", "INHERITS", "IMPORTS"],
451
+ )
452
+
453
+ for node, rel_type in related[:5]:
454
+ if node.path:
455
+ source = self.semantic_service.get_source_from_file(
456
+ node.path,
457
+ node.start_line or 0,
458
+ node.end_line or 0,
459
+ )
460
+ if source:
461
+ contexts.append(create_code_context(
462
+ source_code=source,
463
+ file_path=node.path,
464
+ qualified_name=node.qualified_name,
465
+ entity_type=node.type,
466
+ ))
467
+
468
+ except Exception as e:
469
+ logger.warning(f"Failed to get related contexts: {e}")
470
+
471
+ return contexts
472
+
473
+ def _node_to_context(self, node: Any) -> CodeContext:
474
+ """Convert graph node to code context.
475
+
476
+ Args:
477
+ node: Graph node
478
+
479
+ Returns:
480
+ Code context
481
+ """
482
+ source_code = ""
483
+
484
+ # Try to get source from graph
485
+ if hasattr(node, "properties") and node.properties:
486
+ source_code = node.properties.get("source_code", "")
487
+
488
+ # Fallback to file
489
+ if not source_code and node.path:
490
+ source_code = self.semantic_service.get_source_from_file(
491
+ node.path,
492
+ node.start_line or 0,
493
+ node.end_line or 0,
494
+ ) or ""
495
+
496
+ return create_code_context(
497
+ source_code=source_code,
498
+ file_path=node.path,
499
+ qualified_name=node.qualified_name,
500
+ entity_type=node.type,
501
+ )
502
+
503
+ def _generate_response(
504
+ self,
505
+ query: str,
506
+ contexts: list[CodeContext],
507
+ ) -> str:
508
+ """Generate response using LLM.
509
+
510
+ Args:
511
+ query: User query
512
+ contexts: Retrieved code contexts
513
+
514
+ Returns:
515
+ Generated response
516
+ """
517
+ system_prompt, user_prompt = self.prompts.format_rag_query(
518
+ query=query,
519
+ contexts=contexts,
520
+ )
521
+
522
+ try:
523
+ response = self.llm_client.chat_with_messages([
524
+ {"role": "system", "content": system_prompt},
525
+ {"role": "user", "content": user_prompt},
526
+ ])
527
+ return response.content
528
+ except Exception as e:
529
+ logger.error(f"Failed to generate response: {e}")
530
+ return f"Error generating response: {e}"
531
+
532
+ def _build_sources(self, contexts: list[CodeContext]) -> list[SourceReference]:
533
+ """Build source references from contexts.
534
+
535
+ Args:
536
+ contexts: Code contexts
537
+
538
+ Returns:
539
+ List of source references
540
+ """
541
+ sources = []
542
+ for ctx in contexts:
543
+ if ctx.qualified_name and ctx.file_path:
544
+ sources.append(SourceReference(
545
+ name=ctx.qualified_name.split(".")[-1],
546
+ qualified_name=ctx.qualified_name,
547
+ file_path=ctx.file_path,
548
+ entity_type=ctx.entity_type,
549
+ ))
550
+ return sources
551
+
552
+ def save_result(
553
+ self,
554
+ result: RAGResult,
555
+ output_path: str | Path | None = None,
556
+ ) -> Path:
557
+ """Save RAG result to markdown file.
558
+
559
+ Args:
560
+ result: RAG result to save
561
+ output_path: Output file path (optional)
562
+
563
+ Returns:
564
+ Path to saved file
565
+ """
566
+ if output_path is None:
567
+ output_dir = Path(self.config.output.output_dir)
568
+ output_dir.mkdir(parents=True, exist_ok=True)
569
+ safe_query = "".join(c if c.isalnum() else "_" for c in result.query[:50])
570
+ output_path = output_dir / f"rag_result_{safe_query}.md"
571
+
572
+ markdown = result.to_markdown(self.markdown_generator)
573
+ return self.markdown_generator.save_document(markdown, output_path)
574
+
575
+
576
+ def create_rag_engine(
577
+ config: RAGConfig | None = None,
578
+ embedder: BaseEmbedder | None = None,
579
+ vector_store: VectorStore | None = None,
580
+ graph_service: GraphServiceProtocol | None = None,
581
+ llm_client: LLMClient | None = None,
582
+ ) -> RAGEngine:
583
+ """Factory function to create RAG engine.
584
+
585
+ Args:
586
+ config: RAG configuration (from env if not provided)
587
+ embedder: Embedder for semantic search
588
+ vector_store: Vector store for embeddings
589
+ graph_service: Graph service for queries
590
+ llm_client: LLM API client
591
+
592
+ Returns:
593
+ Configured RAGEngine
594
+
595
+ Raises:
596
+ ValueError: If required dependencies are missing
597
+ """
598
+ if config is None:
599
+ config = RAGConfig.from_env()
600
+
601
+ config.validate()
602
+
603
+ # Create Kimi client if not provided
604
+ if llm_client is None:
605
+ llm_client = create_llm_client(
606
+ api_key=config.moonshot.api_key,
607
+ model=config.moonshot.model,
608
+ base_url=config.moonshot.base_url,
609
+ max_tokens=config.moonshot.max_tokens,
610
+ temperature=config.moonshot.temperature,
611
+ )
612
+
613
+ # Create semantic search service if dependencies provided
614
+ if embedder is None or vector_store is None:
615
+ raise ValueError(
616
+ "embedder and vector_store are required for semantic search. "
617
+ "Use create_embedder() and create_vector_store() to create them."
618
+ )
619
+
620
+ semantic_service = create_semantic_search_service(
621
+ embedder=embedder,
622
+ vector_store=vector_store,
623
+ graph_service=graph_service,
624
+ )
625
+
626
+ # Create graph query service
627
+ if graph_service is None:
628
+ raise ValueError(
629
+ "graph_service is required. "
630
+ "Use MemgraphIngestor or KuzuIngestor as context manager."
631
+ )
632
+
633
+ graph_query_service = create_graph_query_service(graph_service)
634
+
635
+ return RAGEngine(
636
+ config=config,
637
+ llm_client=llm_client,
638
+ semantic_service=semantic_service,
639
+ graph_service=graph_query_service,
640
+ )