code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,298 @@
1
+ """OpenAI-compatible LLM client for RAG.
2
+
3
+ This module provides a client for interacting with any OpenAI-compatible LLM API.
4
+ Supported providers include Moonshot (Kimi), OpenAI, DeepSeek, and others.
5
+
6
+ Examples:
7
+ >>> from code_graph_builder.rag.client import LLMClient
8
+ >>> client = LLMClient(api_key="sk-xxxxx")
9
+ >>> response = client.chat("Explain this code", context="def foo(): pass")
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+ from typing import Any
16
+
17
+ import requests
18
+ from loguru import logger
19
+
20
+
21
+ @dataclass
22
+ class ChatResponse:
23
+ """Response from chat completion.
24
+
25
+ Attributes:
26
+ content: Generated text content
27
+ usage: Token usage information
28
+ model: Model used for generation
29
+ finish_reason: Reason for completion finish
30
+ """
31
+
32
+ content: str
33
+ usage: dict[str, int]
34
+ model: str
35
+ finish_reason: str
36
+
37
+
38
+ class LLMClient:
39
+ """Client for OpenAI-compatible LLM API.
40
+
41
+ Provides a simple interface for chat completions with any OpenAI-compatible model.
42
+
43
+ Args:
44
+ api_key: LLM API key
45
+ model: Model name (default: kimi-k2.5)
46
+ base_url: API base URL
47
+ max_tokens: Maximum tokens for generation
48
+ temperature: Sampling temperature
49
+ timeout: Request timeout in seconds
50
+
51
+ Examples:
52
+ >>> client = LLMClient(api_key="sk-xxxxx")
53
+ >>> response = client.chat(
54
+ ... query="What does this function do?",
55
+ ... context="def add(a, b): return a + b"
56
+ ... )
57
+ >>> print(response.content)
58
+
59
+ Note:
60
+ DEFAULT_MODEL and DEFAULT_BASE_URL default to Moonshot/Kimi but can be
61
+ overridden via constructor arguments or ``create_llm_client()`` auto-detection.
62
+ """
63
+
64
+ DEFAULT_MODEL = "kimi-k2.5"
65
+ DEFAULT_BASE_URL = "https://api.moonshot.cn/v1"
66
+
67
+ def __init__(
68
+ self,
69
+ api_key: str | None = None,
70
+ model: str = DEFAULT_MODEL,
71
+ base_url: str = DEFAULT_BASE_URL,
72
+ max_tokens: int = 4096,
73
+ temperature: float = 1.0,
74
+ timeout: int = 300,
75
+ ):
76
+ self.api_key = api_key
77
+ self.model = model
78
+ self.base_url = base_url.rstrip("/")
79
+ self.max_tokens = max_tokens
80
+ self.temperature = temperature
81
+ self.timeout = timeout
82
+
83
+ if not self.api_key:
84
+ raise ValueError(
85
+ "LLM API key is required. "
86
+ "Set one of: LLM_API_KEY, OPENAI_API_KEY, or MOONSHOT_API_KEY "
87
+ "environment variable, or pass api_key directly. "
88
+ "Use create_llm_client() for automatic provider detection."
89
+ )
90
+
91
+ logger.info(f"Initialized LLMClient with model: {self.model}")
92
+
93
+ def _get_headers(self) -> dict[str, str]:
94
+ """Get API request headers."""
95
+ return {
96
+ "Authorization": f"Bearer {self.api_key}",
97
+ "Content-Type": "application/json",
98
+ }
99
+
100
+ def chat(
101
+ self,
102
+ query: str,
103
+ context: str | None = None,
104
+ system_prompt: str | None = None,
105
+ max_tokens: int | None = None,
106
+ temperature: float | None = None,
107
+ ) -> ChatResponse:
108
+ """Send a chat completion request.
109
+
110
+ Args:
111
+ query: User query
112
+ context: Optional context to include
113
+ system_prompt: Optional system prompt
114
+ max_tokens: Override max tokens
115
+ temperature: Override temperature
116
+
117
+ Returns:
118
+ ChatResponse with generated content
119
+
120
+ Raises:
121
+ RuntimeError: If API request fails
122
+ """
123
+ messages = []
124
+
125
+ if system_prompt:
126
+ messages.append({"role": "system", "content": system_prompt})
127
+
128
+ if context:
129
+ content = f"Context:\n{context}\n\nQuery: {query}"
130
+ else:
131
+ content = query
132
+
133
+ messages.append({"role": "user", "content": content})
134
+
135
+ payload: dict[str, Any] = {
136
+ "model": self.model,
137
+ "messages": messages,
138
+ "max_tokens": max_tokens or self.max_tokens,
139
+ "temperature": temperature or self.temperature,
140
+ }
141
+
142
+ try:
143
+ response = requests.post(
144
+ f"{self.base_url}/chat/completions",
145
+ headers=self._get_headers(),
146
+ json=payload,
147
+ timeout=self.timeout,
148
+ )
149
+ response.raise_for_status()
150
+ data = response.json()
151
+
152
+ choice = data["choices"][0]
153
+ return ChatResponse(
154
+ content=choice["message"]["content"],
155
+ usage=data.get("usage", {}),
156
+ model=data.get("model", self.model),
157
+ finish_reason=choice.get("finish_reason", "unknown"),
158
+ )
159
+
160
+ except requests.exceptions.HTTPError as e:
161
+ logger.error(f"HTTP error: {e}")
162
+ try:
163
+ error_data = e.response.json() if e.response else {}
164
+ error_msg = error_data.get("error", {}).get("message", str(e))
165
+ except Exception:
166
+ error_msg = str(e)
167
+ raise RuntimeError(f"API request failed: {error_msg}")
168
+
169
+ except requests.exceptions.Timeout:
170
+ logger.error("Request timeout")
171
+ raise RuntimeError(f"API request timeout after {self.timeout}s")
172
+
173
+ except Exception as e:
174
+ logger.error(f"Request failed: {e}")
175
+ raise RuntimeError(f"API request failed: {e}")
176
+
177
+ def chat_with_messages(
178
+ self,
179
+ messages: list[dict[str, str]],
180
+ max_tokens: int | None = None,
181
+ temperature: float | None = None,
182
+ ) -> ChatResponse:
183
+ """Send a chat completion request with raw messages.
184
+
185
+ Args:
186
+ messages: List of message dicts with 'role' and 'content'
187
+ max_tokens: Override max tokens
188
+ temperature: Override temperature
189
+
190
+ Returns:
191
+ ChatResponse with generated content
192
+ """
193
+ payload: dict[str, Any] = {
194
+ "model": self.model,
195
+ "messages": messages,
196
+ "max_tokens": max_tokens or self.max_tokens,
197
+ "temperature": temperature or self.temperature,
198
+ }
199
+
200
+ try:
201
+ response = requests.post(
202
+ f"{self.base_url}/chat/completions",
203
+ headers=self._get_headers(),
204
+ json=payload,
205
+ timeout=self.timeout,
206
+ )
207
+ response.raise_for_status()
208
+ data = response.json()
209
+
210
+ choice = data["choices"][0]
211
+ return ChatResponse(
212
+ content=choice["message"]["content"],
213
+ usage=data.get("usage", {}),
214
+ model=data.get("model", self.model),
215
+ finish_reason=choice.get("finish_reason", "unknown"),
216
+ )
217
+
218
+ except Exception as e:
219
+ logger.error(f"Request failed: {e}")
220
+ raise RuntimeError(f"API request failed: {e}")
221
+
222
+ def health_check(self) -> bool:
223
+ """Check if API is accessible.
224
+
225
+ Returns:
226
+ True if healthy, False otherwise
227
+ """
228
+ try:
229
+ response = requests.get(
230
+ f"{self.base_url}/models",
231
+ headers=self._get_headers(),
232
+ timeout=10,
233
+ )
234
+ return response.status_code == 200
235
+ except Exception as e:
236
+ logger.error(f"Health check failed: {e}")
237
+ return False
238
+
239
+
240
+ def create_llm_client(
241
+ api_key: str | None = None,
242
+ model: str | None = None,
243
+ base_url: str | None = None,
244
+ **kwargs: Any,
245
+ ) -> LLMClient:
246
+ """Factory function to create LLMClient with auto-detection.
247
+
248
+ Auto-detects API credentials from environment variables in this priority:
249
+
250
+ 1. ``LLM_API_KEY`` / ``LLM_BASE_URL`` / ``LLM_MODEL`` (generic, highest)
251
+ 2. ``OPENAI_API_KEY`` / ``OPENAI_BASE_URL`` / ``OPENAI_MODEL``
252
+ 3. ``MOONSHOT_API_KEY`` / ``MOONSHOT_MODEL`` (Moonshot/Kimi default)
253
+
254
+ This allows any OpenAI-compatible model provider (DeepSeek, OpenAI,
255
+ Moonshot, etc.) to be used seamlessly.
256
+
257
+ Args:
258
+ api_key: API key (auto-detected from env if not provided)
259
+ model: Model name (auto-detected from env if not provided)
260
+ base_url: API base URL (auto-detected from env if not provided)
261
+ **kwargs: Additional arguments for LLMClient
262
+
263
+ Returns:
264
+ Configured LLMClient
265
+ """
266
+ import os
267
+
268
+ # Provider detection order: (key_env, url_env, model_env, default_url, default_model)
269
+ _providers = [
270
+ ("LLM_API_KEY", "LLM_BASE_URL", "LLM_MODEL", "https://api.openai.com/v1", "gpt-4o"),
271
+ ("OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_MODEL", "https://api.openai.com/v1", "gpt-4o"),
272
+ ("MOONSHOT_API_KEY", "LLM_BASE_URL", "MOONSHOT_MODEL", "https://api.moonshot.cn/v1", "kimi-k2.5"),
273
+ ]
274
+
275
+ detected_key = api_key or ""
276
+ detected_url = base_url or ""
277
+ detected_model = model or ""
278
+
279
+ if not detected_key:
280
+ for key_env, url_env, model_env, default_url, default_model in _providers:
281
+ env_key = os.environ.get(key_env, "")
282
+ if env_key:
283
+ detected_key = env_key
284
+ detected_url = detected_url or os.environ.get(url_env, default_url)
285
+ detected_model = detected_model or os.environ.get(model_env, default_model)
286
+ logger.info(f"LLMClient: auto-detected provider via {key_env}")
287
+ break
288
+
289
+ # Apply defaults for any still-missing values
290
+ detected_model = detected_model or "kimi-k2.5"
291
+ detected_url = detected_url or LLMClient.DEFAULT_BASE_URL
292
+
293
+ return LLMClient(
294
+ api_key=detected_key or None,
295
+ model=detected_model,
296
+ base_url=detected_url,
297
+ **kwargs,
298
+ )
@@ -0,0 +1,239 @@
1
+ """Configuration for RAG module.
2
+
3
+ This module provides configuration classes for RAG components including
4
+ Moonshot API settings, retrieval parameters, and output options.
5
+
6
+ Examples:
7
+ >>> from code_graph_builder.rag.config import RAGConfig
8
+ >>> config = RAGConfig.from_env()
9
+ >>> print(config.moonshot.model)
10
+ kimi-k2.5
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+
21
+ @dataclass
22
+ class MoonshotConfig:
23
+ """Configuration for Moonshot AI API (Kimi k2.5).
24
+
25
+ Args:
26
+ api_key: Moonshot API key (or from MOONSHOT_API_KEY env var)
27
+ model: Model name (default: kimi-k2.5)
28
+ base_url: API base URL
29
+ max_tokens: Maximum tokens for generation
30
+ temperature: Sampling temperature (0-2)
31
+ timeout: Request timeout in seconds
32
+
33
+ Examples:
34
+ >>> config = MoonshotConfig(api_key="sk-xxxxx")
35
+ >>> config = MoonshotConfig(
36
+ ... api_key="sk-xxxxx",
37
+ ... model="kimi-k2.5",
38
+ ... temperature=0.7
39
+ ... )
40
+ """
41
+
42
+ api_key: str | None = None
43
+ model: str = "kimi-k2.5"
44
+ base_url: str = "https://api.moonshot.cn/v1"
45
+ max_tokens: int = 4096
46
+ temperature: float = 0.7
47
+ timeout: int = 120
48
+
49
+ def __post_init__(self):
50
+ """Load API key from environment if not provided."""
51
+ if self.api_key is None:
52
+ self.api_key = os.getenv("MOONSHOT_API_KEY")
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ """Convert to dictionary."""
56
+ return {
57
+ "api_key": self.api_key,
58
+ "model": self.model,
59
+ "base_url": self.base_url,
60
+ "max_tokens": self.max_tokens,
61
+ "temperature": self.temperature,
62
+ "timeout": self.timeout,
63
+ }
64
+
65
+ def validate(self) -> None:
66
+ """Validate configuration.
67
+
68
+ Raises:
69
+ ValueError: If configuration is invalid
70
+ """
71
+ if not self.api_key:
72
+ raise ValueError(
73
+ "Moonshot API key is required. "
74
+ "Set MOONSHOT_API_KEY environment variable or pass api_key."
75
+ )
76
+ if not self.api_key.startswith("sk-"):
77
+ raise ValueError(
78
+ "Moonshot API key format is invalid. Expected to start with 'sk-'."
79
+ )
80
+ if self.temperature < 0 or self.temperature > 2:
81
+ raise ValueError("Temperature must be between 0 and 2.")
82
+
83
+
84
+ @dataclass
85
+ class RetrievalConfig:
86
+ """Configuration for code retrieval.
87
+
88
+ Args:
89
+ semantic_top_k: Number of semantic search results
90
+ graph_max_depth: Maximum depth for graph traversal
91
+ include_callers: Whether to include calling functions
92
+ include_callees: Whether to include called functions
93
+ include_related: Whether to include related nodes
94
+ max_context_tokens: Maximum tokens for context
95
+ code_chunk_size: Maximum size of code chunks
96
+
97
+ Examples:
98
+ >>> config = RetrievalConfig(semantic_top_k=10, include_callers=True)
99
+ """
100
+
101
+ semantic_top_k: int = 10
102
+ graph_max_depth: int = 2
103
+ include_callers: bool = True
104
+ include_callees: bool = True
105
+ include_related: bool = True
106
+ max_context_tokens: int = 8000
107
+ code_chunk_size: int = 2000
108
+
109
+ def to_dict(self) -> dict[str, Any]:
110
+ """Convert to dictionary."""
111
+ return {
112
+ "semantic_top_k": self.semantic_top_k,
113
+ "graph_max_depth": self.graph_max_depth,
114
+ "include_callers": self.include_callers,
115
+ "include_callees": self.include_callees,
116
+ "include_related": self.include_related,
117
+ "max_context_tokens": self.max_context_tokens,
118
+ "code_chunk_size": self.code_chunk_size,
119
+ }
120
+
121
+
122
+ @dataclass
123
+ class OutputConfig:
124
+ """Configuration for RAG output.
125
+
126
+ Args:
127
+ format: Output format (markdown, json)
128
+ include_source_links: Whether to include source code links
129
+ include_code_snippets: Whether to include code snippets
130
+ output_dir: Directory for output files
131
+
132
+ Examples:
133
+ >>> config = OutputConfig(format="markdown", include_source_links=True)
134
+ """
135
+
136
+ format: str = "markdown"
137
+ include_source_links: bool = True
138
+ include_code_snippets: bool = True
139
+ output_dir: str | Path = "./rag_output"
140
+
141
+ def __post_init__(self):
142
+ """Normalize output directory path."""
143
+ if isinstance(self.output_dir, str):
144
+ self.output_dir = Path(self.output_dir)
145
+
146
+ def to_dict(self) -> dict[str, Any]:
147
+ """Convert to dictionary."""
148
+ return {
149
+ "format": self.format,
150
+ "include_source_links": self.include_source_links,
151
+ "include_code_snippets": self.include_code_snippets,
152
+ "output_dir": str(self.output_dir),
153
+ }
154
+
155
+
156
+ @dataclass
157
+ class RAGConfig:
158
+ """Main configuration for RAG module.
159
+
160
+ Combines all sub-configurations for Moonshot API, retrieval,
161
+ and output settings.
162
+
163
+ Args:
164
+ moonshot: Moonshot API configuration
165
+ retrieval: Retrieval configuration
166
+ output: Output configuration
167
+ verbose: Enable verbose logging
168
+
169
+ Examples:
170
+ >>> # From environment variables
171
+ >>> config = RAGConfig.from_env()
172
+ >>>
173
+ >>> # With explicit settings
174
+ >>> config = RAGConfig(
175
+ ... moonshot=MoonshotConfig(api_key="sk-xxxxx"),
176
+ ... retrieval=RetrievalConfig(semantic_top_k=15)
177
+ ... )
178
+ """
179
+
180
+ moonshot: MoonshotConfig = field(default_factory=MoonshotConfig)
181
+ retrieval: RetrievalConfig = field(default_factory=RetrievalConfig)
182
+ output: OutputConfig = field(default_factory=OutputConfig)
183
+ verbose: bool = False
184
+
185
+ @classmethod
186
+ def from_env(cls) -> RAGConfig:
187
+ """Create configuration from environment variables.
188
+
189
+ Environment variables:
190
+ MOONSHOT_API_KEY: Moonshot API key
191
+ MOONSHOT_MODEL: Model name (default: kimi-k2.5)
192
+ MOONSHOT_BASE_URL: API base URL
193
+ RAG_SEMANTIC_TOP_K: Number of semantic search results
194
+ RAG_OUTPUT_FORMAT: Output format
195
+ RAG_VERBOSE: Enable verbose logging
196
+
197
+ Returns:
198
+ RAGConfig instance
199
+ """
200
+ moonshot_config = MoonshotConfig(
201
+ api_key=os.getenv("MOONSHOT_API_KEY"),
202
+ model=os.getenv("MOONSHOT_MODEL", "kimi-k2.5"),
203
+ base_url=os.getenv("MOONSHOT_BASE_URL", "https://api.moonshot.cn/v1"),
204
+ )
205
+
206
+ retrieval_config = RetrievalConfig(
207
+ semantic_top_k=int(os.getenv("RAG_SEMANTIC_TOP_K", "10")),
208
+ )
209
+
210
+ output_config = OutputConfig(
211
+ format=os.getenv("RAG_OUTPUT_FORMAT", "markdown"),
212
+ output_dir=os.getenv("RAG_OUTPUT_DIR", "./rag_output"),
213
+ )
214
+
215
+ verbose = os.getenv("RAG_VERBOSE", "false").lower() == "true"
216
+
217
+ return cls(
218
+ moonshot=moonshot_config,
219
+ retrieval=retrieval_config,
220
+ output=output_config,
221
+ verbose=verbose,
222
+ )
223
+
224
+ def validate(self) -> None:
225
+ """Validate all configurations.
226
+
227
+ Raises:
228
+ ValueError: If any configuration is invalid
229
+ """
230
+ self.moonshot.validate()
231
+
232
+ def to_dict(self) -> dict[str, Any]:
233
+ """Convert to dictionary."""
234
+ return {
235
+ "moonshot": self.moonshot.to_dict(),
236
+ "retrieval": self.retrieval.to_dict(),
237
+ "output": self.output.to_dict(),
238
+ "verbose": self.verbose,
239
+ }
@@ -0,0 +1,67 @@
1
+ """Natural language to Cypher query translator.
2
+
3
+ Uses an LLM backend to convert user questions into Cypher queries
4
+ that can be executed against the code knowledge graph.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ from loguru import logger
12
+
13
+ if TYPE_CHECKING:
14
+ from .llm_backend import LLMBackend
15
+
16
+ # System prompt describing the graph schema for Cypher generation.
17
+ _SCHEMA_PROMPT = """\
18
+ You are a Cypher query generator for a code knowledge graph stored in Kùzu.
19
+
20
+ Node labels: Project, Package, Folder, File, Module, Class, Function, Method, \
21
+ Interface, Enum, Type, Union, ExternalPackage.
22
+
23
+ Common properties: qualified_name (PK), name, path, start_line, end_line, \
24
+ docstring, return_type, signature, visibility, parameters (STRING[]), kind.
25
+
26
+ Relationship types: CONTAINS_PACKAGE, CONTAINS_FOLDER, CONTAINS_FILE, \
27
+ CONTAINS_MODULE, DEFINES, DEFINES_METHOD, IMPORTS, EXPORTS, EXPORTS_MODULE, \
28
+ IMPLEMENTS_MODULE, INHERITS, IMPLEMENTS, OVERRIDES, CALLS, DEPENDS_ON_EXTERNAL.
29
+
30
+ Rules:
31
+ - Output ONLY a single Cypher query, nothing else.
32
+ - Do NOT use OPTIONAL MATCH.
33
+ - Always LIMIT results to at most 50 unless the user specifies otherwise.
34
+ """
35
+
36
+
37
+ class CypherGenerator:
38
+ """Translates natural-language questions to Cypher queries using an LLM."""
39
+
40
+ def __init__(self, llm: LLMBackend) -> None:
41
+ self._llm = llm
42
+
43
+ def generate(self, question: str) -> str:
44
+ """Return a Cypher query string for *question*."""
45
+ if not self._llm.api_key:
46
+ raise RuntimeError(
47
+ "LLM backend has no API key configured. "
48
+ "Set MOONSHOT_API_KEY to enable query_code_graph."
49
+ )
50
+
51
+ messages = [
52
+ {"role": "system", "content": _SCHEMA_PROMPT},
53
+ {"role": "user", "content": question},
54
+ ]
55
+
56
+ raw = self._llm.chat(messages, temperature=0.0)
57
+
58
+ # Strip markdown code fences if present
59
+ query = raw.strip()
60
+ if query.startswith("```"):
61
+ lines = query.splitlines()
62
+ # Remove first and last fence lines
63
+ lines = [l for l in lines if not l.strip().startswith("```")]
64
+ query = "\n".join(lines).strip()
65
+
66
+ logger.debug(f"Generated Cypher: {query}")
67
+ return query