code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,340 @@
1
+ """Configuration for Code Graph Builder.
2
+
3
+ This module provides configuration classes for different backends and
4
+ scanning options.
5
+
6
+ Examples:
7
+ >>> from code_graph_builder import CodeGraphBuilder
8
+ >>> from code_graph_builder.config import KuzuConfig, ScanConfig
9
+ >>>
10
+ >>> # Method 1: Using config objects
11
+ >>> backend_config = KuzuConfig(db_path="./my_graph.db", batch_size=1000)
12
+ >>> scan_config = ScanConfig(exclude_patterns={"tests", "docs"})
13
+ >>>
14
+ >>> builder = CodeGraphBuilder(
15
+ ... repo_path="/path/to/repo",
16
+ ... backend="kuzu",
17
+ ... backend_config=backend_config,
18
+ ... scan_config=scan_config
19
+ ... )
20
+ >>>
21
+ >>> # Method 2: Using dict (simpler)
22
+ >>> builder = CodeGraphBuilder(
23
+ ... repo_path="/path/to/repo",
24
+ ... backend="kuzu",
25
+ ... backend_config={"db_path": "./graph.db"},
26
+ ... scan_config={"exclude_patterns": {"tests"}}
27
+ ... )
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ from dataclasses import dataclass, field
33
+ from pathlib import Path
34
+ from typing import Any
35
+
36
+
37
+ @dataclass
38
+ class KuzuConfig:
39
+ """Configuration for Kùzu embedded database backend.
40
+
41
+ Args:
42
+ db_path: Path to store the Kùzu database files
43
+ batch_size: Number of nodes/relationships to batch before writing
44
+ read_only: Open database in read-only mode
45
+
46
+ Examples:
47
+ >>> config = KuzuConfig(db_path="./graph.db")
48
+ >>> config = KuzuConfig(db_path="/data/graphs/myproj.db", batch_size=5000)
49
+ """
50
+ db_path: str | Path = "./code_graph.db"
51
+ batch_size: int = 1000
52
+ read_only: bool = False
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ """Convert to dictionary."""
56
+ return {
57
+ "db_path": str(self.db_path),
58
+ "batch_size": self.batch_size,
59
+ "read_only": self.read_only,
60
+ }
61
+
62
+
63
+ @dataclass
64
+ class MemgraphConfig:
65
+ """Configuration for Memgraph database backend.
66
+
67
+ Args:
68
+ host: Memgraph server host
69
+ port: Memgraph server port
70
+ username: Authentication username (optional)
71
+ password: Authentication password (optional)
72
+ batch_size: Number of nodes/relationships to batch before writing
73
+
74
+ Examples:
75
+ >>> config = MemgraphConfig(host="localhost", port=7687)
76
+ >>> config = MemgraphConfig(
77
+ ... host="192.168.1.100",
78
+ ... port=7687,
79
+ ... username="user",
80
+ ... password="pass"
81
+ ... )
82
+ """
83
+ host: str = "localhost"
84
+ port: int = 7687
85
+ username: str | None = None
86
+ password: str | None = None
87
+ batch_size: int = 1000
88
+
89
+ def to_dict(self) -> dict[str, Any]:
90
+ """Convert to dictionary."""
91
+ return {
92
+ "host": self.host,
93
+ "port": self.port,
94
+ "username": self.username,
95
+ "password": self.password,
96
+ "batch_size": self.batch_size,
97
+ }
98
+
99
+
100
+ @dataclass
101
+ class MemoryConfig:
102
+ """Configuration for in-memory backend.
103
+
104
+ This backend has no persistence options.
105
+ Useful for testing and one-off analysis.
106
+
107
+ Args:
108
+ auto_save: Whether to auto-save to JSON on exit
109
+ save_path: Path to save JSON when auto_save is True
110
+
111
+ Examples:
112
+ >>> config = MemoryConfig()
113
+ >>> config = MemoryConfig(auto_save=True, save_path="./output.json")
114
+ """
115
+ auto_save: bool = False
116
+ save_path: str | Path | None = None
117
+
118
+ def to_dict(self) -> dict[str, Any]:
119
+ """Convert to dictionary."""
120
+ return {
121
+ "auto_save": self.auto_save,
122
+ "save_path": str(self.save_path) if self.save_path else None,
123
+ }
124
+
125
+
126
+ @dataclass
127
+ class ScanConfig:
128
+ """Configuration for repository scanning.
129
+
130
+ Controls what files are included/excluded from analysis.
131
+
132
+ Args:
133
+ exclude_patterns: Set of patterns to exclude (directories or file patterns)
134
+ unignore_paths: Set of paths to unignore (override default ignores)
135
+ include_languages: Set of languages to include (None = all supported)
136
+ max_file_size: Maximum file size in bytes to process (None = no limit)
137
+ follow_symlinks: Whether to follow symbolic links
138
+
139
+ Examples:
140
+ >>> # Exclude tests and documentation
141
+ >>> config = ScanConfig(exclude_patterns={"tests", "docs", "*.md"})
142
+ >>>
143
+ >>> # Only scan Python files
144
+ >>> config = ScanConfig(
145
+ ... exclude_patterns={"tests"},
146
+ ... include_languages={"python"}
147
+ ... )
148
+ """
149
+ exclude_patterns: set[str] = field(default_factory=set)
150
+ unignore_paths: set[str] = field(default_factory=set)
151
+ include_languages: set[str] | None = None
152
+ max_file_size: int | None = None # bytes
153
+ follow_symlinks: bool = False
154
+
155
+ def to_dict(self) -> dict[str, Any]:
156
+ """Convert to dictionary."""
157
+ return {
158
+ "exclude_patterns": list(self.exclude_patterns),
159
+ "unignore_paths": list(self.unignore_paths),
160
+ "include_languages": list(self.include_languages) if self.include_languages else None,
161
+ "max_file_size": self.max_file_size,
162
+ "follow_symlinks": self.follow_symlinks,
163
+ }
164
+
165
+
166
+ @dataclass
167
+ class OutputConfig:
168
+ """Configuration for output options.
169
+
170
+ Controls what outputs are generated and where they are saved.
171
+
172
+ Args:
173
+ output_dir: Directory to save output files
174
+ export_json: Whether to export graph to JSON
175
+ json_filename: Name of the JSON export file
176
+ export_statistics: Whether to export statistics
177
+ statistics_filename: Name of statistics file
178
+ save_call_graph: Whether to save call relationships separately
179
+ verbose: Enable verbose logging
180
+
181
+ Examples:
182
+ >>> config = OutputConfig(output_dir="./analysis_output")
183
+ >>> config = OutputConfig(
184
+ ... output_dir="./output",
185
+ ... export_json=True,
186
+ ... json_filename="my_graph.json",
187
+ ... verbose=True
188
+ ... )
189
+ """
190
+ output_dir: str | Path = "./code_graph_output"
191
+ export_json: bool = True
192
+ json_filename: str = "graph.json"
193
+ export_statistics: bool = True
194
+ statistics_filename: str = "statistics.json"
195
+ save_call_graph: bool = True
196
+ call_graph_filename: str = "call_graph.json"
197
+ save_functions_list: bool = True
198
+ functions_filename: str = "functions.txt"
199
+ verbose: bool = False
200
+
201
+ def to_dict(self) -> dict[str, Any]:
202
+ """Convert to dictionary."""
203
+ return {
204
+ "output_dir": str(self.output_dir),
205
+ "export_json": self.export_json,
206
+ "json_filename": self.json_filename,
207
+ "export_statistics": self.export_statistics,
208
+ "statistics_filename": self.statistics_filename,
209
+ "save_call_graph": self.save_call_graph,
210
+ "call_graph_filename": self.call_graph_filename,
211
+ "save_functions_list": self.save_functions_list,
212
+ "functions_filename": self.functions_filename,
213
+ "verbose": self.verbose,
214
+ }
215
+
216
+
217
+ @dataclass
218
+ class EmbeddingConfig:
219
+ """Configuration for semantic embedding generation via API.
220
+
221
+ Controls Qwen3 embedding model settings (via Alibaba Cloud Bailian API)
222
+ and vector store backend.
223
+
224
+ Args:
225
+ enabled: Whether to enable embedding generation
226
+ api_key: DashScope API key (or set DASHSCOPE_API_KEY env var)
227
+ model: API model name (default: text-embedding-v4 for Qwen3)
228
+ base_url: API base URL (default: https://dashscope.aliyuncs.com/api/v1)
229
+ batch_size: Batch size for embedding generation (max 25 for API)
230
+ max_retries: Maximum retries for failed API requests
231
+ vector_store_backend: Vector store backend ("memory" or "qdrant")
232
+ vector_store_path: Path for vector store (for qdrant local mode)
233
+ vector_dimension: Embedding dimension (1536 for text-embedding-v4)
234
+
235
+ Examples:
236
+ >>> config = EmbeddingConfig(enabled=True)
237
+ >>> config = EmbeddingConfig(
238
+ ... enabled=True,
239
+ ... api_key="sk-xxxxx",
240
+ ... batch_size=25
241
+ ... )
242
+ """
243
+ enabled: bool = False
244
+ api_key: str | None = None
245
+ model: str = "text-embedding-v4"
246
+ base_url: str | None = None
247
+ batch_size: int = 25 # API limit
248
+ max_retries: int = 3
249
+ vector_store_backend: str = "memory"
250
+ vector_store_path: str | Path | None = None
251
+ vector_dimension: int = 1536 # text-embedding-v4 dimension
252
+
253
+ def to_dict(self) -> dict[str, Any]:
254
+ """Convert to dictionary."""
255
+ return {
256
+ "enabled": self.enabled,
257
+ "api_key": self.api_key,
258
+ "model": self.model,
259
+ "base_url": self.base_url,
260
+ "batch_size": self.batch_size,
261
+ "max_retries": self.max_retries,
262
+ "vector_store_backend": self.vector_store_backend,
263
+ "vector_store_path": str(self.vector_store_path) if self.vector_store_path else None,
264
+ "vector_dimension": self.vector_dimension,
265
+ }
266
+
267
+
268
+ # Type alias for backend configs
269
+ BackendConfig = KuzuConfig | MemgraphConfig | MemoryConfig | dict[str, Any]
270
+
271
+
272
+ # Type alias for all config types
273
+ GraphBuilderConfig = KuzuConfig | MemgraphConfig | MemoryConfig | EmbeddingConfig | dict[str, Any]
274
+
275
+
276
+ class ConfigValidator:
277
+ """Validator for configuration combinations."""
278
+
279
+ @staticmethod
280
+ def validate_backend_config(backend: str, config: BackendConfig | None) -> dict[str, Any]:
281
+ """Validate and convert backend config to dict.
282
+
283
+ Args:
284
+ backend: Backend type ("kuzu", "memgraph", "memory")
285
+ config: Configuration object or dict
286
+
287
+ Returns:
288
+ Validated configuration dictionary
289
+
290
+ Raises:
291
+ ValueError: If backend or config is invalid
292
+ """
293
+ # Convert dataclass to dict
294
+ if hasattr(config, 'to_dict'):
295
+ config = config.to_dict()
296
+ elif config is None:
297
+ config = {}
298
+ elif not isinstance(config, dict):
299
+ raise ValueError(f"Config must be a dict or dataclass, got {type(config)}")
300
+
301
+ # Validate based on backend type
302
+ if backend == "kuzu":
303
+ return ConfigValidator._validate_kuzu_config(config)
304
+ elif backend == "memgraph":
305
+ return ConfigValidator._validate_memgraph_config(config)
306
+ elif backend == "memory":
307
+ return ConfigValidator._validate_memory_config(config)
308
+ else:
309
+ raise ValueError(f"Unknown backend: {backend}. Use 'kuzu', 'memgraph', or 'memory'")
310
+
311
+ @staticmethod
312
+ def _validate_kuzu_config(config: dict[str, Any]) -> dict[str, Any]:
313
+ """Validate Kùzu configuration."""
314
+ defaults = KuzuConfig()
315
+ return {
316
+ "db_path": config.get("db_path", defaults.db_path),
317
+ "batch_size": config.get("batch_size", defaults.batch_size),
318
+ "read_only": config.get("read_only", defaults.read_only),
319
+ }
320
+
321
+ @staticmethod
322
+ def _validate_memgraph_config(config: dict[str, Any]) -> dict[str, Any]:
323
+ """Validate Memgraph configuration."""
324
+ defaults = MemgraphConfig()
325
+ return {
326
+ "host": config.get("host", defaults.host),
327
+ "port": config.get("port", defaults.port),
328
+ "username": config.get("username", defaults.username),
329
+ "password": config.get("password", defaults.password),
330
+ "batch_size": config.get("batch_size", defaults.batch_size),
331
+ }
332
+
333
+ @staticmethod
334
+ def _validate_memory_config(config: dict[str, Any]) -> dict[str, Any]:
335
+ """Validate Memory configuration."""
336
+ defaults = MemoryConfig()
337
+ return {
338
+ "auto_save": config.get("auto_save", defaults.auto_save),
339
+ "save_path": config.get("save_path", defaults.save_path),
340
+ }