code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""Configuration for Code Graph Builder.
|
|
2
|
+
|
|
3
|
+
This module provides configuration classes for different backends and
|
|
4
|
+
scanning options.
|
|
5
|
+
|
|
6
|
+
Examples:
|
|
7
|
+
>>> from code_graph_builder import CodeGraphBuilder
|
|
8
|
+
>>> from code_graph_builder.config import KuzuConfig, ScanConfig
|
|
9
|
+
>>>
|
|
10
|
+
>>> # Method 1: Using config objects
|
|
11
|
+
>>> backend_config = KuzuConfig(db_path="./my_graph.db", batch_size=1000)
|
|
12
|
+
>>> scan_config = ScanConfig(exclude_patterns={"tests", "docs"})
|
|
13
|
+
>>>
|
|
14
|
+
>>> builder = CodeGraphBuilder(
|
|
15
|
+
... repo_path="/path/to/repo",
|
|
16
|
+
... backend="kuzu",
|
|
17
|
+
... backend_config=backend_config,
|
|
18
|
+
... scan_config=scan_config
|
|
19
|
+
... )
|
|
20
|
+
>>>
|
|
21
|
+
>>> # Method 2: Using dict (simpler)
|
|
22
|
+
>>> builder = CodeGraphBuilder(
|
|
23
|
+
... repo_path="/path/to/repo",
|
|
24
|
+
... backend="kuzu",
|
|
25
|
+
... backend_config={"db_path": "./graph.db"},
|
|
26
|
+
... scan_config={"exclude_patterns": {"tests"}}
|
|
27
|
+
... )
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
from dataclasses import dataclass, field
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Any
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class KuzuConfig:
|
|
39
|
+
"""Configuration for Kùzu embedded database backend.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
db_path: Path to store the Kùzu database files
|
|
43
|
+
batch_size: Number of nodes/relationships to batch before writing
|
|
44
|
+
read_only: Open database in read-only mode
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
>>> config = KuzuConfig(db_path="./graph.db")
|
|
48
|
+
>>> config = KuzuConfig(db_path="/data/graphs/myproj.db", batch_size=5000)
|
|
49
|
+
"""
|
|
50
|
+
db_path: str | Path = "./code_graph.db"
|
|
51
|
+
batch_size: int = 1000
|
|
52
|
+
read_only: bool = False
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> dict[str, Any]:
|
|
55
|
+
"""Convert to dictionary."""
|
|
56
|
+
return {
|
|
57
|
+
"db_path": str(self.db_path),
|
|
58
|
+
"batch_size": self.batch_size,
|
|
59
|
+
"read_only": self.read_only,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class MemgraphConfig:
|
|
65
|
+
"""Configuration for Memgraph database backend.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
host: Memgraph server host
|
|
69
|
+
port: Memgraph server port
|
|
70
|
+
username: Authentication username (optional)
|
|
71
|
+
password: Authentication password (optional)
|
|
72
|
+
batch_size: Number of nodes/relationships to batch before writing
|
|
73
|
+
|
|
74
|
+
Examples:
|
|
75
|
+
>>> config = MemgraphConfig(host="localhost", port=7687)
|
|
76
|
+
>>> config = MemgraphConfig(
|
|
77
|
+
... host="192.168.1.100",
|
|
78
|
+
... port=7687,
|
|
79
|
+
... username="user",
|
|
80
|
+
... password="pass"
|
|
81
|
+
... )
|
|
82
|
+
"""
|
|
83
|
+
host: str = "localhost"
|
|
84
|
+
port: int = 7687
|
|
85
|
+
username: str | None = None
|
|
86
|
+
password: str | None = None
|
|
87
|
+
batch_size: int = 1000
|
|
88
|
+
|
|
89
|
+
def to_dict(self) -> dict[str, Any]:
|
|
90
|
+
"""Convert to dictionary."""
|
|
91
|
+
return {
|
|
92
|
+
"host": self.host,
|
|
93
|
+
"port": self.port,
|
|
94
|
+
"username": self.username,
|
|
95
|
+
"password": self.password,
|
|
96
|
+
"batch_size": self.batch_size,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class MemoryConfig:
|
|
102
|
+
"""Configuration for in-memory backend.
|
|
103
|
+
|
|
104
|
+
This backend has no persistence options.
|
|
105
|
+
Useful for testing and one-off analysis.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
auto_save: Whether to auto-save to JSON on exit
|
|
109
|
+
save_path: Path to save JSON when auto_save is True
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> config = MemoryConfig()
|
|
113
|
+
>>> config = MemoryConfig(auto_save=True, save_path="./output.json")
|
|
114
|
+
"""
|
|
115
|
+
auto_save: bool = False
|
|
116
|
+
save_path: str | Path | None = None
|
|
117
|
+
|
|
118
|
+
def to_dict(self) -> dict[str, Any]:
|
|
119
|
+
"""Convert to dictionary."""
|
|
120
|
+
return {
|
|
121
|
+
"auto_save": self.auto_save,
|
|
122
|
+
"save_path": str(self.save_path) if self.save_path else None,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class ScanConfig:
|
|
128
|
+
"""Configuration for repository scanning.
|
|
129
|
+
|
|
130
|
+
Controls what files are included/excluded from analysis.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
exclude_patterns: Set of patterns to exclude (directories or file patterns)
|
|
134
|
+
unignore_paths: Set of paths to unignore (override default ignores)
|
|
135
|
+
include_languages: Set of languages to include (None = all supported)
|
|
136
|
+
max_file_size: Maximum file size in bytes to process (None = no limit)
|
|
137
|
+
follow_symlinks: Whether to follow symbolic links
|
|
138
|
+
|
|
139
|
+
Examples:
|
|
140
|
+
>>> # Exclude tests and documentation
|
|
141
|
+
>>> config = ScanConfig(exclude_patterns={"tests", "docs", "*.md"})
|
|
142
|
+
>>>
|
|
143
|
+
>>> # Only scan Python files
|
|
144
|
+
>>> config = ScanConfig(
|
|
145
|
+
... exclude_patterns={"tests"},
|
|
146
|
+
... include_languages={"python"}
|
|
147
|
+
... )
|
|
148
|
+
"""
|
|
149
|
+
exclude_patterns: set[str] = field(default_factory=set)
|
|
150
|
+
unignore_paths: set[str] = field(default_factory=set)
|
|
151
|
+
include_languages: set[str] | None = None
|
|
152
|
+
max_file_size: int | None = None # bytes
|
|
153
|
+
follow_symlinks: bool = False
|
|
154
|
+
|
|
155
|
+
def to_dict(self) -> dict[str, Any]:
|
|
156
|
+
"""Convert to dictionary."""
|
|
157
|
+
return {
|
|
158
|
+
"exclude_patterns": list(self.exclude_patterns),
|
|
159
|
+
"unignore_paths": list(self.unignore_paths),
|
|
160
|
+
"include_languages": list(self.include_languages) if self.include_languages else None,
|
|
161
|
+
"max_file_size": self.max_file_size,
|
|
162
|
+
"follow_symlinks": self.follow_symlinks,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@dataclass
|
|
167
|
+
class OutputConfig:
|
|
168
|
+
"""Configuration for output options.
|
|
169
|
+
|
|
170
|
+
Controls what outputs are generated and where they are saved.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
output_dir: Directory to save output files
|
|
174
|
+
export_json: Whether to export graph to JSON
|
|
175
|
+
json_filename: Name of the JSON export file
|
|
176
|
+
export_statistics: Whether to export statistics
|
|
177
|
+
statistics_filename: Name of statistics file
|
|
178
|
+
save_call_graph: Whether to save call relationships separately
|
|
179
|
+
verbose: Enable verbose logging
|
|
180
|
+
|
|
181
|
+
Examples:
|
|
182
|
+
>>> config = OutputConfig(output_dir="./analysis_output")
|
|
183
|
+
>>> config = OutputConfig(
|
|
184
|
+
... output_dir="./output",
|
|
185
|
+
... export_json=True,
|
|
186
|
+
... json_filename="my_graph.json",
|
|
187
|
+
... verbose=True
|
|
188
|
+
... )
|
|
189
|
+
"""
|
|
190
|
+
output_dir: str | Path = "./code_graph_output"
|
|
191
|
+
export_json: bool = True
|
|
192
|
+
json_filename: str = "graph.json"
|
|
193
|
+
export_statistics: bool = True
|
|
194
|
+
statistics_filename: str = "statistics.json"
|
|
195
|
+
save_call_graph: bool = True
|
|
196
|
+
call_graph_filename: str = "call_graph.json"
|
|
197
|
+
save_functions_list: bool = True
|
|
198
|
+
functions_filename: str = "functions.txt"
|
|
199
|
+
verbose: bool = False
|
|
200
|
+
|
|
201
|
+
def to_dict(self) -> dict[str, Any]:
|
|
202
|
+
"""Convert to dictionary."""
|
|
203
|
+
return {
|
|
204
|
+
"output_dir": str(self.output_dir),
|
|
205
|
+
"export_json": self.export_json,
|
|
206
|
+
"json_filename": self.json_filename,
|
|
207
|
+
"export_statistics": self.export_statistics,
|
|
208
|
+
"statistics_filename": self.statistics_filename,
|
|
209
|
+
"save_call_graph": self.save_call_graph,
|
|
210
|
+
"call_graph_filename": self.call_graph_filename,
|
|
211
|
+
"save_functions_list": self.save_functions_list,
|
|
212
|
+
"functions_filename": self.functions_filename,
|
|
213
|
+
"verbose": self.verbose,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class EmbeddingConfig:
|
|
219
|
+
"""Configuration for semantic embedding generation via API.
|
|
220
|
+
|
|
221
|
+
Controls Qwen3 embedding model settings (via Alibaba Cloud Bailian API)
|
|
222
|
+
and vector store backend.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
enabled: Whether to enable embedding generation
|
|
226
|
+
api_key: DashScope API key (or set DASHSCOPE_API_KEY env var)
|
|
227
|
+
model: API model name (default: text-embedding-v4 for Qwen3)
|
|
228
|
+
base_url: API base URL (default: https://dashscope.aliyuncs.com/api/v1)
|
|
229
|
+
batch_size: Batch size for embedding generation (max 25 for API)
|
|
230
|
+
max_retries: Maximum retries for failed API requests
|
|
231
|
+
vector_store_backend: Vector store backend ("memory" or "qdrant")
|
|
232
|
+
vector_store_path: Path for vector store (for qdrant local mode)
|
|
233
|
+
vector_dimension: Embedding dimension (1536 for text-embedding-v4)
|
|
234
|
+
|
|
235
|
+
Examples:
|
|
236
|
+
>>> config = EmbeddingConfig(enabled=True)
|
|
237
|
+
>>> config = EmbeddingConfig(
|
|
238
|
+
... enabled=True,
|
|
239
|
+
... api_key="sk-xxxxx",
|
|
240
|
+
... batch_size=25
|
|
241
|
+
... )
|
|
242
|
+
"""
|
|
243
|
+
enabled: bool = False
|
|
244
|
+
api_key: str | None = None
|
|
245
|
+
model: str = "text-embedding-v4"
|
|
246
|
+
base_url: str | None = None
|
|
247
|
+
batch_size: int = 25 # API limit
|
|
248
|
+
max_retries: int = 3
|
|
249
|
+
vector_store_backend: str = "memory"
|
|
250
|
+
vector_store_path: str | Path | None = None
|
|
251
|
+
vector_dimension: int = 1536 # text-embedding-v4 dimension
|
|
252
|
+
|
|
253
|
+
def to_dict(self) -> dict[str, Any]:
|
|
254
|
+
"""Convert to dictionary."""
|
|
255
|
+
return {
|
|
256
|
+
"enabled": self.enabled,
|
|
257
|
+
"api_key": self.api_key,
|
|
258
|
+
"model": self.model,
|
|
259
|
+
"base_url": self.base_url,
|
|
260
|
+
"batch_size": self.batch_size,
|
|
261
|
+
"max_retries": self.max_retries,
|
|
262
|
+
"vector_store_backend": self.vector_store_backend,
|
|
263
|
+
"vector_store_path": str(self.vector_store_path) if self.vector_store_path else None,
|
|
264
|
+
"vector_dimension": self.vector_dimension,
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# Type alias for backend configs
|
|
269
|
+
BackendConfig = KuzuConfig | MemgraphConfig | MemoryConfig | dict[str, Any]
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# Type alias for all config types
|
|
273
|
+
GraphBuilderConfig = KuzuConfig | MemgraphConfig | MemoryConfig | EmbeddingConfig | dict[str, Any]
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class ConfigValidator:
|
|
277
|
+
"""Validator for configuration combinations."""
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
def validate_backend_config(backend: str, config: BackendConfig | None) -> dict[str, Any]:
|
|
281
|
+
"""Validate and convert backend config to dict.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
backend: Backend type ("kuzu", "memgraph", "memory")
|
|
285
|
+
config: Configuration object or dict
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
Validated configuration dictionary
|
|
289
|
+
|
|
290
|
+
Raises:
|
|
291
|
+
ValueError: If backend or config is invalid
|
|
292
|
+
"""
|
|
293
|
+
# Convert dataclass to dict
|
|
294
|
+
if hasattr(config, 'to_dict'):
|
|
295
|
+
config = config.to_dict()
|
|
296
|
+
elif config is None:
|
|
297
|
+
config = {}
|
|
298
|
+
elif not isinstance(config, dict):
|
|
299
|
+
raise ValueError(f"Config must be a dict or dataclass, got {type(config)}")
|
|
300
|
+
|
|
301
|
+
# Validate based on backend type
|
|
302
|
+
if backend == "kuzu":
|
|
303
|
+
return ConfigValidator._validate_kuzu_config(config)
|
|
304
|
+
elif backend == "memgraph":
|
|
305
|
+
return ConfigValidator._validate_memgraph_config(config)
|
|
306
|
+
elif backend == "memory":
|
|
307
|
+
return ConfigValidator._validate_memory_config(config)
|
|
308
|
+
else:
|
|
309
|
+
raise ValueError(f"Unknown backend: {backend}. Use 'kuzu', 'memgraph', or 'memory'")
|
|
310
|
+
|
|
311
|
+
@staticmethod
|
|
312
|
+
def _validate_kuzu_config(config: dict[str, Any]) -> dict[str, Any]:
|
|
313
|
+
"""Validate Kùzu configuration."""
|
|
314
|
+
defaults = KuzuConfig()
|
|
315
|
+
return {
|
|
316
|
+
"db_path": config.get("db_path", defaults.db_path),
|
|
317
|
+
"batch_size": config.get("batch_size", defaults.batch_size),
|
|
318
|
+
"read_only": config.get("read_only", defaults.read_only),
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def _validate_memgraph_config(config: dict[str, Any]) -> dict[str, Any]:
|
|
323
|
+
"""Validate Memgraph configuration."""
|
|
324
|
+
defaults = MemgraphConfig()
|
|
325
|
+
return {
|
|
326
|
+
"host": config.get("host", defaults.host),
|
|
327
|
+
"port": config.get("port", defaults.port),
|
|
328
|
+
"username": config.get("username", defaults.username),
|
|
329
|
+
"password": config.get("password", defaults.password),
|
|
330
|
+
"batch_size": config.get("batch_size", defaults.batch_size),
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
@staticmethod
|
|
334
|
+
def _validate_memory_config(config: dict[str, Any]) -> dict[str, Any]:
|
|
335
|
+
"""Validate Memory configuration."""
|
|
336
|
+
defaults = MemoryConfig()
|
|
337
|
+
return {
|
|
338
|
+
"auto_save": config.get("auto_save", defaults.auto_save),
|
|
339
|
+
"save_path": config.get("save_path", defaults.save_path),
|
|
340
|
+
}
|