code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,495 @@
1
+ """Graph query layer for unified access to Kuzu and Memgraph backends.
2
+
3
+ This module provides a unified interface for querying graph data from
4
+ different backends (Memgraph and Kuzu), enabling seamless integration
5
+ with vector store search results.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING, Protocol, runtime_checkable
13
+
14
+ from loguru import logger
15
+
16
+ if TYPE_CHECKING:
17
+ from ..services import IngestorProtocol, QueryProtocol
18
+ from ..types import ResultRow
19
+
20
+
21
+ @dataclass
22
+ class GraphNode:
23
+ """Represents a node in the code graph.
24
+
25
+ Attributes:
26
+ node_id: Unique node identifier
27
+ qualified_name: Fully qualified name (e.g., "module.Class.method")
28
+ name: Simple name (e.g., "method")
29
+ type: Node type (Function, Class, Method, Module, etc.)
30
+ path: File path
31
+ start_line: Start line number in file
32
+ end_line: End line number in file
33
+ docstring: Documentation string if available
34
+ properties: Additional node properties
35
+ """
36
+
37
+ node_id: int
38
+ qualified_name: str
39
+ name: str
40
+ type: str
41
+ path: str | None = None
42
+ start_line: int | None = None
43
+ end_line: int | None = None
44
+ docstring: str | None = None
45
+ properties: dict | None = None
46
+
47
+
48
+ @dataclass
49
+ class GraphRelationship:
50
+ """Represents a relationship in the code graph.
51
+
52
+ Attributes:
53
+ rel_type: Relationship type (CALLS, DEFINES, INHERITS, etc.)
54
+ source_id: Source node ID
55
+ target_id: Target node ID
56
+ properties: Relationship properties
57
+ """
58
+
59
+ rel_type: str
60
+ source_id: int
61
+ target_id: int
62
+ properties: dict | None = None
63
+
64
+
65
+ @runtime_checkable
66
+ class GraphQueryProtocol(Protocol):
67
+ """Protocol for graph query operations."""
68
+
69
+ def fetch_nodes_by_ids(self, node_ids: list[int]) -> list[GraphNode]: ...
70
+
71
+ def fetch_node_by_qualified_name(self, qualified_name: str) -> GraphNode | None: ...
72
+
73
+ def fetch_callers(self, function_name: str) -> list[GraphNode]: ...
74
+
75
+ def fetch_callees(self, function_name: str) -> list[GraphNode]: ...
76
+
77
+ def fetch_related_nodes(
78
+ self, node_id: int, relationship_types: list[str] | None = None
79
+ ) -> list[tuple[GraphNode, str]]: ...
80
+
81
+ def execute_cypher(self, query: str, params: dict | None = None) -> list[ResultRow]: ...
82
+
83
+
84
+ class GraphQueryService:
85
+ """Unified service for querying code graph data.
86
+
87
+ Supports both Memgraph and Kuzu backends through a common interface.
88
+
89
+ Example:
90
+ >>> from code_graph_builder.services import MemgraphIngestor
91
+ >>> from code_graph_builder.tools.graph_query import GraphQueryService
92
+ >>>
93
+ >>> with MemgraphIngestor("localhost", 7687) as ingestor:
94
+ ... query_service = GraphQueryService(ingestor)
95
+ ... node = query_service.fetch_node_by_qualified_name("myproject.utils.foo")
96
+ ... callers = query_service.fetch_callers("foo")
97
+ """
98
+
99
+ def __init__(self, graph_service: QueryProtocol, backend: str = "memgraph"):
100
+ """Initialize graph query service.
101
+
102
+ Args:
103
+ graph_service: Graph service instance (MemgraphIngestor or KuzuIngestor)
104
+ backend: Backend type ("memgraph" or "kuzu")
105
+ """
106
+ self.graph_service = graph_service
107
+ self.backend = backend.lower()
108
+
109
+ def fetch_nodes_by_ids(self, node_ids: list[int]) -> list[GraphNode]:
110
+ """Fetch multiple nodes by their IDs.
111
+
112
+ Args:
113
+ node_ids: List of node identifiers
114
+
115
+ Returns:
116
+ List of GraphNode objects
117
+ """
118
+ if not node_ids:
119
+ return []
120
+
121
+ query = self._build_nodes_by_id_query()
122
+
123
+ try:
124
+ results = self.graph_service.fetch_all(query, {"node_ids": node_ids})
125
+ return [self._row_to_node(row) for row in results if self._extract_node_id(row) in node_ids]
126
+ except Exception as e:
127
+ logger.error(f"Failed to fetch nodes by IDs: {e}")
128
+ return []
129
+
130
+ def fetch_node_by_qualified_name(self, qualified_name: str) -> GraphNode | None:
131
+ """Fetch a single node by its qualified name.
132
+
133
+ Args:
134
+ qualified_name: Fully qualified name (e.g., "module.Class.method")
135
+
136
+ Returns:
137
+ GraphNode if found, None otherwise
138
+ """
139
+ query = """
140
+ MATCH (n)
141
+ WHERE n.qualified_name = $qualified_name
142
+ RETURN n,
143
+ n.node_id AS node_id,
144
+ n.id AS id,
145
+ n.qualified_name AS qualified_name,
146
+ n.name AS name,
147
+ labels(n) AS labels,
148
+ n.path AS path,
149
+ n.start_line AS start_line,
150
+ n.end_line AS end_line,
151
+ n.docstring AS docstring
152
+ LIMIT 1
153
+ """
154
+
155
+ try:
156
+ results = self.graph_service.fetch_all(query, {"qualified_name": qualified_name})
157
+ if results:
158
+ return self._row_to_node(results[0])
159
+ except Exception as e:
160
+ logger.error(f"Failed to fetch node {qualified_name}: {e}")
161
+
162
+ return None
163
+
164
+ def fetch_callers(self, function_name: str) -> list[GraphNode]:
165
+ """Find all functions that call the given function.
166
+
167
+ Args:
168
+ function_name: Function name or qualified name
169
+
170
+ Returns:
171
+ List of caller GraphNodes
172
+ """
173
+ # Try qualified name first
174
+ query = """
175
+ MATCH (caller:Function)-[:CALLS]->(callee)
176
+ WHERE callee.qualified_name = $name
177
+ OR callee.name = $name
178
+ RETURN caller,
179
+ caller.node_id AS node_id,
180
+ caller.id AS id,
181
+ caller.qualified_name AS qualified_name,
182
+ caller.name AS name,
183
+ labels(caller) AS labels,
184
+ caller.path AS path,
185
+ caller.start_line AS start_line,
186
+ caller.end_line AS end_line
187
+ """
188
+
189
+ try:
190
+ results = self.graph_service.fetch_all(query, {"name": function_name})
191
+ return [self._row_to_node(row) for row in results]
192
+ except Exception as e:
193
+ logger.error(f"Failed to fetch callers of {function_name}: {e}")
194
+ return []
195
+
196
+ def fetch_callees(self, function_name: str) -> list[GraphNode]:
197
+ """Find all functions called by the given function.
198
+
199
+ Args:
200
+ function_name: Function name or qualified name
201
+
202
+ Returns:
203
+ List of callee GraphNodes
204
+ """
205
+ query = """
206
+ MATCH (caller:Function)-[:CALLS]->(callee:Function)
207
+ WHERE caller.qualified_name = $name
208
+ OR caller.name = $name
209
+ RETURN callee,
210
+ callee.node_id AS node_id,
211
+ callee.id AS id,
212
+ callee.qualified_name AS qualified_name,
213
+ callee.name AS name,
214
+ labels(callee) AS labels,
215
+ callee.path AS path,
216
+ callee.start_line AS start_line,
217
+ callee.end_line AS end_line
218
+ """
219
+
220
+ try:
221
+ results = self.graph_service.fetch_all(query, {"name": function_name})
222
+ return [self._row_to_node(row) for row in results]
223
+ except Exception as e:
224
+ logger.error(f"Failed to fetch callees of {function_name}: {e}")
225
+ return []
226
+
227
+ def fetch_related_nodes(
228
+ self,
229
+ node_id: int,
230
+ relationship_types: list[str] | None = None,
231
+ direction: str = "both",
232
+ ) -> list[tuple[GraphNode, str]]:
233
+ """Fetch nodes related to the given node.
234
+
235
+ Args:
236
+ node_id: Node identifier
237
+ relationship_types: Optional filter for relationship types
238
+ direction: Relationship direction ("in", "out", or "both")
239
+
240
+ Returns:
241
+ List of (GraphNode, relationship_type) tuples
242
+ """
243
+ if direction == "in":
244
+ pattern = "(related)-[r]->(n)"
245
+ elif direction == "out":
246
+ pattern = "(n)-[r]->(related)"
247
+ else:
248
+ pattern = "(n)-[r]-(related)"
249
+
250
+ rel_filter = ""
251
+ if relationship_types:
252
+ rel_types = "|".join(f":{rt}" for rt in relationship_types)
253
+ rel_filter = f"AND type(r) IN {relationship_types}"
254
+
255
+ query = f"""
256
+ MATCH {pattern}
257
+ WHERE n.node_id = $node_id
258
+ OR n.id = $node_id
259
+ OR id(n) = $node_id
260
+ {rel_filter}
261
+ RETURN related,
262
+ related.node_id AS node_id,
263
+ related.id AS id,
264
+ related.qualified_name AS qualified_name,
265
+ related.name AS name,
266
+ labels(related) AS labels,
267
+ related.path AS path,
268
+ related.start_line AS start_line,
269
+ related.end_line AS end_line,
270
+ type(r) AS rel_type
271
+ """
272
+
273
+ try:
274
+ results = self.graph_service.fetch_all(query, {"node_id": node_id})
275
+ return [
276
+ (self._row_to_node(row), str(row.get("rel_type", "UNKNOWN")))
277
+ for row in results
278
+ ]
279
+ except Exception as e:
280
+ logger.error(f"Failed to fetch related nodes for {node_id}: {e}")
281
+ return []
282
+
283
+ def fetch_class_hierarchy(self, class_name: str) -> dict:
284
+ """Fetch class hierarchy information.
285
+
286
+ Args:
287
+ class_name: Class name or qualified name
288
+
289
+ Returns:
290
+ Dictionary with superclass and subclasses
291
+ """
292
+ query = """
293
+ MATCH (c:Class)
294
+ WHERE c.qualified_name = $name OR c.name = $name
295
+ OPTIONAL MATCH (c)-[:INHERITS]->(super:Class)
296
+ OPTIONAL MATCH (sub:Class)-[:INHERITS]->(c)
297
+ RETURN c,
298
+ super.qualified_name AS superclass,
299
+ collect(sub.qualified_name) AS subclasses
300
+ """
301
+
302
+ try:
303
+ results = self.graph_service.fetch_all(query, {"name": class_name})
304
+ if results:
305
+ return {
306
+ "class": results[0].get("c"),
307
+ "superclass": results[0].get("superclass"),
308
+ "subclasses": results[0].get("subclasses", []),
309
+ }
310
+ except Exception as e:
311
+ logger.error(f"Failed to fetch class hierarchy for {class_name}: {e}")
312
+
313
+ return {}
314
+
315
+ def execute_cypher(self, query: str, params: dict | None = None) -> list[ResultRow]:
316
+ """Execute a raw Cypher query.
317
+
318
+ Args:
319
+ query: Cypher query string
320
+ params: Query parameters
321
+
322
+ Returns:
323
+ Query results as list of dictionaries
324
+ """
325
+ try:
326
+ return self.graph_service.fetch_all(query, params or {})
327
+ except Exception as e:
328
+ logger.error(f"Cypher query failed: {e}")
329
+ return []
330
+
331
+ def _build_nodes_by_id_query(self) -> str:
332
+ """Build query to fetch nodes by IDs.
333
+
334
+ Compatible with both Memgraph and Kuzu.
335
+ """
336
+ return """
337
+ MATCH (n)
338
+ WHERE n.node_id IN $node_ids
339
+ OR n.id IN $node_ids
340
+ OR id(n) IN $node_ids
341
+ RETURN n,
342
+ n.node_id AS node_id,
343
+ n.id AS id,
344
+ n.qualified_name AS qualified_name,
345
+ n.name AS name,
346
+ labels(n) AS labels,
347
+ n.path AS path,
348
+ n.start_line AS start_line,
349
+ n.end_line AS end_line,
350
+ n.docstring AS docstring
351
+ """
352
+
353
+ def _extract_node_id(self, row: ResultRow) -> int:
354
+ """Extract node ID from query result."""
355
+ for key in ["node_id", "id", "n.node_id", "n.id"]:
356
+ if key in row:
357
+ val = row[key]
358
+ if isinstance(val, int):
359
+ return val
360
+ try:
361
+ return int(val)
362
+ except (ValueError, TypeError):
363
+ continue
364
+ return 0
365
+
366
+ def _extract_type(self, row: ResultRow) -> str:
367
+ """Extract node type from query result."""
368
+ # Try labels first
369
+ labels = row.get("labels")
370
+ if labels:
371
+ if isinstance(labels, list) and labels:
372
+ return labels[0]
373
+ return str(labels)
374
+
375
+ # Try type property
376
+ node_type = row.get("type")
377
+ if node_type:
378
+ return str(node_type)
379
+
380
+ # Extract from node object if available
381
+ node = row.get("n") or row.get("caller") or row.get("callee") or row.get("related")
382
+ if node and isinstance(node, dict):
383
+ node_labels = node.get("_label") or node.get("labels")
384
+ if node_labels:
385
+ if isinstance(node_labels, list) and node_labels:
386
+ return node_labels[0]
387
+ return str(node_labels)
388
+ node_type = node.get("type")
389
+ if node_type:
390
+ return str(node_type)
391
+
392
+ return "Unknown"
393
+
394
+ def _row_to_node(self, row: ResultRow) -> GraphNode:
395
+ """Convert query result row to GraphNode."""
396
+ node_id = self._extract_node_id(row)
397
+ qualified_name = str(row.get("qualified_name", ""))
398
+ name = str(row.get("name", qualified_name.split(".")[-1] if qualified_name else ""))
399
+ node_type = self._extract_type(row)
400
+
401
+ return GraphNode(
402
+ node_id=node_id,
403
+ qualified_name=qualified_name,
404
+ name=name,
405
+ type=node_type,
406
+ path=str(row.get("path")) if row.get("path") else None,
407
+ start_line=int(row["start_line"]) if row.get("start_line") is not None else None,
408
+ end_line=int(row["end_line"]) if row.get("end_line") is not None else None,
409
+ docstring=str(row["docstring"]) if row.get("docstring") else None,
410
+ properties={k: v for k, v in row.items() if k not in [
411
+ "node_id", "id", "qualified_name", "name", "labels", "type",
412
+ "path", "start_line", "end_line", "docstring"
413
+ ]},
414
+ )
415
+
416
+
417
+ # Convenience factory functions
418
+
419
+
420
+ def create_graph_query_service(
421
+ graph_service: QueryProtocol,
422
+ backend: str = "memgraph",
423
+ ) -> GraphQueryService:
424
+ """Create graph query service with auto-detected backend.
425
+
426
+ Args:
427
+ graph_service: Graph service instance
428
+ backend: Backend type ("memgraph" or "kuzu")
429
+
430
+ Returns:
431
+ Configured GraphQueryService
432
+ """
433
+ return GraphQueryService(graph_service, backend)
434
+
435
+
436
+ def query_nodes_by_vector_results(
437
+ vector_results: list,
438
+ graph_service: QueryProtocol,
439
+ ) -> list[GraphNode]:
440
+ """Query graph nodes corresponding to vector search results.
441
+
442
+ This is the main integration point between vector store and graph database.
443
+
444
+ Args:
445
+ vector_results: Results from VectorStore.search_similar()
446
+ graph_service: Graph service to query
447
+
448
+ Returns:
449
+ List of GraphNode objects
450
+ """
451
+ service = GraphQueryService(graph_service)
452
+ node_ids = [vr.node_id for vr in vector_results]
453
+ return service.fetch_nodes_by_ids(node_ids)
454
+
455
+
456
+ def get_function_with_context(
457
+ qualified_name: str,
458
+ graph_service: QueryProtocol,
459
+ include_callers: bool = True,
460
+ include_callees: bool = True,
461
+ ) -> dict:
462
+ """Get comprehensive information about a function including its context.
463
+
464
+ Args:
465
+ qualified_name: Function qualified name
466
+ graph_service: Graph service
467
+ include_callers: Whether to include calling functions
468
+ include_callees: Whether to include called functions
469
+
470
+ Returns:
471
+ Dictionary with function info, callers, and callees
472
+ """
473
+ service = GraphQueryService(graph_service)
474
+
475
+ result = {
476
+ "function": None,
477
+ "callers": [],
478
+ "callees": [],
479
+ "related": [],
480
+ }
481
+
482
+ # Get main function
483
+ func = service.fetch_node_by_qualified_name(qualified_name)
484
+ if func:
485
+ result["function"] = func
486
+
487
+ # Get callers
488
+ if include_callers:
489
+ result["callers"] = service.fetch_callers(qualified_name)
490
+
491
+ # Get callees
492
+ if include_callees:
493
+ result["callees"] = service.fetch_callees(qualified_name)
494
+
495
+ return result