code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,347 @@
1
+ #!/usr/bin/env python3
2
+ """Complete example of semantic search with embedding integration.
3
+
4
+ This example demonstrates:
5
+ 1. P0: GraphUpdater with embedding generation
6
+ 2. P1: Semantic search tools
7
+ 3. P2: Graph query layer with Kuzu/Memgraph compatibility
8
+
9
+ Usage:
10
+ # With Kuzu (no Docker required)
11
+ python example_semantic_search_full.py --backend kuzu --repo ./my_repo
12
+
13
+ # With Memgraph (requires Docker)
14
+ python example_semantic_search_full.py --backend memgraph --repo ./my_repo
15
+
16
+ # Search only (skip building)
17
+ python example_semantic_search_full.py --backend kuzu --search "recursive function"
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import sys
24
+ from pathlib import Path
25
+
26
+
27
+ def setup_environment():
28
+ """Add parent directory to path for imports."""
29
+ sys.path.insert(0, str(Path(__file__).parent.parent))
30
+
31
+
32
+ def parse_args():
33
+ """Parse command line arguments."""
34
+ parser = argparse.ArgumentParser(description="Semantic search example")
35
+ parser.add_argument(
36
+ "--backend",
37
+ choices=["kuzu", "memgraph"],
38
+ default="kuzu",
39
+ help="Graph database backend",
40
+ )
41
+ parser.add_argument(
42
+ "--repo",
43
+ type=Path,
44
+ default=Path("./test_repo"),
45
+ help="Path to code repository",
46
+ )
47
+ parser.add_argument(
48
+ "--search",
49
+ type=str,
50
+ help="Search query (skip building if provided)",
51
+ )
52
+ parser.add_argument(
53
+ "--clean",
54
+ action="store_true",
55
+ help="Clean database before building",
56
+ )
57
+ return parser.parse_args()
58
+
59
+
60
+ def create_sample_repo(repo_path: Path) -> Path:
61
+ """Create a sample repository for testing."""
62
+ if repo_path.exists():
63
+ return repo_path
64
+
65
+ print(f"Creating sample repository at {repo_path}")
66
+ repo_path.mkdir(parents=True, exist_ok=True)
67
+
68
+ # Create sample Python files
69
+ (repo_path / "math_utils.py").write_text('''
70
+ def factorial(n):
71
+ """Calculate factorial recursively."""
72
+ if n <= 1:
73
+ return 1
74
+ return n * factorial(n - 1)
75
+
76
+ def fibonacci(n):
77
+ """Calculate Fibonacci number recursively."""
78
+ if n <= 1:
79
+ return n
80
+ return fibonacci(n - 1) + fibonacci(n - 2)
81
+
82
+ def add(a, b):
83
+ """Add two numbers."""
84
+ return a + b
85
+ ''')
86
+
87
+ (repo_path / "string_utils.py").write_text('''
88
+ def reverse_string(s):
89
+ """Reverse a string."""
90
+ return s[::-1]
91
+
92
+ def is_palindrome(s):
93
+ """Check if string is palindrome."""
94
+ cleaned = s.lower().replace(" ", "")
95
+ return cleaned == cleaned[::-1]
96
+ ''')
97
+
98
+ (repo_path / "data_structures.py").write_text('''
99
+ class Stack:
100
+ """A simple stack implementation."""
101
+
102
+ def __init__(self):
103
+ self.items = []
104
+
105
+ def push(self, item):
106
+ """Push item onto stack."""
107
+ self.items.append(item)
108
+
109
+ def pop(self):
110
+ """Pop item from stack."""
111
+ if not self.items:
112
+ return None
113
+ return self.items.pop()
114
+
115
+ def peek(self):
116
+ """View top item without removing."""
117
+ if not self.items:
118
+ return None
119
+ return self.items[-1]
120
+
121
+
122
+ class Queue:
123
+ """A simple queue implementation."""
124
+
125
+ def __init__(self):
126
+ self.items = []
127
+
128
+ def enqueue(self, item):
129
+ """Add item to queue."""
130
+ self.items.append(item)
131
+
132
+ def dequeue(self):
133
+ """Remove and return first item."""
134
+ if not self.items:
135
+ return None
136
+ return self.items.pop(0)
137
+ ''')
138
+
139
+ return repo_path
140
+
141
+
142
+ def build_graph_with_embeddings(
143
+ repo_path: Path,
144
+ backend: str,
145
+ clean: bool = False,
146
+ ) -> tuple:
147
+ """Build code graph with embedding generation.
148
+
149
+ Args:
150
+ repo_path: Path to repository
151
+ backend: "kuzu" or "memgraph"
152
+ clean: Whether to clean database first
153
+
154
+ Returns:
155
+ Tuple of (graph_service, vector_store, embedder)
156
+ """
157
+ from code_graph_builder.embeddings import create_embedder, create_vector_store
158
+ from code_graph_builder.embeddings.qwen3_embedder import DummyEmbedder
159
+
160
+ print(f"\n{'='*60}")
161
+ print(f"Building code graph with {backend} backend")
162
+ print(f"Repository: {repo_path}")
163
+ print(f"{'='*60}\n")
164
+
165
+ # Initialize embedding components
166
+ print("Initializing embedding components...")
167
+
168
+ # Use DummyEmbedder for testing (no API key needed)
169
+ # In production, use Qwen3Embedder with your API key
170
+ try:
171
+ embedder = create_embedder(backend="qwen3")
172
+ print(f" Using Qwen3 embedder (dimension: {embedder.get_embedding_dimension()})")
173
+ except Exception as e:
174
+ print(f" Failed to create Qwen3 embedder: {e}")
175
+ print(" Falling back to DummyEmbedder")
176
+ embedder = DummyEmbedder(dimension=1536)
177
+
178
+ vector_store = create_vector_store(backend="memory", dimension=1536)
179
+ print(f" Using MemoryVectorStore (dimension: 1536)")
180
+
181
+ # Initialize graph service based on backend
182
+ if backend == "kuzu":
183
+ from code_graph_builder.services.kuzu_service import KuzuIngestor
184
+
185
+ db_path = Path("./example_graph.db")
186
+ if clean and db_path.exists():
187
+ import shutil
188
+ shutil.rmtree(db_path)
189
+
190
+ graph_service = KuzuIngestor(db_path)
191
+ print(f" Using Kuzu database at {db_path}")
192
+ else:
193
+ from code_graph_builder.services.graph_service import MemgraphIngestor
194
+
195
+ graph_service = MemgraphIngestor("localhost", 7687)
196
+ print(" Using Memgraph at localhost:7687")
197
+
198
+ # Note: Full graph building would require parser setup
199
+ # This is simplified for the example
200
+ print("\nNote: Full graph building requires parser setup")
201
+ print("See code_graph_builder/graph_updater.py for complete implementation")
202
+
203
+ return graph_service, vector_store, embedder
204
+
205
+
206
+ def perform_semantic_search(
207
+ query: str,
208
+ graph_service,
209
+ vector_store,
210
+ embedder,
211
+ ) -> list:
212
+ """Perform semantic code search.
213
+
214
+ Args:
215
+ query: Natural language query
216
+ graph_service: Graph database service
217
+ vector_store: Vector store instance
218
+ embedder: Embedder instance
219
+
220
+ Returns:
221
+ List of search results
222
+ """
223
+ from code_graph_builder.tools.semantic_search import SemanticSearchService
224
+
225
+ print(f"\n{'='*60}")
226
+ print(f"Semantic Search: '{query}'")
227
+ print(f"{'='*60}\n")
228
+
229
+ # Create semantic search service
230
+ search_service = SemanticSearchService(
231
+ embedder=embedder,
232
+ vector_store=vector_store,
233
+ graph_service=graph_service if hasattr(graph_service, 'fetch_all') else None,
234
+ )
235
+
236
+ # Add some sample data if vector store is empty
237
+ if len(vector_store) == 0:
238
+ print("Adding sample embeddings to vector store...")
239
+ # In real usage, these would be generated from actual code
240
+ sample_data = [
241
+ (1, "math_utils.factorial", "Calculate factorial recursively"),
242
+ (2, "math_utils.fibonacci", "Calculate Fibonacci recursively"),
243
+ (3, "math_utils.add", "Add two numbers"),
244
+ (4, "string_utils.reverse_string", "Reverse a string"),
245
+ (5, "string_utils.is_palindrome", "Check if palindrome"),
246
+ (6, "data_structures.Stack", "Stack implementation"),
247
+ (7, "data_structures.Queue", "Queue implementation"),
248
+ ]
249
+
250
+ for node_id, qn, description in sample_data:
251
+ # Generate embedding for description
252
+ embedding = embedder.embed_code(description)
253
+ vector_store.store_embedding(
254
+ node_id=node_id,
255
+ qualified_name=qn,
256
+ embedding=embedding,
257
+ metadata={"type": "Function" if "." in qn else "Class"},
258
+ )
259
+ print(f" Added {len(sample_data)} sample embeddings")
260
+
261
+ # Perform search
262
+ print(f"\nSearching for: '{query}'")
263
+ results = search_service.search(query, top_k=5)
264
+
265
+ print(f"\nFound {len(results)} results:\n")
266
+ for i, result in enumerate(results, 1):
267
+ print(f"{i}. {result.qualified_name}")
268
+ print(f" Type: {result.type}")
269
+ print(f" Score: {result.score:.3f}")
270
+ if result.docstring:
271
+ print(f" Doc: {result.docstring}")
272
+ print()
273
+
274
+ return results
275
+
276
+
277
+ def demonstrate_graph_query(graph_service, backend: str) -> None:
278
+ """Demonstrate graph query capabilities.
279
+
280
+ Args:
281
+ graph_service: Graph database service
282
+ backend: Backend type
283
+ """
284
+ from code_graph_builder.tools.graph_query import GraphQueryService
285
+
286
+ print(f"\n{'='*60}")
287
+ print(f"Graph Query Layer ({backend})")
288
+ print(f"{'='*60}\n")
289
+
290
+ query_service = GraphQueryService(graph_service, backend=backend)
291
+
292
+ # Example: Fetch nodes by IDs
293
+ print("Example: Fetch nodes by IDs")
294
+ print(" Query: nodes [1, 2, 3]")
295
+
296
+ # This would work with real data
297
+ print(" (Requires populated database)")
298
+
299
+ # Example: Query by qualified name
300
+ print("\nExample: Query by qualified name")
301
+ print(" Query: math_utils.factorial")
302
+ print(" (Requires populated database)")
303
+
304
+
305
+ def main():
306
+ """Main entry point."""
307
+ setup_environment()
308
+ args = parse_args()
309
+
310
+ print("\n" + "=" * 60)
311
+ print("Semantic Search with Embedding Integration Example")
312
+ print("=" * 60)
313
+
314
+ # Create sample repo if needed
315
+ repo_path = create_sample_repo(args.repo)
316
+
317
+ # Build or load graph
318
+ graph_service, vector_store, embedder = build_graph_with_embeddings(
319
+ repo_path=repo_path,
320
+ backend=args.backend,
321
+ clean=args.clean,
322
+ )
323
+
324
+ # Perform search if query provided
325
+ if args.search:
326
+ with graph_service:
327
+ perform_semantic_search(
328
+ query=args.search,
329
+ graph_service=graph_service,
330
+ vector_store=vector_store,
331
+ embedder=embedder,
332
+ )
333
+ else:
334
+ print("\nTip: Use --search '<query>' to perform semantic search")
335
+ print("Example: python example_semantic_search_full.py --search 'recursive function'")
336
+
337
+ # Demonstrate graph query layer
338
+ with graph_service:
339
+ demonstrate_graph_query(graph_service, args.backend)
340
+
341
+ print("\n" + "=" * 60)
342
+ print("Example complete!")
343
+ print("=" * 60)
344
+
345
+
346
+ if __name__ == "__main__":
347
+ main()