code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Complete example of semantic search with embedding integration.
|
|
3
|
+
|
|
4
|
+
This example demonstrates:
|
|
5
|
+
1. P0: GraphUpdater with embedding generation
|
|
6
|
+
2. P1: Semantic search tools
|
|
7
|
+
3. P2: Graph query layer with Kuzu/Memgraph compatibility
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
# With Kuzu (no Docker required)
|
|
11
|
+
python example_semantic_search_full.py --backend kuzu --repo ./my_repo
|
|
12
|
+
|
|
13
|
+
# With Memgraph (requires Docker)
|
|
14
|
+
python example_semantic_search_full.py --backend memgraph --repo ./my_repo
|
|
15
|
+
|
|
16
|
+
# Search only (skip building)
|
|
17
|
+
python example_semantic_search_full.py --backend kuzu --search "recursive function"
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import sys
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def setup_environment():
|
|
28
|
+
"""Add parent directory to path for imports."""
|
|
29
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_args():
|
|
33
|
+
"""Parse command line arguments."""
|
|
34
|
+
parser = argparse.ArgumentParser(description="Semantic search example")
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--backend",
|
|
37
|
+
choices=["kuzu", "memgraph"],
|
|
38
|
+
default="kuzu",
|
|
39
|
+
help="Graph database backend",
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--repo",
|
|
43
|
+
type=Path,
|
|
44
|
+
default=Path("./test_repo"),
|
|
45
|
+
help="Path to code repository",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--search",
|
|
49
|
+
type=str,
|
|
50
|
+
help="Search query (skip building if provided)",
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument(
|
|
53
|
+
"--clean",
|
|
54
|
+
action="store_true",
|
|
55
|
+
help="Clean database before building",
|
|
56
|
+
)
|
|
57
|
+
return parser.parse_args()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def create_sample_repo(repo_path: Path) -> Path:
|
|
61
|
+
"""Create a sample repository for testing."""
|
|
62
|
+
if repo_path.exists():
|
|
63
|
+
return repo_path
|
|
64
|
+
|
|
65
|
+
print(f"Creating sample repository at {repo_path}")
|
|
66
|
+
repo_path.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
|
|
68
|
+
# Create sample Python files
|
|
69
|
+
(repo_path / "math_utils.py").write_text('''
|
|
70
|
+
def factorial(n):
|
|
71
|
+
"""Calculate factorial recursively."""
|
|
72
|
+
if n <= 1:
|
|
73
|
+
return 1
|
|
74
|
+
return n * factorial(n - 1)
|
|
75
|
+
|
|
76
|
+
def fibonacci(n):
|
|
77
|
+
"""Calculate Fibonacci number recursively."""
|
|
78
|
+
if n <= 1:
|
|
79
|
+
return n
|
|
80
|
+
return fibonacci(n - 1) + fibonacci(n - 2)
|
|
81
|
+
|
|
82
|
+
def add(a, b):
|
|
83
|
+
"""Add two numbers."""
|
|
84
|
+
return a + b
|
|
85
|
+
''')
|
|
86
|
+
|
|
87
|
+
(repo_path / "string_utils.py").write_text('''
|
|
88
|
+
def reverse_string(s):
|
|
89
|
+
"""Reverse a string."""
|
|
90
|
+
return s[::-1]
|
|
91
|
+
|
|
92
|
+
def is_palindrome(s):
|
|
93
|
+
"""Check if string is palindrome."""
|
|
94
|
+
cleaned = s.lower().replace(" ", "")
|
|
95
|
+
return cleaned == cleaned[::-1]
|
|
96
|
+
''')
|
|
97
|
+
|
|
98
|
+
(repo_path / "data_structures.py").write_text('''
|
|
99
|
+
class Stack:
|
|
100
|
+
"""A simple stack implementation."""
|
|
101
|
+
|
|
102
|
+
def __init__(self):
|
|
103
|
+
self.items = []
|
|
104
|
+
|
|
105
|
+
def push(self, item):
|
|
106
|
+
"""Push item onto stack."""
|
|
107
|
+
self.items.append(item)
|
|
108
|
+
|
|
109
|
+
def pop(self):
|
|
110
|
+
"""Pop item from stack."""
|
|
111
|
+
if not self.items:
|
|
112
|
+
return None
|
|
113
|
+
return self.items.pop()
|
|
114
|
+
|
|
115
|
+
def peek(self):
|
|
116
|
+
"""View top item without removing."""
|
|
117
|
+
if not self.items:
|
|
118
|
+
return None
|
|
119
|
+
return self.items[-1]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class Queue:
|
|
123
|
+
"""A simple queue implementation."""
|
|
124
|
+
|
|
125
|
+
def __init__(self):
|
|
126
|
+
self.items = []
|
|
127
|
+
|
|
128
|
+
def enqueue(self, item):
|
|
129
|
+
"""Add item to queue."""
|
|
130
|
+
self.items.append(item)
|
|
131
|
+
|
|
132
|
+
def dequeue(self):
|
|
133
|
+
"""Remove and return first item."""
|
|
134
|
+
if not self.items:
|
|
135
|
+
return None
|
|
136
|
+
return self.items.pop(0)
|
|
137
|
+
''')
|
|
138
|
+
|
|
139
|
+
return repo_path
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def build_graph_with_embeddings(
|
|
143
|
+
repo_path: Path,
|
|
144
|
+
backend: str,
|
|
145
|
+
clean: bool = False,
|
|
146
|
+
) -> tuple:
|
|
147
|
+
"""Build code graph with embedding generation.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
repo_path: Path to repository
|
|
151
|
+
backend: "kuzu" or "memgraph"
|
|
152
|
+
clean: Whether to clean database first
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Tuple of (graph_service, vector_store, embedder)
|
|
156
|
+
"""
|
|
157
|
+
from code_graph_builder.embeddings import create_embedder, create_vector_store
|
|
158
|
+
from code_graph_builder.embeddings.qwen3_embedder import DummyEmbedder
|
|
159
|
+
|
|
160
|
+
print(f"\n{'='*60}")
|
|
161
|
+
print(f"Building code graph with {backend} backend")
|
|
162
|
+
print(f"Repository: {repo_path}")
|
|
163
|
+
print(f"{'='*60}\n")
|
|
164
|
+
|
|
165
|
+
# Initialize embedding components
|
|
166
|
+
print("Initializing embedding components...")
|
|
167
|
+
|
|
168
|
+
# Use DummyEmbedder for testing (no API key needed)
|
|
169
|
+
# In production, use Qwen3Embedder with your API key
|
|
170
|
+
try:
|
|
171
|
+
embedder = create_embedder(backend="qwen3")
|
|
172
|
+
print(f" Using Qwen3 embedder (dimension: {embedder.get_embedding_dimension()})")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
print(f" Failed to create Qwen3 embedder: {e}")
|
|
175
|
+
print(" Falling back to DummyEmbedder")
|
|
176
|
+
embedder = DummyEmbedder(dimension=1536)
|
|
177
|
+
|
|
178
|
+
vector_store = create_vector_store(backend="memory", dimension=1536)
|
|
179
|
+
print(f" Using MemoryVectorStore (dimension: 1536)")
|
|
180
|
+
|
|
181
|
+
# Initialize graph service based on backend
|
|
182
|
+
if backend == "kuzu":
|
|
183
|
+
from code_graph_builder.services.kuzu_service import KuzuIngestor
|
|
184
|
+
|
|
185
|
+
db_path = Path("./example_graph.db")
|
|
186
|
+
if clean and db_path.exists():
|
|
187
|
+
import shutil
|
|
188
|
+
shutil.rmtree(db_path)
|
|
189
|
+
|
|
190
|
+
graph_service = KuzuIngestor(db_path)
|
|
191
|
+
print(f" Using Kuzu database at {db_path}")
|
|
192
|
+
else:
|
|
193
|
+
from code_graph_builder.services.graph_service import MemgraphIngestor
|
|
194
|
+
|
|
195
|
+
graph_service = MemgraphIngestor("localhost", 7687)
|
|
196
|
+
print(" Using Memgraph at localhost:7687")
|
|
197
|
+
|
|
198
|
+
# Note: Full graph building would require parser setup
|
|
199
|
+
# This is simplified for the example
|
|
200
|
+
print("\nNote: Full graph building requires parser setup")
|
|
201
|
+
print("See code_graph_builder/graph_updater.py for complete implementation")
|
|
202
|
+
|
|
203
|
+
return graph_service, vector_store, embedder
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def perform_semantic_search(
|
|
207
|
+
query: str,
|
|
208
|
+
graph_service,
|
|
209
|
+
vector_store,
|
|
210
|
+
embedder,
|
|
211
|
+
) -> list:
|
|
212
|
+
"""Perform semantic code search.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
query: Natural language query
|
|
216
|
+
graph_service: Graph database service
|
|
217
|
+
vector_store: Vector store instance
|
|
218
|
+
embedder: Embedder instance
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
List of search results
|
|
222
|
+
"""
|
|
223
|
+
from code_graph_builder.tools.semantic_search import SemanticSearchService
|
|
224
|
+
|
|
225
|
+
print(f"\n{'='*60}")
|
|
226
|
+
print(f"Semantic Search: '{query}'")
|
|
227
|
+
print(f"{'='*60}\n")
|
|
228
|
+
|
|
229
|
+
# Create semantic search service
|
|
230
|
+
search_service = SemanticSearchService(
|
|
231
|
+
embedder=embedder,
|
|
232
|
+
vector_store=vector_store,
|
|
233
|
+
graph_service=graph_service if hasattr(graph_service, 'fetch_all') else None,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Add some sample data if vector store is empty
|
|
237
|
+
if len(vector_store) == 0:
|
|
238
|
+
print("Adding sample embeddings to vector store...")
|
|
239
|
+
# In real usage, these would be generated from actual code
|
|
240
|
+
sample_data = [
|
|
241
|
+
(1, "math_utils.factorial", "Calculate factorial recursively"),
|
|
242
|
+
(2, "math_utils.fibonacci", "Calculate Fibonacci recursively"),
|
|
243
|
+
(3, "math_utils.add", "Add two numbers"),
|
|
244
|
+
(4, "string_utils.reverse_string", "Reverse a string"),
|
|
245
|
+
(5, "string_utils.is_palindrome", "Check if palindrome"),
|
|
246
|
+
(6, "data_structures.Stack", "Stack implementation"),
|
|
247
|
+
(7, "data_structures.Queue", "Queue implementation"),
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
for node_id, qn, description in sample_data:
|
|
251
|
+
# Generate embedding for description
|
|
252
|
+
embedding = embedder.embed_code(description)
|
|
253
|
+
vector_store.store_embedding(
|
|
254
|
+
node_id=node_id,
|
|
255
|
+
qualified_name=qn,
|
|
256
|
+
embedding=embedding,
|
|
257
|
+
metadata={"type": "Function" if "." in qn else "Class"},
|
|
258
|
+
)
|
|
259
|
+
print(f" Added {len(sample_data)} sample embeddings")
|
|
260
|
+
|
|
261
|
+
# Perform search
|
|
262
|
+
print(f"\nSearching for: '{query}'")
|
|
263
|
+
results = search_service.search(query, top_k=5)
|
|
264
|
+
|
|
265
|
+
print(f"\nFound {len(results)} results:\n")
|
|
266
|
+
for i, result in enumerate(results, 1):
|
|
267
|
+
print(f"{i}. {result.qualified_name}")
|
|
268
|
+
print(f" Type: {result.type}")
|
|
269
|
+
print(f" Score: {result.score:.3f}")
|
|
270
|
+
if result.docstring:
|
|
271
|
+
print(f" Doc: {result.docstring}")
|
|
272
|
+
print()
|
|
273
|
+
|
|
274
|
+
return results
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def demonstrate_graph_query(graph_service, backend: str) -> None:
|
|
278
|
+
"""Demonstrate graph query capabilities.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
graph_service: Graph database service
|
|
282
|
+
backend: Backend type
|
|
283
|
+
"""
|
|
284
|
+
from code_graph_builder.tools.graph_query import GraphQueryService
|
|
285
|
+
|
|
286
|
+
print(f"\n{'='*60}")
|
|
287
|
+
print(f"Graph Query Layer ({backend})")
|
|
288
|
+
print(f"{'='*60}\n")
|
|
289
|
+
|
|
290
|
+
query_service = GraphQueryService(graph_service, backend=backend)
|
|
291
|
+
|
|
292
|
+
# Example: Fetch nodes by IDs
|
|
293
|
+
print("Example: Fetch nodes by IDs")
|
|
294
|
+
print(" Query: nodes [1, 2, 3]")
|
|
295
|
+
|
|
296
|
+
# This would work with real data
|
|
297
|
+
print(" (Requires populated database)")
|
|
298
|
+
|
|
299
|
+
# Example: Query by qualified name
|
|
300
|
+
print("\nExample: Query by qualified name")
|
|
301
|
+
print(" Query: math_utils.factorial")
|
|
302
|
+
print(" (Requires populated database)")
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def main():
|
|
306
|
+
"""Main entry point."""
|
|
307
|
+
setup_environment()
|
|
308
|
+
args = parse_args()
|
|
309
|
+
|
|
310
|
+
print("\n" + "=" * 60)
|
|
311
|
+
print("Semantic Search with Embedding Integration Example")
|
|
312
|
+
print("=" * 60)
|
|
313
|
+
|
|
314
|
+
# Create sample repo if needed
|
|
315
|
+
repo_path = create_sample_repo(args.repo)
|
|
316
|
+
|
|
317
|
+
# Build or load graph
|
|
318
|
+
graph_service, vector_store, embedder = build_graph_with_embeddings(
|
|
319
|
+
repo_path=repo_path,
|
|
320
|
+
backend=args.backend,
|
|
321
|
+
clean=args.clean,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Perform search if query provided
|
|
325
|
+
if args.search:
|
|
326
|
+
with graph_service:
|
|
327
|
+
perform_semantic_search(
|
|
328
|
+
query=args.search,
|
|
329
|
+
graph_service=graph_service,
|
|
330
|
+
vector_store=vector_store,
|
|
331
|
+
embedder=embedder,
|
|
332
|
+
)
|
|
333
|
+
else:
|
|
334
|
+
print("\nTip: Use --search '<query>' to perform semantic search")
|
|
335
|
+
print("Example: python example_semantic_search_full.py --search 'recursive function'")
|
|
336
|
+
|
|
337
|
+
# Demonstrate graph query layer
|
|
338
|
+
with graph_service:
|
|
339
|
+
demonstrate_graph_query(graph_service, args.backend)
|
|
340
|
+
|
|
341
|
+
print("\n" + "=" * 60)
|
|
342
|
+
print("Example complete!")
|
|
343
|
+
print("=" * 60)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
if __name__ == "__main__":
|
|
347
|
+
main()
|