code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,100 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import Annotated
6
+
7
+ import typer
8
+ from loguru import logger
9
+
10
+ sys.path.insert(0, str(Path(__file__).parent.parent))
11
+
12
+ from codebase_rag import cli_help as ch
13
+ from codebase_rag import logs
14
+ from codebase_rag.constants import (
15
+ DEFAULT_NAME,
16
+ KEY_EXPORTED_AT,
17
+ KEY_METADATA,
18
+ KEY_NAME,
19
+ KEY_NODE_LABELS,
20
+ KEY_RELATIONSHIP_TYPES,
21
+ KEY_TOTAL_NODES,
22
+ KEY_TOTAL_RELATIONSHIPS,
23
+ NodeLabel,
24
+ )
25
+ from codebase_rag.graph_loader import GraphLoader, load_graph
26
+ from codebase_rag.types_defs import GraphSummary
27
+
28
+
29
+ def log_summary(summary: GraphSummary) -> None:
30
+ logger.info(logs.GRAPH_SUMMARY)
31
+ logger.info(logs.GRAPH_TOTAL_NODES.format(count=summary.get(KEY_TOTAL_NODES, 0)))
32
+ logger.info(
33
+ logs.GRAPH_TOTAL_RELS.format(count=summary.get(KEY_TOTAL_RELATIONSHIPS, 0))
34
+ )
35
+ if KEY_METADATA in summary and KEY_EXPORTED_AT in summary[KEY_METADATA]:
36
+ logger.info(
37
+ logs.GRAPH_EXPORTED_AT.format(
38
+ timestamp=summary[KEY_METADATA][KEY_EXPORTED_AT]
39
+ )
40
+ )
41
+
42
+
43
+ def log_node_and_relationship_types(summary: GraphSummary) -> None:
44
+ logger.info(logs.GRAPH_NODE_TYPES)
45
+ for label, count in summary.get(KEY_NODE_LABELS, {}).items():
46
+ logger.info(logs.GRAPH_NODE_COUNT.format(label=label, count=count))
47
+
48
+ logger.info(logs.GRAPH_REL_TYPES)
49
+ for rel_type, count in summary.get(KEY_RELATIONSHIP_TYPES, {}).items():
50
+ logger.info(logs.GRAPH_REL_COUNT.format(rel_type=rel_type, count=count))
51
+
52
+
53
+ def log_example_nodes(graph: GraphLoader, node_label: str, limit: int = 5) -> None:
54
+ nodes = graph.find_nodes_by_label(node_label)
55
+ logger.info(logs.GRAPH_FOUND_NODES.format(count=len(nodes), label=node_label))
56
+
57
+ if nodes:
58
+ logger.info(logs.GRAPH_EXAMPLE_NAMES.format(label=node_label))
59
+ for node in nodes[:limit]:
60
+ name = node.properties.get(KEY_NAME, DEFAULT_NAME)
61
+ logger.info(logs.GRAPH_EXAMPLE_NAME.format(name=name))
62
+ if len(nodes) > limit:
63
+ logger.info(logs.GRAPH_MORE_NODES.format(count=len(nodes) - limit))
64
+
65
+
66
+ def analyze_graph(graph_file: str) -> None:
67
+ logger.info(logs.GRAPH_ANALYZING.format(path=graph_file))
68
+
69
+ try:
70
+ _perform_graph_analysis(graph_file)
71
+ except Exception as e:
72
+ logger.error(logs.GRAPH_ANALYSIS_ERROR.format(error=e))
73
+ sys.exit(1)
74
+
75
+
76
+ def _perform_graph_analysis(graph_file: str) -> None:
77
+ graph = load_graph(graph_file)
78
+ summary = graph.summary()
79
+
80
+ log_summary(summary)
81
+ log_node_and_relationship_types(summary)
82
+
83
+ log_example_nodes(graph, NodeLabel.FUNCTION)
84
+ log_example_nodes(graph, NodeLabel.CLASS)
85
+
86
+ logger.success(logs.GRAPH_ANALYSIS_COMPLETE)
87
+
88
+
89
+ def main(
90
+ graph_file: Annotated[Path, typer.Argument(help=ch.HELP_EXPORTED_GRAPH_FILE)],
91
+ ) -> None:
92
+ if not graph_file.exists():
93
+ logger.error(logs.GRAPH_FILE_NOT_FOUND.format(path=graph_file))
94
+ raise typer.Exit(1)
95
+
96
+ analyze_graph(str(graph_file))
97
+
98
+
99
+ if __name__ == "__main__":
100
+ typer.run(main)
@@ -0,0 +1,206 @@
1
+ """Example usage of the RAG module for code analysis.
2
+
3
+ This example demonstrates how to use the RAG module to:
4
+ 1. Query code using natural language
5
+ 2. Explain specific code entities
6
+ 3. Analyze module architecture
7
+ 4. Use CAMEL agents for specialized analysis
8
+
9
+ Prerequisites:
10
+ - Set MOONSHOT_API_KEY environment variable
11
+ - Have a code graph built with code_graph_builder
12
+ - Have embeddings generated in vector store
13
+
14
+ Example:
15
+ export MOONSHOT_API_KEY="sk-xxxxx"
16
+ uv run examples/rag_example.py
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ from pathlib import Path
23
+
24
+ from code_graph_builder.embeddings import create_embedder, create_vector_store
25
+ from code_graph_builder.rag import (
26
+ RAGConfig,
27
+ RAGEngine,
28
+ create_rag_engine,
29
+ )
30
+ from code_graph_builder.rag.camel_agent import CamelAgent, MultiAgentRAG
31
+ from code_graph_builder.services import MemgraphIngestor
32
+
33
+
34
+ def setup_rag_engine() -> RAGEngine:
35
+ """Set up the RAG engine with configuration."""
36
+ # Load configuration from environment
37
+ config = RAGConfig.from_env()
38
+ config.validate()
39
+
40
+ print(f"Using model: {config.moonshot.model}")
41
+ print(f"Semantic top-k: {config.retrieval.semantic_top_k}")
42
+
43
+ # Create embedder and vector store
44
+ embedder = create_embedder()
45
+ vector_store = create_vector_store(
46
+ backend="memory",
47
+ dimension=embedder.get_embedding_dimension(),
48
+ )
49
+
50
+ # Connect to graph database
51
+ graph_service = MemgraphIngestor(
52
+ host=os.getenv("MEMGRAPH_HOST", "localhost"),
53
+ port=int(os.getenv("MEMGRAPH_PORT", "7687")),
54
+ )
55
+
56
+ # Create RAG engine
57
+ engine = create_rag_engine(
58
+ config=config,
59
+ embedder=embedder,
60
+ vector_store=vector_store,
61
+ graph_service=graph_service,
62
+ )
63
+
64
+ return engine
65
+
66
+
67
+ def example_natural_language_query(engine: RAGEngine) -> None:
68
+ """Example: Query code using natural language."""
69
+ print("\n" + "=" * 60)
70
+ print("Example 1: Natural Language Query")
71
+ print("=" * 60)
72
+
73
+ query = "How does the authentication system work?"
74
+ print(f"\nQuery: {query}")
75
+
76
+ result = engine.query(query, top_k=5)
77
+
78
+ print(f"\nResponse:\n{result.response}")
79
+ print(f"\nSources used:")
80
+ for source in result.sources:
81
+ print(f" - {source.qualified_name} ({source.file_path})")
82
+
83
+ # Save result to file
84
+ output_path = engine.save_result(result)
85
+ print(f"\nSaved to: {output_path}")
86
+
87
+
88
+ def example_explain_code(engine: RAGEngine) -> None:
89
+ """Example: Explain a specific code entity."""
90
+ print("\n" + "=" * 60)
91
+ print("Example 2: Explain Code Entity")
92
+ print("=" * 60)
93
+
94
+ # Example qualified name - adjust to your codebase
95
+ qualified_name = "code_graph_builder.rag.rag_engine.RAGEngine.query"
96
+ print(f"\nExplaining: {qualified_name}")
97
+
98
+ result = engine.explain_code(qualified_name, include_related=True)
99
+
100
+ print(f"\nExplanation:\n{result.response}")
101
+
102
+
103
+ def example_architecture_analysis(engine: RAGEngine) -> None:
104
+ """Example: Analyze module architecture."""
105
+ print("\n" + "=" * 60)
106
+ print("Example 3: Architecture Analysis")
107
+ print("=" * 60)
108
+
109
+ module_name = "code_graph_builder.rag"
110
+ print(f"\nAnalyzing module: {module_name}")
111
+
112
+ result = engine.analyze_architecture(module_name)
113
+
114
+ print(f"\nArchitecture Analysis:\n{result.response}")
115
+
116
+
117
+ def example_camel_agent() -> None:
118
+ """Example: Use CAMEL agent for code review."""
119
+ print("\n" + "=" * 60)
120
+ print("Example 4: CAMEL Agent Code Review")
121
+ print("=" * 60)
122
+
123
+ # Create a specialized agent
124
+ agent = CamelAgent(
125
+ role="Senior Python Developer",
126
+ goal="Review code for best practices and potential issues",
127
+ backstory="10+ years of Python development experience, expert in clean code",
128
+ )
129
+
130
+ # Code to review
131
+ code = """
132
+ def process_data(data):
133
+ result = []
134
+ for i in range(len(data)):
135
+ if data[i] > 0:
136
+ result.append(data[i] * 2)
137
+ return result
138
+ """
139
+
140
+ print("\nCode to review:")
141
+ print(code)
142
+
143
+ # Run review
144
+ response = agent.review_code(code, review_type="general")
145
+ print(f"\nReview:\n{response.content}")
146
+
147
+ # Get improvement suggestions
148
+ suggestions = agent.suggest_improvements(
149
+ code,
150
+ focus_areas=["readability", "performance"],
151
+ )
152
+ print(f"\nSuggestions:\n{suggestions.content}")
153
+
154
+
155
+ def example_multi_agent_analysis(engine: RAGEngine) -> None:
156
+ """Example: Multi-agent comprehensive analysis."""
157
+ print("\n" + "=" * 60)
158
+ print("Example 5: Multi-Agent Analysis")
159
+ print("=" * 60)
160
+
161
+ # Create multi-agent system
162
+ multi_agent = MultiAgentRAG(engine)
163
+
164
+ query = "Explain the RAG engine implementation"
165
+ print(f"\nQuery: {query}")
166
+
167
+ # Run multi-agent analysis
168
+ results = multi_agent.analyze(
169
+ query=query,
170
+ analysis_types=["architecture", "docs"],
171
+ )
172
+
173
+ for agent_type, response in results.items():
174
+ print(f"\n--- {agent_type.upper()} ANALYSIS ---")
175
+ print(response.content[:500] + "..." if len(response.content) > 500 else response.content)
176
+
177
+
178
+ def main() -> None:
179
+ """Run all examples."""
180
+ print("RAG Module Examples")
181
+ print("===================")
182
+
183
+ # Check API key
184
+ if not os.getenv("MOONSHOT_API_KEY"):
185
+ print("\nError: MOONSHOT_API_KEY environment variable not set")
186
+ print("Please set it before running: export MOONSHOT_API_KEY='your-key'")
187
+ return
188
+
189
+ try:
190
+ # Set up RAG engine
191
+ engine = setup_rag_engine()
192
+
193
+ # Run examples
194
+ example_natural_language_query(engine)
195
+ example_explain_code(engine)
196
+ example_architecture_analysis(engine)
197
+ example_camel_agent()
198
+ example_multi_agent_analysis(engine)
199
+
200
+ except Exception as e:
201
+ print(f"\nError: {e}")
202
+ raise
203
+
204
+
205
+ if __name__ == "__main__":
206
+ main()
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env python3
2
+ """Demonstration of Code Graph Builder CLI.
3
+
4
+ This script shows all CLI commands without actually running them.
5
+ For actual usage, run the commands directly in your terminal.
6
+ """
7
+
8
+ import subprocess
9
+ import sys
10
+
11
+
12
+ def run_command(cmd: str, description: str) -> None:
13
+ """Print command description and the command itself."""
14
+ print("=" * 80)
15
+ print(f"{description}")
16
+ print("=" * 80)
17
+ print(f"$ {cmd}")
18
+ print()
19
+
20
+
21
+ def main():
22
+ """Show CLI examples."""
23
+ repo_path = "/Users/jiaojeremy/CodeFile/tinycc"
24
+ db_path = "/tmp/demo_graph.db"
25
+
26
+ print("Code Graph Builder - CLI 演示")
27
+ print()
28
+ print("注意: 以下只是命令示例,不会实际执行")
29
+ print("在实际终端中运行这些命令来体验完整功能")
30
+ print()
31
+
32
+ # 1. Help
33
+ run_command(
34
+ "code-graph-builder --help",
35
+ "1. 查看帮助信息"
36
+ )
37
+
38
+ # 2. Scan
39
+ run_command(
40
+ f"code-graph-builder scan {repo_path} --db-path {db_path} --clean",
41
+ "2. 扫描代码仓库"
42
+ )
43
+
44
+ run_command(
45
+ f"code-graph-builder scan {repo_path} \\\n"
46
+ f" --db-path {db_path} \\\n"
47
+ f" --exclude tests,win32,examples \\\n"
48
+ f" --language c \\\n"
49
+ f" --clean",
50
+ "3. 扫描(带过滤选项)"
51
+ )
52
+
53
+ # 3. Query
54
+ run_command(
55
+ f"code-graph-builder query \\\n"
56
+ f' "MATCH (f:Function) RETURN f.name LIMIT 10" \\\n'
57
+ f" --db-path {db_path}",
58
+ "4. 查询函数"
59
+ )
60
+
61
+ run_command(
62
+ f"code-graph-builder query \\\n"
63
+ f' "MATCH (caller:Function)-[:CALLS]->(callee:Function) \\\n'
64
+ f' WHERE callee.name = \\\'parse_expr\\\' \\\n'
65
+ f' RETURN caller.name" \\\n'
66
+ f" --db-path {db_path}",
67
+ "5. 查询调用关系"
68
+ )
69
+
70
+ # 4. Stats
71
+ run_command(
72
+ f"code-graph-builder stats --db-path {db_path}",
73
+ "6. 查看统计信息"
74
+ )
75
+
76
+ # 5. Export
77
+ run_command(
78
+ f"code-graph-builder export {repo_path} \\\n"
79
+ f" --output /tmp/graph.json \\\n"
80
+ f" --build \\\n"
81
+ f" --exclude tests",
82
+ "7. 导出为 JSON"
83
+ )
84
+
85
+ # 6. Using config file
86
+ run_command(
87
+ f"code-graph-builder scan {repo_path} \\\n"
88
+ f" --config code-graph-builder.example.yaml",
89
+ "8. 使用配置文件"
90
+ )
91
+
92
+ print("=" * 80)
93
+ print("实际运行测试")
94
+ print("=" * 80)
95
+ print()
96
+
97
+ # Actually run a quick test
98
+ print("运行: code-graph-builder --version")
99
+ result = subprocess.run(
100
+ [sys.executable, "-m", "code_graph_builder.cli", "--version"],
101
+ capture_output=True,
102
+ text=True
103
+ )
104
+ print(result.stdout or result.stderr)
105
+
106
+ print("运行: code-graph-builder stats --help")
107
+ result = subprocess.run(
108
+ [sys.executable, "-m", "code_graph_builder.cli", "stats", "--help"],
109
+ capture_output=True,
110
+ text=True
111
+ )
112
+ print(result.stdout)
113
+ print()
114
+
115
+ print("=" * 80)
116
+ print("CLI 演示完成!")
117
+ print("=" * 80)
118
+ print()
119
+ print("快速参考:")
120
+ print(" code-graph-builder scan <repo> --db-path <path> # 扫描代码")
121
+ print(" code-graph-builder query '<cypher>' # 查询")
122
+ print(" code-graph-builder stats # 统计")
123
+ print(" code-graph-builder export <repo> -o <file> # 导出")
124
+ print()
125
+ print("详细文档: CLI.md")
126
+
127
+
128
+ if __name__ == "__main__":
129
+ main()
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env python3
2
+ """Example: Test Qwen3 Embedding via Alibaba Cloud Bailian API.
3
+
4
+ This script demonstrates how to use the API-based Qwen3 embedder.
5
+
6
+ Prerequisites:
7
+ 1. Set your API key:
8
+ export DASHSCOPE_API_KEY="sk-xxxxx"
9
+
10
+ 2. Or create a .env file in the project root with:
11
+ DASHSCOPE_API_KEY=sk-xxxxx
12
+
13
+ Usage:
14
+ python examples/test_embedding_api.py
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import os
20
+ import sys
21
+ from pathlib import Path
22
+
23
+ # Add parent directory to path
24
+ sys.path.insert(0, str(Path(__file__).parent.parent))
25
+
26
+ from dotenv import load_dotenv
27
+ from loguru import logger
28
+
29
+ # Load environment variables from .env file
30
+ load_dotenv(Path(__file__).parent.parent / ".env")
31
+
32
+
33
+ def test_embedder() -> None:
34
+ """Test the Qwen3 embedder with API."""
35
+ from code_graph_builder.embeddings.qwen3_embedder import Qwen3Embedder, create_embedder
36
+
37
+ # Check API key
38
+ api_key = os.getenv("DASHSCOPE_API_KEY")
39
+ if not api_key:
40
+ logger.error("DASHSCOPE_API_KEY not set!")
41
+ logger.info("Please set your API key:")
42
+ logger.info(" export DASHSCOPE_API_KEY='sk-xxxxx'")
43
+ sys.exit(1)
44
+
45
+ logger.info(f"API Key found: {api_key[:10]}...")
46
+
47
+ # Create embedder
48
+ logger.info("Creating Qwen3 embedder...")
49
+ embedder = create_embedder()
50
+
51
+ # Health check
52
+ logger.info("Running health check...")
53
+ if embedder.health_check():
54
+ logger.success("✓ API is accessible")
55
+ else:
56
+ logger.error("✗ API health check failed")
57
+ sys.exit(1)
58
+
59
+ # Test single embedding
60
+ logger.info("\nTesting single code embedding...")
61
+ code = """
62
+ def fibonacci(n):
63
+ if n <= 1:
64
+ return n
65
+ return fibonacci(n-1) + fibonacci(n-2)
66
+ """
67
+
68
+ try:
69
+ embedding = embedder.embed_code(code)
70
+ logger.success(f"✓ Generated embedding with {len(embedding)} dimensions")
71
+ logger.info(f" First 5 values: {embedding[:5]}")
72
+ except Exception as e:
73
+ logger.error(f"✗ Failed to generate embedding: {e}")
74
+ sys.exit(1)
75
+
76
+ # Test batch embedding
77
+ logger.info("\nTesting batch embedding...")
78
+ codes = [
79
+ "def add(a, b): return a + b",
80
+ "class Calculator:\n def multiply(self, x, y):\n return x * y",
81
+ "import os\nprint(os.getcwd())",
82
+ ]
83
+
84
+ try:
85
+ embeddings = embedder.embed_batch(codes, show_progress=True)
86
+ logger.success(f"✓ Generated {len(embeddings)} embeddings")
87
+ for i, emb in enumerate(embeddings):
88
+ logger.info(f" Code {i+1}: {len(emb)} dimensions")
89
+ except Exception as e:
90
+ logger.error(f"✗ Failed to generate batch embeddings: {e}")
91
+ sys.exit(1)
92
+
93
+ # Test query embedding (with instruction)
94
+ logger.info("\nTesting query embedding (with instruction)...")
95
+ query = "functions that calculate Fibonacci numbers"
96
+
97
+ try:
98
+ query_embedding = embedder.embed_query(query)
99
+ logger.success(f"✓ Generated query embedding with {len(query_embedding)} dimensions")
100
+ except Exception as e:
101
+ logger.error(f"✗ Failed to generate query embedding: {e}")
102
+ sys.exit(1)
103
+
104
+ logger.info("\n" + "=" * 50)
105
+ logger.success("All tests passed! ✓")
106
+ logger.info("=" * 50)
107
+
108
+
109
+ def test_vector_store() -> None:
110
+ """Test the vector store with embeddings."""
111
+ from code_graph_builder.embeddings.qwen3_embedder import create_embedder
112
+ from code_graph_builder.embeddings.vector_store import create_vector_store
113
+
114
+ logger.info("\nTesting Vector Store...")
115
+
116
+ # Create embedder and vector store
117
+ embedder = create_embedder()
118
+ vector_store = create_vector_store(backend="memory", dimension=1536)
119
+
120
+ # Store some embeddings
121
+ codes = [
122
+ (1, "def add(a, b): return a + b"),
123
+ (2, "def subtract(a, b): return a - b"),
124
+ (3, "class Calculator:\n def multiply(self, x, y): return x * y"),
125
+ ]
126
+
127
+ logger.info("Storing embeddings...")
128
+ for node_id, code in codes:
129
+ embedding = embedder.embed_code(code)
130
+ vector_store.store_embedding(
131
+ node_id=node_id,
132
+ qualified_name=f"module.function_{node_id}",
133
+ embedding=embedding,
134
+ )
135
+
136
+ stats = vector_store.get_stats()
137
+ logger.success(f"✓ Stored {stats['count']} embeddings")
138
+
139
+ # Search
140
+ logger.info("\nSearching for similar code...")
141
+ query = "addition function"
142
+ query_embedding = embedder.embed_query(query)
143
+
144
+ results = vector_store.search_similar(query_embedding, top_k=3)
145
+
146
+ logger.success(f"✓ Found {len(results)} results:")
147
+ for i, result in enumerate(results, 1):
148
+ logger.info(f" {i}. {result.qualified_name} (score: {result.score:.4f})")
149
+
150
+
151
+ if __name__ == "__main__":
152
+ test_embedder()
153
+ test_vector_store()