code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,434 @@
1
+ """Integration tests for semantic search and graph query features.
2
+
3
+ Tests the integration between:
4
+ - GraphUpdater embedding generation (P0)
5
+ - Semantic search tools (P1)
6
+ - Graph query layer with Kuzu/Memgraph compatibility (P2)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING
13
+ from unittest.mock import MagicMock, patch
14
+
15
+ import pytest
16
+
17
+ if TYPE_CHECKING:
18
+ from ..embeddings.qwen3_embedder import BaseEmbedder
19
+ from ..embeddings.vector_store import VectorStore
20
+
21
+
22
+ # =============================================================================
23
+ # Fixtures
24
+ # =============================================================================
25
+
26
+
27
+ @pytest.fixture
28
+ def mock_embedder() -> MagicMock:
29
+ """Create a mock embedder for testing."""
30
+ mock = MagicMock()
31
+ mock.embed_code.return_value = [0.1] * 1536
32
+ mock.embed_query.return_value = [0.1] * 1536
33
+ mock.embed_documents.return_value = [[0.1] * 1536]
34
+ mock.get_embedding_dimension.return_value = 1536
35
+ return mock
36
+
37
+
38
+ @pytest.fixture
39
+ def mock_vector_store() -> MagicMock:
40
+ """Create a mock vector store for testing."""
41
+ mock = MagicMock()
42
+ mock.store_embedding.return_value = None
43
+ mock.store_embeddings_batch.return_value = None
44
+ mock.search_similar.return_value = []
45
+ mock.get_stats.return_value = {"count": 0, "dimension": 1536}
46
+ return mock
47
+
48
+
49
+ @pytest.fixture
50
+ def sample_repo(tmp_path: Path) -> Path:
51
+ """Create a sample repository for testing."""
52
+ repo = tmp_path / "sample_project"
53
+ repo.mkdir()
54
+
55
+ # Create a Python file with functions
56
+ py_file = repo / "module.py"
57
+ py_file.write_text("""
58
+ def add(a, b):
59
+ \"\"\"Add two numbers.\"\"\"
60
+ return a + b
61
+
62
+ def subtract(a, b):
63
+ \"\"\"Subtract b from a.\"\"\"
64
+ return a - b
65
+
66
+ class Calculator:
67
+ \"\"\"A simple calculator.\"\"\"
68
+
69
+ def multiply(self, a, b):
70
+ \"\"\"Multiply two numbers.\"\"\"
71
+ return a * b
72
+ """)
73
+
74
+ return repo
75
+
76
+
77
+ # =============================================================================
78
+ # P0: GraphUpdater Integration Tests
79
+ # =============================================================================
80
+
81
+
82
+ class TestGraphUpdaterEmbeddingIntegration:
83
+ """Test GraphUpdater embedding generation integration."""
84
+
85
+ @pytest.mark.skip(reason="Requires full parser setup")
86
+ def test_graph_updater_initializes_with_embedding_config(
87
+ self,
88
+ sample_repo: Path,
89
+ mock_embedder: MagicMock,
90
+ mock_vector_store: MagicMock,
91
+ ) -> None:
92
+ """Test that GraphUpdater can be initialized with embedding config."""
93
+ from ..graph_updater import GraphUpdater
94
+ from ..services.memory_service import MemoryIngestor
95
+
96
+ ingestor = MemoryIngestor()
97
+ embedding_config = {
98
+ "enabled": True,
99
+ "batch_size": 10,
100
+ "api_key": "test-key",
101
+ }
102
+
103
+ updater = GraphUpdater(
104
+ ingestor=ingestor,
105
+ repo_path=sample_repo,
106
+ parsers={},
107
+ queries={},
108
+ embedder=mock_embedder,
109
+ vector_store=mock_vector_store,
110
+ embedding_config=embedding_config,
111
+ )
112
+
113
+ assert updater.embedder is mock_embedder
114
+ assert updater.vector_store is mock_vector_store
115
+ assert updater._embedding_enabled is True
116
+
117
+ def test_graph_updater_skips_embeddings_when_disabled(
118
+ self,
119
+ sample_repo: Path,
120
+ mock_vector_store: MagicMock,
121
+ ) -> None:
122
+ """Test that GraphUpdater skips embeddings when disabled."""
123
+ from ..graph_updater import GraphUpdater
124
+ from ..services.memory_service import MemoryIngestor
125
+
126
+ ingestor = MemoryIngestor()
127
+
128
+ updater = GraphUpdater(
129
+ ingestor=ingestor,
130
+ repo_path=sample_repo,
131
+ parsers={},
132
+ queries={},
133
+ embedder=None,
134
+ vector_store=mock_vector_store,
135
+ embedding_config={"enabled": False},
136
+ )
137
+
138
+ assert updater._embedding_enabled is False
139
+ assert updater.embedder is None
140
+
141
+
142
+ # =============================================================================
143
+ # P1: Semantic Search Tests
144
+ # =============================================================================
145
+
146
+
147
+ class TestSemanticSearchService:
148
+ """Test semantic search service functionality."""
149
+
150
+ def test_semantic_search_with_mock_embedder(
151
+ self,
152
+ mock_embedder: MagicMock,
153
+ ) -> None:
154
+ """Test semantic search with mock embedder."""
155
+ from ..tools.semantic_search import SemanticSearchService
156
+ from ..embeddings.vector_store import MemoryVectorStore
157
+
158
+ # Create vector store with test data
159
+ vector_store = MemoryVectorStore(dimension=1536)
160
+ vector_store.store_embedding(
161
+ node_id=1,
162
+ qualified_name="module.add",
163
+ embedding=[0.1] * 1536,
164
+ metadata={"type": "Function"},
165
+ )
166
+
167
+ service = SemanticSearchService(
168
+ embedder=mock_embedder,
169
+ vector_store=vector_store,
170
+ )
171
+
172
+ results = service.search("addition function", top_k=5)
173
+
174
+ assert isinstance(results, list)
175
+ mock_embedder.embed_query.assert_called_once_with("addition function")
176
+
177
+ def test_semantic_search_result_structure(
178
+ self,
179
+ mock_embedder: MagicMock,
180
+ ) -> None:
181
+ """Test that search results have correct structure."""
182
+ from ..tools.semantic_search import SemanticSearchService, SemanticSearchResult
183
+ from ..embeddings.vector_store import MemoryVectorStore
184
+
185
+ vector_store = MemoryVectorStore(dimension=1536)
186
+ vector_store.store_embedding(
187
+ node_id=1,
188
+ qualified_name="test.module.function",
189
+ embedding=[0.5] * 1536,
190
+ metadata={"type": "Function"},
191
+ )
192
+
193
+ service = SemanticSearchService(
194
+ embedder=mock_embedder,
195
+ vector_store=vector_store,
196
+ )
197
+
198
+ results = service.search("test query", top_k=1)
199
+
200
+ if results:
201
+ result = results[0]
202
+ assert hasattr(result, "node_id")
203
+ assert hasattr(result, "qualified_name")
204
+ assert hasattr(result, "name")
205
+ assert hasattr(result, "type")
206
+ assert hasattr(result, "score")
207
+
208
+
209
+ class TestSemanticSearchConvenienceFunctions:
210
+ """Test semantic search convenience functions."""
211
+
212
+ def test_semantic_code_search_function(self, mock_embedder: MagicMock) -> None:
213
+ """Test semantic_code_search convenience function."""
214
+ from ..tools.semantic_search import semantic_code_search
215
+ from ..embeddings.vector_store import MemoryVectorStore
216
+
217
+ vector_store = MemoryVectorStore(dimension=1536)
218
+
219
+ results = semantic_code_search(
220
+ query="test",
221
+ embedder=mock_embedder,
222
+ vector_store=vector_store,
223
+ top_k=5,
224
+ )
225
+
226
+ assert isinstance(results, list)
227
+
228
+
229
+ # =============================================================================
230
+ # P2: Graph Query Layer Tests
231
+ # =============================================================================
232
+
233
+
234
+ class TestGraphQueryService:
235
+ """Test graph query service with both backends."""
236
+
237
+ def test_graph_query_service_initialization(self) -> None:
238
+ """Test GraphQueryService initialization."""
239
+ from ..tools.graph_query import GraphQueryService
240
+
241
+ mock_service = MagicMock()
242
+ service = GraphQueryService(mock_service, backend="memgraph")
243
+
244
+ assert service.backend == "memgraph"
245
+ assert service.graph_service is mock_service
246
+
247
+ def test_graph_query_service_kuzu_backend(self) -> None:
248
+ """Test GraphQueryService with Kuzu backend."""
249
+ from ..tools.graph_query import GraphQueryService
250
+
251
+ mock_service = MagicMock()
252
+ service = GraphQueryService(mock_service, backend="kuzu")
253
+
254
+ assert service.backend == "kuzu"
255
+
256
+ def test_fetch_nodes_by_ids_empty_list(self) -> None:
257
+ """Test fetch_nodes_by_ids with empty list returns empty."""
258
+ from ..tools.graph_query import GraphQueryService
259
+
260
+ mock_service = MagicMock()
261
+ service = GraphQueryService(mock_service)
262
+
263
+ results = service.fetch_nodes_by_ids([])
264
+ assert results == []
265
+
266
+ def test_fetch_nodes_by_ids_with_results(self) -> None:
267
+ """Test fetch_nodes_by_ids returns parsed nodes."""
268
+ from ..tools.graph_query import GraphQueryService, GraphNode
269
+
270
+ mock_service = MagicMock()
271
+ mock_service.fetch_all.return_value = [
272
+ {
273
+ "node_id": 1,
274
+ "qualified_name": "module.func",
275
+ "name": "func",
276
+ "labels": ["Function"],
277
+ "path": "module.py",
278
+ "start_line": 10,
279
+ "end_line": 20,
280
+ }
281
+ ]
282
+
283
+ service = GraphQueryService(mock_service)
284
+ results = service.fetch_nodes_by_ids([1])
285
+
286
+ assert len(results) == 1
287
+ assert isinstance(results[0], GraphNode)
288
+ assert results[0].node_id == 1
289
+ assert results[0].qualified_name == "module.func"
290
+
291
+ def test_fetch_node_by_qualified_name(self) -> None:
292
+ """Test fetching node by qualified name."""
293
+ from ..tools.graph_query import GraphQueryService, GraphNode
294
+
295
+ mock_service = MagicMock()
296
+ mock_service.fetch_all.return_value = [
297
+ {
298
+ "node_id": 42,
299
+ "qualified_name": "myproject.utils.helper",
300
+ "name": "helper",
301
+ "labels": ["Function"],
302
+ "path": "utils.py",
303
+ }
304
+ ]
305
+
306
+ service = GraphQueryService(mock_service)
307
+ result = service.fetch_node_by_qualified_name("myproject.utils.helper")
308
+
309
+ assert isinstance(result, GraphNode)
310
+ assert result.node_id == 42
311
+
312
+
313
+ class TestGraphQueryWithVectorResults:
314
+ """Test integration between vector search and graph queries."""
315
+
316
+ def test_query_nodes_by_vector_results(self) -> None:
317
+ """Test querying graph nodes from vector search results."""
318
+ from ..tools.graph_query import query_nodes_by_vector_results
319
+ from ..embeddings.vector_store import SearchResult
320
+
321
+ # Create mock vector results
322
+ vector_results = [
323
+ SearchResult(node_id=1, score=0.95, qualified_name="module.func1"),
324
+ SearchResult(node_id=2, score=0.85, qualified_name="module.func2"),
325
+ ]
326
+
327
+ mock_graph_service = MagicMock()
328
+ mock_graph_service.fetch_all.return_value = [
329
+ {"node_id": 1, "qualified_name": "module.func1", "name": "func1", "labels": ["Function"]},
330
+ {"node_id": 2, "qualified_name": "module.func2", "name": "func2", "labels": ["Function"]},
331
+ ]
332
+
333
+ results = query_nodes_by_vector_results(vector_results, mock_graph_service)
334
+
335
+ assert len(results) == 2
336
+ mock_graph_service.fetch_all.assert_called_once()
337
+
338
+
339
+ # =============================================================================
340
+ # Backend Compatibility Tests
341
+ # =============================================================================
342
+
343
+
344
+ class TestBackendCompatibility:
345
+ """Test compatibility with both Kuzu and Memgraph backends."""
346
+
347
+ def test_cypher_query_compatibility_memgraph(self) -> None:
348
+ """Test Cypher query format for Memgraph."""
349
+ from ..tools.graph_query import GraphQueryService
350
+
351
+ mock_service = MagicMock()
352
+ service = GraphQueryService(mock_service, backend="memgraph")
353
+
354
+ # Build query and verify it works with Memgraph
355
+ query = service._build_nodes_by_id_query()
356
+
357
+ # Should contain Memgraph-compatible ID references
358
+ assert "node_id" in query or "id(n)" in query
359
+
360
+ def test_cypher_query_compatibility_kuzu(self) -> None:
361
+ """Test Cypher query format for Kuzu."""
362
+ from ..tools.graph_query import GraphQueryService
363
+
364
+ mock_service = MagicMock()
365
+ service = GraphQueryService(mock_service, backend="kuzu")
366
+
367
+ query = service._build_nodes_by_id_query()
368
+
369
+ # Should be compatible with Kuzu's Cypher subset
370
+ assert "MATCH" in query
371
+ assert "RETURN" in query
372
+
373
+ def test_node_id_extraction_various_formats(self) -> None:
374
+ """Test node ID extraction from various result formats."""
375
+ from ..tools.graph_query import GraphQueryService
376
+
377
+ service = GraphQueryService(MagicMock())
378
+
379
+ # Test different ID field names
380
+ assert service._extract_node_id({"node_id": 42}) == 42
381
+ assert service._extract_node_id({"id": 42}) == 42
382
+ assert service._extract_node_id({"n.node_id": 42}) == 42
383
+ assert service._extract_node_id({}) == 0
384
+
385
+
386
+ # =============================================================================
387
+ # End-to-End Integration Tests
388
+ # =============================================================================
389
+
390
+
391
+ @pytest.mark.skip(reason="Requires full environment setup")
392
+ class TestEndToEndIntegration:
393
+ """End-to-end integration tests."""
394
+
395
+ def test_full_workflow_memory_backend(
396
+ self,
397
+ sample_repo: Path,
398
+ ) -> None:
399
+ """Test full workflow with memory backend."""
400
+ from ..graph_updater import GraphUpdater
401
+ from ..services.memory_service import MemoryIngestor
402
+ from ..embeddings.qwen3_embedder import DummyEmbedder
403
+ from ..embeddings.vector_store import MemoryVectorStore
404
+ from ..tools.semantic_search import SemanticSearchService
405
+
406
+ # Setup
407
+ ingestor = MemoryIngestor()
408
+ embedder = DummyEmbedder(dimension=1536)
409
+ vector_store = MemoryVectorStore(dimension=1536)
410
+
411
+ # Create updater with embedding
412
+ updater = GraphUpdater(
413
+ ingestor=ingestor,
414
+ repo_path=sample_repo,
415
+ parsers={},
416
+ queries={},
417
+ embedder=embedder,
418
+ vector_store=vector_store,
419
+ embedding_config={"enabled": True, "batch_size": 10},
420
+ )
421
+
422
+ # Run graph building
423
+ # updater.run() # Would require full parser setup
424
+
425
+ # Create semantic search service
426
+ search_service = SemanticSearchService(
427
+ embedder=embedder,
428
+ vector_store=vector_store,
429
+ graph_service=ingestor,
430
+ )
431
+
432
+ # Search
433
+ results = search_service.search("calculator", top_k=5)
434
+ assert isinstance(results, list)