code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
"""Tests for MemoryVectorStore - In-memory vector storage for code embeddings.
|
|
2
|
+
|
|
3
|
+
These tests verify the MemoryVectorStore class correctly:
|
|
4
|
+
1. Stores embeddings with associated metadata
|
|
5
|
+
2. Searches for similar vectors using cosine similarity
|
|
6
|
+
3. Handles edge cases (empty store, single item, etc.)
|
|
7
|
+
4. Provides efficient similarity computation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Module-level fixtures for all test classes
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def vector_store() -> "MemoryVectorStore":
|
|
24
|
+
"""Create a fresh MemoryVectorStore instance."""
|
|
25
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
26
|
+
|
|
27
|
+
return MemoryVectorStore(dimension=768)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def sample_embedding() -> list[float]:
|
|
32
|
+
"""Create a sample embedding vector."""
|
|
33
|
+
return [0.1] * 768
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@pytest.fixture
|
|
37
|
+
def sample_embeddings() -> list[list[float]]:
|
|
38
|
+
"""Create multiple sample embeddings."""
|
|
39
|
+
return [
|
|
40
|
+
[1.0] + [0.0] * 767, # First dimension high
|
|
41
|
+
[0.0, 1.0] + [0.0] * 766, # Second dimension high
|
|
42
|
+
[0.5, 0.5] + [0.0] * 766, # Mixed
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TestMemoryVectorStore:
|
|
47
|
+
"""Test suite for MemoryVectorStore class."""
|
|
48
|
+
|
|
49
|
+
def test_store_initialization(self) -> None:
|
|
50
|
+
"""Test MemoryVectorStore initializes correctly."""
|
|
51
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
52
|
+
|
|
53
|
+
store = MemoryVectorStore(dimension=768)
|
|
54
|
+
|
|
55
|
+
assert store is not None
|
|
56
|
+
assert len(store) == 0
|
|
57
|
+
|
|
58
|
+
def test_store_embedding_adds_item(self, vector_store: "MemoryVectorStore", sample_embedding: list[float]) -> None:
|
|
59
|
+
"""Test store_embedding adds an item to the store."""
|
|
60
|
+
vector_store.store_embedding(
|
|
61
|
+
node_id=1,
|
|
62
|
+
embedding=sample_embedding,
|
|
63
|
+
qualified_name="test.module.function",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
assert len(vector_store) == 1
|
|
67
|
+
|
|
68
|
+
def test_store_embedding_stores_correct_data(self, vector_store: "MemoryVectorStore", sample_embedding: list[float]) -> None:
|
|
69
|
+
"""Test store_embedding stores correct data."""
|
|
70
|
+
vector_store.store_embedding(
|
|
71
|
+
node_id=42,
|
|
72
|
+
embedding=sample_embedding,
|
|
73
|
+
qualified_name="myproject.utils.foo",
|
|
74
|
+
start_line=10,
|
|
75
|
+
end_line=20,
|
|
76
|
+
path="/path/to/file.py",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
results = vector_store.search_similar(sample_embedding, top_k=1)
|
|
80
|
+
|
|
81
|
+
assert len(results) == 1
|
|
82
|
+
assert results[0].node_id == 42
|
|
83
|
+
assert results[0].qualified_name == "myproject.utils.foo"
|
|
84
|
+
|
|
85
|
+
def test_store_multiple_embeddings(self, vector_store: "MemoryVectorStore", sample_embeddings: list[list[float]]) -> None:
|
|
86
|
+
"""Test storing multiple embeddings."""
|
|
87
|
+
for i, embedding in enumerate(sample_embeddings):
|
|
88
|
+
vector_store.store_embedding(
|
|
89
|
+
node_id=i + 1,
|
|
90
|
+
embedding=embedding,
|
|
91
|
+
qualified_name=f"test.func{i + 1}",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
assert len(vector_store) == 3
|
|
95
|
+
|
|
96
|
+
def test_search_similar_returns_top_k(self, vector_store: "MemoryVectorStore", sample_embeddings: list[list[float]]) -> None:
|
|
97
|
+
"""Test search_similar returns top_k results."""
|
|
98
|
+
for i, embedding in enumerate(sample_embeddings):
|
|
99
|
+
vector_store.store_embedding(
|
|
100
|
+
node_id=i + 1,
|
|
101
|
+
embedding=embedding,
|
|
102
|
+
qualified_name=f"test.func{i + 1}",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Search with query similar to first embedding
|
|
106
|
+
query = [0.9] + [0.1] * 767
|
|
107
|
+
results = vector_store.search_similar(query, top_k=2)
|
|
108
|
+
|
|
109
|
+
assert len(results) == 2
|
|
110
|
+
|
|
111
|
+
def test_search_similar_orders_by_similarity(self, vector_store: "MemoryVectorStore") -> None:
|
|
112
|
+
"""Test search_similar orders results by similarity."""
|
|
113
|
+
# Store embeddings with different directions
|
|
114
|
+
embedding1 = [1.0, 0.0, 0.0] + [0.0] * 765 # Direction A
|
|
115
|
+
embedding2 = [0.0, 1.0, 0.0] + [0.0] * 765 # Direction B (orthogonal)
|
|
116
|
+
embedding3 = [0.99, 0.01, 0.0] + [0.0] * 765 # Similar to A
|
|
117
|
+
|
|
118
|
+
vector_store.store_embedding(node_id=1, embedding=embedding1, qualified_name="func1")
|
|
119
|
+
vector_store.store_embedding(node_id=2, embedding=embedding2, qualified_name="func2")
|
|
120
|
+
vector_store.store_embedding(node_id=3, embedding=embedding3, qualified_name="func3")
|
|
121
|
+
|
|
122
|
+
# Query similar to embedding1
|
|
123
|
+
query = [1.0, 0.0, 0.0] + [0.0] * 765
|
|
124
|
+
results = vector_store.search_similar(query, top_k=3)
|
|
125
|
+
|
|
126
|
+
# Most similar should be first
|
|
127
|
+
assert results[0].node_id in [1, 3] # Both similar to query
|
|
128
|
+
assert results[0].score > results[1].score
|
|
129
|
+
|
|
130
|
+
def test_search_similar_empty_store(self, vector_store: "MemoryVectorStore") -> None:
|
|
131
|
+
"""Test search_similar on empty store returns empty list."""
|
|
132
|
+
query = [0.1] * 768
|
|
133
|
+
results = vector_store.search_similar(query, top_k=5)
|
|
134
|
+
|
|
135
|
+
assert results == []
|
|
136
|
+
|
|
137
|
+
def test_search_similar_single_item(self, vector_store: "MemoryVectorStore") -> None:
|
|
138
|
+
"""Test search_similar with single item in store."""
|
|
139
|
+
embedding = [0.1] * 768
|
|
140
|
+
vector_store.store_embedding(
|
|
141
|
+
node_id=1,
|
|
142
|
+
embedding=embedding,
|
|
143
|
+
qualified_name="test.func",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
query = [0.1] * 768
|
|
147
|
+
results = vector_store.search_similar(query, top_k=5)
|
|
148
|
+
|
|
149
|
+
assert len(results) == 1
|
|
150
|
+
assert results[0].node_id == 1
|
|
151
|
+
|
|
152
|
+
def test_search_similar_top_k_larger_than_store(self, vector_store: "MemoryVectorStore", sample_embeddings: list[list[float]]) -> None:
|
|
153
|
+
"""Test search_similar when top_k > store size."""
|
|
154
|
+
for i, embedding in enumerate(sample_embeddings):
|
|
155
|
+
vector_store.store_embedding(
|
|
156
|
+
node_id=i + 1,
|
|
157
|
+
embedding=embedding,
|
|
158
|
+
qualified_name=f"test.func{i + 1}",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
query = [0.1] * 768
|
|
162
|
+
results = vector_store.search_similar(query, top_k=10)
|
|
163
|
+
|
|
164
|
+
assert len(results) == 3 # Only 3 items in store
|
|
165
|
+
|
|
166
|
+
def test_search_similar_zero_top_k(self, vector_store: "MemoryVectorStore", sample_embedding: list[float]) -> None:
|
|
167
|
+
"""Test search_similar with top_k=0 returns empty list."""
|
|
168
|
+
vector_store.store_embedding(
|
|
169
|
+
node_id=1,
|
|
170
|
+
embedding=sample_embedding,
|
|
171
|
+
qualified_name="test.func",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
query = [0.1] * 768
|
|
175
|
+
results = vector_store.search_similar(query, top_k=0)
|
|
176
|
+
|
|
177
|
+
assert results == []
|
|
178
|
+
|
|
179
|
+
def test_search_similar_negative_top_k(self, vector_store: "MemoryVectorStore", sample_embedding: list[float]) -> None:
|
|
180
|
+
"""Test search_similar with negative top_k raises error."""
|
|
181
|
+
vector_store.store_embedding(
|
|
182
|
+
node_id=1,
|
|
183
|
+
embedding=sample_embedding,
|
|
184
|
+
qualified_name="test.func",
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
query = [0.1] * 768
|
|
188
|
+
|
|
189
|
+
with pytest.raises(ValueError):
|
|
190
|
+
vector_store.search_similar(query, top_k=-1)
|
|
191
|
+
|
|
192
|
+
def test_update_existing_embedding(self, vector_store: "MemoryVectorStore") -> None:
|
|
193
|
+
"""Test updating an existing embedding by node_id."""
|
|
194
|
+
embedding1 = [1.0] + [0.0] * 767
|
|
195
|
+
embedding2 = [0.0, 1.0] + [0.0] * 766
|
|
196
|
+
|
|
197
|
+
vector_store.store_embedding(
|
|
198
|
+
node_id=1,
|
|
199
|
+
embedding=embedding1,
|
|
200
|
+
qualified_name="test.func",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Update with new embedding
|
|
204
|
+
vector_store.store_embedding(
|
|
205
|
+
node_id=1,
|
|
206
|
+
embedding=embedding2,
|
|
207
|
+
qualified_name="test.func_updated",
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
assert len(vector_store) == 1
|
|
211
|
+
|
|
212
|
+
# Search should find the updated embedding
|
|
213
|
+
query = [0.0, 1.0] + [0.0] * 766
|
|
214
|
+
results = vector_store.search_similar(query, top_k=1)
|
|
215
|
+
|
|
216
|
+
assert results[0].qualified_name == "test.func_updated"
|
|
217
|
+
|
|
218
|
+
def test_delete_embedding(self, vector_store: "MemoryVectorStore", sample_embeddings: list[list[float]]) -> None:
|
|
219
|
+
"""Test delete_embedding removes item."""
|
|
220
|
+
for i, embedding in enumerate(sample_embeddings):
|
|
221
|
+
vector_store.store_embedding(
|
|
222
|
+
node_id=i + 1,
|
|
223
|
+
embedding=embedding,
|
|
224
|
+
qualified_name=f"test.func{i + 1}",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
vector_store.delete_embedding(node_id=2)
|
|
228
|
+
|
|
229
|
+
assert len(vector_store) == 2
|
|
230
|
+
|
|
231
|
+
# Verify it's gone
|
|
232
|
+
results = vector_store.search_similar(sample_embeddings[1], top_k=3)
|
|
233
|
+
node_ids = [r.node_id for r in results]
|
|
234
|
+
assert 2 not in node_ids
|
|
235
|
+
|
|
236
|
+
def test_delete_nonexistent_embedding(self, vector_store: "MemoryVectorStore", sample_embedding: list[float]) -> None:
|
|
237
|
+
"""Test delete_embedding for non-existent node_id is no-op."""
|
|
238
|
+
vector_store.store_embedding(
|
|
239
|
+
node_id=1,
|
|
240
|
+
embedding=sample_embedding,
|
|
241
|
+
qualified_name="test.func",
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
vector_store.delete_embedding(node_id=999) # Non-existent
|
|
245
|
+
|
|
246
|
+
assert len(vector_store) == 1
|
|
247
|
+
|
|
248
|
+
def test_clear_store(self, vector_store: "MemoryVectorStore", sample_embeddings: list[list[float]]) -> None:
|
|
249
|
+
"""Test clear removes all embeddings."""
|
|
250
|
+
for i, embedding in enumerate(sample_embeddings):
|
|
251
|
+
vector_store.store_embedding(
|
|
252
|
+
node_id=i + 1,
|
|
253
|
+
embedding=embedding,
|
|
254
|
+
qualified_name=f"test.func{i + 1}",
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
vector_store.clear()
|
|
258
|
+
|
|
259
|
+
assert len(vector_store) == 0
|
|
260
|
+
|
|
261
|
+
def test_get_stats(self, vector_store: "MemoryVectorStore", sample_embeddings: list[list[float]]) -> None:
|
|
262
|
+
"""Test get_stats returns correct statistics."""
|
|
263
|
+
for i, embedding in enumerate(sample_embeddings):
|
|
264
|
+
vector_store.store_embedding(
|
|
265
|
+
node_id=i + 1,
|
|
266
|
+
embedding=embedding,
|
|
267
|
+
qualified_name=f"test.func{i + 1}",
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
stats = vector_store.get_stats()
|
|
271
|
+
|
|
272
|
+
assert stats["count"] == 3
|
|
273
|
+
assert stats["dimension"] == 768
|
|
274
|
+
|
|
275
|
+
def test_get_embedding_by_node_id(self, vector_store: "MemoryVectorStore", sample_embedding: list[float]) -> None:
|
|
276
|
+
"""Test get_embedding retrieves embedding by node_id."""
|
|
277
|
+
vector_store.store_embedding(
|
|
278
|
+
node_id=42,
|
|
279
|
+
embedding=sample_embedding,
|
|
280
|
+
qualified_name="test.func",
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
result = vector_store.get_embedding(node_id=42)
|
|
284
|
+
|
|
285
|
+
assert result is not None
|
|
286
|
+
assert result.node_id == 42
|
|
287
|
+
assert result.embedding == sample_embedding
|
|
288
|
+
|
|
289
|
+
def test_get_embedding_nonexistent(self, vector_store: "MemoryVectorStore") -> None:
|
|
290
|
+
"""Test get_embedding returns None for non-existent node_id."""
|
|
291
|
+
result = vector_store.get_embedding(node_id=999)
|
|
292
|
+
|
|
293
|
+
assert result is None
|
|
294
|
+
|
|
295
|
+
def test_dimension_mismatch_raises_error(self, vector_store: "MemoryVectorStore") -> None:
|
|
296
|
+
"""Test storing embedding with wrong dimension raises error."""
|
|
297
|
+
vector_store.store_embedding(
|
|
298
|
+
node_id=1,
|
|
299
|
+
embedding=[0.1] * 768,
|
|
300
|
+
qualified_name="test.func1",
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Try to store with different dimension
|
|
304
|
+
with pytest.raises(ValueError):
|
|
305
|
+
vector_store.store_embedding(
|
|
306
|
+
node_id=2,
|
|
307
|
+
embedding=[0.1] * 512, # Wrong dimension
|
|
308
|
+
qualified_name="test.func2",
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class TestCosineSimilarity:
|
|
313
|
+
"""Test suite for cosine similarity computation."""
|
|
314
|
+
|
|
315
|
+
def test_cosine_similarity_identical_vectors(self) -> None:
|
|
316
|
+
"""Test cosine similarity of identical vectors is 1.0."""
|
|
317
|
+
from code_graph_builder.embeddings.vector_store import cosine_similarity
|
|
318
|
+
|
|
319
|
+
v1 = [1.0, 2.0, 3.0]
|
|
320
|
+
v2 = [1.0, 2.0, 3.0]
|
|
321
|
+
|
|
322
|
+
result = cosine_similarity(v1, v2)
|
|
323
|
+
|
|
324
|
+
assert abs(result - 1.0) < 1e-6
|
|
325
|
+
|
|
326
|
+
def test_cosine_similarity_opposite_vectors(self) -> None:
|
|
327
|
+
"""Test cosine similarity of opposite vectors is -1.0."""
|
|
328
|
+
from code_graph_builder.embeddings.vector_store import cosine_similarity
|
|
329
|
+
|
|
330
|
+
v1 = [1.0, 2.0, 3.0]
|
|
331
|
+
v2 = [-1.0, -2.0, -3.0]
|
|
332
|
+
|
|
333
|
+
result = cosine_similarity(v1, v2)
|
|
334
|
+
|
|
335
|
+
assert abs(result - (-1.0)) < 1e-6
|
|
336
|
+
|
|
337
|
+
def test_cosine_similarity_orthogonal_vectors(self) -> None:
|
|
338
|
+
"""Test cosine similarity of orthogonal vectors is 0.0."""
|
|
339
|
+
from code_graph_builder.embeddings.vector_store import cosine_similarity
|
|
340
|
+
|
|
341
|
+
v1 = [1.0, 0.0, 0.0]
|
|
342
|
+
v2 = [0.0, 1.0, 0.0]
|
|
343
|
+
|
|
344
|
+
result = cosine_similarity(v1, v2)
|
|
345
|
+
|
|
346
|
+
assert abs(result) < 1e-6
|
|
347
|
+
|
|
348
|
+
def test_cosine_similarity_different_magnitudes(self) -> None:
|
|
349
|
+
"""Test cosine similarity is independent of vector magnitude."""
|
|
350
|
+
from code_graph_builder.embeddings.vector_store import cosine_similarity
|
|
351
|
+
|
|
352
|
+
v1 = [1.0, 0.0, 0.0]
|
|
353
|
+
v2 = [5.0, 0.0, 0.0]
|
|
354
|
+
|
|
355
|
+
result = cosine_similarity(v1, v2)
|
|
356
|
+
|
|
357
|
+
assert abs(result - 1.0) < 1e-6
|
|
358
|
+
|
|
359
|
+
def test_cosine_similarity_zero_vector_raises(self) -> None:
|
|
360
|
+
"""Test cosine similarity with zero vector raises error."""
|
|
361
|
+
from code_graph_builder.embeddings.vector_store import cosine_similarity
|
|
362
|
+
|
|
363
|
+
v1 = [1.0, 2.0, 3.0]
|
|
364
|
+
v2 = [0.0, 0.0, 0.0]
|
|
365
|
+
|
|
366
|
+
with pytest.raises(ValueError):
|
|
367
|
+
cosine_similarity(v1, v2)
|
|
368
|
+
|
|
369
|
+
def test_cosine_similarity_different_lengths_raises(self) -> None:
|
|
370
|
+
"""Test cosine similarity with different length vectors raises error."""
|
|
371
|
+
from code_graph_builder.embeddings.vector_store import cosine_similarity
|
|
372
|
+
|
|
373
|
+
v1 = [1.0, 2.0, 3.0]
|
|
374
|
+
v2 = [1.0, 2.0]
|
|
375
|
+
|
|
376
|
+
with pytest.raises(ValueError):
|
|
377
|
+
cosine_similarity(v1, v2)
|
|
378
|
+
|
|
379
|
+
def test_cosine_similarity_typical_case(self) -> None:
|
|
380
|
+
"""Test cosine similarity with typical vectors."""
|
|
381
|
+
from code_graph_builder.embeddings.vector_store import cosine_similarity
|
|
382
|
+
|
|
383
|
+
v1 = [1.0, 2.0, 3.0]
|
|
384
|
+
v2 = [4.0, 5.0, 6.0]
|
|
385
|
+
|
|
386
|
+
# Manual calculation
|
|
387
|
+
dot_product = sum(a * b for a, b in zip(v1, v2))
|
|
388
|
+
norm1 = math.sqrt(sum(a * a for a in v1))
|
|
389
|
+
norm2 = math.sqrt(sum(a * a for a in v2))
|
|
390
|
+
expected = dot_product / (norm1 * norm2)
|
|
391
|
+
|
|
392
|
+
result = cosine_similarity(v1, v2)
|
|
393
|
+
|
|
394
|
+
assert abs(result - expected) < 1e-6
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
class TestVectorStoreEdgeCases:
|
|
398
|
+
"""Test suite for edge cases in MemoryVectorStore."""
|
|
399
|
+
|
|
400
|
+
def test_empty_embedding_raises(self) -> None:
|
|
401
|
+
"""Test storing empty embedding raises error."""
|
|
402
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
403
|
+
|
|
404
|
+
store = MemoryVectorStore(dimension=768)
|
|
405
|
+
|
|
406
|
+
with pytest.raises(ValueError):
|
|
407
|
+
store.store_embedding(
|
|
408
|
+
node_id=1,
|
|
409
|
+
embedding=[],
|
|
410
|
+
qualified_name="test.func",
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
def test_very_large_embedding(self) -> None:
|
|
414
|
+
"""Test storing very large embedding."""
|
|
415
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
416
|
+
|
|
417
|
+
large_embedding = [0.001] * 10000
|
|
418
|
+
store = MemoryVectorStore(dimension=len(large_embedding))
|
|
419
|
+
|
|
420
|
+
store.store_embedding(
|
|
421
|
+
node_id=1,
|
|
422
|
+
embedding=large_embedding,
|
|
423
|
+
qualified_name="test.func",
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
assert len(store) == 1
|
|
427
|
+
|
|
428
|
+
def test_special_characters_in_qualified_name(self) -> None:
|
|
429
|
+
"""Test storing with special characters in qualified_name."""
|
|
430
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
431
|
+
|
|
432
|
+
store = MemoryVectorStore(dimension=768)
|
|
433
|
+
|
|
434
|
+
store.store_embedding(
|
|
435
|
+
node_id=1,
|
|
436
|
+
embedding=[0.1] * 768,
|
|
437
|
+
qualified_name="test.module.function<T>",
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
result = store.get_embedding(node_id=1)
|
|
441
|
+
assert result.qualified_name == "test.module.function<T>"
|
|
442
|
+
|
|
443
|
+
def test_unicode_in_qualified_name(self) -> None:
|
|
444
|
+
"""Test storing with unicode characters in qualified_name."""
|
|
445
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
446
|
+
|
|
447
|
+
store = MemoryVectorStore(dimension=768)
|
|
448
|
+
|
|
449
|
+
store.store_embedding(
|
|
450
|
+
node_id=1,
|
|
451
|
+
embedding=[0.1] * 768,
|
|
452
|
+
qualified_name="测试.函数.示例",
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
result = store.get_embedding(node_id=1)
|
|
456
|
+
assert result.qualified_name == "测试.函数.示例"
|
|
457
|
+
|
|
458
|
+
def test_negative_node_id(self) -> None:
|
|
459
|
+
"""Test storing with negative node_id."""
|
|
460
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
461
|
+
|
|
462
|
+
store = MemoryVectorStore(dimension=768)
|
|
463
|
+
|
|
464
|
+
store.store_embedding(
|
|
465
|
+
node_id=-1,
|
|
466
|
+
embedding=[0.1] * 768,
|
|
467
|
+
qualified_name="test.func",
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
result = store.get_embedding(node_id=-1)
|
|
471
|
+
assert result.node_id == -1
|
|
472
|
+
|
|
473
|
+
def test_float_similarity_scores(self, vector_store: "MemoryVectorStore") -> None:
|
|
474
|
+
"""Test that similarity scores are valid floats between -1 and 1."""
|
|
475
|
+
vector_store.store_embedding(
|
|
476
|
+
node_id=1,
|
|
477
|
+
embedding=[1.0, 0.0] + [0.0] * 766,
|
|
478
|
+
qualified_name="test.func1",
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
vector_store.store_embedding(
|
|
482
|
+
node_id=2,
|
|
483
|
+
embedding=[0.0, 1.0] + [0.0] * 766,
|
|
484
|
+
qualified_name="test.func2",
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
query = [0.5, 0.5] + [0.0] * 766
|
|
488
|
+
results = vector_store.search_similar(query, top_k=2)
|
|
489
|
+
|
|
490
|
+
for result in results:
|
|
491
|
+
assert isinstance(result.score, float)
|
|
492
|
+
assert -1.0 <= result.score <= 1.0
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
class TestVectorStoreIntegration:
|
|
496
|
+
"""Integration tests for MemoryVectorStore."""
|
|
497
|
+
|
|
498
|
+
def test_store_and_retrieve_roundtrip(self) -> None:
|
|
499
|
+
"""Test full roundtrip of store and retrieve."""
|
|
500
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
501
|
+
|
|
502
|
+
store = MemoryVectorStore(dimension=768)
|
|
503
|
+
|
|
504
|
+
# Store multiple embeddings
|
|
505
|
+
embeddings = [
|
|
506
|
+
([1.0, 0.0] + [0.0] * 766, "func1"),
|
|
507
|
+
([0.0, 1.0] + [0.0] * 766, "func2"),
|
|
508
|
+
([0.5, 0.5] + [0.0] * 766, "func3"),
|
|
509
|
+
]
|
|
510
|
+
|
|
511
|
+
for i, (emb, name) in enumerate(embeddings):
|
|
512
|
+
store.store_embedding(
|
|
513
|
+
node_id=i + 1,
|
|
514
|
+
embedding=emb,
|
|
515
|
+
qualified_name=f"test.{name}",
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# Search with query similar to func1
|
|
519
|
+
query = [0.9, 0.1] + [0.0] * 766
|
|
520
|
+
results = store.search_similar(query, top_k=3)
|
|
521
|
+
|
|
522
|
+
assert len(results) == 3
|
|
523
|
+
# func1 should be most similar
|
|
524
|
+
assert results[0].qualified_name == "test.func1"
|
|
525
|
+
|
|
526
|
+
def test_multiple_searches_consistency(self) -> None:
|
|
527
|
+
"""Test that multiple searches return consistent results."""
|
|
528
|
+
from code_graph_builder.embeddings.vector_store import MemoryVectorStore
|
|
529
|
+
|
|
530
|
+
store = MemoryVectorStore(dimension=768)
|
|
531
|
+
|
|
532
|
+
store.store_embedding(
|
|
533
|
+
node_id=1,
|
|
534
|
+
embedding=[1.0, 0.0] + [0.0] * 766,
|
|
535
|
+
qualified_name="test.func1",
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
store.store_embedding(
|
|
539
|
+
node_id=2,
|
|
540
|
+
embedding=[0.0, 1.0] + [0.0] * 766,
|
|
541
|
+
qualified_name="test.func2",
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
query = [0.5, 0.5] + [0.0] * 766
|
|
545
|
+
|
|
546
|
+
results1 = store.search_similar(query, top_k=2)
|
|
547
|
+
results2 = store.search_similar(query, top_k=2)
|
|
548
|
+
|
|
549
|
+
assert len(results1) == len(results2)
|
|
550
|
+
for r1, r2 in zip(results1, results2):
|
|
551
|
+
assert r1.node_id == r2.node_id
|
|
552
|
+
assert abs(r1.score - r2.score) < 1e-6
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Tools for Code Graph Builder.
|
|
2
|
+
|
|
3
|
+
This module provides tools for semantic code search and retrieval,
|
|
4
|
+
compatible with CAMEL Agent framework.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
# Semantic search tools
|
|
10
|
+
from .semantic_search import (
|
|
11
|
+
SemanticSearchResult,
|
|
12
|
+
SemanticSearchService,
|
|
13
|
+
create_semantic_search_service,
|
|
14
|
+
get_function_source_by_node_id,
|
|
15
|
+
semantic_code_search,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Graph query tools
|
|
19
|
+
from .graph_query import (
|
|
20
|
+
GraphNode,
|
|
21
|
+
GraphQueryService,
|
|
22
|
+
create_graph_query_service,
|
|
23
|
+
get_function_with_context,
|
|
24
|
+
query_nodes_by_vector_results,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# Semantic search
|
|
29
|
+
"semantic_code_search",
|
|
30
|
+
"get_function_source_by_node_id",
|
|
31
|
+
"create_semantic_search_service",
|
|
32
|
+
"SemanticSearchService",
|
|
33
|
+
"SemanticSearchResult",
|
|
34
|
+
# Graph query
|
|
35
|
+
"GraphNode",
|
|
36
|
+
"GraphQueryService",
|
|
37
|
+
"create_graph_query_service",
|
|
38
|
+
"get_function_with_context",
|
|
39
|
+
"query_nodes_by_vector_results",
|
|
40
|
+
]
|