cognee 0.5.0.dev1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +1 -0
- cognee/infrastructure/databases/relational/config.py +16 -1
- cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +24 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +9 -2
- cognee/infrastructure/llm/LLMGateway.py +0 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
- cognee/modules/data/models/Data.py +2 -1
- cognee/modules/retrieval/triplet_retriever.py +1 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +0 -18
- cognee/tasks/ingestion/data_item.py +8 -0
- cognee/tasks/ingestion/ingest_data.py +12 -1
- cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +252 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +268 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +226 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +218 -0
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +254 -0
- cognee/tests/{unit/modules/retrieval/structured_output_test.py → integration/retrieval/test_structured_output.py} +87 -77
- cognee/tests/integration/retrieval/test_summaries_retriever.py +184 -0
- cognee/tests/integration/retrieval/test_temporal_retriever.py +306 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +35 -0
- cognee/tests/test_custom_data_label.py +68 -0
- cognee/tests/test_search_db.py +334 -181
- cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
- cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
- cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +181 -199
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +454 -162
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +674 -156
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +625 -200
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +319 -203
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +189 -155
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +539 -58
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +218 -9
- cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
- cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
- cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +246 -0
- {cognee-0.5.0.dev1.dist-info → cognee-0.5.1.dev0.dist-info}/METADATA +1 -1
- {cognee-0.5.0.dev1.dist-info → cognee-0.5.1.dev0.dist-info}/RECORD +56 -42
- {cognee-0.5.0.dev1.dist-info → cognee-0.5.1.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.0.dev1.dist-info → cognee-0.5.1.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dev1.dist-info → cognee-0.5.1.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dev1.dist-info → cognee-0.5.1.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pytest
|
|
3
|
+
import pathlib
|
|
4
|
+
import pytest_asyncio
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
import cognee
|
|
7
|
+
|
|
8
|
+
from cognee.low_level import setup, DataPoint
|
|
9
|
+
from cognee.tasks.storage import add_data_points
|
|
10
|
+
from cognee.modules.graph.utils import resolve_edges_to_text
|
|
11
|
+
from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
|
|
12
|
+
GraphCompletionContextExtensionRetriever,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest_asyncio.fixture
|
|
17
|
+
async def setup_test_environment_simple():
|
|
18
|
+
"""Set up a clean test environment with simple graph data."""
|
|
19
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
20
|
+
system_directory_path = str(
|
|
21
|
+
base_dir / ".cognee_system/test_graph_completion_extension_context_simple"
|
|
22
|
+
)
|
|
23
|
+
data_directory_path = str(
|
|
24
|
+
base_dir / ".data_storage/test_graph_completion_extension_context_simple"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
28
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
29
|
+
|
|
30
|
+
await cognee.prune.prune_data()
|
|
31
|
+
await cognee.prune.prune_system(metadata=True)
|
|
32
|
+
await setup()
|
|
33
|
+
|
|
34
|
+
class Company(DataPoint):
|
|
35
|
+
name: str
|
|
36
|
+
|
|
37
|
+
class Person(DataPoint):
|
|
38
|
+
name: str
|
|
39
|
+
works_for: Company
|
|
40
|
+
|
|
41
|
+
company1 = Company(name="Figma")
|
|
42
|
+
company2 = Company(name="Canva")
|
|
43
|
+
person1 = Person(name="Steve Rodger", works_for=company1)
|
|
44
|
+
person2 = Person(name="Ike Loma", works_for=company1)
|
|
45
|
+
person3 = Person(name="Jason Statham", works_for=company1)
|
|
46
|
+
person4 = Person(name="Mike Broski", works_for=company2)
|
|
47
|
+
person5 = Person(name="Christina Mayer", works_for=company2)
|
|
48
|
+
|
|
49
|
+
entities = [company1, company2, person1, person2, person3, person4, person5]
|
|
50
|
+
|
|
51
|
+
await add_data_points(entities)
|
|
52
|
+
|
|
53
|
+
yield
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
await cognee.prune.prune_data()
|
|
57
|
+
await cognee.prune.prune_system(metadata=True)
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@pytest_asyncio.fixture
|
|
63
|
+
async def setup_test_environment_complex():
|
|
64
|
+
"""Set up a clean test environment with complex graph data."""
|
|
65
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
66
|
+
system_directory_path = str(
|
|
67
|
+
base_dir / ".cognee_system/test_graph_completion_extension_context_complex"
|
|
68
|
+
)
|
|
69
|
+
data_directory_path = str(
|
|
70
|
+
base_dir / ".data_storage/test_graph_completion_extension_context_complex"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
74
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
75
|
+
|
|
76
|
+
await cognee.prune.prune_data()
|
|
77
|
+
await cognee.prune.prune_system(metadata=True)
|
|
78
|
+
await setup()
|
|
79
|
+
|
|
80
|
+
class Company(DataPoint):
|
|
81
|
+
name: str
|
|
82
|
+
metadata: dict = {"index_fields": ["name"]}
|
|
83
|
+
|
|
84
|
+
class Car(DataPoint):
|
|
85
|
+
brand: str
|
|
86
|
+
model: str
|
|
87
|
+
year: int
|
|
88
|
+
|
|
89
|
+
class Location(DataPoint):
|
|
90
|
+
country: str
|
|
91
|
+
city: str
|
|
92
|
+
|
|
93
|
+
class Home(DataPoint):
|
|
94
|
+
location: Location
|
|
95
|
+
rooms: int
|
|
96
|
+
sqm: int
|
|
97
|
+
|
|
98
|
+
class Person(DataPoint):
|
|
99
|
+
name: str
|
|
100
|
+
works_for: Company
|
|
101
|
+
owns: Optional[list[Union[Car, Home]]] = None
|
|
102
|
+
|
|
103
|
+
company1 = Company(name="Figma")
|
|
104
|
+
company2 = Company(name="Canva")
|
|
105
|
+
|
|
106
|
+
person1 = Person(name="Mike Rodger", works_for=company1)
|
|
107
|
+
person1.owns = [Car(brand="Toyota", model="Camry", year=2020)]
|
|
108
|
+
|
|
109
|
+
person2 = Person(name="Ike Loma", works_for=company1)
|
|
110
|
+
person2.owns = [
|
|
111
|
+
Car(brand="Tesla", model="Model S", year=2021),
|
|
112
|
+
Home(location=Location(country="USA", city="New York"), sqm=80, rooms=4),
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
person3 = Person(name="Jason Statham", works_for=company1)
|
|
116
|
+
|
|
117
|
+
person4 = Person(name="Mike Broski", works_for=company2)
|
|
118
|
+
person4.owns = [Car(brand="Ford", model="Mustang", year=1978)]
|
|
119
|
+
|
|
120
|
+
person5 = Person(name="Christina Mayer", works_for=company2)
|
|
121
|
+
person5.owns = [Car(brand="Honda", model="Civic", year=2023)]
|
|
122
|
+
|
|
123
|
+
entities = [company1, company2, person1, person2, person3, person4, person5]
|
|
124
|
+
|
|
125
|
+
await add_data_points(entities)
|
|
126
|
+
|
|
127
|
+
yield
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
await cognee.prune.prune_data()
|
|
131
|
+
await cognee.prune.prune_system(metadata=True)
|
|
132
|
+
except Exception:
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@pytest_asyncio.fixture
|
|
137
|
+
async def setup_test_environment_empty():
|
|
138
|
+
"""Set up a clean test environment without graph data."""
|
|
139
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
140
|
+
system_directory_path = str(
|
|
141
|
+
base_dir / ".cognee_system/test_get_graph_completion_extension_context_on_empty_graph"
|
|
142
|
+
)
|
|
143
|
+
data_directory_path = str(
|
|
144
|
+
base_dir / ".data_storage/test_get_graph_completion_extension_context_on_empty_graph"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
148
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
149
|
+
|
|
150
|
+
await cognee.prune.prune_data()
|
|
151
|
+
await cognee.prune.prune_system(metadata=True)
|
|
152
|
+
await setup()
|
|
153
|
+
|
|
154
|
+
yield
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
await cognee.prune.prune_data()
|
|
158
|
+
await cognee.prune.prune_system(metadata=True)
|
|
159
|
+
except Exception:
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@pytest.mark.asyncio
|
|
164
|
+
async def test_graph_completion_extension_context_simple(setup_test_environment_simple):
|
|
165
|
+
"""Integration test: verify GraphCompletionContextExtensionRetriever can retrieve context (simple)."""
|
|
166
|
+
retriever = GraphCompletionContextExtensionRetriever()
|
|
167
|
+
|
|
168
|
+
context = await resolve_edges_to_text(await retriever.get_context("Who works at Canva?"))
|
|
169
|
+
|
|
170
|
+
assert "Mike Broski --[works_for]--> Canva" in context, "Failed to get Mike Broski"
|
|
171
|
+
assert "Christina Mayer --[works_for]--> Canva" in context, "Failed to get Christina Mayer"
|
|
172
|
+
|
|
173
|
+
answer = await retriever.get_completion("Who works at Canva?")
|
|
174
|
+
|
|
175
|
+
assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
|
|
176
|
+
assert all(isinstance(item, str) and item.strip() for item in answer), (
|
|
177
|
+
"Answer must contain only non-empty strings"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@pytest.mark.asyncio
|
|
182
|
+
async def test_graph_completion_extension_context_complex(setup_test_environment_complex):
|
|
183
|
+
"""Integration test: verify GraphCompletionContextExtensionRetriever can retrieve context (complex)."""
|
|
184
|
+
retriever = GraphCompletionContextExtensionRetriever(top_k=20)
|
|
185
|
+
|
|
186
|
+
context = await resolve_edges_to_text(
|
|
187
|
+
await retriever.get_context("Who works at Figma and drives Tesla?")
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
|
|
191
|
+
assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
|
|
192
|
+
assert "Jason Statham --[works_for]--> Figma" in context, "Failed to get Jason Statham"
|
|
193
|
+
|
|
194
|
+
answer = await retriever.get_completion("Who works at Figma?")
|
|
195
|
+
|
|
196
|
+
assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
|
|
197
|
+
assert all(isinstance(item, str) and item.strip() for item in answer), (
|
|
198
|
+
"Answer must contain only non-empty strings"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@pytest.mark.asyncio
|
|
203
|
+
async def test_get_graph_completion_extension_context_on_empty_graph(setup_test_environment_empty):
|
|
204
|
+
"""Integration test: verify GraphCompletionContextExtensionRetriever handles empty graph correctly."""
|
|
205
|
+
retriever = GraphCompletionContextExtensionRetriever()
|
|
206
|
+
|
|
207
|
+
context = await retriever.get_context("Who works at Figma?")
|
|
208
|
+
assert context == [], "Context should be empty on an empty graph"
|
|
209
|
+
|
|
210
|
+
answer = await retriever.get_completion("Who works at Figma?")
|
|
211
|
+
|
|
212
|
+
assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
|
|
213
|
+
assert all(isinstance(item, str) and item.strip() for item in answer), (
|
|
214
|
+
"Answer must contain only non-empty strings"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@pytest.mark.asyncio
|
|
219
|
+
async def test_graph_completion_extension_get_triplets_empty(setup_test_environment_empty):
|
|
220
|
+
"""Integration test: verify GraphCompletionContextExtensionRetriever get_triplets handles empty graph."""
|
|
221
|
+
retriever = GraphCompletionContextExtensionRetriever()
|
|
222
|
+
|
|
223
|
+
triplets = await retriever.get_triplets("Who works at Figma?")
|
|
224
|
+
|
|
225
|
+
assert isinstance(triplets, list), "Triplets should be a list"
|
|
226
|
+
assert len(triplets) == 0, "Should return empty list on empty graph"
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pytest
|
|
3
|
+
import pathlib
|
|
4
|
+
import pytest_asyncio
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
import cognee
|
|
7
|
+
|
|
8
|
+
from cognee.low_level import setup, DataPoint
|
|
9
|
+
from cognee.modules.graph.utils import resolve_edges_to_text
|
|
10
|
+
from cognee.tasks.storage import add_data_points
|
|
11
|
+
from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest_asyncio.fixture
|
|
15
|
+
async def setup_test_environment_simple():
|
|
16
|
+
"""Set up a clean test environment with simple graph data."""
|
|
17
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
18
|
+
system_directory_path = str(
|
|
19
|
+
base_dir / ".cognee_system/test_graph_completion_cot_context_simple"
|
|
20
|
+
)
|
|
21
|
+
data_directory_path = str(base_dir / ".data_storage/test_graph_completion_cot_context_simple")
|
|
22
|
+
|
|
23
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
24
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
25
|
+
|
|
26
|
+
await cognee.prune.prune_data()
|
|
27
|
+
await cognee.prune.prune_system(metadata=True)
|
|
28
|
+
await setup()
|
|
29
|
+
|
|
30
|
+
class Company(DataPoint):
|
|
31
|
+
name: str
|
|
32
|
+
|
|
33
|
+
class Person(DataPoint):
|
|
34
|
+
name: str
|
|
35
|
+
works_for: Company
|
|
36
|
+
|
|
37
|
+
company1 = Company(name="Figma")
|
|
38
|
+
company2 = Company(name="Canva")
|
|
39
|
+
person1 = Person(name="Steve Rodger", works_for=company1)
|
|
40
|
+
person2 = Person(name="Ike Loma", works_for=company1)
|
|
41
|
+
person3 = Person(name="Jason Statham", works_for=company1)
|
|
42
|
+
person4 = Person(name="Mike Broski", works_for=company2)
|
|
43
|
+
person5 = Person(name="Christina Mayer", works_for=company2)
|
|
44
|
+
|
|
45
|
+
entities = [company1, company2, person1, person2, person3, person4, person5]
|
|
46
|
+
|
|
47
|
+
await add_data_points(entities)
|
|
48
|
+
|
|
49
|
+
yield
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
await cognee.prune.prune_data()
|
|
53
|
+
await cognee.prune.prune_system(metadata=True)
|
|
54
|
+
except Exception:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@pytest_asyncio.fixture
|
|
59
|
+
async def setup_test_environment_complex():
|
|
60
|
+
"""Set up a clean test environment with complex graph data."""
|
|
61
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
62
|
+
system_directory_path = str(
|
|
63
|
+
base_dir / ".cognee_system/test_graph_completion_cot_context_complex"
|
|
64
|
+
)
|
|
65
|
+
data_directory_path = str(base_dir / ".data_storage/test_graph_completion_cot_context_complex")
|
|
66
|
+
|
|
67
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
68
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
69
|
+
|
|
70
|
+
await cognee.prune.prune_data()
|
|
71
|
+
await cognee.prune.prune_system(metadata=True)
|
|
72
|
+
await setup()
|
|
73
|
+
|
|
74
|
+
class Company(DataPoint):
|
|
75
|
+
name: str
|
|
76
|
+
metadata: dict = {"index_fields": ["name"]}
|
|
77
|
+
|
|
78
|
+
class Car(DataPoint):
|
|
79
|
+
brand: str
|
|
80
|
+
model: str
|
|
81
|
+
year: int
|
|
82
|
+
|
|
83
|
+
class Location(DataPoint):
|
|
84
|
+
country: str
|
|
85
|
+
city: str
|
|
86
|
+
|
|
87
|
+
class Home(DataPoint):
|
|
88
|
+
location: Location
|
|
89
|
+
rooms: int
|
|
90
|
+
sqm: int
|
|
91
|
+
|
|
92
|
+
class Person(DataPoint):
|
|
93
|
+
name: str
|
|
94
|
+
works_for: Company
|
|
95
|
+
owns: Optional[list[Union[Car, Home]]] = None
|
|
96
|
+
|
|
97
|
+
company1 = Company(name="Figma")
|
|
98
|
+
company2 = Company(name="Canva")
|
|
99
|
+
|
|
100
|
+
person1 = Person(name="Mike Rodger", works_for=company1)
|
|
101
|
+
person1.owns = [Car(brand="Toyota", model="Camry", year=2020)]
|
|
102
|
+
|
|
103
|
+
person2 = Person(name="Ike Loma", works_for=company1)
|
|
104
|
+
person2.owns = [
|
|
105
|
+
Car(brand="Tesla", model="Model S", year=2021),
|
|
106
|
+
Home(location=Location(country="USA", city="New York"), sqm=80, rooms=4),
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
person3 = Person(name="Jason Statham", works_for=company1)
|
|
110
|
+
|
|
111
|
+
person4 = Person(name="Mike Broski", works_for=company2)
|
|
112
|
+
person4.owns = [Car(brand="Ford", model="Mustang", year=1978)]
|
|
113
|
+
|
|
114
|
+
person5 = Person(name="Christina Mayer", works_for=company2)
|
|
115
|
+
person5.owns = [Car(brand="Honda", model="Civic", year=2023)]
|
|
116
|
+
|
|
117
|
+
entities = [company1, company2, person1, person2, person3, person4, person5]
|
|
118
|
+
|
|
119
|
+
await add_data_points(entities)
|
|
120
|
+
|
|
121
|
+
yield
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
await cognee.prune.prune_data()
|
|
125
|
+
await cognee.prune.prune_system(metadata=True)
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@pytest_asyncio.fixture
|
|
131
|
+
async def setup_test_environment_empty():
|
|
132
|
+
"""Set up a clean test environment without graph data."""
|
|
133
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
134
|
+
system_directory_path = str(
|
|
135
|
+
base_dir / ".cognee_system/test_get_graph_completion_cot_context_on_empty_graph"
|
|
136
|
+
)
|
|
137
|
+
data_directory_path = str(
|
|
138
|
+
base_dir / ".data_storage/test_get_graph_completion_cot_context_on_empty_graph"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
142
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
143
|
+
|
|
144
|
+
await cognee.prune.prune_data()
|
|
145
|
+
await cognee.prune.prune_system(metadata=True)
|
|
146
|
+
await setup()
|
|
147
|
+
|
|
148
|
+
yield
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
await cognee.prune.prune_data()
|
|
152
|
+
await cognee.prune.prune_system(metadata=True)
|
|
153
|
+
except Exception:
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@pytest.mark.asyncio
|
|
158
|
+
async def test_graph_completion_cot_context_simple(setup_test_environment_simple):
|
|
159
|
+
"""Integration test: verify GraphCompletionCotRetriever can retrieve context (simple)."""
|
|
160
|
+
retriever = GraphCompletionCotRetriever()
|
|
161
|
+
|
|
162
|
+
context = await resolve_edges_to_text(await retriever.get_context("Who works at Canva?"))
|
|
163
|
+
|
|
164
|
+
assert "Mike Broski --[works_for]--> Canva" in context, "Failed to get Mike Broski"
|
|
165
|
+
assert "Christina Mayer --[works_for]--> Canva" in context, "Failed to get Christina Mayer"
|
|
166
|
+
|
|
167
|
+
answer = await retriever.get_completion("Who works at Canva?")
|
|
168
|
+
|
|
169
|
+
assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
|
|
170
|
+
assert all(isinstance(item, str) and item.strip() for item in answer), (
|
|
171
|
+
"Answer must contain only non-empty strings"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@pytest.mark.asyncio
|
|
176
|
+
async def test_graph_completion_cot_context_complex(setup_test_environment_complex):
|
|
177
|
+
"""Integration test: verify GraphCompletionCotRetriever can retrieve context (complex)."""
|
|
178
|
+
retriever = GraphCompletionCotRetriever(top_k=20)
|
|
179
|
+
|
|
180
|
+
context = await resolve_edges_to_text(await retriever.get_context("Who works at Figma?"))
|
|
181
|
+
|
|
182
|
+
assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
|
|
183
|
+
assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
|
|
184
|
+
assert "Jason Statham --[works_for]--> Figma" in context, "Failed to get Jason Statham"
|
|
185
|
+
|
|
186
|
+
answer = await retriever.get_completion("Who works at Figma?")
|
|
187
|
+
|
|
188
|
+
assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
|
|
189
|
+
assert all(isinstance(item, str) and item.strip() for item in answer), (
|
|
190
|
+
"Answer must contain only non-empty strings"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@pytest.mark.asyncio
|
|
195
|
+
async def test_get_graph_completion_cot_context_on_empty_graph(setup_test_environment_empty):
|
|
196
|
+
"""Integration test: verify GraphCompletionCotRetriever handles empty graph correctly."""
|
|
197
|
+
retriever = GraphCompletionCotRetriever()
|
|
198
|
+
|
|
199
|
+
context = await retriever.get_context("Who works at Figma?")
|
|
200
|
+
assert context == [], "Context should be empty on an empty graph"
|
|
201
|
+
|
|
202
|
+
answer = await retriever.get_completion("Who works at Figma?")
|
|
203
|
+
|
|
204
|
+
assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
|
|
205
|
+
assert all(isinstance(item, str) and item.strip() for item in answer), (
|
|
206
|
+
"Answer must contain only non-empty strings"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
@pytest.mark.asyncio
|
|
211
|
+
async def test_graph_completion_cot_get_triplets_empty(setup_test_environment_empty):
|
|
212
|
+
"""Integration test: verify GraphCompletionCotRetriever get_triplets handles empty graph."""
|
|
213
|
+
retriever = GraphCompletionCotRetriever()
|
|
214
|
+
|
|
215
|
+
triplets = await retriever.get_triplets("Who works at Figma?")
|
|
216
|
+
|
|
217
|
+
assert isinstance(triplets, list), "Triplets should be a list"
|
|
218
|
+
assert len(triplets) == 0, "Should return empty list on empty graph"
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List
|
|
3
|
+
import pytest
|
|
4
|
+
import pathlib
|
|
5
|
+
import pytest_asyncio
|
|
6
|
+
import cognee
|
|
7
|
+
|
|
8
|
+
from cognee.low_level import setup
|
|
9
|
+
from cognee.tasks.storage import add_data_points
|
|
10
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
11
|
+
from cognee.modules.chunking.models import DocumentChunk
|
|
12
|
+
from cognee.modules.data.processing.document_types import TextDocument
|
|
13
|
+
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
14
|
+
from cognee.modules.retrieval.completion_retriever import CompletionRetriever
|
|
15
|
+
from cognee.infrastructure.engine import DataPoint
|
|
16
|
+
from cognee.modules.data.processing.document_types import Document
|
|
17
|
+
from cognee.modules.engine.models import Entity
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DocumentChunkWithEntities(DataPoint):
|
|
21
|
+
text: str
|
|
22
|
+
chunk_size: int
|
|
23
|
+
chunk_index: int
|
|
24
|
+
cut_type: str
|
|
25
|
+
is_part_of: Document
|
|
26
|
+
contains: List[Entity] = None
|
|
27
|
+
|
|
28
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest_asyncio.fixture
|
|
32
|
+
async def setup_test_environment_with_chunks_simple():
|
|
33
|
+
"""Set up a clean test environment with simple chunks."""
|
|
34
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
35
|
+
system_directory_path = str(base_dir / ".cognee_system/test_rag_completion_context_simple")
|
|
36
|
+
data_directory_path = str(base_dir / ".data_storage/test_rag_completion_context_simple")
|
|
37
|
+
|
|
38
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
39
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
40
|
+
|
|
41
|
+
await cognee.prune.prune_data()
|
|
42
|
+
await cognee.prune.prune_system(metadata=True)
|
|
43
|
+
await setup()
|
|
44
|
+
|
|
45
|
+
document = TextDocument(
|
|
46
|
+
name="Steve Rodger's career",
|
|
47
|
+
raw_data_location="somewhere",
|
|
48
|
+
external_metadata="",
|
|
49
|
+
mime_type="text/plain",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
chunk1 = DocumentChunk(
|
|
53
|
+
text="Steve Rodger",
|
|
54
|
+
chunk_size=2,
|
|
55
|
+
chunk_index=0,
|
|
56
|
+
cut_type="sentence_end",
|
|
57
|
+
is_part_of=document,
|
|
58
|
+
contains=[],
|
|
59
|
+
)
|
|
60
|
+
chunk2 = DocumentChunk(
|
|
61
|
+
text="Mike Broski",
|
|
62
|
+
chunk_size=2,
|
|
63
|
+
chunk_index=1,
|
|
64
|
+
cut_type="sentence_end",
|
|
65
|
+
is_part_of=document,
|
|
66
|
+
contains=[],
|
|
67
|
+
)
|
|
68
|
+
chunk3 = DocumentChunk(
|
|
69
|
+
text="Christina Mayer",
|
|
70
|
+
chunk_size=2,
|
|
71
|
+
chunk_index=2,
|
|
72
|
+
cut_type="sentence_end",
|
|
73
|
+
is_part_of=document,
|
|
74
|
+
contains=[],
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
entities = [chunk1, chunk2, chunk3]
|
|
78
|
+
|
|
79
|
+
await add_data_points(entities)
|
|
80
|
+
|
|
81
|
+
yield
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
await cognee.prune.prune_data()
|
|
85
|
+
await cognee.prune.prune_system(metadata=True)
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest_asyncio.fixture
|
|
91
|
+
async def setup_test_environment_with_chunks_complex():
|
|
92
|
+
"""Set up a clean test environment with complex chunks."""
|
|
93
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
94
|
+
system_directory_path = str(base_dir / ".cognee_system/test_rag_completion_context_complex")
|
|
95
|
+
data_directory_path = str(base_dir / ".data_storage/test_rag_completion_context_complex")
|
|
96
|
+
|
|
97
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
98
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
99
|
+
|
|
100
|
+
await cognee.prune.prune_data()
|
|
101
|
+
await cognee.prune.prune_system(metadata=True)
|
|
102
|
+
await setup()
|
|
103
|
+
|
|
104
|
+
document1 = TextDocument(
|
|
105
|
+
name="Employee List",
|
|
106
|
+
raw_data_location="somewhere",
|
|
107
|
+
external_metadata="",
|
|
108
|
+
mime_type="text/plain",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
document2 = TextDocument(
|
|
112
|
+
name="Car List",
|
|
113
|
+
raw_data_location="somewhere",
|
|
114
|
+
external_metadata="",
|
|
115
|
+
mime_type="text/plain",
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
chunk1 = DocumentChunk(
|
|
119
|
+
text="Steve Rodger",
|
|
120
|
+
chunk_size=2,
|
|
121
|
+
chunk_index=0,
|
|
122
|
+
cut_type="sentence_end",
|
|
123
|
+
is_part_of=document1,
|
|
124
|
+
contains=[],
|
|
125
|
+
)
|
|
126
|
+
chunk2 = DocumentChunk(
|
|
127
|
+
text="Mike Broski",
|
|
128
|
+
chunk_size=2,
|
|
129
|
+
chunk_index=1,
|
|
130
|
+
cut_type="sentence_end",
|
|
131
|
+
is_part_of=document1,
|
|
132
|
+
contains=[],
|
|
133
|
+
)
|
|
134
|
+
chunk3 = DocumentChunk(
|
|
135
|
+
text="Christina Mayer",
|
|
136
|
+
chunk_size=2,
|
|
137
|
+
chunk_index=2,
|
|
138
|
+
cut_type="sentence_end",
|
|
139
|
+
is_part_of=document1,
|
|
140
|
+
contains=[],
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
chunk4 = DocumentChunk(
|
|
144
|
+
text="Range Rover",
|
|
145
|
+
chunk_size=2,
|
|
146
|
+
chunk_index=0,
|
|
147
|
+
cut_type="sentence_end",
|
|
148
|
+
is_part_of=document2,
|
|
149
|
+
contains=[],
|
|
150
|
+
)
|
|
151
|
+
chunk5 = DocumentChunk(
|
|
152
|
+
text="Hyundai",
|
|
153
|
+
chunk_size=2,
|
|
154
|
+
chunk_index=1,
|
|
155
|
+
cut_type="sentence_end",
|
|
156
|
+
is_part_of=document2,
|
|
157
|
+
contains=[],
|
|
158
|
+
)
|
|
159
|
+
chunk6 = DocumentChunk(
|
|
160
|
+
text="Chrysler",
|
|
161
|
+
chunk_size=2,
|
|
162
|
+
chunk_index=2,
|
|
163
|
+
cut_type="sentence_end",
|
|
164
|
+
is_part_of=document2,
|
|
165
|
+
contains=[],
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
entities = [chunk1, chunk2, chunk3, chunk4, chunk5, chunk6]
|
|
169
|
+
|
|
170
|
+
await add_data_points(entities)
|
|
171
|
+
|
|
172
|
+
yield
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
await cognee.prune.prune_data()
|
|
176
|
+
await cognee.prune.prune_system(metadata=True)
|
|
177
|
+
except Exception:
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@pytest_asyncio.fixture
|
|
182
|
+
async def setup_test_environment_empty():
|
|
183
|
+
"""Set up a clean test environment without chunks."""
|
|
184
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
185
|
+
system_directory_path = str(
|
|
186
|
+
base_dir / ".cognee_system/test_get_rag_completion_context_on_empty_graph"
|
|
187
|
+
)
|
|
188
|
+
data_directory_path = str(
|
|
189
|
+
base_dir / ".data_storage/test_get_rag_completion_context_on_empty_graph"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
193
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
194
|
+
|
|
195
|
+
await cognee.prune.prune_data()
|
|
196
|
+
await cognee.prune.prune_system(metadata=True)
|
|
197
|
+
|
|
198
|
+
yield
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
await cognee.prune.prune_data()
|
|
202
|
+
await cognee.prune.prune_system(metadata=True)
|
|
203
|
+
except Exception:
|
|
204
|
+
pass
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
@pytest.mark.asyncio
|
|
208
|
+
async def test_rag_completion_context_simple(setup_test_environment_with_chunks_simple):
|
|
209
|
+
"""Integration test: verify CompletionRetriever can retrieve context (simple)."""
|
|
210
|
+
retriever = CompletionRetriever()
|
|
211
|
+
|
|
212
|
+
context = await retriever.get_context("Mike")
|
|
213
|
+
|
|
214
|
+
assert isinstance(context, str), "Context should be a string"
|
|
215
|
+
assert "Mike Broski" in context, "Failed to get Mike Broski"
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@pytest.mark.asyncio
|
|
219
|
+
async def test_rag_completion_context_multiple_chunks(setup_test_environment_with_chunks_simple):
|
|
220
|
+
"""Integration test: verify CompletionRetriever can retrieve context from multiple chunks."""
|
|
221
|
+
retriever = CompletionRetriever()
|
|
222
|
+
|
|
223
|
+
context = await retriever.get_context("Steve")
|
|
224
|
+
|
|
225
|
+
assert isinstance(context, str), "Context should be a string"
|
|
226
|
+
assert "Steve Rodger" in context, "Failed to get Steve Rodger"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
@pytest.mark.asyncio
|
|
230
|
+
async def test_rag_completion_context_complex(setup_test_environment_with_chunks_complex):
|
|
231
|
+
"""Integration test: verify CompletionRetriever can retrieve context (complex)."""
|
|
232
|
+
# TODO: top_k doesn't affect the output, it should be fixed.
|
|
233
|
+
retriever = CompletionRetriever(top_k=20)
|
|
234
|
+
|
|
235
|
+
context = await retriever.get_context("Christina")
|
|
236
|
+
|
|
237
|
+
assert context[0:15] == "Christina Mayer", "Failed to get Christina Mayer"
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@pytest.mark.asyncio
|
|
241
|
+
async def test_get_rag_completion_context_on_empty_graph(setup_test_environment_empty):
|
|
242
|
+
"""Integration test: verify CompletionRetriever handles empty graph correctly."""
|
|
243
|
+
retriever = CompletionRetriever()
|
|
244
|
+
|
|
245
|
+
with pytest.raises(NoDataError):
|
|
246
|
+
await retriever.get_context("Christina Mayer")
|
|
247
|
+
|
|
248
|
+
vector_engine = get_vector_engine()
|
|
249
|
+
await vector_engine.create_collection(
|
|
250
|
+
"DocumentChunk_text", payload_schema=DocumentChunkWithEntities
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
context = await retriever.get_context("Christina Mayer")
|
|
254
|
+
assert context == "", "Returned context should be empty on an empty graph"
|