cognee 0.5.1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. cognee/api/v1/add/add.py +2 -1
  2. cognee/api/v1/datasets/routers/get_datasets_router.py +1 -0
  3. cognee/api/v1/memify/routers/get_memify_router.py +1 -0
  4. cognee/api/v1/search/search.py +0 -4
  5. cognee/infrastructure/databases/relational/config.py +16 -1
  6. cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
  7. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +24 -2
  8. cognee/infrastructure/databases/vector/create_vector_engine.py +9 -2
  9. cognee/infrastructure/llm/LLMGateway.py +0 -13
  10. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
  11. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
  12. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
  13. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -5
  14. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
  15. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
  16. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
  17. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
  18. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
  19. cognee/modules/data/models/Data.py +2 -1
  20. cognee/modules/retrieval/triplet_retriever.py +1 -1
  21. cognee/modules/retrieval/utils/brute_force_triplet_search.py +0 -18
  22. cognee/modules/search/methods/search.py +18 -25
  23. cognee/tasks/ingestion/data_item.py +8 -0
  24. cognee/tasks/ingestion/ingest_data.py +12 -1
  25. cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
  26. cognee/tests/integration/retrieval/test_chunks_retriever.py +252 -0
  27. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +268 -0
  28. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +226 -0
  29. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +218 -0
  30. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +254 -0
  31. cognee/tests/{unit/modules/retrieval/structured_output_test.py → integration/retrieval/test_structured_output.py} +87 -77
  32. cognee/tests/integration/retrieval/test_summaries_retriever.py +184 -0
  33. cognee/tests/integration/retrieval/test_temporal_retriever.py +306 -0
  34. cognee/tests/integration/retrieval/test_triplet_retriever.py +35 -0
  35. cognee/tests/test_custom_data_label.py +68 -0
  36. cognee/tests/test_search_db.py +334 -181
  37. cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
  38. cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
  39. cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
  40. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +181 -199
  41. cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
  42. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +454 -162
  43. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +674 -156
  44. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +625 -200
  45. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +319 -203
  46. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +189 -155
  47. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +539 -58
  48. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +218 -9
  49. cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
  50. cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
  51. cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
  52. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +246 -0
  53. {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/METADATA +1 -1
  54. {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/RECORD +58 -45
  55. cognee/tests/unit/modules/search/test_search.py +0 -100
  56. {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/WHEEL +0 -0
  57. {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/entry_points.txt +0 -0
  58. {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/licenses/LICENSE +0 -0
  59. {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,226 @@
1
+ import os
2
+ import pytest
3
+ import pathlib
4
+ import pytest_asyncio
5
+ from typing import Optional, Union
6
+ import cognee
7
+
8
+ from cognee.low_level import setup, DataPoint
9
+ from cognee.tasks.storage import add_data_points
10
+ from cognee.modules.graph.utils import resolve_edges_to_text
11
+ from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
12
+ GraphCompletionContextExtensionRetriever,
13
+ )
14
+
15
+
16
+ @pytest_asyncio.fixture
17
+ async def setup_test_environment_simple():
18
+ """Set up a clean test environment with simple graph data."""
19
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
20
+ system_directory_path = str(
21
+ base_dir / ".cognee_system/test_graph_completion_extension_context_simple"
22
+ )
23
+ data_directory_path = str(
24
+ base_dir / ".data_storage/test_graph_completion_extension_context_simple"
25
+ )
26
+
27
+ cognee.config.system_root_directory(system_directory_path)
28
+ cognee.config.data_root_directory(data_directory_path)
29
+
30
+ await cognee.prune.prune_data()
31
+ await cognee.prune.prune_system(metadata=True)
32
+ await setup()
33
+
34
+ class Company(DataPoint):
35
+ name: str
36
+
37
+ class Person(DataPoint):
38
+ name: str
39
+ works_for: Company
40
+
41
+ company1 = Company(name="Figma")
42
+ company2 = Company(name="Canva")
43
+ person1 = Person(name="Steve Rodger", works_for=company1)
44
+ person2 = Person(name="Ike Loma", works_for=company1)
45
+ person3 = Person(name="Jason Statham", works_for=company1)
46
+ person4 = Person(name="Mike Broski", works_for=company2)
47
+ person5 = Person(name="Christina Mayer", works_for=company2)
48
+
49
+ entities = [company1, company2, person1, person2, person3, person4, person5]
50
+
51
+ await add_data_points(entities)
52
+
53
+ yield
54
+
55
+ try:
56
+ await cognee.prune.prune_data()
57
+ await cognee.prune.prune_system(metadata=True)
58
+ except Exception:
59
+ pass
60
+
61
+
62
+ @pytest_asyncio.fixture
63
+ async def setup_test_environment_complex():
64
+ """Set up a clean test environment with complex graph data."""
65
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
66
+ system_directory_path = str(
67
+ base_dir / ".cognee_system/test_graph_completion_extension_context_complex"
68
+ )
69
+ data_directory_path = str(
70
+ base_dir / ".data_storage/test_graph_completion_extension_context_complex"
71
+ )
72
+
73
+ cognee.config.system_root_directory(system_directory_path)
74
+ cognee.config.data_root_directory(data_directory_path)
75
+
76
+ await cognee.prune.prune_data()
77
+ await cognee.prune.prune_system(metadata=True)
78
+ await setup()
79
+
80
+ class Company(DataPoint):
81
+ name: str
82
+ metadata: dict = {"index_fields": ["name"]}
83
+
84
+ class Car(DataPoint):
85
+ brand: str
86
+ model: str
87
+ year: int
88
+
89
+ class Location(DataPoint):
90
+ country: str
91
+ city: str
92
+
93
+ class Home(DataPoint):
94
+ location: Location
95
+ rooms: int
96
+ sqm: int
97
+
98
+ class Person(DataPoint):
99
+ name: str
100
+ works_for: Company
101
+ owns: Optional[list[Union[Car, Home]]] = None
102
+
103
+ company1 = Company(name="Figma")
104
+ company2 = Company(name="Canva")
105
+
106
+ person1 = Person(name="Mike Rodger", works_for=company1)
107
+ person1.owns = [Car(brand="Toyota", model="Camry", year=2020)]
108
+
109
+ person2 = Person(name="Ike Loma", works_for=company1)
110
+ person2.owns = [
111
+ Car(brand="Tesla", model="Model S", year=2021),
112
+ Home(location=Location(country="USA", city="New York"), sqm=80, rooms=4),
113
+ ]
114
+
115
+ person3 = Person(name="Jason Statham", works_for=company1)
116
+
117
+ person4 = Person(name="Mike Broski", works_for=company2)
118
+ person4.owns = [Car(brand="Ford", model="Mustang", year=1978)]
119
+
120
+ person5 = Person(name="Christina Mayer", works_for=company2)
121
+ person5.owns = [Car(brand="Honda", model="Civic", year=2023)]
122
+
123
+ entities = [company1, company2, person1, person2, person3, person4, person5]
124
+
125
+ await add_data_points(entities)
126
+
127
+ yield
128
+
129
+ try:
130
+ await cognee.prune.prune_data()
131
+ await cognee.prune.prune_system(metadata=True)
132
+ except Exception:
133
+ pass
134
+
135
+
136
+ @pytest_asyncio.fixture
137
+ async def setup_test_environment_empty():
138
+ """Set up a clean test environment without graph data."""
139
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
140
+ system_directory_path = str(
141
+ base_dir / ".cognee_system/test_get_graph_completion_extension_context_on_empty_graph"
142
+ )
143
+ data_directory_path = str(
144
+ base_dir / ".data_storage/test_get_graph_completion_extension_context_on_empty_graph"
145
+ )
146
+
147
+ cognee.config.system_root_directory(system_directory_path)
148
+ cognee.config.data_root_directory(data_directory_path)
149
+
150
+ await cognee.prune.prune_data()
151
+ await cognee.prune.prune_system(metadata=True)
152
+ await setup()
153
+
154
+ yield
155
+
156
+ try:
157
+ await cognee.prune.prune_data()
158
+ await cognee.prune.prune_system(metadata=True)
159
+ except Exception:
160
+ pass
161
+
162
+
163
+ @pytest.mark.asyncio
164
+ async def test_graph_completion_extension_context_simple(setup_test_environment_simple):
165
+ """Integration test: verify GraphCompletionContextExtensionRetriever can retrieve context (simple)."""
166
+ retriever = GraphCompletionContextExtensionRetriever()
167
+
168
+ context = await resolve_edges_to_text(await retriever.get_context("Who works at Canva?"))
169
+
170
+ assert "Mike Broski --[works_for]--> Canva" in context, "Failed to get Mike Broski"
171
+ assert "Christina Mayer --[works_for]--> Canva" in context, "Failed to get Christina Mayer"
172
+
173
+ answer = await retriever.get_completion("Who works at Canva?")
174
+
175
+ assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
176
+ assert all(isinstance(item, str) and item.strip() for item in answer), (
177
+ "Answer must contain only non-empty strings"
178
+ )
179
+
180
+
181
+ @pytest.mark.asyncio
182
+ async def test_graph_completion_extension_context_complex(setup_test_environment_complex):
183
+ """Integration test: verify GraphCompletionContextExtensionRetriever can retrieve context (complex)."""
184
+ retriever = GraphCompletionContextExtensionRetriever(top_k=20)
185
+
186
+ context = await resolve_edges_to_text(
187
+ await retriever.get_context("Who works at Figma and drives Tesla?")
188
+ )
189
+
190
+ assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
191
+ assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
192
+ assert "Jason Statham --[works_for]--> Figma" in context, "Failed to get Jason Statham"
193
+
194
+ answer = await retriever.get_completion("Who works at Figma?")
195
+
196
+ assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
197
+ assert all(isinstance(item, str) and item.strip() for item in answer), (
198
+ "Answer must contain only non-empty strings"
199
+ )
200
+
201
+
202
+ @pytest.mark.asyncio
203
+ async def test_get_graph_completion_extension_context_on_empty_graph(setup_test_environment_empty):
204
+ """Integration test: verify GraphCompletionContextExtensionRetriever handles empty graph correctly."""
205
+ retriever = GraphCompletionContextExtensionRetriever()
206
+
207
+ context = await retriever.get_context("Who works at Figma?")
208
+ assert context == [], "Context should be empty on an empty graph"
209
+
210
+ answer = await retriever.get_completion("Who works at Figma?")
211
+
212
+ assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
213
+ assert all(isinstance(item, str) and item.strip() for item in answer), (
214
+ "Answer must contain only non-empty strings"
215
+ )
216
+
217
+
218
+ @pytest.mark.asyncio
219
+ async def test_graph_completion_extension_get_triplets_empty(setup_test_environment_empty):
220
+ """Integration test: verify GraphCompletionContextExtensionRetriever get_triplets handles empty graph."""
221
+ retriever = GraphCompletionContextExtensionRetriever()
222
+
223
+ triplets = await retriever.get_triplets("Who works at Figma?")
224
+
225
+ assert isinstance(triplets, list), "Triplets should be a list"
226
+ assert len(triplets) == 0, "Should return empty list on empty graph"
@@ -0,0 +1,218 @@
1
+ import os
2
+ import pytest
3
+ import pathlib
4
+ import pytest_asyncio
5
+ from typing import Optional, Union
6
+ import cognee
7
+
8
+ from cognee.low_level import setup, DataPoint
9
+ from cognee.modules.graph.utils import resolve_edges_to_text
10
+ from cognee.tasks.storage import add_data_points
11
+ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
12
+
13
+
14
+ @pytest_asyncio.fixture
15
+ async def setup_test_environment_simple():
16
+ """Set up a clean test environment with simple graph data."""
17
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
18
+ system_directory_path = str(
19
+ base_dir / ".cognee_system/test_graph_completion_cot_context_simple"
20
+ )
21
+ data_directory_path = str(base_dir / ".data_storage/test_graph_completion_cot_context_simple")
22
+
23
+ cognee.config.system_root_directory(system_directory_path)
24
+ cognee.config.data_root_directory(data_directory_path)
25
+
26
+ await cognee.prune.prune_data()
27
+ await cognee.prune.prune_system(metadata=True)
28
+ await setup()
29
+
30
+ class Company(DataPoint):
31
+ name: str
32
+
33
+ class Person(DataPoint):
34
+ name: str
35
+ works_for: Company
36
+
37
+ company1 = Company(name="Figma")
38
+ company2 = Company(name="Canva")
39
+ person1 = Person(name="Steve Rodger", works_for=company1)
40
+ person2 = Person(name="Ike Loma", works_for=company1)
41
+ person3 = Person(name="Jason Statham", works_for=company1)
42
+ person4 = Person(name="Mike Broski", works_for=company2)
43
+ person5 = Person(name="Christina Mayer", works_for=company2)
44
+
45
+ entities = [company1, company2, person1, person2, person3, person4, person5]
46
+
47
+ await add_data_points(entities)
48
+
49
+ yield
50
+
51
+ try:
52
+ await cognee.prune.prune_data()
53
+ await cognee.prune.prune_system(metadata=True)
54
+ except Exception:
55
+ pass
56
+
57
+
58
+ @pytest_asyncio.fixture
59
+ async def setup_test_environment_complex():
60
+ """Set up a clean test environment with complex graph data."""
61
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
62
+ system_directory_path = str(
63
+ base_dir / ".cognee_system/test_graph_completion_cot_context_complex"
64
+ )
65
+ data_directory_path = str(base_dir / ".data_storage/test_graph_completion_cot_context_complex")
66
+
67
+ cognee.config.system_root_directory(system_directory_path)
68
+ cognee.config.data_root_directory(data_directory_path)
69
+
70
+ await cognee.prune.prune_data()
71
+ await cognee.prune.prune_system(metadata=True)
72
+ await setup()
73
+
74
+ class Company(DataPoint):
75
+ name: str
76
+ metadata: dict = {"index_fields": ["name"]}
77
+
78
+ class Car(DataPoint):
79
+ brand: str
80
+ model: str
81
+ year: int
82
+
83
+ class Location(DataPoint):
84
+ country: str
85
+ city: str
86
+
87
+ class Home(DataPoint):
88
+ location: Location
89
+ rooms: int
90
+ sqm: int
91
+
92
+ class Person(DataPoint):
93
+ name: str
94
+ works_for: Company
95
+ owns: Optional[list[Union[Car, Home]]] = None
96
+
97
+ company1 = Company(name="Figma")
98
+ company2 = Company(name="Canva")
99
+
100
+ person1 = Person(name="Mike Rodger", works_for=company1)
101
+ person1.owns = [Car(brand="Toyota", model="Camry", year=2020)]
102
+
103
+ person2 = Person(name="Ike Loma", works_for=company1)
104
+ person2.owns = [
105
+ Car(brand="Tesla", model="Model S", year=2021),
106
+ Home(location=Location(country="USA", city="New York"), sqm=80, rooms=4),
107
+ ]
108
+
109
+ person3 = Person(name="Jason Statham", works_for=company1)
110
+
111
+ person4 = Person(name="Mike Broski", works_for=company2)
112
+ person4.owns = [Car(brand="Ford", model="Mustang", year=1978)]
113
+
114
+ person5 = Person(name="Christina Mayer", works_for=company2)
115
+ person5.owns = [Car(brand="Honda", model="Civic", year=2023)]
116
+
117
+ entities = [company1, company2, person1, person2, person3, person4, person5]
118
+
119
+ await add_data_points(entities)
120
+
121
+ yield
122
+
123
+ try:
124
+ await cognee.prune.prune_data()
125
+ await cognee.prune.prune_system(metadata=True)
126
+ except Exception:
127
+ pass
128
+
129
+
130
+ @pytest_asyncio.fixture
131
+ async def setup_test_environment_empty():
132
+ """Set up a clean test environment without graph data."""
133
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
134
+ system_directory_path = str(
135
+ base_dir / ".cognee_system/test_get_graph_completion_cot_context_on_empty_graph"
136
+ )
137
+ data_directory_path = str(
138
+ base_dir / ".data_storage/test_get_graph_completion_cot_context_on_empty_graph"
139
+ )
140
+
141
+ cognee.config.system_root_directory(system_directory_path)
142
+ cognee.config.data_root_directory(data_directory_path)
143
+
144
+ await cognee.prune.prune_data()
145
+ await cognee.prune.prune_system(metadata=True)
146
+ await setup()
147
+
148
+ yield
149
+
150
+ try:
151
+ await cognee.prune.prune_data()
152
+ await cognee.prune.prune_system(metadata=True)
153
+ except Exception:
154
+ pass
155
+
156
+
157
+ @pytest.mark.asyncio
158
+ async def test_graph_completion_cot_context_simple(setup_test_environment_simple):
159
+ """Integration test: verify GraphCompletionCotRetriever can retrieve context (simple)."""
160
+ retriever = GraphCompletionCotRetriever()
161
+
162
+ context = await resolve_edges_to_text(await retriever.get_context("Who works at Canva?"))
163
+
164
+ assert "Mike Broski --[works_for]--> Canva" in context, "Failed to get Mike Broski"
165
+ assert "Christina Mayer --[works_for]--> Canva" in context, "Failed to get Christina Mayer"
166
+
167
+ answer = await retriever.get_completion("Who works at Canva?")
168
+
169
+ assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
170
+ assert all(isinstance(item, str) and item.strip() for item in answer), (
171
+ "Answer must contain only non-empty strings"
172
+ )
173
+
174
+
175
+ @pytest.mark.asyncio
176
+ async def test_graph_completion_cot_context_complex(setup_test_environment_complex):
177
+ """Integration test: verify GraphCompletionCotRetriever can retrieve context (complex)."""
178
+ retriever = GraphCompletionCotRetriever(top_k=20)
179
+
180
+ context = await resolve_edges_to_text(await retriever.get_context("Who works at Figma?"))
181
+
182
+ assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
183
+ assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
184
+ assert "Jason Statham --[works_for]--> Figma" in context, "Failed to get Jason Statham"
185
+
186
+ answer = await retriever.get_completion("Who works at Figma?")
187
+
188
+ assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
189
+ assert all(isinstance(item, str) and item.strip() for item in answer), (
190
+ "Answer must contain only non-empty strings"
191
+ )
192
+
193
+
194
+ @pytest.mark.asyncio
195
+ async def test_get_graph_completion_cot_context_on_empty_graph(setup_test_environment_empty):
196
+ """Integration test: verify GraphCompletionCotRetriever handles empty graph correctly."""
197
+ retriever = GraphCompletionCotRetriever()
198
+
199
+ context = await retriever.get_context("Who works at Figma?")
200
+ assert context == [], "Context should be empty on an empty graph"
201
+
202
+ answer = await retriever.get_completion("Who works at Figma?")
203
+
204
+ assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
205
+ assert all(isinstance(item, str) and item.strip() for item in answer), (
206
+ "Answer must contain only non-empty strings"
207
+ )
208
+
209
+
210
+ @pytest.mark.asyncio
211
+ async def test_graph_completion_cot_get_triplets_empty(setup_test_environment_empty):
212
+ """Integration test: verify GraphCompletionCotRetriever get_triplets handles empty graph."""
213
+ retriever = GraphCompletionCotRetriever()
214
+
215
+ triplets = await retriever.get_triplets("Who works at Figma?")
216
+
217
+ assert isinstance(triplets, list), "Triplets should be a list"
218
+ assert len(triplets) == 0, "Should return empty list on empty graph"
@@ -0,0 +1,254 @@
1
+ import os
2
+ from typing import List
3
+ import pytest
4
+ import pathlib
5
+ import pytest_asyncio
6
+ import cognee
7
+
8
+ from cognee.low_level import setup
9
+ from cognee.tasks.storage import add_data_points
10
+ from cognee.infrastructure.databases.vector import get_vector_engine
11
+ from cognee.modules.chunking.models import DocumentChunk
12
+ from cognee.modules.data.processing.document_types import TextDocument
13
+ from cognee.modules.retrieval.exceptions.exceptions import NoDataError
14
+ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
15
+ from cognee.infrastructure.engine import DataPoint
16
+ from cognee.modules.data.processing.document_types import Document
17
+ from cognee.modules.engine.models import Entity
18
+
19
+
20
+ class DocumentChunkWithEntities(DataPoint):
21
+ text: str
22
+ chunk_size: int
23
+ chunk_index: int
24
+ cut_type: str
25
+ is_part_of: Document
26
+ contains: List[Entity] = None
27
+
28
+ metadata: dict = {"index_fields": ["text"]}
29
+
30
+
31
+ @pytest_asyncio.fixture
32
+ async def setup_test_environment_with_chunks_simple():
33
+ """Set up a clean test environment with simple chunks."""
34
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
35
+ system_directory_path = str(base_dir / ".cognee_system/test_rag_completion_context_simple")
36
+ data_directory_path = str(base_dir / ".data_storage/test_rag_completion_context_simple")
37
+
38
+ cognee.config.system_root_directory(system_directory_path)
39
+ cognee.config.data_root_directory(data_directory_path)
40
+
41
+ await cognee.prune.prune_data()
42
+ await cognee.prune.prune_system(metadata=True)
43
+ await setup()
44
+
45
+ document = TextDocument(
46
+ name="Steve Rodger's career",
47
+ raw_data_location="somewhere",
48
+ external_metadata="",
49
+ mime_type="text/plain",
50
+ )
51
+
52
+ chunk1 = DocumentChunk(
53
+ text="Steve Rodger",
54
+ chunk_size=2,
55
+ chunk_index=0,
56
+ cut_type="sentence_end",
57
+ is_part_of=document,
58
+ contains=[],
59
+ )
60
+ chunk2 = DocumentChunk(
61
+ text="Mike Broski",
62
+ chunk_size=2,
63
+ chunk_index=1,
64
+ cut_type="sentence_end",
65
+ is_part_of=document,
66
+ contains=[],
67
+ )
68
+ chunk3 = DocumentChunk(
69
+ text="Christina Mayer",
70
+ chunk_size=2,
71
+ chunk_index=2,
72
+ cut_type="sentence_end",
73
+ is_part_of=document,
74
+ contains=[],
75
+ )
76
+
77
+ entities = [chunk1, chunk2, chunk3]
78
+
79
+ await add_data_points(entities)
80
+
81
+ yield
82
+
83
+ try:
84
+ await cognee.prune.prune_data()
85
+ await cognee.prune.prune_system(metadata=True)
86
+ except Exception:
87
+ pass
88
+
89
+
90
+ @pytest_asyncio.fixture
91
+ async def setup_test_environment_with_chunks_complex():
92
+ """Set up a clean test environment with complex chunks."""
93
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
94
+ system_directory_path = str(base_dir / ".cognee_system/test_rag_completion_context_complex")
95
+ data_directory_path = str(base_dir / ".data_storage/test_rag_completion_context_complex")
96
+
97
+ cognee.config.system_root_directory(system_directory_path)
98
+ cognee.config.data_root_directory(data_directory_path)
99
+
100
+ await cognee.prune.prune_data()
101
+ await cognee.prune.prune_system(metadata=True)
102
+ await setup()
103
+
104
+ document1 = TextDocument(
105
+ name="Employee List",
106
+ raw_data_location="somewhere",
107
+ external_metadata="",
108
+ mime_type="text/plain",
109
+ )
110
+
111
+ document2 = TextDocument(
112
+ name="Car List",
113
+ raw_data_location="somewhere",
114
+ external_metadata="",
115
+ mime_type="text/plain",
116
+ )
117
+
118
+ chunk1 = DocumentChunk(
119
+ text="Steve Rodger",
120
+ chunk_size=2,
121
+ chunk_index=0,
122
+ cut_type="sentence_end",
123
+ is_part_of=document1,
124
+ contains=[],
125
+ )
126
+ chunk2 = DocumentChunk(
127
+ text="Mike Broski",
128
+ chunk_size=2,
129
+ chunk_index=1,
130
+ cut_type="sentence_end",
131
+ is_part_of=document1,
132
+ contains=[],
133
+ )
134
+ chunk3 = DocumentChunk(
135
+ text="Christina Mayer",
136
+ chunk_size=2,
137
+ chunk_index=2,
138
+ cut_type="sentence_end",
139
+ is_part_of=document1,
140
+ contains=[],
141
+ )
142
+
143
+ chunk4 = DocumentChunk(
144
+ text="Range Rover",
145
+ chunk_size=2,
146
+ chunk_index=0,
147
+ cut_type="sentence_end",
148
+ is_part_of=document2,
149
+ contains=[],
150
+ )
151
+ chunk5 = DocumentChunk(
152
+ text="Hyundai",
153
+ chunk_size=2,
154
+ chunk_index=1,
155
+ cut_type="sentence_end",
156
+ is_part_of=document2,
157
+ contains=[],
158
+ )
159
+ chunk6 = DocumentChunk(
160
+ text="Chrysler",
161
+ chunk_size=2,
162
+ chunk_index=2,
163
+ cut_type="sentence_end",
164
+ is_part_of=document2,
165
+ contains=[],
166
+ )
167
+
168
+ entities = [chunk1, chunk2, chunk3, chunk4, chunk5, chunk6]
169
+
170
+ await add_data_points(entities)
171
+
172
+ yield
173
+
174
+ try:
175
+ await cognee.prune.prune_data()
176
+ await cognee.prune.prune_system(metadata=True)
177
+ except Exception:
178
+ pass
179
+
180
+
181
+ @pytest_asyncio.fixture
182
+ async def setup_test_environment_empty():
183
+ """Set up a clean test environment without chunks."""
184
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
185
+ system_directory_path = str(
186
+ base_dir / ".cognee_system/test_get_rag_completion_context_on_empty_graph"
187
+ )
188
+ data_directory_path = str(
189
+ base_dir / ".data_storage/test_get_rag_completion_context_on_empty_graph"
190
+ )
191
+
192
+ cognee.config.system_root_directory(system_directory_path)
193
+ cognee.config.data_root_directory(data_directory_path)
194
+
195
+ await cognee.prune.prune_data()
196
+ await cognee.prune.prune_system(metadata=True)
197
+
198
+ yield
199
+
200
+ try:
201
+ await cognee.prune.prune_data()
202
+ await cognee.prune.prune_system(metadata=True)
203
+ except Exception:
204
+ pass
205
+
206
+
207
+ @pytest.mark.asyncio
208
+ async def test_rag_completion_context_simple(setup_test_environment_with_chunks_simple):
209
+ """Integration test: verify CompletionRetriever can retrieve context (simple)."""
210
+ retriever = CompletionRetriever()
211
+
212
+ context = await retriever.get_context("Mike")
213
+
214
+ assert isinstance(context, str), "Context should be a string"
215
+ assert "Mike Broski" in context, "Failed to get Mike Broski"
216
+
217
+
218
+ @pytest.mark.asyncio
219
+ async def test_rag_completion_context_multiple_chunks(setup_test_environment_with_chunks_simple):
220
+ """Integration test: verify CompletionRetriever can retrieve context from multiple chunks."""
221
+ retriever = CompletionRetriever()
222
+
223
+ context = await retriever.get_context("Steve")
224
+
225
+ assert isinstance(context, str), "Context should be a string"
226
+ assert "Steve Rodger" in context, "Failed to get Steve Rodger"
227
+
228
+
229
+ @pytest.mark.asyncio
230
+ async def test_rag_completion_context_complex(setup_test_environment_with_chunks_complex):
231
+ """Integration test: verify CompletionRetriever can retrieve context (complex)."""
232
+ # TODO: top_k doesn't affect the output, it should be fixed.
233
+ retriever = CompletionRetriever(top_k=20)
234
+
235
+ context = await retriever.get_context("Christina")
236
+
237
+ assert context[0:15] == "Christina Mayer", "Failed to get Christina Mayer"
238
+
239
+
240
+ @pytest.mark.asyncio
241
+ async def test_get_rag_completion_context_on_empty_graph(setup_test_environment_empty):
242
+ """Integration test: verify CompletionRetriever handles empty graph correctly."""
243
+ retriever = CompletionRetriever()
244
+
245
+ with pytest.raises(NoDataError):
246
+ await retriever.get_context("Christina Mayer")
247
+
248
+ vector_engine = get_vector_engine()
249
+ await vector_engine.create_collection(
250
+ "DocumentChunk_text", payload_schema=DocumentChunkWithEntities
251
+ )
252
+
253
+ context = await retriever.get_context("Christina Mayer")
254
+ assert context == "", "Returned context should be empty on an empty graph"