cognee 0.5.1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +1 -0
- cognee/api/v1/search/search.py +0 -4
- cognee/infrastructure/databases/relational/config.py +16 -1
- cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +24 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +9 -2
- cognee/infrastructure/llm/LLMGateway.py +0 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
- cognee/modules/data/models/Data.py +2 -1
- cognee/modules/retrieval/triplet_retriever.py +1 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +0 -18
- cognee/modules/search/methods/search.py +18 -25
- cognee/tasks/ingestion/data_item.py +8 -0
- cognee/tasks/ingestion/ingest_data.py +12 -1
- cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +252 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +268 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +226 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +218 -0
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +254 -0
- cognee/tests/{unit/modules/retrieval/structured_output_test.py → integration/retrieval/test_structured_output.py} +87 -77
- cognee/tests/integration/retrieval/test_summaries_retriever.py +184 -0
- cognee/tests/integration/retrieval/test_temporal_retriever.py +306 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +35 -0
- cognee/tests/test_custom_data_label.py +68 -0
- cognee/tests/test_search_db.py +334 -181
- cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
- cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
- cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +181 -199
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +454 -162
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +674 -156
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +625 -200
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +319 -203
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +189 -155
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +539 -58
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +218 -9
- cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
- cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
- cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +246 -0
- {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/METADATA +1 -1
- {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/RECORD +58 -45
- cognee/tests/unit/modules/search/test_search.py +0 -100
- {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.1.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,159 +1,193 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import pytest
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
import cognee
|
|
6
|
-
from cognee.low_level import setup
|
|
7
|
-
from cognee.tasks.storage import add_data_points
|
|
8
|
-
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
9
|
-
from cognee.modules.chunking.models import DocumentChunk
|
|
10
|
-
from cognee.tasks.summarization.models import TextSummary
|
|
11
|
-
from cognee.modules.data.processing.document_types import TextDocument
|
|
12
|
-
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
2
|
+
from unittest.mock import AsyncMock, patch, MagicMock
|
|
3
|
+
|
|
13
4
|
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
|
5
|
+
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
6
|
+
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def mock_vector_engine():
|
|
11
|
+
"""Create a mock vector engine."""
|
|
12
|
+
engine = AsyncMock()
|
|
13
|
+
engine.search = AsyncMock()
|
|
14
|
+
return engine
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@pytest.mark.asyncio
|
|
18
|
+
async def test_get_context_success(mock_vector_engine):
|
|
19
|
+
"""Test successful retrieval of summary context."""
|
|
20
|
+
mock_result1 = MagicMock()
|
|
21
|
+
mock_result1.payload = {"text": "S.R.", "made_from": "chunk1"}
|
|
22
|
+
mock_result2 = MagicMock()
|
|
23
|
+
mock_result2.payload = {"text": "M.B.", "made_from": "chunk2"}
|
|
24
|
+
|
|
25
|
+
mock_vector_engine.search.return_value = [mock_result1, mock_result2]
|
|
26
|
+
|
|
27
|
+
retriever = SummariesRetriever(top_k=5)
|
|
28
|
+
|
|
29
|
+
with patch(
|
|
30
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
31
|
+
return_value=mock_vector_engine,
|
|
32
|
+
):
|
|
33
|
+
context = await retriever.get_context("test query")
|
|
34
|
+
|
|
35
|
+
assert len(context) == 2
|
|
36
|
+
assert context[0]["text"] == "S.R."
|
|
37
|
+
assert context[1]["text"] == "M.B."
|
|
38
|
+
mock_vector_engine.search.assert_awaited_once_with("TextSummary_text", "test query", limit=5)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@pytest.mark.asyncio
|
|
42
|
+
async def test_get_context_collection_not_found_error(mock_vector_engine):
|
|
43
|
+
"""Test that CollectionNotFoundError is converted to NoDataError."""
|
|
44
|
+
mock_vector_engine.search.side_effect = CollectionNotFoundError("Collection not found")
|
|
45
|
+
|
|
46
|
+
retriever = SummariesRetriever()
|
|
47
|
+
|
|
48
|
+
with patch(
|
|
49
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
50
|
+
return_value=mock_vector_engine,
|
|
51
|
+
):
|
|
52
|
+
with pytest.raises(NoDataError, match="No data found"):
|
|
53
|
+
await retriever.get_context("test query")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pytest.mark.asyncio
|
|
57
|
+
async def test_get_context_empty_results(mock_vector_engine):
|
|
58
|
+
"""Test that empty list is returned when no summaries are found."""
|
|
59
|
+
mock_vector_engine.search.return_value = []
|
|
60
|
+
|
|
61
|
+
retriever = SummariesRetriever()
|
|
62
|
+
|
|
63
|
+
with patch(
|
|
64
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
65
|
+
return_value=mock_vector_engine,
|
|
66
|
+
):
|
|
67
|
+
context = await retriever.get_context("test query")
|
|
68
|
+
|
|
69
|
+
assert context == []
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@pytest.mark.asyncio
|
|
73
|
+
async def test_get_context_top_k_limit(mock_vector_engine):
|
|
74
|
+
"""Test that top_k parameter limits the number of results."""
|
|
75
|
+
mock_results = [MagicMock() for _ in range(3)]
|
|
76
|
+
for i, result in enumerate(mock_results):
|
|
77
|
+
result.payload = {"text": f"Summary {i}"}
|
|
78
|
+
|
|
79
|
+
mock_vector_engine.search.return_value = mock_results
|
|
80
|
+
|
|
81
|
+
retriever = SummariesRetriever(top_k=3)
|
|
82
|
+
|
|
83
|
+
with patch(
|
|
84
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
85
|
+
return_value=mock_vector_engine,
|
|
86
|
+
):
|
|
87
|
+
context = await retriever.get_context("test query")
|
|
88
|
+
|
|
89
|
+
assert len(context) == 3
|
|
90
|
+
mock_vector_engine.search.assert_awaited_once_with("TextSummary_text", "test query", limit=3)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@pytest.mark.asyncio
|
|
94
|
+
async def test_get_completion_with_context(mock_vector_engine):
|
|
95
|
+
"""Test get_completion returns provided context."""
|
|
96
|
+
retriever = SummariesRetriever()
|
|
97
|
+
|
|
98
|
+
provided_context = [{"text": "S.R."}, {"text": "M.B."}]
|
|
99
|
+
completion = await retriever.get_completion("test query", context=provided_context)
|
|
100
|
+
|
|
101
|
+
assert completion == provided_context
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@pytest.mark.asyncio
|
|
105
|
+
async def test_get_completion_without_context(mock_vector_engine):
|
|
106
|
+
"""Test get_completion retrieves context when not provided."""
|
|
107
|
+
mock_result = MagicMock()
|
|
108
|
+
mock_result.payload = {"text": "S.R."}
|
|
109
|
+
mock_vector_engine.search.return_value = [mock_result]
|
|
110
|
+
|
|
111
|
+
retriever = SummariesRetriever()
|
|
112
|
+
|
|
113
|
+
with patch(
|
|
114
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
115
|
+
return_value=mock_vector_engine,
|
|
116
|
+
):
|
|
117
|
+
completion = await retriever.get_completion("test query")
|
|
118
|
+
|
|
119
|
+
assert len(completion) == 1
|
|
120
|
+
assert completion[0]["text"] == "S.R."
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@pytest.mark.asyncio
|
|
124
|
+
async def test_init_defaults():
|
|
125
|
+
"""Test SummariesRetriever initialization with defaults."""
|
|
126
|
+
retriever = SummariesRetriever()
|
|
127
|
+
|
|
128
|
+
assert retriever.top_k == 5
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@pytest.mark.asyncio
|
|
132
|
+
async def test_init_custom_top_k():
|
|
133
|
+
"""Test SummariesRetriever initialization with custom top_k."""
|
|
134
|
+
retriever = SummariesRetriever(top_k=10)
|
|
135
|
+
|
|
136
|
+
assert retriever.top_k == 10
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@pytest.mark.asyncio
|
|
140
|
+
async def test_get_context_empty_payload(mock_vector_engine):
|
|
141
|
+
"""Test get_context handles empty payload."""
|
|
142
|
+
mock_result = MagicMock()
|
|
143
|
+
mock_result.payload = {}
|
|
144
|
+
|
|
145
|
+
mock_vector_engine.search.return_value = [mock_result]
|
|
146
|
+
|
|
147
|
+
retriever = SummariesRetriever()
|
|
148
|
+
|
|
149
|
+
with patch(
|
|
150
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
151
|
+
return_value=mock_vector_engine,
|
|
152
|
+
):
|
|
153
|
+
context = await retriever.get_context("test query")
|
|
154
|
+
|
|
155
|
+
assert len(context) == 1
|
|
156
|
+
assert context[0] == {}
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@pytest.mark.asyncio
|
|
160
|
+
async def test_get_completion_with_session_id(mock_vector_engine):
|
|
161
|
+
"""Test get_completion with session_id parameter."""
|
|
162
|
+
mock_result = MagicMock()
|
|
163
|
+
mock_result.payload = {"text": "S.R."}
|
|
164
|
+
mock_vector_engine.search.return_value = [mock_result]
|
|
165
|
+
|
|
166
|
+
retriever = SummariesRetriever()
|
|
167
|
+
|
|
168
|
+
with patch(
|
|
169
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
170
|
+
return_value=mock_vector_engine,
|
|
171
|
+
):
|
|
172
|
+
completion = await retriever.get_completion("test query", session_id="test_session")
|
|
173
|
+
|
|
174
|
+
assert len(completion) == 1
|
|
175
|
+
assert completion[0]["text"] == "S.R."
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@pytest.mark.asyncio
|
|
179
|
+
async def test_get_completion_with_kwargs(mock_vector_engine):
|
|
180
|
+
"""Test get_completion accepts additional kwargs."""
|
|
181
|
+
mock_result = MagicMock()
|
|
182
|
+
mock_result.payload = {"text": "S.R."}
|
|
183
|
+
mock_vector_engine.search.return_value = [mock_result]
|
|
184
|
+
|
|
185
|
+
retriever = SummariesRetriever()
|
|
14
186
|
|
|
187
|
+
with patch(
|
|
188
|
+
"cognee.modules.retrieval.summaries_retriever.get_vector_engine",
|
|
189
|
+
return_value=mock_vector_engine,
|
|
190
|
+
):
|
|
191
|
+
completion = await retriever.get_completion("test query", extra_param="value")
|
|
15
192
|
|
|
16
|
-
|
|
17
|
-
@pytest.mark.asyncio
|
|
18
|
-
async def test_chunk_context(self):
|
|
19
|
-
system_directory_path = os.path.join(
|
|
20
|
-
pathlib.Path(__file__).parent, ".cognee_system/test_chunk_context"
|
|
21
|
-
)
|
|
22
|
-
cognee.config.system_root_directory(system_directory_path)
|
|
23
|
-
data_directory_path = os.path.join(
|
|
24
|
-
pathlib.Path(__file__).parent, ".data_storage/test_chunk_context"
|
|
25
|
-
)
|
|
26
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
27
|
-
|
|
28
|
-
await cognee.prune.prune_data()
|
|
29
|
-
await cognee.prune.prune_system(metadata=True)
|
|
30
|
-
await setup()
|
|
31
|
-
|
|
32
|
-
document1 = TextDocument(
|
|
33
|
-
name="Employee List",
|
|
34
|
-
raw_data_location="somewhere",
|
|
35
|
-
external_metadata="",
|
|
36
|
-
mime_type="text/plain",
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
document2 = TextDocument(
|
|
40
|
-
name="Car List",
|
|
41
|
-
raw_data_location="somewhere",
|
|
42
|
-
external_metadata="",
|
|
43
|
-
mime_type="text/plain",
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
chunk1 = DocumentChunk(
|
|
47
|
-
text="Steve Rodger",
|
|
48
|
-
chunk_size=2,
|
|
49
|
-
chunk_index=0,
|
|
50
|
-
cut_type="sentence_end",
|
|
51
|
-
is_part_of=document1,
|
|
52
|
-
contains=[],
|
|
53
|
-
)
|
|
54
|
-
chunk1_summary = TextSummary(
|
|
55
|
-
text="S.R.",
|
|
56
|
-
made_from=chunk1,
|
|
57
|
-
)
|
|
58
|
-
chunk2 = DocumentChunk(
|
|
59
|
-
text="Mike Broski",
|
|
60
|
-
chunk_size=2,
|
|
61
|
-
chunk_index=1,
|
|
62
|
-
cut_type="sentence_end",
|
|
63
|
-
is_part_of=document1,
|
|
64
|
-
contains=[],
|
|
65
|
-
)
|
|
66
|
-
chunk2_summary = TextSummary(
|
|
67
|
-
text="M.B.",
|
|
68
|
-
made_from=chunk2,
|
|
69
|
-
)
|
|
70
|
-
chunk3 = DocumentChunk(
|
|
71
|
-
text="Christina Mayer",
|
|
72
|
-
chunk_size=2,
|
|
73
|
-
chunk_index=2,
|
|
74
|
-
cut_type="sentence_end",
|
|
75
|
-
is_part_of=document1,
|
|
76
|
-
contains=[],
|
|
77
|
-
)
|
|
78
|
-
chunk3_summary = TextSummary(
|
|
79
|
-
text="C.M.",
|
|
80
|
-
made_from=chunk3,
|
|
81
|
-
)
|
|
82
|
-
chunk4 = DocumentChunk(
|
|
83
|
-
text="Range Rover",
|
|
84
|
-
chunk_size=2,
|
|
85
|
-
chunk_index=0,
|
|
86
|
-
cut_type="sentence_end",
|
|
87
|
-
is_part_of=document2,
|
|
88
|
-
contains=[],
|
|
89
|
-
)
|
|
90
|
-
chunk4_summary = TextSummary(
|
|
91
|
-
text="R.R.",
|
|
92
|
-
made_from=chunk4,
|
|
93
|
-
)
|
|
94
|
-
chunk5 = DocumentChunk(
|
|
95
|
-
text="Hyundai",
|
|
96
|
-
chunk_size=2,
|
|
97
|
-
chunk_index=1,
|
|
98
|
-
cut_type="sentence_end",
|
|
99
|
-
is_part_of=document2,
|
|
100
|
-
contains=[],
|
|
101
|
-
)
|
|
102
|
-
chunk5_summary = TextSummary(
|
|
103
|
-
text="H.Y.",
|
|
104
|
-
made_from=chunk5,
|
|
105
|
-
)
|
|
106
|
-
chunk6 = DocumentChunk(
|
|
107
|
-
text="Chrysler",
|
|
108
|
-
chunk_size=2,
|
|
109
|
-
chunk_index=2,
|
|
110
|
-
cut_type="sentence_end",
|
|
111
|
-
is_part_of=document2,
|
|
112
|
-
contains=[],
|
|
113
|
-
)
|
|
114
|
-
chunk6_summary = TextSummary(
|
|
115
|
-
text="C.H.",
|
|
116
|
-
made_from=chunk6,
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
entities = [
|
|
120
|
-
chunk1_summary,
|
|
121
|
-
chunk2_summary,
|
|
122
|
-
chunk3_summary,
|
|
123
|
-
chunk4_summary,
|
|
124
|
-
chunk5_summary,
|
|
125
|
-
chunk6_summary,
|
|
126
|
-
]
|
|
127
|
-
|
|
128
|
-
await add_data_points(entities)
|
|
129
|
-
|
|
130
|
-
retriever = SummariesRetriever(top_k=20)
|
|
131
|
-
|
|
132
|
-
context = await retriever.get_context("Christina")
|
|
133
|
-
|
|
134
|
-
assert context[0]["text"] == "C.M.", "Failed to get Christina Mayer"
|
|
135
|
-
|
|
136
|
-
@pytest.mark.asyncio
|
|
137
|
-
async def test_chunk_context_on_empty_graph(self):
|
|
138
|
-
system_directory_path = os.path.join(
|
|
139
|
-
pathlib.Path(__file__).parent, ".cognee_system/test_chunk_context_on_empty_graph"
|
|
140
|
-
)
|
|
141
|
-
cognee.config.system_root_directory(system_directory_path)
|
|
142
|
-
data_directory_path = os.path.join(
|
|
143
|
-
pathlib.Path(__file__).parent, ".data_storage/test_chunk_context_on_empty_graph"
|
|
144
|
-
)
|
|
145
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
146
|
-
|
|
147
|
-
await cognee.prune.prune_data()
|
|
148
|
-
await cognee.prune.prune_system(metadata=True)
|
|
149
|
-
|
|
150
|
-
retriever = SummariesRetriever()
|
|
151
|
-
|
|
152
|
-
with pytest.raises(NoDataError):
|
|
153
|
-
await retriever.get_context("Christina Mayer")
|
|
154
|
-
|
|
155
|
-
vector_engine = get_vector_engine()
|
|
156
|
-
await vector_engine.create_collection("TextSummary_text", payload_schema=TextSummary)
|
|
157
|
-
|
|
158
|
-
context = await retriever.get_context("Christina Mayer")
|
|
159
|
-
assert context == [], "Returned context should be empty on an empty graph"
|
|
193
|
+
assert len(completion) == 1
|