cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
- cognee/api/v1/prune/prune.py +2 -2
- cognee/api/v1/search/search.py +1 -1
- cognee/api/v1/sync/sync.py +16 -5
- cognee/base_config.py +19 -1
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
- cognee/infrastructure/databases/relational/ModelBase.py +2 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
- cognee/infrastructure/databases/vector/config.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
- cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
- cognee/infrastructure/files/storage/StorageManager.py +18 -0
- cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
- cognee/infrastructure/utils/run_async.py +9 -4
- cognee/infrastructure/utils/run_sync.py +4 -3
- cognee/modules/cloud/operations/check_api_key.py +4 -1
- cognee/modules/data/deletion/prune_system.py +5 -1
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/notebooks/methods/create_notebook.py +34 -0
- cognee/modules/notebooks/methods/get_notebook.py +2 -2
- cognee/modules/notebooks/methods/get_notebooks.py +27 -1
- cognee/modules/notebooks/methods/update_notebook.py +0 -1
- cognee/modules/notebooks/models/Notebook.py +206 -1
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
- cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +3 -3
- cognee/modules/retrieval/user_qa_feedback.py +1 -1
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/search.py +12 -13
- cognee/modules/search/utils/prepare_search_result.py +31 -9
- cognee/modules/search/utils/transform_context_to_graph.py +1 -1
- cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
- cognee/modules/users/methods/create_user.py +4 -24
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +10 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +10 -0
- cognee/root_dir.py +5 -0
- cognee/shared/cache.py +346 -0
- cognee/shared/utils.py +12 -0
- cognee/tasks/graph/extract_graph_from_data.py +53 -10
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
- cognee/tasks/temporal_graph/models.py +11 -6
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
- cognee/tests/test_cognee_server_start.py +4 -4
- cognee/tests/test_temporal_graph.py +6 -34
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/RECORD +93 -86
- cognee-0.3.4.dist-info/entry_points.txt +2 -0
- cognee/api/v1/save/save.py +0 -335
- cognee/tests/test_save_export_path.py +0 -116
- cognee-0.3.2.dist-info/entry_points.txt +0 -2
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pytest
|
|
3
|
+
from unittest.mock import AsyncMock, patch, MagicMock
|
|
4
|
+
import hashlib
|
|
5
|
+
import time
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import zipfile
|
|
10
|
+
from cognee.shared.cache import get_tutorial_data_dir
|
|
11
|
+
|
|
12
|
+
from cognee.modules.notebooks.methods.create_notebook import _create_tutorial_notebook
|
|
13
|
+
from cognee.modules.notebooks.models.Notebook import Notebook
|
|
14
|
+
import cognee
|
|
15
|
+
from cognee.shared.logging_utils import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Module-level fixtures available to all test classes
|
|
21
|
+
@pytest.fixture
|
|
22
|
+
def mock_session():
|
|
23
|
+
"""Mock database session."""
|
|
24
|
+
session = AsyncMock(spec=AsyncSession)
|
|
25
|
+
session.add = MagicMock()
|
|
26
|
+
session.commit = AsyncMock()
|
|
27
|
+
return session
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def sample_jupyter_notebook():
|
|
32
|
+
"""Sample Jupyter notebook content for testing."""
|
|
33
|
+
return {
|
|
34
|
+
"cells": [
|
|
35
|
+
{
|
|
36
|
+
"cell_type": "markdown",
|
|
37
|
+
"metadata": {},
|
|
38
|
+
"source": ["# Tutorial Introduction\n", "\n", "This is a tutorial notebook."],
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"cell_type": "code",
|
|
42
|
+
"execution_count": None,
|
|
43
|
+
"metadata": {},
|
|
44
|
+
"outputs": [],
|
|
45
|
+
"source": ["import cognee\n", "print('Hello, Cognee!')"],
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"cell_type": "markdown",
|
|
49
|
+
"metadata": {},
|
|
50
|
+
"source": ["## Step 1: Data Ingestion\n", "\n", "Let's add some data."],
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"cell_type": "code",
|
|
54
|
+
"execution_count": None,
|
|
55
|
+
"metadata": {},
|
|
56
|
+
"outputs": [],
|
|
57
|
+
"source": ["# Add your data here\n", "# await cognee.add('data.txt')"],
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"cell_type": "raw",
|
|
61
|
+
"metadata": {},
|
|
62
|
+
"source": ["This is a raw cell that should be skipped"],
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
"metadata": {
|
|
66
|
+
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}
|
|
67
|
+
},
|
|
68
|
+
"nbformat": 4,
|
|
69
|
+
"nbformat_minor": 4,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class TestTutorialNotebookCreation:
|
|
74
|
+
"""Test cases for tutorial notebook creation functionality."""
|
|
75
|
+
|
|
76
|
+
@pytest.mark.asyncio
|
|
77
|
+
async def test_notebook_from_ipynb_string_success(self, sample_jupyter_notebook):
|
|
78
|
+
"""Test successful creation of notebook from JSON string."""
|
|
79
|
+
notebook_json = json.dumps(sample_jupyter_notebook)
|
|
80
|
+
user_id = uuid4()
|
|
81
|
+
|
|
82
|
+
notebook = Notebook.from_ipynb_string(
|
|
83
|
+
notebook_content=notebook_json, owner_id=user_id, name="String Test Notebook"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
assert notebook.owner_id == user_id
|
|
87
|
+
assert notebook.name == "String Test Notebook"
|
|
88
|
+
assert len(notebook.cells) == 4 # Should skip the raw cell
|
|
89
|
+
assert notebook.cells[0].type == "markdown"
|
|
90
|
+
assert notebook.cells[1].type == "code"
|
|
91
|
+
|
|
92
|
+
@pytest.mark.asyncio
|
|
93
|
+
async def test_notebook_cell_name_generation(self, sample_jupyter_notebook):
|
|
94
|
+
"""Test that cell names are generated correctly from markdown headers."""
|
|
95
|
+
user_id = uuid4()
|
|
96
|
+
notebook_json = json.dumps(sample_jupyter_notebook)
|
|
97
|
+
|
|
98
|
+
notebook = Notebook.from_ipynb_string(notebook_content=notebook_json, owner_id=user_id)
|
|
99
|
+
|
|
100
|
+
# Check markdown header extraction
|
|
101
|
+
assert notebook.cells[0].name == "Tutorial Introduction"
|
|
102
|
+
assert notebook.cells[2].name == "Step 1: Data Ingestion"
|
|
103
|
+
|
|
104
|
+
# Check code cell naming
|
|
105
|
+
assert notebook.cells[1].name == "Code Cell"
|
|
106
|
+
assert notebook.cells[3].name == "Code Cell"
|
|
107
|
+
|
|
108
|
+
@pytest.mark.asyncio
|
|
109
|
+
async def test_notebook_from_ipynb_string_with_default_name(self, sample_jupyter_notebook):
|
|
110
|
+
"""Test notebook creation uses kernelspec display_name when no name provided."""
|
|
111
|
+
user_id = uuid4()
|
|
112
|
+
notebook_json = json.dumps(sample_jupyter_notebook)
|
|
113
|
+
|
|
114
|
+
notebook = Notebook.from_ipynb_string(notebook_content=notebook_json, owner_id=user_id)
|
|
115
|
+
|
|
116
|
+
assert notebook.name == "Python 3" # From kernelspec.display_name
|
|
117
|
+
|
|
118
|
+
@pytest.mark.asyncio
|
|
119
|
+
async def test_notebook_from_ipynb_string_fallback_name(self):
|
|
120
|
+
"""Test fallback naming when kernelspec is missing."""
|
|
121
|
+
minimal_notebook = {
|
|
122
|
+
"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Test"]}],
|
|
123
|
+
"metadata": {}, # No kernelspec
|
|
124
|
+
"nbformat": 4,
|
|
125
|
+
"nbformat_minor": 4,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
user_id = uuid4()
|
|
129
|
+
notebook_json = json.dumps(minimal_notebook)
|
|
130
|
+
|
|
131
|
+
notebook = Notebook.from_ipynb_string(notebook_content=notebook_json, owner_id=user_id)
|
|
132
|
+
|
|
133
|
+
assert notebook.name == "Imported Notebook" # Fallback name
|
|
134
|
+
|
|
135
|
+
@pytest.mark.asyncio
|
|
136
|
+
async def test_notebook_from_ipynb_string_invalid_json(self):
|
|
137
|
+
"""Test error handling for invalid JSON."""
|
|
138
|
+
user_id = uuid4()
|
|
139
|
+
invalid_json = "{ invalid json content"
|
|
140
|
+
|
|
141
|
+
from nbformat.reader import NotJSONError
|
|
142
|
+
|
|
143
|
+
with pytest.raises(NotJSONError):
|
|
144
|
+
Notebook.from_ipynb_string(notebook_content=invalid_json, owner_id=user_id)
|
|
145
|
+
|
|
146
|
+
@pytest.mark.asyncio
|
|
147
|
+
@patch.object(Notebook, "from_ipynb_zip_url")
|
|
148
|
+
async def test_create_tutorial_notebook_error_propagated(self, mock_from_zip_url, mock_session):
|
|
149
|
+
"""Test that errors are propagated when zip fetch fails."""
|
|
150
|
+
user_id = uuid4()
|
|
151
|
+
mock_from_zip_url.side_effect = Exception("Network error")
|
|
152
|
+
|
|
153
|
+
# Should raise the exception (not catch it)
|
|
154
|
+
with pytest.raises(Exception, match="Network error"):
|
|
155
|
+
await _create_tutorial_notebook(user_id, mock_session)
|
|
156
|
+
|
|
157
|
+
# Verify error handling path was taken
|
|
158
|
+
mock_from_zip_url.assert_called_once()
|
|
159
|
+
mock_session.add.assert_not_called()
|
|
160
|
+
mock_session.commit.assert_not_called()
|
|
161
|
+
|
|
162
|
+
def test_generate_cell_name_code_cell(self):
|
|
163
|
+
"""Test cell name generation for code cells."""
|
|
164
|
+
from nbformat.notebooknode import NotebookNode
|
|
165
|
+
|
|
166
|
+
mock_cell = NotebookNode(
|
|
167
|
+
{"cell_type": "code", "source": 'import pandas as pd\nprint("Hello world")'}
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
result = Notebook._generate_cell_name(mock_cell)
|
|
171
|
+
assert result == "Code Cell"
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class TestTutorialNotebookZipFunctionality:
|
|
175
|
+
"""Test cases for zip-based tutorial functionality."""
|
|
176
|
+
|
|
177
|
+
@pytest.mark.asyncio
|
|
178
|
+
async def test_notebook_from_ipynb_zip_url_missing_notebook(
|
|
179
|
+
self,
|
|
180
|
+
):
|
|
181
|
+
"""Test error handling when notebook file is missing from zip."""
|
|
182
|
+
user_id = uuid4()
|
|
183
|
+
|
|
184
|
+
with pytest.raises(
|
|
185
|
+
FileNotFoundError,
|
|
186
|
+
match="Notebook file 'super_random_tutorial_name.ipynb' not found in zip",
|
|
187
|
+
):
|
|
188
|
+
await Notebook.from_ipynb_zip_url(
|
|
189
|
+
zip_url="https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip",
|
|
190
|
+
owner_id=user_id,
|
|
191
|
+
notebook_filename="super_random_tutorial_name.ipynb",
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
@pytest.mark.asyncio
|
|
195
|
+
async def test_notebook_from_ipynb_zip_url_download_failure(self):
|
|
196
|
+
"""Test error handling when zip download fails."""
|
|
197
|
+
user_id = uuid4()
|
|
198
|
+
with pytest.raises(RuntimeError, match="Failed to download tutorial zip"):
|
|
199
|
+
await Notebook.from_ipynb_zip_url(
|
|
200
|
+
zip_url="https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/nonexistent_tutorial_name.zip",
|
|
201
|
+
owner_id=user_id,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
@pytest.mark.asyncio
|
|
205
|
+
async def test_create_tutorial_notebook_zip_success(self, mock_session):
|
|
206
|
+
"""Test successful tutorial notebook creation with zip."""
|
|
207
|
+
await cognee.prune.prune_data()
|
|
208
|
+
await cognee.prune.prune_system(metadata=True)
|
|
209
|
+
|
|
210
|
+
user_id = uuid4()
|
|
211
|
+
|
|
212
|
+
# Check that tutorial data directory is empty using storage-aware method
|
|
213
|
+
tutorial_data_dir_path = await get_tutorial_data_dir()
|
|
214
|
+
tutorial_data_dir = Path(tutorial_data_dir_path)
|
|
215
|
+
if tutorial_data_dir.exists():
|
|
216
|
+
assert not any(tutorial_data_dir.iterdir()), "Tutorial data directory should be empty"
|
|
217
|
+
|
|
218
|
+
await _create_tutorial_notebook(user_id, mock_session)
|
|
219
|
+
|
|
220
|
+
items = list(tutorial_data_dir.iterdir())
|
|
221
|
+
assert len(items) == 1, "Tutorial data directory should contain exactly one item"
|
|
222
|
+
assert items[0].is_dir(), "Tutorial data directory item should be a directory"
|
|
223
|
+
|
|
224
|
+
# Verify the structure inside the tutorial directory
|
|
225
|
+
tutorial_dir = items[0]
|
|
226
|
+
|
|
227
|
+
# Check for tutorial.ipynb file
|
|
228
|
+
notebook_file = tutorial_dir / "tutorial.ipynb"
|
|
229
|
+
assert notebook_file.exists(), f"tutorial.ipynb should exist in {tutorial_dir}"
|
|
230
|
+
assert notebook_file.is_file(), "tutorial.ipynb should be a file"
|
|
231
|
+
|
|
232
|
+
# Check for data subfolder with contents
|
|
233
|
+
data_folder = tutorial_dir / "data"
|
|
234
|
+
assert data_folder.exists(), f"data subfolder should exist in {tutorial_dir}"
|
|
235
|
+
assert data_folder.is_dir(), "data should be a directory"
|
|
236
|
+
|
|
237
|
+
data_items = list(data_folder.iterdir())
|
|
238
|
+
assert len(data_items) > 0, (
|
|
239
|
+
f"data folder should contain files, but found {len(data_items)} items"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
@pytest.mark.asyncio
|
|
243
|
+
async def test_create_tutorial_notebook_with_force_refresh(self, mock_session):
|
|
244
|
+
"""Test tutorial notebook creation with force refresh."""
|
|
245
|
+
await cognee.prune.prune_data()
|
|
246
|
+
await cognee.prune.prune_system(metadata=True)
|
|
247
|
+
|
|
248
|
+
user_id = uuid4()
|
|
249
|
+
|
|
250
|
+
# Check that tutorial data directory is empty using storage-aware method
|
|
251
|
+
tutorial_data_dir_path = await get_tutorial_data_dir()
|
|
252
|
+
tutorial_data_dir = Path(tutorial_data_dir_path)
|
|
253
|
+
if tutorial_data_dir.exists():
|
|
254
|
+
assert not any(tutorial_data_dir.iterdir()), "Tutorial data directory should be empty"
|
|
255
|
+
|
|
256
|
+
# First creation (without force refresh)
|
|
257
|
+
await _create_tutorial_notebook(user_id, mock_session, force_refresh=False)
|
|
258
|
+
|
|
259
|
+
items_first = list(tutorial_data_dir.iterdir())
|
|
260
|
+
assert len(items_first) == 1, (
|
|
261
|
+
"Tutorial data directory should contain exactly one item after first creation"
|
|
262
|
+
)
|
|
263
|
+
first_dir = items_first[0]
|
|
264
|
+
assert first_dir.is_dir(), "Tutorial data directory item should be a directory"
|
|
265
|
+
|
|
266
|
+
# Verify the structure inside the tutorial directory (first creation)
|
|
267
|
+
notebook_file = first_dir / "tutorial.ipynb"
|
|
268
|
+
assert notebook_file.exists(), f"tutorial.ipynb should exist in {first_dir}"
|
|
269
|
+
assert notebook_file.is_file(), "tutorial.ipynb should be a file"
|
|
270
|
+
|
|
271
|
+
data_folder = first_dir / "data"
|
|
272
|
+
assert data_folder.exists(), f"data subfolder should exist in {first_dir}"
|
|
273
|
+
assert data_folder.is_dir(), "data should be a directory"
|
|
274
|
+
|
|
275
|
+
data_items = list(data_folder.iterdir())
|
|
276
|
+
assert len(data_items) > 0, (
|
|
277
|
+
f"data folder should contain files, but found {len(data_items)} items"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Capture metadata from first creation
|
|
281
|
+
|
|
282
|
+
first_creation_metadata = {}
|
|
283
|
+
|
|
284
|
+
for file_path in first_dir.rglob("*"):
|
|
285
|
+
if file_path.is_file():
|
|
286
|
+
relative_path = file_path.relative_to(first_dir)
|
|
287
|
+
stat = file_path.stat()
|
|
288
|
+
|
|
289
|
+
# Store multiple metadata points
|
|
290
|
+
with open(file_path, "rb") as f:
|
|
291
|
+
content = f.read()
|
|
292
|
+
|
|
293
|
+
first_creation_metadata[str(relative_path)] = {
|
|
294
|
+
"mtime": stat.st_mtime,
|
|
295
|
+
"size": stat.st_size,
|
|
296
|
+
"hash": hashlib.md5(content).hexdigest(),
|
|
297
|
+
"first_bytes": content[:100]
|
|
298
|
+
if content
|
|
299
|
+
else b"", # First 100 bytes as fingerprint
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
# Wait a moment to ensure different timestamps
|
|
303
|
+
time.sleep(0.1)
|
|
304
|
+
|
|
305
|
+
# Force refresh - should create new files with different metadata
|
|
306
|
+
await _create_tutorial_notebook(user_id, mock_session, force_refresh=True)
|
|
307
|
+
|
|
308
|
+
items_second = list(tutorial_data_dir.iterdir())
|
|
309
|
+
assert len(items_second) == 1, (
|
|
310
|
+
"Tutorial data directory should contain exactly one item after force refresh"
|
|
311
|
+
)
|
|
312
|
+
second_dir = items_second[0]
|
|
313
|
+
|
|
314
|
+
# Verify the structure is maintained after force refresh
|
|
315
|
+
notebook_file_second = second_dir / "tutorial.ipynb"
|
|
316
|
+
assert notebook_file_second.exists(), (
|
|
317
|
+
f"tutorial.ipynb should exist in {second_dir} after force refresh"
|
|
318
|
+
)
|
|
319
|
+
assert notebook_file_second.is_file(), "tutorial.ipynb should be a file after force refresh"
|
|
320
|
+
|
|
321
|
+
data_folder_second = second_dir / "data"
|
|
322
|
+
assert data_folder_second.exists(), (
|
|
323
|
+
f"data subfolder should exist in {second_dir} after force refresh"
|
|
324
|
+
)
|
|
325
|
+
assert data_folder_second.is_dir(), "data should be a directory after force refresh"
|
|
326
|
+
|
|
327
|
+
data_items_second = list(data_folder_second.iterdir())
|
|
328
|
+
assert len(data_items_second) > 0, (
|
|
329
|
+
f"data folder should still contain files after force refresh, but found {len(data_items_second)} items"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Compare metadata to ensure files are actually different
|
|
333
|
+
files_with_changed_metadata = 0
|
|
334
|
+
|
|
335
|
+
for file_path in second_dir.rglob("*"):
|
|
336
|
+
if file_path.is_file():
|
|
337
|
+
relative_path = file_path.relative_to(second_dir)
|
|
338
|
+
relative_path_str = str(relative_path)
|
|
339
|
+
|
|
340
|
+
# File should exist from first creation
|
|
341
|
+
assert relative_path_str in first_creation_metadata, (
|
|
342
|
+
f"File {relative_path_str} missing from first creation"
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
old_metadata = first_creation_metadata[relative_path_str]
|
|
346
|
+
|
|
347
|
+
# Get new metadata
|
|
348
|
+
stat = file_path.stat()
|
|
349
|
+
with open(file_path, "rb") as f:
|
|
350
|
+
new_content = f.read()
|
|
351
|
+
|
|
352
|
+
new_metadata = {
|
|
353
|
+
"mtime": stat.st_mtime,
|
|
354
|
+
"size": stat.st_size,
|
|
355
|
+
"hash": hashlib.md5(new_content).hexdigest(),
|
|
356
|
+
"first_bytes": new_content[:100] if new_content else b"",
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
# Check if any metadata changed (indicating file was refreshed)
|
|
360
|
+
metadata_changed = (
|
|
361
|
+
new_metadata["mtime"] > old_metadata["mtime"] # Newer modification time
|
|
362
|
+
or new_metadata["hash"] != old_metadata["hash"] # Different content hash
|
|
363
|
+
or new_metadata["size"] != old_metadata["size"] # Different file size
|
|
364
|
+
or new_metadata["first_bytes"]
|
|
365
|
+
!= old_metadata["first_bytes"] # Different content
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
if metadata_changed:
|
|
369
|
+
files_with_changed_metadata += 1
|
|
370
|
+
|
|
371
|
+
# Assert that force refresh actually updated files
|
|
372
|
+
assert files_with_changed_metadata > 0, (
|
|
373
|
+
f"Force refresh should have updated at least some files, but all {len(first_creation_metadata)} "
|
|
374
|
+
f"files appear to have identical metadata. This suggests force refresh didn't work."
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
mock_session.commit.assert_called()
|
|
378
|
+
|
|
379
|
+
@pytest.mark.asyncio
|
|
380
|
+
async def test_tutorial_zip_url_accessibility(self):
|
|
381
|
+
"""Test that the actual tutorial zip URL is accessible (integration test)."""
|
|
382
|
+
try:
|
|
383
|
+
import requests
|
|
384
|
+
|
|
385
|
+
response = requests.get(
|
|
386
|
+
"https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip",
|
|
387
|
+
timeout=10,
|
|
388
|
+
)
|
|
389
|
+
response.raise_for_status()
|
|
390
|
+
|
|
391
|
+
# Verify it's a valid zip file by checking headers
|
|
392
|
+
assert response.headers.get("content-type") in [
|
|
393
|
+
"application/zip",
|
|
394
|
+
"application/octet-stream",
|
|
395
|
+
"application/x-zip-compressed",
|
|
396
|
+
] or response.content.startswith(b"PK") # Zip file signature
|
|
397
|
+
|
|
398
|
+
except Exception:
|
|
399
|
+
pytest.skip("Network request failed or zip not available - skipping integration test")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
|
|
5
5
|
Project-URL: Homepage, https://www.cognee.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/topoteretes/cognee
|
|
@@ -25,6 +25,7 @@ Requires-Dist: dlt[sqlalchemy]<2,>=1.9.0
|
|
|
25
25
|
Requires-Dist: fastapi-users[sqlalchemy]<15.0.0,>=14.0.1
|
|
26
26
|
Requires-Dist: fastapi<1.0.0,>=0.115.7
|
|
27
27
|
Requires-Dist: filetype<2.0.0,>=1.2.0
|
|
28
|
+
Requires-Dist: gunicorn<24,>=20.1.0
|
|
28
29
|
Requires-Dist: instructor<2.0.0,>=1.9.1
|
|
29
30
|
Requires-Dist: jinja2<4,>=3.1.3
|
|
30
31
|
Requires-Dist: kuzu==0.11.0
|
|
@@ -33,6 +34,7 @@ Requires-Dist: langfuse<3,>=2.32.0
|
|
|
33
34
|
Requires-Dist: limits<5,>=4.4.1
|
|
34
35
|
Requires-Dist: litellm<2.0.0,>=1.71.0
|
|
35
36
|
Requires-Dist: matplotlib<4,>=3.8.3
|
|
37
|
+
Requires-Dist: nbformat<6.0.0,>=5.7.0
|
|
36
38
|
Requires-Dist: networkx<4,>=3.4.2
|
|
37
39
|
Requires-Dist: nltk<4.0.0,>=3.9.1
|
|
38
40
|
Requires-Dist: numpy<=4.0.0,>=1.26.4
|
|
@@ -56,17 +58,16 @@ Requires-Dist: sqlalchemy<3.0.0,>=2.0.39
|
|
|
56
58
|
Requires-Dist: structlog<26,>=25.2.0
|
|
57
59
|
Requires-Dist: tiktoken<1.0.0,>=0.8.0
|
|
58
60
|
Requires-Dist: typing-extensions<5.0.0,>=4.12.2
|
|
61
|
+
Requires-Dist: uvicorn<1.0.0,>=0.34.0
|
|
62
|
+
Requires-Dist: websockets<16.0.0,>=15.0.1
|
|
59
63
|
Provides-Extra: anthropic
|
|
60
|
-
Requires-Dist: anthropic
|
|
64
|
+
Requires-Dist: anthropic>=0.27; extra == 'anthropic'
|
|
61
65
|
Provides-Extra: api
|
|
62
|
-
Requires-Dist: gunicorn<24,>=20.1.0; extra == 'api'
|
|
63
|
-
Requires-Dist: uvicorn<1.0.0,>=0.34.0; extra == 'api'
|
|
64
|
-
Requires-Dist: websockets<16.0.0,>=15.0.1; extra == 'api'
|
|
65
66
|
Provides-Extra: aws
|
|
66
67
|
Requires-Dist: s3fs[boto3]==2025.3.2; extra == 'aws'
|
|
67
68
|
Provides-Extra: chromadb
|
|
68
|
-
Requires-Dist: chromadb<0.7,>=0.
|
|
69
|
-
Requires-Dist: pypika==0.48.
|
|
69
|
+
Requires-Dist: chromadb<0.7,>=0.6; extra == 'chromadb'
|
|
70
|
+
Requires-Dist: pypika==0.48.9; extra == 'chromadb'
|
|
70
71
|
Provides-Extra: codegraph
|
|
71
72
|
Requires-Dist: fastembed<=0.6.0; (python_version < '3.13') and extra == 'codegraph'
|
|
72
73
|
Requires-Dist: transformers<5,>=4.46.3; extra == 'codegraph'
|
|
@@ -316,7 +317,9 @@ You can also cognify your files and query using cognee UI.
|
|
|
316
317
|
|
|
317
318
|
<img src="assets/cognee-new-ui.webp" width="100%" alt="Cognee UI 2"></a>
|
|
318
319
|
|
|
319
|
-
|
|
320
|
+
### Running the UI
|
|
321
|
+
|
|
322
|
+
Try cognee UI by running ``` cognee-cli -ui ``` command on your terminal.
|
|
320
323
|
|
|
321
324
|
## Understand our architecture
|
|
322
325
|
|