cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,19 +1,29 @@
1
- import json
2
1
  import pytest
3
2
  from unittest.mock import AsyncMock, patch, MagicMock
4
- import hashlib
5
- import time
6
- from uuid import uuid4
7
- from sqlalchemy.ext.asyncio import AsyncSession
3
+ from uuid import uuid4, uuid5, NAMESPACE_OID
8
4
  from pathlib import Path
9
- import zipfile
10
- from cognee.shared.cache import get_tutorial_data_dir
11
-
12
- from cognee.modules.notebooks.methods.create_notebook import _create_tutorial_notebook
13
- from cognee.modules.notebooks.models.Notebook import Notebook
14
- import cognee
5
+ from sqlalchemy.ext.asyncio import AsyncSession
6
+ import tempfile
7
+ import shutil
8
+ import importlib
9
+
10
+ from cognee.modules.notebooks.methods.create_tutorial_notebooks import (
11
+ create_tutorial_notebooks,
12
+ _parse_cell_index,
13
+ _get_cell_type,
14
+ _extract_markdown_heading,
15
+ _get_cell_name,
16
+ _format_tutorial_name,
17
+ _load_tutorial_cells,
18
+ )
19
+
20
+ from cognee.modules.notebooks.models.Notebook import Notebook, NotebookCell
15
21
  from cognee.shared.logging_utils import get_logger
16
22
 
23
+ create_tutorial_notebooks_module = importlib.import_module(
24
+ "cognee.modules.notebooks.methods.create_tutorial_notebooks"
25
+ )
26
+
17
27
  logger = get_logger()
18
28
 
19
29
 
@@ -28,372 +38,576 @@ def mock_session():
28
38
 
29
39
 
30
40
  @pytest.fixture
31
- def sample_jupyter_notebook():
32
- """Sample Jupyter notebook content for testing."""
33
- return {
34
- "cells": [
35
- {
36
- "cell_type": "markdown",
37
- "metadata": {},
38
- "source": ["# Tutorial Introduction\n", "\n", "This is a tutorial notebook."],
39
- },
40
- {
41
- "cell_type": "code",
42
- "execution_count": None,
43
- "metadata": {},
44
- "outputs": [],
45
- "source": ["import cognee\n", "print('Hello, Cognee!')"],
46
- },
47
- {
48
- "cell_type": "markdown",
49
- "metadata": {},
50
- "source": ["## Step 1: Data Ingestion\n", "\n", "Let's add some data."],
51
- },
52
- {
53
- "cell_type": "code",
54
- "execution_count": None,
55
- "metadata": {},
56
- "outputs": [],
57
- "source": ["# Add your data here\n", "# await cognee.add('data.txt')"],
58
- },
59
- {
60
- "cell_type": "raw",
61
- "metadata": {},
62
- "source": ["This is a raw cell that should be skipped"],
63
- },
64
- ],
65
- "metadata": {
66
- "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}
67
- },
68
- "nbformat": 4,
69
- "nbformat_minor": 4,
70
- }
41
+ def temp_tutorials_dir():
42
+ """Create a temporary tutorials directory for testing."""
43
+ temp_dir = Path(tempfile.mkdtemp())
44
+ tutorials_dir = temp_dir / "tutorials"
45
+ tutorials_dir.mkdir(parents=True)
46
+ yield tutorials_dir
47
+ shutil.rmtree(temp_dir)
48
+
49
+
50
+ class TestTutorialNotebookHelperFunctions:
51
+ """Test cases for helper functions used in tutorial notebook creation."""
52
+
53
+ def test_parse_cell_index_valid(self):
54
+ """Test parsing valid cell index from filename."""
55
+ assert _parse_cell_index("cell-0.md") == 0
56
+ assert _parse_cell_index("cell-1.py") == 1
57
+ assert _parse_cell_index("cell-123.md") == 123
58
+ assert _parse_cell_index("cell-999.py") == 999
59
+
60
+ def test_parse_cell_index_invalid(self):
61
+ """Test parsing invalid cell index returns -1."""
62
+ assert _parse_cell_index("not-a-cell.md") == -1
63
+ assert _parse_cell_index("cell.md") == -1
64
+ assert _parse_cell_index("cell-.md") == -1
65
+ assert _parse_cell_index("") == -1
66
+
67
+ def test_get_cell_type_markdown(self):
68
+ """Test cell type detection for markdown files."""
69
+ assert _get_cell_type(Path("cell-1.md")) == "markdown"
70
+ assert _get_cell_type(Path("test.MD")) == "markdown"
71
+
72
+ def test_get_cell_type_code(self):
73
+ """Test cell type detection for Python files."""
74
+ assert _get_cell_type(Path("cell-1.py")) == "code"
75
+ assert _get_cell_type(Path("test.PY")) == "code"
76
+
77
+ def test_get_cell_type_unsupported(self):
78
+ """Test error handling for unsupported file types."""
79
+ with pytest.raises(ValueError, match="Unsupported cell file type"):
80
+ _get_cell_type(Path("cell-1.txt"))
81
+
82
+ def test_extract_markdown_heading_single_hash(self):
83
+ """Test extracting heading from markdown with single #."""
84
+ content = "# My Heading\nSome content here"
85
+ assert _extract_markdown_heading(content) == "My Heading"
86
+
87
+ def test_extract_markdown_heading_multiple_hash(self):
88
+ """Test extracting heading from markdown with multiple #."""
89
+ content = "## Subheading\nSome content"
90
+ assert _extract_markdown_heading(content) == "Subheading"
91
+
92
+ def test_extract_markdown_heading_with_whitespace(self):
93
+ """Test extracting heading with leading/trailing whitespace."""
94
+ content = " # Heading with spaces \nContent"
95
+ assert _extract_markdown_heading(content) == "Heading with spaces"
96
+
97
+ def test_extract_markdown_heading_no_heading(self):
98
+ """Test extracting heading when no heading exists."""
99
+ content = "Just some regular text\nNo heading here"
100
+ assert _extract_markdown_heading(content) is None
101
+
102
+ def test_extract_markdown_heading_empty(self):
103
+ """Test extracting heading from empty content."""
104
+ assert _extract_markdown_heading("") is None
105
+
106
+ def test_get_cell_name_code_cell(self):
107
+ """Test cell name generation for code cells."""
108
+ cell_file = Path("cell-1.py")
109
+ content = "print('Hello, World!')"
110
+ assert _get_cell_name(cell_file, "code", content) == "Code Cell"
111
+
112
+ def test_get_cell_name_markdown_with_heading(self):
113
+ """Test cell name generation for markdown cells with heading."""
114
+ cell_file = Path("cell-1.md")
115
+ content = "# My Tutorial Title\nSome content"
116
+ assert _get_cell_name(cell_file, "markdown", content) == "My Tutorial Title"
117
+
118
+ def test_get_cell_name_markdown_no_heading(self):
119
+ """Test cell name generation for markdown cells without heading."""
120
+ cell_file = Path("cell-1.md")
121
+ content = "Just some text without heading"
122
+ assert _get_cell_name(cell_file, "markdown", content) == "cell-1"
123
+
124
+ def test_format_tutorial_name_simple(self):
125
+ """Test formatting simple tutorial directory name."""
126
+ assert _format_tutorial_name("cognee-basics") == "Cognee basics - tutorial 🧠"
127
+
128
+ def test_format_tutorial_name_with_underscores(self):
129
+ """Test formatting tutorial name with underscores."""
130
+ assert _format_tutorial_name("python_development") == "Python development - tutorial 🧠"
131
+
132
+ def test_format_tutorial_name_mixed(self):
133
+ """Test formatting tutorial name with mixed separators."""
134
+ assert _format_tutorial_name("my-tutorial_name") == "My tutorial name - tutorial 🧠"
135
+
136
+
137
+ class TestLoadTutorialCells:
138
+ """Test cases for loading cells from tutorial directories."""
71
139
 
140
+ def test_load_tutorial_cells_sorted_order(self, temp_tutorials_dir):
141
+ """Test that cells are loaded in sorted order by index."""
142
+ # Create cells out of order
143
+ (temp_tutorials_dir / "cell-3.md").write_text("# Third")
144
+ (temp_tutorials_dir / "cell-1.md").write_text("# First")
145
+ (temp_tutorials_dir / "cell-2.py").write_text("print('second')")
72
146
 
73
- class TestTutorialNotebookCreation:
74
- """Test cases for tutorial notebook creation functionality."""
147
+ cells = _load_tutorial_cells(temp_tutorials_dir)
148
+
149
+ assert len(cells) == 3
150
+ assert cells[0].name == "First"
151
+ assert cells[1].name == "Code Cell"
152
+ assert cells[2].name == "Third"
153
+
154
+ def test_load_tutorial_cells_skips_non_cell_files(self, temp_tutorials_dir):
155
+ """Test that non-cell files are skipped."""
156
+ (temp_tutorials_dir / "cell-1.md").write_text("# First")
157
+ (temp_tutorials_dir / "config.json").write_text('{"name": "test"}')
158
+ (temp_tutorials_dir / "README.md").write_text("# Readme")
159
+ (temp_tutorials_dir / "data").mkdir()
160
+ (temp_tutorials_dir / "data" / "file.txt").write_text("data")
161
+
162
+ cells = _load_tutorial_cells(temp_tutorials_dir)
163
+
164
+ assert len(cells) == 1
165
+ assert cells[0].name == "First"
166
+
167
+ def test_load_tutorial_cells_skips_unsupported_extensions(self, temp_tutorials_dir):
168
+ """Test that unsupported file extensions are skipped."""
169
+ (temp_tutorials_dir / "cell-1.md").write_text("# First")
170
+ (temp_tutorials_dir / "cell-2.txt").write_text("Text file")
171
+ (temp_tutorials_dir / "cell-3.py").write_text("print('code')")
172
+
173
+ cells = _load_tutorial_cells(temp_tutorials_dir)
174
+
175
+ assert len(cells) == 2
176
+ assert cells[0].name == "First"
177
+ assert cells[1].name == "Code Cell"
178
+
179
+ def test_load_tutorial_cells_empty_directory(self, temp_tutorials_dir):
180
+ """Test loading cells from empty directory."""
181
+ cells = _load_tutorial_cells(temp_tutorials_dir)
182
+ assert len(cells) == 0
183
+
184
+ def test_load_tutorial_cells_preserves_content(self, temp_tutorials_dir):
185
+ """Test that cell content is preserved correctly."""
186
+ markdown_content = "# My Heading\n\nSome content here."
187
+ code_content = "import cognee\nprint('Hello')"
188
+
189
+ (temp_tutorials_dir / "cell-1.md").write_text(markdown_content)
190
+ (temp_tutorials_dir / "cell-2.py").write_text(code_content)
191
+
192
+ cells = _load_tutorial_cells(temp_tutorials_dir)
193
+
194
+ assert len(cells) == 2
195
+ assert cells[0].content == markdown_content
196
+ assert cells[0].type == "markdown"
197
+ assert cells[1].content == code_content
198
+ assert cells[1].type == "code"
199
+
200
+
201
+ class TestCreateTutorialNotebooks:
202
+ """Test cases for the main create_tutorial_notebooks function."""
75
203
 
76
204
  @pytest.mark.asyncio
77
- async def test_notebook_from_ipynb_string_success(self, sample_jupyter_notebook):
78
- """Test successful creation of notebook from JSON string."""
79
- notebook_json = json.dumps(sample_jupyter_notebook)
205
+ async def test_create_tutorial_notebooks_success_with_config(
206
+ self, mock_session, temp_tutorials_dir
207
+ ):
208
+ """Test successful creation of tutorial notebooks with config.json."""
209
+ import json
210
+
80
211
  user_id = uuid4()
81
212
 
82
- notebook = Notebook.from_ipynb_string(
83
- notebook_content=notebook_json, owner_id=user_id, name="String Test Notebook"
213
+ # Create a tutorial directory with cells and config.json
214
+ tutorial_dir = temp_tutorials_dir / "test-tutorial"
215
+ tutorial_dir.mkdir()
216
+ (tutorial_dir / "cell-1.md").write_text("# Introduction\nWelcome to the tutorial")
217
+ (tutorial_dir / "cell-2.py").write_text("print('Hello')")
218
+ (tutorial_dir / "config.json").write_text(
219
+ json.dumps({"name": "Custom Tutorial Name", "deletable": False})
84
220
  )
85
221
 
86
- assert notebook.owner_id == user_id
87
- assert notebook.name == "String Test Notebook"
88
- assert len(notebook.cells) == 4 # Should skip the raw cell
89
- assert notebook.cells[0].type == "markdown"
90
- assert notebook.cells[1].type == "code"
222
+ with patch.object(
223
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
224
+ ) as mock_get_dir:
225
+ mock_get_dir.return_value = temp_tutorials_dir
226
+
227
+ await create_tutorial_notebooks(user_id, mock_session)
228
+
229
+ # Verify notebook was added to session
230
+ assert mock_session.add.call_count == 1
231
+ added_notebook = mock_session.add.call_args[0][0]
232
+
233
+ assert isinstance(added_notebook, Notebook)
234
+ assert added_notebook.owner_id == user_id
235
+ assert added_notebook.name == "Custom Tutorial Name"
236
+ assert len(added_notebook.cells) == 2
237
+ assert added_notebook.deletable is False
238
+
239
+ # Verify deterministic ID generation based on config name
240
+ expected_id = uuid5(NAMESPACE_OID, name="Custom Tutorial Name")
241
+ assert added_notebook.id == expected_id
242
+
243
+ # Verify commit was called
244
+ mock_session.commit.assert_called_once()
91
245
 
92
246
  @pytest.mark.asyncio
93
- async def test_notebook_cell_name_generation(self, sample_jupyter_notebook):
94
- """Test that cell names are generated correctly from markdown headers."""
247
+ async def test_create_tutorial_notebooks_success_without_config(
248
+ self, mock_session, temp_tutorials_dir
249
+ ):
250
+ """Test successful creation of tutorial notebooks without config.json (fallback)."""
95
251
  user_id = uuid4()
96
- notebook_json = json.dumps(sample_jupyter_notebook)
97
252
 
98
- notebook = Notebook.from_ipynb_string(notebook_content=notebook_json, owner_id=user_id)
253
+ # Create a tutorial directory with cells but no config.json
254
+ tutorial_dir = temp_tutorials_dir / "test-tutorial"
255
+ tutorial_dir.mkdir()
256
+ (tutorial_dir / "cell-1.md").write_text("# Introduction\nWelcome to the tutorial")
257
+ (tutorial_dir / "cell-2.py").write_text("print('Hello')")
258
+
259
+ with patch.object(
260
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
261
+ ) as mock_get_dir:
262
+ mock_get_dir.return_value = temp_tutorials_dir
263
+
264
+ await create_tutorial_notebooks(user_id, mock_session)
265
+
266
+ # Verify notebook was added to session
267
+ assert mock_session.add.call_count == 1
268
+ added_notebook = mock_session.add.call_args[0][0]
99
269
 
100
- # Check markdown header extraction
101
- assert notebook.cells[0].name == "Tutorial Introduction"
102
- assert notebook.cells[2].name == "Step 1: Data Ingestion"
270
+ assert isinstance(added_notebook, Notebook)
271
+ assert added_notebook.owner_id == user_id
272
+ assert added_notebook.name == "Test tutorial - tutorial 🧠"
273
+ assert len(added_notebook.cells) == 2
274
+ assert added_notebook.deletable is False # Default for tutorials
103
275
 
104
- # Check code cell naming
105
- assert notebook.cells[1].name == "Code Cell"
106
- assert notebook.cells[3].name == "Code Cell"
276
+ # Verify deterministic ID generation
277
+ expected_id = uuid5(NAMESPACE_OID, name="Test tutorial - tutorial 🧠")
278
+ assert added_notebook.id == expected_id
279
+
280
+ # Verify commit was called
281
+ mock_session.commit.assert_called_once()
107
282
 
108
283
  @pytest.mark.asyncio
109
- async def test_notebook_from_ipynb_string_with_default_name(self, sample_jupyter_notebook):
110
- """Test notebook creation uses kernelspec display_name when no name provided."""
284
+ async def test_create_tutorial_notebooks_multiple_tutorials(
285
+ self, mock_session, temp_tutorials_dir
286
+ ):
287
+ """Test creation of multiple tutorial notebooks."""
111
288
  user_id = uuid4()
112
- notebook_json = json.dumps(sample_jupyter_notebook)
113
289
 
114
- notebook = Notebook.from_ipynb_string(notebook_content=notebook_json, owner_id=user_id)
290
+ # Create two tutorial directories
291
+ tutorial1 = temp_tutorials_dir / "tutorial-one"
292
+ tutorial1.mkdir()
293
+ (tutorial1 / "cell-1.md").write_text("# Tutorial One")
294
+
295
+ tutorial2 = temp_tutorials_dir / "tutorial-two"
296
+ tutorial2.mkdir()
297
+ (tutorial2 / "cell-1.md").write_text("# Tutorial Two")
298
+
299
+ with patch.object(
300
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
301
+ ) as mock_get_dir:
302
+ mock_get_dir.return_value = temp_tutorials_dir
115
303
 
116
- assert notebook.name == "Python 3" # From kernelspec.display_name
304
+ await create_tutorial_notebooks(user_id, mock_session)
305
+
306
+ # Verify both notebooks were added
307
+ assert mock_session.add.call_count == 2
308
+ mock_session.commit.assert_called_once()
117
309
 
118
310
  @pytest.mark.asyncio
119
- async def test_notebook_from_ipynb_string_fallback_name(self):
120
- """Test fallback naming when kernelspec is missing."""
121
- minimal_notebook = {
122
- "cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Test"]}],
123
- "metadata": {}, # No kernelspec
124
- "nbformat": 4,
125
- "nbformat_minor": 4,
126
- }
311
+ async def test_create_tutorial_notebooks_skips_empty_tutorials(
312
+ self, mock_session, temp_tutorials_dir
313
+ ):
314
+ """Test that tutorials with no cells are skipped."""
315
+ user_id = uuid4()
316
+
317
+ # Create tutorial directory without cells
318
+ tutorial_dir = temp_tutorials_dir / "empty-tutorial"
319
+ tutorial_dir.mkdir()
320
+
321
+ with patch.object(
322
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
323
+ ) as mock_get_dir:
324
+ mock_get_dir.return_value = temp_tutorials_dir
325
+
326
+ await create_tutorial_notebooks(user_id, mock_session)
327
+
328
+ # Verify no notebooks were added
329
+ mock_session.add.assert_not_called()
330
+ mock_session.commit.assert_not_called()
127
331
 
332
+ @pytest.mark.asyncio
333
+ async def test_create_tutorial_notebooks_skips_hidden_directories(
334
+ self, mock_session, temp_tutorials_dir
335
+ ):
336
+ """Test that hidden directories (starting with .) are skipped."""
128
337
  user_id = uuid4()
129
- notebook_json = json.dumps(minimal_notebook)
130
338
 
131
- notebook = Notebook.from_ipynb_string(notebook_content=notebook_json, owner_id=user_id)
339
+ # Create hidden tutorial directory
340
+ hidden_tutorial = temp_tutorials_dir / ".hidden-tutorial"
341
+ hidden_tutorial.mkdir()
342
+ (hidden_tutorial / "cell-1.md").write_text("# Hidden")
343
+
344
+ # Create visible tutorial directory
345
+ visible_tutorial = temp_tutorials_dir / "visible-tutorial"
346
+ visible_tutorial.mkdir()
347
+ (visible_tutorial / "cell-1.md").write_text("# Visible")
348
+
349
+ with patch.object(
350
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
351
+ ) as mock_get_dir:
352
+ mock_get_dir.return_value = temp_tutorials_dir
353
+
354
+ await create_tutorial_notebooks(user_id, mock_session)
132
355
 
133
- assert notebook.name == "Imported Notebook" # Fallback name
356
+ # Verify only visible tutorial was added
357
+ assert mock_session.add.call_count == 1
358
+ added_notebook = mock_session.add.call_args[0][0]
359
+ assert added_notebook.name == "Visible tutorial - tutorial 🧠"
134
360
 
135
361
  @pytest.mark.asyncio
136
- async def test_notebook_from_ipynb_string_invalid_json(self):
137
- """Test error handling for invalid JSON."""
362
+ async def test_create_tutorial_notebooks_missing_directory(self, mock_session):
363
+ """Test handling when tutorials directory doesn't exist."""
138
364
  user_id = uuid4()
139
- invalid_json = "{ invalid json content"
140
365
 
141
- from nbformat.reader import NotJSONError
366
+ with patch.object(
367
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
368
+ ) as mock_get_dir:
369
+ mock_get_dir.return_value = Path("/nonexistent/tutorials/dir")
142
370
 
143
- with pytest.raises(NotJSONError):
144
- Notebook.from_ipynb_string(notebook_content=invalid_json, owner_id=user_id)
371
+ await create_tutorial_notebooks(user_id, mock_session)
372
+
373
+ # Verify no notebooks were added and no commit
374
+ mock_session.add.assert_not_called()
375
+ mock_session.commit.assert_not_called()
145
376
 
146
377
  @pytest.mark.asyncio
147
- @patch.object(Notebook, "from_ipynb_zip_url")
148
- async def test_create_tutorial_notebook_error_propagated(self, mock_from_zip_url, mock_session):
149
- """Test that errors are propagated when zip fetch fails."""
378
+ async def test_create_tutorial_notebooks_empty_directory(
379
+ self, mock_session, temp_tutorials_dir
380
+ ):
381
+ """Test handling when tutorials directory is empty."""
150
382
  user_id = uuid4()
151
- mock_from_zip_url.side_effect = Exception("Network error")
152
383
 
153
- # Should raise the exception (not catch it)
154
- with pytest.raises(Exception, match="Network error"):
155
- await _create_tutorial_notebook(user_id, mock_session)
384
+ with patch.object(
385
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
386
+ ) as mock_get_dir:
387
+ mock_get_dir.return_value = temp_tutorials_dir
388
+
389
+ await create_tutorial_notebooks(user_id, mock_session)
156
390
 
157
- # Verify error handling path was taken
158
- mock_from_zip_url.assert_called_once()
391
+ # Verify no notebooks were added
159
392
  mock_session.add.assert_not_called()
160
393
  mock_session.commit.assert_not_called()
161
394
 
162
- def test_generate_cell_name_code_cell(self):
163
- """Test cell name generation for code cells."""
164
- from nbformat.notebooknode import NotebookNode
395
+ @pytest.mark.asyncio
396
+ async def test_create_tutorial_notebooks_handles_cell_loading_error(
397
+ self, mock_session, temp_tutorials_dir
398
+ ):
399
+ """Test that errors loading individual cells don't stop notebook creation."""
400
+ user_id = uuid4()
165
401
 
166
- mock_cell = NotebookNode(
167
- {"cell_type": "code", "source": 'import pandas as pd\nprint("Hello world")'}
168
- )
402
+ tutorial_dir = temp_tutorials_dir / "test-tutorial"
403
+ tutorial_dir.mkdir()
404
+ (tutorial_dir / "cell-1.md").write_text("# Valid Cell")
405
+ # Create a file that will cause an error (invalid extension that passes filter)
406
+ invalid_file = tutorial_dir / "cell-2.invalid"
407
+ invalid_file.write_text("Invalid content")
408
+
409
+ with patch.object(create_tutorial_notebooks_module, "_load_tutorial_cells") as mock_load:
410
+ # Simulate error loading one cell but others succeed
411
+ mock_load.return_value = [
412
+ NotebookCell(id=uuid4(), type="markdown", name="Valid Cell", content="# Valid Cell")
413
+ ]
169
414
 
170
- result = Notebook._generate_cell_name(mock_cell)
171
- assert result == "Code Cell"
415
+ with patch.object(
416
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
417
+ ) as mock_get_dir:
418
+ mock_get_dir.return_value = temp_tutorials_dir
172
419
 
420
+ await create_tutorial_notebooks(user_id, mock_session)
173
421
 
174
- class TestTutorialNotebookZipFunctionality:
175
- """Test cases for zip-based tutorial functionality."""
422
+ # Verify notebook was still created with valid cells
423
+ assert mock_session.add.call_count == 1
424
+ mock_session.commit.assert_called_once()
176
425
 
177
426
  @pytest.mark.asyncio
178
- async def test_notebook_from_ipynb_zip_url_missing_notebook(
179
- self,
427
+ async def test_create_tutorial_notebooks_handles_tutorial_error_gracefully(
428
+ self, mock_session, temp_tutorials_dir
180
429
  ):
181
- """Test error handling when notebook file is missing from zip."""
430
+ """Test that errors in one tutorial don't prevent others from being created."""
182
431
  user_id = uuid4()
183
432
 
184
- with pytest.raises(
185
- FileNotFoundError,
186
- match="Notebook file 'super_random_tutorial_name.ipynb' not found in zip",
187
- ):
188
- await Notebook.from_ipynb_zip_url(
189
- zip_url="https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip",
190
- owner_id=user_id,
191
- notebook_filename="super_random_tutorial_name.ipynb",
192
- )
433
+ # Create two tutorials - one will fail, one will succeed
434
+ tutorial1 = temp_tutorials_dir / "working-tutorial"
435
+ tutorial1.mkdir()
436
+ (tutorial1 / "cell-1.md").write_text("# Working")
437
+
438
+ tutorial2 = temp_tutorials_dir / "broken-tutorial"
439
+ tutorial2.mkdir()
440
+ # Create a file that will cause an error when trying to determine cell type
441
+ (tutorial2 / "cell-1.txt").write_text("Invalid")
442
+
443
+ with patch.object(
444
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
445
+ ) as mock_get_dir:
446
+ mock_get_dir.return_value = temp_tutorials_dir
447
+
448
+ await create_tutorial_notebooks(user_id, mock_session)
449
+
450
+ # Verify working tutorial was created
451
+ assert mock_session.add.call_count == 1
452
+ added_notebook = mock_session.add.call_args[0][0]
453
+ assert added_notebook.name == "Working tutorial - tutorial 🧠"
454
+ mock_session.commit.assert_called_once()
193
455
 
194
456
  @pytest.mark.asyncio
195
- async def test_notebook_from_ipynb_zip_url_download_failure(self):
196
- """Test error handling when zip download fails."""
457
+ async def test_create_tutorial_notebooks_deterministic_ids(
458
+ self, mock_session, temp_tutorials_dir
459
+ ):
460
+ """Test that tutorial notebooks have deterministic IDs based on name."""
197
461
  user_id = uuid4()
198
- with pytest.raises(RuntimeError, match="Failed to download tutorial zip"):
199
- await Notebook.from_ipynb_zip_url(
200
- zip_url="https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/nonexistent_tutorial_name.zip",
201
- owner_id=user_id,
202
- )
203
462
 
204
- @pytest.mark.asyncio
205
- async def test_create_tutorial_notebook_zip_success(self, mock_session):
206
- """Test successful tutorial notebook creation with zip."""
207
- await cognee.prune.prune_data()
208
- await cognee.prune.prune_system(metadata=True)
463
+ tutorial_dir = temp_tutorials_dir / "test-tutorial"
464
+ tutorial_dir.mkdir()
465
+ (tutorial_dir / "cell-1.md").write_text("# Test")
209
466
 
210
- user_id = uuid4()
467
+ with patch.object(
468
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
469
+ ) as mock_get_dir:
470
+ mock_get_dir.return_value = temp_tutorials_dir
211
471
 
212
- # Check that tutorial data directory is empty using storage-aware method
213
- tutorial_data_dir_path = await get_tutorial_data_dir()
214
- tutorial_data_dir = Path(tutorial_data_dir_path)
215
- if tutorial_data_dir.exists():
216
- assert not any(tutorial_data_dir.iterdir()), "Tutorial data directory should be empty"
472
+ # Create notebooks twice
473
+ await create_tutorial_notebooks(user_id, mock_session)
474
+ first_notebook = mock_session.add.call_args[0][0]
475
+ first_id = first_notebook.id
217
476
 
218
- await _create_tutorial_notebook(user_id, mock_session)
477
+ # Reset mocks
478
+ mock_session.add.reset_mock()
479
+ mock_session.commit.reset_mock()
219
480
 
220
- items = list(tutorial_data_dir.iterdir())
221
- assert len(items) == 1, "Tutorial data directory should contain exactly one item"
222
- assert items[0].is_dir(), "Tutorial data directory item should be a directory"
481
+ await create_tutorial_notebooks(user_id, mock_session)
482
+ second_notebook = mock_session.add.call_args[0][0]
483
+ second_id = second_notebook.id
223
484
 
224
- # Verify the structure inside the tutorial directory
225
- tutorial_dir = items[0]
485
+ # IDs should be the same (deterministic)
486
+ assert first_id == second_id
487
+ assert first_id == uuid5(NAMESPACE_OID, name="Test tutorial - tutorial 🧠")
226
488
 
227
- # Check for tutorial.ipynb file
228
- notebook_file = tutorial_dir / "tutorial.ipynb"
229
- assert notebook_file.exists(), f"tutorial.ipynb should exist in {tutorial_dir}"
230
- assert notebook_file.is_file(), "tutorial.ipynb should be a file"
489
+ @pytest.mark.asyncio
490
+ async def test_create_tutorial_notebooks_with_config_deletable(
491
+ self, mock_session, temp_tutorials_dir
492
+ ):
493
+ """Test that deletable flag from config.json is respected."""
494
+ import json
231
495
 
232
- # Check for data subfolder with contents
233
- data_folder = tutorial_dir / "data"
234
- assert data_folder.exists(), f"data subfolder should exist in {tutorial_dir}"
235
- assert data_folder.is_dir(), "data should be a directory"
496
+ user_id = uuid4()
236
497
 
237
- data_items = list(data_folder.iterdir())
238
- assert len(data_items) > 0, (
239
- f"data folder should contain files, but found {len(data_items)} items"
498
+ tutorial_dir = temp_tutorials_dir / "test-tutorial"
499
+ tutorial_dir.mkdir()
500
+ (tutorial_dir / "cell-1.md").write_text("# Test")
501
+ (tutorial_dir / "config.json").write_text(
502
+ json.dumps({"name": "Test Tutorial", "deletable": True})
240
503
  )
241
504
 
242
- @pytest.mark.asyncio
243
- async def test_create_tutorial_notebook_with_force_refresh(self, mock_session):
244
- """Test tutorial notebook creation with force refresh."""
245
- await cognee.prune.prune_data()
246
- await cognee.prune.prune_system(metadata=True)
505
+ with patch.object(
506
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
507
+ ) as mock_get_dir:
508
+ mock_get_dir.return_value = temp_tutorials_dir
247
509
 
248
- user_id = uuid4()
510
+ await create_tutorial_notebooks(user_id, mock_session)
249
511
 
250
- # Check that tutorial data directory is empty using storage-aware method
251
- tutorial_data_dir_path = await get_tutorial_data_dir()
252
- tutorial_data_dir = Path(tutorial_data_dir_path)
253
- if tutorial_data_dir.exists():
254
- assert not any(tutorial_data_dir.iterdir()), "Tutorial data directory should be empty"
512
+ added_notebook = mock_session.add.call_args[0][0]
513
+ assert added_notebook.deletable is True
255
514
 
256
- # First creation (without force refresh)
257
- await _create_tutorial_notebook(user_id, mock_session, force_refresh=False)
515
+ @pytest.mark.asyncio
516
+ async def test_create_tutorial_notebooks_config_missing_name(
517
+ self, mock_session, temp_tutorials_dir
518
+ ):
519
+ """Test that missing name in config.json falls back to formatted directory name."""
520
+ import json
258
521
 
259
- items_first = list(tutorial_data_dir.iterdir())
260
- assert len(items_first) == 1, (
261
- "Tutorial data directory should contain exactly one item after first creation"
262
- )
263
- first_dir = items_first[0]
264
- assert first_dir.is_dir(), "Tutorial data directory item should be a directory"
522
+ user_id = uuid4()
265
523
 
266
- # Verify the structure inside the tutorial directory (first creation)
267
- notebook_file = first_dir / "tutorial.ipynb"
268
- assert notebook_file.exists(), f"tutorial.ipynb should exist in {first_dir}"
269
- assert notebook_file.is_file(), "tutorial.ipynb should be a file"
524
+ tutorial_dir = temp_tutorials_dir / "test-tutorial"
525
+ tutorial_dir.mkdir()
526
+ (tutorial_dir / "cell-1.md").write_text("# Test")
527
+ (tutorial_dir / "config.json").write_text(json.dumps({"deletable": False}))
270
528
 
271
- data_folder = first_dir / "data"
272
- assert data_folder.exists(), f"data subfolder should exist in {first_dir}"
273
- assert data_folder.is_dir(), "data should be a directory"
529
+ with patch.object(
530
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
531
+ ) as mock_get_dir:
532
+ mock_get_dir.return_value = temp_tutorials_dir
274
533
 
275
- data_items = list(data_folder.iterdir())
276
- assert len(data_items) > 0, (
277
- f"data folder should contain files, but found {len(data_items)} items"
278
- )
534
+ await create_tutorial_notebooks(user_id, mock_session)
279
535
 
280
- # Capture metadata from first creation
536
+ added_notebook = mock_session.add.call_args[0][0]
537
+ assert added_notebook.name == "Test tutorial - tutorial 🧠"
281
538
 
282
- first_creation_metadata = {}
539
+ @pytest.mark.asyncio
540
+ async def test_create_tutorial_notebooks_invalid_config_json(
541
+ self, mock_session, temp_tutorials_dir
542
+ ):
543
+ """Test that invalid config.json is handled gracefully."""
544
+ user_id = uuid4()
283
545
 
284
- for file_path in first_dir.rglob("*"):
285
- if file_path.is_file():
286
- relative_path = file_path.relative_to(first_dir)
287
- stat = file_path.stat()
546
+ tutorial_dir = temp_tutorials_dir / "test-tutorial"
547
+ tutorial_dir.mkdir()
548
+ (tutorial_dir / "cell-1.md").write_text("# Test")
549
+ (tutorial_dir / "config.json").write_text("{ invalid json }")
288
550
 
289
- # Store multiple metadata points
290
- with open(file_path, "rb") as f:
291
- content = f.read()
551
+ with patch.object(
552
+ create_tutorial_notebooks_module, "_get_tutorials_directory"
553
+ ) as mock_get_dir:
554
+ mock_get_dir.return_value = temp_tutorials_dir
292
555
 
293
- first_creation_metadata[str(relative_path)] = {
294
- "mtime": stat.st_mtime,
295
- "size": stat.st_size,
296
- "hash": hashlib.md5(content).hexdigest(),
297
- "first_bytes": content[:100]
298
- if content
299
- else b"", # First 100 bytes as fingerprint
300
- }
556
+ # Should not raise, should fall back to formatted name
557
+ await create_tutorial_notebooks(user_id, mock_session)
301
558
 
302
- # Wait a moment to ensure different timestamps
303
- time.sleep(0.1)
559
+ added_notebook = mock_session.add.call_args[0][0]
560
+ assert added_notebook.name == "Test tutorial - tutorial 🧠"
304
561
 
305
- # Force refresh - should create new files with different metadata
306
- await _create_tutorial_notebook(user_id, mock_session, force_refresh=True)
307
562
 
308
- items_second = list(tutorial_data_dir.iterdir())
309
- assert len(items_second) == 1, (
310
- "Tutorial data directory should contain exactly one item after force refresh"
311
- )
312
- second_dir = items_second[0]
563
+ class TestNotebookFromIpynbString:
564
+ """Test cases for Notebook.from_ipynb_string (legacy method, still used)."""
313
565
 
314
- # Verify the structure is maintained after force refresh
315
- notebook_file_second = second_dir / "tutorial.ipynb"
316
- assert notebook_file_second.exists(), (
317
- f"tutorial.ipynb should exist in {second_dir} after force refresh"
318
- )
319
- assert notebook_file_second.is_file(), "tutorial.ipynb should be a file after force refresh"
566
+ def test_notebook_from_ipynb_string_success(self):
567
+ """Test successful creation of notebook from JSON string."""
568
+ import json
569
+
570
+ sample_notebook = {
571
+ "cells": [
572
+ {
573
+ "cell_type": "markdown",
574
+ "metadata": {},
575
+ "source": ["# Tutorial Introduction\n", "\n", "This is a tutorial notebook."],
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "execution_count": None,
580
+ "metadata": {},
581
+ "outputs": [],
582
+ "source": ["import cognee\n", "print('Hello, Cognee!')"],
583
+ },
584
+ ],
585
+ "metadata": {
586
+ "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}
587
+ },
588
+ "nbformat": 4,
589
+ "nbformat_minor": 4,
590
+ }
320
591
 
321
- data_folder_second = second_dir / "data"
322
- assert data_folder_second.exists(), (
323
- f"data subfolder should exist in {second_dir} after force refresh"
324
- )
325
- assert data_folder_second.is_dir(), "data should be a directory after force refresh"
592
+ notebook_json = json.dumps(sample_notebook)
593
+ user_id = uuid4()
326
594
 
327
- data_items_second = list(data_folder_second.iterdir())
328
- assert len(data_items_second) > 0, (
329
- f"data folder should still contain files after force refresh, but found {len(data_items_second)} items"
595
+ notebook = Notebook.from_ipynb_string(
596
+ notebook_content=notebook_json, owner_id=user_id, name="String Test Notebook"
330
597
  )
331
598
 
332
- # Compare metadata to ensure files are actually different
333
- files_with_changed_metadata = 0
334
-
335
- for file_path in second_dir.rglob("*"):
336
- if file_path.is_file():
337
- relative_path = file_path.relative_to(second_dir)
338
- relative_path_str = str(relative_path)
339
-
340
- # File should exist from first creation
341
- assert relative_path_str in first_creation_metadata, (
342
- f"File {relative_path_str} missing from first creation"
343
- )
344
-
345
- old_metadata = first_creation_metadata[relative_path_str]
346
-
347
- # Get new metadata
348
- stat = file_path.stat()
349
- with open(file_path, "rb") as f:
350
- new_content = f.read()
351
-
352
- new_metadata = {
353
- "mtime": stat.st_mtime,
354
- "size": stat.st_size,
355
- "hash": hashlib.md5(new_content).hexdigest(),
356
- "first_bytes": new_content[:100] if new_content else b"",
357
- }
358
-
359
- # Check if any metadata changed (indicating file was refreshed)
360
- metadata_changed = (
361
- new_metadata["mtime"] > old_metadata["mtime"] # Newer modification time
362
- or new_metadata["hash"] != old_metadata["hash"] # Different content hash
363
- or new_metadata["size"] != old_metadata["size"] # Different file size
364
- or new_metadata["first_bytes"]
365
- != old_metadata["first_bytes"] # Different content
366
- )
367
-
368
- if metadata_changed:
369
- files_with_changed_metadata += 1
370
-
371
- # Assert that force refresh actually updated files
372
- assert files_with_changed_metadata > 0, (
373
- f"Force refresh should have updated at least some files, but all {len(first_creation_metadata)} "
374
- f"files appear to have identical metadata. This suggests force refresh didn't work."
375
- )
599
+ assert notebook.owner_id == user_id
600
+ assert notebook.name == "String Test Notebook"
601
+ assert len(notebook.cells) == 2
602
+ assert notebook.cells[0].type == "markdown"
603
+ assert notebook.cells[1].type == "code"
376
604
 
377
- mock_session.commit.assert_called()
605
+ def test_notebook_from_ipynb_string_invalid_json(self):
606
+ """Test error handling for invalid JSON."""
607
+ user_id = uuid4()
608
+ invalid_json = "{ invalid json content"
378
609
 
379
- @pytest.mark.asyncio
380
- async def test_tutorial_zip_url_accessibility(self):
381
- """Test that the actual tutorial zip URL is accessible (integration test)."""
382
- try:
383
- import requests
384
-
385
- response = requests.get(
386
- "https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip",
387
- timeout=10,
388
- )
389
- response.raise_for_status()
390
-
391
- # Verify it's a valid zip file by checking headers
392
- assert response.headers.get("content-type") in [
393
- "application/zip",
394
- "application/octet-stream",
395
- "application/x-zip-compressed",
396
- ] or response.content.startswith(b"PK") # Zip file signature
397
-
398
- except Exception:
399
- pytest.skip("Network request failed or zip not available - skipping integration test")
610
+ from nbformat.reader import NotJSONError
611
+
612
+ with pytest.raises(NotJSONError):
613
+ Notebook.from_ipynb_string(notebook_content=invalid_json, owner_id=user_id)