cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
|
-
from
|
|
3
|
+
from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
|
|
4
|
+
from typing import List, Dict, Union, Optional, Type, Iterable, Tuple, Callable, Any
|
|
4
5
|
|
|
5
6
|
from cognee.modules.graph.exceptions import (
|
|
6
7
|
EntityNotFoundError,
|
|
@@ -25,12 +26,16 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
25
26
|
|
|
26
27
|
nodes: Dict[str, Node]
|
|
27
28
|
edges: List[Edge]
|
|
29
|
+
edges_by_distance_key: Dict[str, List[Edge]]
|
|
28
30
|
directed: bool
|
|
31
|
+
triplet_distance_penalty: float
|
|
29
32
|
|
|
30
33
|
def __init__(self, directed: bool = True):
|
|
31
34
|
self.nodes = {}
|
|
32
35
|
self.edges = []
|
|
36
|
+
self.edges_by_distance_key = {}
|
|
33
37
|
self.directed = directed
|
|
38
|
+
self.triplet_distance_penalty = 3.5
|
|
34
39
|
|
|
35
40
|
def add_node(self, node: Node) -> None:
|
|
36
41
|
if node.id not in self.nodes:
|
|
@@ -40,8 +45,20 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
40
45
|
|
|
41
46
|
def add_edge(self, edge: Edge) -> None:
|
|
42
47
|
self.edges.append(edge)
|
|
48
|
+
|
|
49
|
+
edge_text = edge.attributes.get("edge_text") or edge.attributes.get("relationship_type")
|
|
50
|
+
edge.attributes["edge_type_id"] = (
|
|
51
|
+
generate_edge_id(edge_id=edge_text) if edge_text else None
|
|
52
|
+
) # Update edge with generated edge_type_id
|
|
53
|
+
|
|
43
54
|
edge.node1.add_skeleton_edge(edge)
|
|
44
55
|
edge.node2.add_skeleton_edge(edge)
|
|
56
|
+
key = edge.get_distance_key()
|
|
57
|
+
if not key:
|
|
58
|
+
return
|
|
59
|
+
if key not in self.edges_by_distance_key:
|
|
60
|
+
self.edges_by_distance_key[key] = []
|
|
61
|
+
self.edges_by_distance_key[key].append(edge)
|
|
45
62
|
|
|
46
63
|
def get_node(self, node_id: str) -> Node:
|
|
47
64
|
return self.nodes.get(node_id, None)
|
|
@@ -56,6 +73,29 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
56
73
|
def get_edges(self) -> List[Edge]:
|
|
57
74
|
return self.edges
|
|
58
75
|
|
|
76
|
+
def reset_distances(self, collection: Iterable[Union[Node, Edge]], query_count: int) -> None:
|
|
77
|
+
"""Reset vector distances for a collection of nodes or edges."""
|
|
78
|
+
for item in collection:
|
|
79
|
+
item.reset_vector_distances(query_count, self.triplet_distance_penalty)
|
|
80
|
+
|
|
81
|
+
def _normalize_query_distance_lists(
|
|
82
|
+
self, distances: List, query_list_length: Optional[int] = None, name: str = "distances"
|
|
83
|
+
) -> List:
|
|
84
|
+
"""Normalize shape: flat list -> single-query; nested list -> multi-query."""
|
|
85
|
+
if not distances:
|
|
86
|
+
return []
|
|
87
|
+
first_item = distances[0]
|
|
88
|
+
if isinstance(first_item, (list, tuple)):
|
|
89
|
+
per_query_lists = distances
|
|
90
|
+
else:
|
|
91
|
+
per_query_lists = [distances]
|
|
92
|
+
if query_list_length is not None and len(per_query_lists) != query_list_length:
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"{name} has {len(per_query_lists)} query lists, "
|
|
95
|
+
f"but query_list_length is {query_list_length}"
|
|
96
|
+
)
|
|
97
|
+
return per_query_lists
|
|
98
|
+
|
|
59
99
|
async def _get_nodeset_subgraph(
|
|
60
100
|
self,
|
|
61
101
|
adapter,
|
|
@@ -148,7 +188,7 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
148
188
|
adapter, memory_fragment_filter
|
|
149
189
|
)
|
|
150
190
|
|
|
151
|
-
|
|
191
|
+
self.triplet_distance_penalty = triplet_distance_penalty
|
|
152
192
|
|
|
153
193
|
start_time = time.time()
|
|
154
194
|
# Process nodes
|
|
@@ -182,9 +222,6 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
182
222
|
edge_penalty=triplet_distance_penalty,
|
|
183
223
|
)
|
|
184
224
|
self.add_edge(edge)
|
|
185
|
-
|
|
186
|
-
source_node.add_skeleton_edge(edge)
|
|
187
|
-
target_node.add_skeleton_edge(edge)
|
|
188
225
|
else:
|
|
189
226
|
raise EntityNotFoundError(
|
|
190
227
|
message=f"Edge references nonexistent nodes: {source_id} -> {target_id}"
|
|
@@ -200,41 +237,117 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
200
237
|
logger.error(f"Error during graph projection: {str(e)}")
|
|
201
238
|
raise
|
|
202
239
|
|
|
203
|
-
async def map_vector_distances_to_graph_nodes(
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
node = self.get_node(node_id)
|
|
210
|
-
if node:
|
|
211
|
-
node.add_attribute("vector_distance", score)
|
|
212
|
-
mapped_nodes += 1
|
|
213
|
-
|
|
214
|
-
async def map_vector_distances_to_graph_edges(self, edge_distances) -> None:
|
|
215
|
-
try:
|
|
216
|
-
if edge_distances is None:
|
|
217
|
-
return
|
|
240
|
+
async def map_vector_distances_to_graph_nodes(
|
|
241
|
+
self,
|
|
242
|
+
node_distances,
|
|
243
|
+
query_list_length: Optional[int] = None,
|
|
244
|
+
) -> None:
|
|
245
|
+
"""Map vector distances to nodes, supporting single- and multi-query input shapes."""
|
|
218
246
|
|
|
219
|
-
|
|
247
|
+
query_count = query_list_length or 1
|
|
220
248
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
249
|
+
self.reset_distances(self.nodes.values(), query_count)
|
|
250
|
+
|
|
251
|
+
for collection_name, scored_results in node_distances.items():
|
|
252
|
+
if not scored_results:
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
per_query_scored_results = self._normalize_query_distance_lists(
|
|
256
|
+
scored_results, query_list_length, f"Collection '{collection_name}'"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
for query_index, scored_results in enumerate(per_query_scored_results):
|
|
260
|
+
for result in scored_results:
|
|
261
|
+
node_id = str(getattr(result, "id", None))
|
|
262
|
+
if not node_id:
|
|
263
|
+
continue
|
|
264
|
+
node = self.get_node(node_id)
|
|
265
|
+
if node is None:
|
|
266
|
+
continue
|
|
267
|
+
score = float(getattr(result, "score", self.triplet_distance_penalty))
|
|
268
|
+
node.update_distance_for_query(
|
|
269
|
+
query_index=query_index,
|
|
270
|
+
score=score,
|
|
271
|
+
query_count=query_count,
|
|
272
|
+
default_penalty=self.triplet_distance_penalty,
|
|
273
|
+
)
|
|
228
274
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
275
|
+
async def map_vector_distances_to_graph_edges(
|
|
276
|
+
self,
|
|
277
|
+
edge_distances,
|
|
278
|
+
query_list_length: Optional[int] = None,
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Map vector distances to graph edges, supporting single- and multi-query input shapes."""
|
|
281
|
+
query_count = query_list_length or 1
|
|
282
|
+
|
|
283
|
+
self.reset_distances(self.edges, query_count)
|
|
284
|
+
|
|
285
|
+
if not edge_distances:
|
|
286
|
+
return None
|
|
232
287
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
288
|
+
per_query_scored_results = self._normalize_query_distance_lists(
|
|
289
|
+
edge_distances, query_list_length, "edge_distances"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
for query_index, scored_results in enumerate(per_query_scored_results):
|
|
293
|
+
for result in scored_results:
|
|
294
|
+
matching_edges = self.edges_by_distance_key.get(str(result.id))
|
|
295
|
+
if not matching_edges:
|
|
296
|
+
continue
|
|
297
|
+
for edge in matching_edges:
|
|
298
|
+
edge.update_distance_for_query(
|
|
299
|
+
query_index=query_index,
|
|
300
|
+
score=float(getattr(result, "score", self.triplet_distance_penalty)),
|
|
301
|
+
query_count=query_count,
|
|
302
|
+
default_penalty=self.triplet_distance_penalty,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def _calculate_query_top_triplet_importances(
|
|
306
|
+
self,
|
|
307
|
+
k: int,
|
|
308
|
+
query_index: int = 0,
|
|
309
|
+
) -> List[Edge]:
|
|
310
|
+
"""Calculate top k triplet importances for a specific query index."""
|
|
311
|
+
|
|
312
|
+
def score(edge: Edge) -> float:
|
|
313
|
+
elements = (
|
|
314
|
+
(edge.node1, f"node {edge.node1.id}"),
|
|
315
|
+
(edge.node2, f"node {edge.node2.id}"),
|
|
316
|
+
(edge, f"edge {edge.node1.id}->{edge.node2.id}"),
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
importances = []
|
|
320
|
+
for element, label in elements:
|
|
321
|
+
distances = element.attributes.get("vector_distance")
|
|
322
|
+
if not isinstance(distances, list) or query_index >= len(distances):
|
|
323
|
+
raise ValueError(
|
|
324
|
+
f"{label}: vector_distance must be a list with length > {query_index} "
|
|
325
|
+
f"before scoring (got {type(distances).__name__} with length "
|
|
326
|
+
f"{len(distances) if isinstance(distances, list) else 'n/a'})"
|
|
327
|
+
)
|
|
328
|
+
value = distances[query_index]
|
|
329
|
+
try:
|
|
330
|
+
importances.append(float(value))
|
|
331
|
+
except (TypeError, ValueError):
|
|
332
|
+
raise ValueError(
|
|
333
|
+
f"{label}: vector_distance[{query_index}] must be float-like, "
|
|
334
|
+
f"got {type(value).__name__}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
return sum(importances)
|
|
239
338
|
|
|
240
339
|
return heapq.nsmallest(k, self.edges, key=score)
|
|
340
|
+
|
|
341
|
+
async def calculate_top_triplet_importances(
|
|
342
|
+
self, k: int, query_list_length: Optional[int] = None
|
|
343
|
+
) -> Union[List[Edge], List[List[Edge]]]:
|
|
344
|
+
"""Calculate top k triplet importances, supporting both single and multi-query modes."""
|
|
345
|
+
query_count = query_list_length or 1
|
|
346
|
+
results = [
|
|
347
|
+
self._calculate_query_top_triplet_importances(k=k, query_index=i)
|
|
348
|
+
for i in range(query_count)
|
|
349
|
+
]
|
|
350
|
+
|
|
351
|
+
if query_list_length is None:
|
|
352
|
+
return results[0]
|
|
353
|
+
return results
|
|
@@ -30,11 +30,31 @@ class Node:
|
|
|
30
30
|
raise InvalidDimensionsError()
|
|
31
31
|
self.id = node_id
|
|
32
32
|
self.attributes = attributes if attributes is not None else {}
|
|
33
|
-
self.attributes["vector_distance"] =
|
|
33
|
+
self.attributes["vector_distance"] = None
|
|
34
34
|
self.skeleton_neighbours = []
|
|
35
35
|
self.skeleton_edges = []
|
|
36
36
|
self.status = np.ones(dimension, dtype=int)
|
|
37
37
|
|
|
38
|
+
def reset_vector_distances(self, query_count: int, default_penalty: float) -> None:
|
|
39
|
+
self.attributes["vector_distance"] = [default_penalty] * query_count
|
|
40
|
+
|
|
41
|
+
def ensure_vector_distance_list(self, query_count: int, default_penalty: float) -> List[float]:
|
|
42
|
+
distances = self.attributes.get("vector_distance")
|
|
43
|
+
if not isinstance(distances, list) or len(distances) != query_count:
|
|
44
|
+
distances = [default_penalty] * query_count
|
|
45
|
+
self.attributes["vector_distance"] = distances
|
|
46
|
+
return distances
|
|
47
|
+
|
|
48
|
+
def update_distance_for_query(
|
|
49
|
+
self,
|
|
50
|
+
query_index: int,
|
|
51
|
+
score: float,
|
|
52
|
+
query_count: int,
|
|
53
|
+
default_penalty: float,
|
|
54
|
+
) -> None:
|
|
55
|
+
distances = self.ensure_vector_distance_list(query_count, default_penalty)
|
|
56
|
+
distances[query_index] = score
|
|
57
|
+
|
|
38
58
|
def add_skeleton_neighbor(self, neighbor: "Node") -> None:
|
|
39
59
|
if neighbor not in self.skeleton_neighbours:
|
|
40
60
|
self.skeleton_neighbours.append(neighbor)
|
|
@@ -116,10 +136,36 @@ class Edge:
|
|
|
116
136
|
self.node1 = node1
|
|
117
137
|
self.node2 = node2
|
|
118
138
|
self.attributes = attributes if attributes is not None else {}
|
|
119
|
-
self.attributes["vector_distance"] =
|
|
139
|
+
self.attributes["vector_distance"] = None
|
|
120
140
|
self.directed = directed
|
|
121
141
|
self.status = np.ones(dimension, dtype=int)
|
|
122
142
|
|
|
143
|
+
def get_distance_key(self) -> Optional[str]:
|
|
144
|
+
key = self.attributes.get("edge_type_id")
|
|
145
|
+
if key is None:
|
|
146
|
+
return None
|
|
147
|
+
return str(key)
|
|
148
|
+
|
|
149
|
+
def reset_vector_distances(self, query_count: int, default_penalty: float) -> None:
|
|
150
|
+
self.attributes["vector_distance"] = [default_penalty] * query_count
|
|
151
|
+
|
|
152
|
+
def ensure_vector_distance_list(self, query_count: int, default_penalty: float) -> List[float]:
|
|
153
|
+
distances = self.attributes.get("vector_distance")
|
|
154
|
+
if not isinstance(distances, list) or len(distances) != query_count:
|
|
155
|
+
distances = [default_penalty] * query_count
|
|
156
|
+
self.attributes["vector_distance"] = distances
|
|
157
|
+
return distances
|
|
158
|
+
|
|
159
|
+
def update_distance_for_query(
|
|
160
|
+
self,
|
|
161
|
+
query_index: int,
|
|
162
|
+
score: float,
|
|
163
|
+
query_count: int,
|
|
164
|
+
default_penalty: float,
|
|
165
|
+
) -> None:
|
|
166
|
+
distances = self.ensure_vector_distance_list(query_count, default_penalty)
|
|
167
|
+
distances[query_index] = score
|
|
168
|
+
|
|
123
169
|
def is_edge_alive_in_dimension(self, dimension: int) -> bool:
|
|
124
170
|
if dimension < 0 or dimension >= len(self.status):
|
|
125
171
|
raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
|
|
@@ -5,3 +5,4 @@ from .retrieve_existing_edges import retrieve_existing_edges
|
|
|
5
5
|
from .convert_node_to_data_point import convert_node_to_data_point
|
|
6
6
|
from .deduplicate_nodes_and_edges import deduplicate_nodes_and_edges
|
|
7
7
|
from .resolve_edges_to_text import resolve_edges_to_text
|
|
8
|
+
from .get_entity_nodes_from_triplets import get_entity_nodes_from_triplets
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
def get_entity_nodes_from_triplets(triplets):
|
|
2
|
+
entity_nodes = []
|
|
3
|
+
seen_ids = set()
|
|
4
|
+
for triplet in triplets:
|
|
5
|
+
if hasattr(triplet, "node1") and triplet.node1 and triplet.node1.id not in seen_ids:
|
|
6
|
+
entity_nodes.append({"id": str(triplet.node1.id)})
|
|
7
|
+
seen_ids.add(triplet.node1.id)
|
|
8
|
+
if hasattr(triplet, "node2") and triplet.node2 and triplet.node2.id not in seen_ids:
|
|
9
|
+
entity_nodes.append({"id": str(triplet.node2.id)})
|
|
10
|
+
seen_ids.add(triplet.node2.id)
|
|
11
|
+
|
|
12
|
+
return entity_nodes
|
|
@@ -6,40 +6,6 @@ from cognee.infrastructure.databases.relational import with_async_session
|
|
|
6
6
|
|
|
7
7
|
from ..models.Notebook import Notebook, NotebookCell
|
|
8
8
|
|
|
9
|
-
TUTORIAL_NOTEBOOK_NAME = "Python Development with Cognee Tutorial 🧠"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
async def _create_tutorial_notebook(
|
|
13
|
-
user_id: UUID, session: AsyncSession, force_refresh: bool = False
|
|
14
|
-
) -> None:
|
|
15
|
-
"""
|
|
16
|
-
Create the default tutorial notebook for new users.
|
|
17
|
-
Dynamically fetches from: https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/starter_tutorial.zip
|
|
18
|
-
"""
|
|
19
|
-
TUTORIAL_ZIP_URL = (
|
|
20
|
-
"https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip"
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
try:
|
|
24
|
-
# Create notebook from remote zip file (includes notebook + data files)
|
|
25
|
-
notebook = await Notebook.from_ipynb_zip_url(
|
|
26
|
-
zip_url=TUTORIAL_ZIP_URL,
|
|
27
|
-
owner_id=user_id,
|
|
28
|
-
notebook_filename="tutorial.ipynb",
|
|
29
|
-
name=TUTORIAL_NOTEBOOK_NAME,
|
|
30
|
-
deletable=False,
|
|
31
|
-
force=force_refresh,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
# Add to session and commit
|
|
35
|
-
session.add(notebook)
|
|
36
|
-
await session.commit()
|
|
37
|
-
|
|
38
|
-
except Exception as e:
|
|
39
|
-
print(f"Failed to fetch tutorial notebook from {TUTORIAL_ZIP_URL}: {e}")
|
|
40
|
-
|
|
41
|
-
raise e
|
|
42
|
-
|
|
43
9
|
|
|
44
10
|
@with_async_session
|
|
45
11
|
async def create_notebook(
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from uuid import NAMESPACE_OID, UUID, uuid5, uuid4
|
|
3
|
+
from typing import List, Optional, Dict, Any
|
|
4
|
+
import re
|
|
5
|
+
import json
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
9
|
+
from cognee.root_dir import ROOT_DIR
|
|
10
|
+
|
|
11
|
+
from ..models.Notebook import Notebook, NotebookCell
|
|
12
|
+
|
|
13
|
+
logger = get_logger()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_tutorials_directory() -> Path:
|
|
17
|
+
"""Get the path to the tutorials directory."""
|
|
18
|
+
return ROOT_DIR / "modules" / "notebooks" / "tutorials"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _parse_cell_index(filename: str) -> int:
|
|
22
|
+
"""Extract cell index from filename like 'cell-0.md' or 'cell-123.py'."""
|
|
23
|
+
match = re.search(r"cell-(\d+)", filename)
|
|
24
|
+
if match:
|
|
25
|
+
return int(match.group(1))
|
|
26
|
+
return -1
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _get_cell_type(file_path: Path) -> str:
|
|
30
|
+
"""Determine cell type from file extension."""
|
|
31
|
+
extension = file_path.suffix.lower()
|
|
32
|
+
if extension == ".md":
|
|
33
|
+
return "markdown"
|
|
34
|
+
elif extension == ".py":
|
|
35
|
+
return "code"
|
|
36
|
+
else:
|
|
37
|
+
raise ValueError(f"Unsupported cell file type: {extension}")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _extract_markdown_heading(content: str) -> str | None:
|
|
41
|
+
"""Extract the first markdown heading from content."""
|
|
42
|
+
for line in content.splitlines():
|
|
43
|
+
line = line.strip()
|
|
44
|
+
# Match lines starting with one or more # followed by space and text
|
|
45
|
+
match = re.match(r"^#+\s+(.+)$", line)
|
|
46
|
+
if match:
|
|
47
|
+
return match.group(1).strip()
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_cell_name(cell_file: Path, cell_type: str, content: str) -> str:
|
|
52
|
+
"""Get the appropriate name for a cell."""
|
|
53
|
+
if cell_type == "code":
|
|
54
|
+
return "Code Cell"
|
|
55
|
+
elif cell_type == "markdown":
|
|
56
|
+
heading = _extract_markdown_heading(content)
|
|
57
|
+
if heading:
|
|
58
|
+
return heading
|
|
59
|
+
# Fallback to filename stem
|
|
60
|
+
return cell_file.stem
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _load_tutorial_cells(tutorial_dir: Path) -> List[NotebookCell]:
|
|
64
|
+
"""Load all cells from a tutorial directory, sorted by cell index."""
|
|
65
|
+
cells = []
|
|
66
|
+
|
|
67
|
+
cell_files = [
|
|
68
|
+
file_path
|
|
69
|
+
for file_path in tutorial_dir.iterdir()
|
|
70
|
+
if file_path.is_file()
|
|
71
|
+
and file_path.name.startswith("cell-")
|
|
72
|
+
and file_path.suffix in [".md", ".py"]
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
cell_files.sort(key=lambda file_path: _parse_cell_index(file_path.name))
|
|
76
|
+
|
|
77
|
+
for cell_file in cell_files:
|
|
78
|
+
try:
|
|
79
|
+
cell_type = _get_cell_type(cell_file)
|
|
80
|
+
content = cell_file.read_text(encoding="utf-8")
|
|
81
|
+
cell_name = _get_cell_name(cell_file, cell_type, content)
|
|
82
|
+
|
|
83
|
+
cells.append(
|
|
84
|
+
NotebookCell(
|
|
85
|
+
id=uuid4(),
|
|
86
|
+
type=cell_type,
|
|
87
|
+
name=cell_name,
|
|
88
|
+
content=content,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
logger.warning(f"Failed to load cell {cell_file}: {e}")
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
return cells
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _read_tutorial_config(tutorial_dir: Path) -> Optional[Dict[str, Any]]:
|
|
99
|
+
"""Read config.json from a tutorial directory if it exists."""
|
|
100
|
+
config_path = tutorial_dir / "config.json"
|
|
101
|
+
if config_path.exists():
|
|
102
|
+
try:
|
|
103
|
+
with open(config_path, "r", encoding="utf-8") as f:
|
|
104
|
+
return json.load(f)
|
|
105
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
106
|
+
logger.warning(f"Failed to read config.json from {tutorial_dir}: {e}")
|
|
107
|
+
return None
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _format_tutorial_name(tutorial_dir_name: str) -> str:
|
|
112
|
+
"""Format tutorial directory name into a readable notebook name (fallback)."""
|
|
113
|
+
|
|
114
|
+
name = tutorial_dir_name.replace("-", " ").replace("_", " ")
|
|
115
|
+
return f"{name.capitalize()} - tutorial 🧠"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def create_tutorial_notebooks(user_id: UUID, session: AsyncSession) -> None:
|
|
119
|
+
"""
|
|
120
|
+
Create tutorial notebooks for all tutorials found in the tutorials directory.
|
|
121
|
+
Each tutorial directory will become a separate notebook.
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
tutorials_dir = _get_tutorials_directory()
|
|
125
|
+
|
|
126
|
+
if not tutorials_dir.exists():
|
|
127
|
+
logger.warning(f"Tutorials directory not found: {tutorials_dir}")
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
tutorial_dirs = [
|
|
131
|
+
d for d in tutorials_dir.iterdir() if d.is_dir() and not d.name.startswith(".")
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
if not tutorial_dirs:
|
|
135
|
+
logger.warning(f"No tutorial directories found in {tutorials_dir}")
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
notebooks_to_add = []
|
|
139
|
+
|
|
140
|
+
for tutorial_dir in tutorial_dirs:
|
|
141
|
+
try:
|
|
142
|
+
cells = _load_tutorial_cells(tutorial_dir)
|
|
143
|
+
|
|
144
|
+
if not cells:
|
|
145
|
+
logger.warning(f"No cells found in tutorial directory: {tutorial_dir}")
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
config = _read_tutorial_config(tutorial_dir)
|
|
149
|
+
|
|
150
|
+
# Use name from config.json, or fallback to formatted directory name
|
|
151
|
+
if config and "name" in config:
|
|
152
|
+
notebook_name = config["name"]
|
|
153
|
+
else:
|
|
154
|
+
notebook_name = _format_tutorial_name(tutorial_dir.name)
|
|
155
|
+
logger.warning(
|
|
156
|
+
f"No config.json or 'name' field found in {tutorial_dir}, "
|
|
157
|
+
f"using fallback name: {notebook_name}"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Use deletable flag from config.json, or default to False for tutorials
|
|
161
|
+
deletable = False
|
|
162
|
+
if config and "deletable" in config:
|
|
163
|
+
deletable = bool(config["deletable"])
|
|
164
|
+
|
|
165
|
+
notebook_id = uuid5(NAMESPACE_OID, name=notebook_name)
|
|
166
|
+
|
|
167
|
+
notebook = Notebook(
|
|
168
|
+
id=notebook_id,
|
|
169
|
+
owner_id=user_id,
|
|
170
|
+
name=notebook_name,
|
|
171
|
+
cells=cells,
|
|
172
|
+
deletable=deletable,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
notebooks_to_add.append(notebook)
|
|
176
|
+
logger.info(f"Created tutorial notebook: {notebook_name} with {len(cells)} cells")
|
|
177
|
+
|
|
178
|
+
except Exception as e:
|
|
179
|
+
logger.error(f"Failed to create tutorial notebook from {tutorial_dir}: {e}")
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
if not notebooks_to_add:
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
for notebook in notebooks_to_add:
|
|
186
|
+
session.add(notebook)
|
|
187
|
+
|
|
188
|
+
await session.commit()
|
|
189
|
+
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.error(f"Failed to create tutorial notebooks for user {user_id}: {e}")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from uuid import UUID
|
|
1
|
+
from uuid import NAMESPACE_OID, UUID, uuid5
|
|
2
2
|
from typing import List
|
|
3
3
|
from sqlalchemy import select, and_
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
@@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
6
6
|
from cognee.infrastructure.databases.relational import with_async_session
|
|
7
7
|
|
|
8
8
|
from ..models.Notebook import Notebook
|
|
9
|
-
from .
|
|
9
|
+
from .create_tutorial_notebooks import create_tutorial_notebooks
|
|
10
10
|
|
|
11
11
|
from cognee.shared.logging_utils import get_logger
|
|
12
12
|
|
|
@@ -19,21 +19,25 @@ async def get_notebooks(
|
|
|
19
19
|
session: AsyncSession,
|
|
20
20
|
) -> List[Notebook]:
|
|
21
21
|
# Check if tutorial notebook already exists for this user
|
|
22
|
+
tutorial_notebook_ids = [
|
|
23
|
+
uuid5(NAMESPACE_OID, name="Cognee Basics - tutorial 🧠"),
|
|
24
|
+
uuid5(NAMESPACE_OID, name="Python Development with Cognee - tutorial 🧠"),
|
|
25
|
+
]
|
|
22
26
|
tutorial_query = select(Notebook).where(
|
|
23
27
|
and_(
|
|
24
28
|
Notebook.owner_id == user_id,
|
|
25
|
-
Notebook.
|
|
29
|
+
Notebook.id.in_(tutorial_notebook_ids),
|
|
26
30
|
~Notebook.deletable,
|
|
27
31
|
)
|
|
28
32
|
)
|
|
29
33
|
tutorial_result = await session.execute(tutorial_query)
|
|
30
|
-
|
|
34
|
+
tutorial_notebooks = tutorial_result.scalars().all()
|
|
31
35
|
|
|
32
|
-
# If tutorial
|
|
33
|
-
if
|
|
34
|
-
logger.info(f"Tutorial
|
|
36
|
+
# If tutorial notebooks don't exist, create them
|
|
37
|
+
if len(tutorial_notebooks) == 0:
|
|
38
|
+
logger.info(f"Tutorial notebooks not found for user {user_id}, creating them")
|
|
35
39
|
try:
|
|
36
|
-
await
|
|
40
|
+
await create_tutorial_notebooks(user_id, session)
|
|
37
41
|
except Exception as e:
|
|
38
42
|
# Log the error but continue to return existing notebooks
|
|
39
43
|
logger.error(f"Failed to create tutorial notebook for user {user_id}: {e}")
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# What You'll Learn in This Tutorial
|
|
2
|
+
|
|
3
|
+
In this tutorial, you'll learn how to use Cognee to transform scattered data into an intelligent knowledge system that enhances your workflow.
|
|
4
|
+
By the end, you'll have:
|
|
5
|
+
|
|
6
|
+
- Connected disparate data sources into a unified AI memory graph
|
|
7
|
+
- Built a memory layer that infers knowledge from provided data
|
|
8
|
+
- Learn how to use search capabilities that combine the diverse context
|
|
9
|
+
|
|
10
|
+
This tutorial demonstrates the power of knowledge graphs and retrieval-augmented generation (RAG), showing you how to build systems that learn from data and infer knowledge.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# Cognee and Its Core Operations
|
|
2
|
+
|
|
3
|
+
Before we dive in, let's understand the core Cognee operations we'll be working with:
|
|
4
|
+
|
|
5
|
+
- `cognee.add()` - Ingests raw data into the system
|
|
6
|
+
- `cognee.cognify()` - Processes and structures data into a knowledge graph using AI
|
|
7
|
+
- `cognee.search()` - Queries the knowledge graph with natural language
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Add data one by one, or pass a list to add multiple items at once
|
|
2
|
+
|
|
3
|
+
await cognee.add(
|
|
4
|
+
"Harry Potter is a student at Hogwarts and belongs to Gryffindor house. \
|
|
5
|
+
He is known for defeating Voldemort and his Patronus is a stag.",
|
|
6
|
+
dataset_name="cognee-basics",
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
await cognee.add(
|
|
10
|
+
"Hermione Granger is a student at Hogwarts and also belongs to Gryffindor house. \
|
|
11
|
+
She is known for her intelligence and deep knowledge of spells. Her Patronus is an otter.",
|
|
12
|
+
dataset_name="cognee-basics",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
await cognee.add(
|
|
16
|
+
"Severus Snape is a professor at Hogwarts who teaches Potions. \
|
|
17
|
+
He belongs to Slytherin house and was secretly loyal to Albus Dumbledore.",
|
|
18
|
+
dataset_name="cognee-basics",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
await cognee.add(
|
|
22
|
+
[
|
|
23
|
+
"Hogwarts is a magical school located in Scotland. During Harry Potter's time at school, the headmaster was Albus Dumbledore.",
|
|
24
|
+
"A Horcrux is a dark magic object used to store a fragment of a wizard's soul. Voldemort created multiple Horcruxes to achieve immortality.",
|
|
25
|
+
"The Elder Wand is a powerful wand believed to be unbeatable. Its final known owner was Harry Potter.",
|
|
26
|
+
],
|
|
27
|
+
dataset_name="cognee-basics",
|
|
28
|
+
)
|