cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
|
|
3
|
+
from cognee.shared.logging_utils import get_logger
|
|
4
|
+
from cognee.infrastructure.files.storage import get_file_storage, get_storage_config
|
|
5
|
+
from cognee.infrastructure.files.utils.get_file_metadata import get_file_metadata
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PyPdfLoader(LoaderInterface):
|
|
11
|
+
"""
|
|
12
|
+
PDF loader using pypdf library.
|
|
13
|
+
|
|
14
|
+
Extracts text content from PDF files page by page, providing
|
|
15
|
+
structured page information and handling PDF-specific errors.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def supported_extensions(self) -> List[str]:
|
|
20
|
+
return ["pdf"]
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def supported_mime_types(self) -> List[str]:
|
|
24
|
+
return ["application/pdf"]
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def loader_name(self) -> str:
|
|
28
|
+
return "pypdf_loader"
|
|
29
|
+
|
|
30
|
+
def can_handle(self, extension: str, mime_type: str) -> bool:
|
|
31
|
+
"""Check if file can be handled by this loader."""
|
|
32
|
+
# Check file extension
|
|
33
|
+
if extension in self.supported_extensions and mime_type in self.supported_mime_types:
|
|
34
|
+
return True
|
|
35
|
+
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
async def load(self, file_path: str, strict: bool = False, **kwargs) -> str:
|
|
39
|
+
"""
|
|
40
|
+
Load PDF file and extract text content.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
file_path: Path to the PDF file
|
|
44
|
+
strict: Whether to use strict mode for PDF reading
|
|
45
|
+
**kwargs: Additional arguments
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
LoaderResult with extracted text content and metadata
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
ImportError: If pypdf is not installed
|
|
52
|
+
Exception: If PDF processing fails
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
from pypdf import PdfReader
|
|
56
|
+
except ImportError as e:
|
|
57
|
+
raise ImportError(
|
|
58
|
+
"pypdf is required for PDF processing. Install with: pip install pypdf"
|
|
59
|
+
) from e
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
with open(file_path, "rb") as file:
|
|
63
|
+
file_metadata = await get_file_metadata(file)
|
|
64
|
+
# Name ingested file of current loader based on original file content hash
|
|
65
|
+
storage_file_name = "text_" + file_metadata["content_hash"] + ".txt"
|
|
66
|
+
|
|
67
|
+
logger.info(f"Reading PDF: {file_path}")
|
|
68
|
+
reader = PdfReader(file, strict=strict)
|
|
69
|
+
|
|
70
|
+
content_parts = []
|
|
71
|
+
page_texts = []
|
|
72
|
+
|
|
73
|
+
for page_num, page in enumerate(reader.pages, 1):
|
|
74
|
+
try:
|
|
75
|
+
page_text = page.extract_text()
|
|
76
|
+
if page_text.strip(): # Only add non-empty pages
|
|
77
|
+
page_texts.append(page_text)
|
|
78
|
+
content_parts.append(f"Page {page_num}:\n{page_text}\n")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Failed to extract text from page {page_num}: {e}")
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# Combine all content
|
|
84
|
+
full_content = "\n".join(content_parts)
|
|
85
|
+
|
|
86
|
+
storage_config = get_storage_config()
|
|
87
|
+
data_root_directory = storage_config["data_root_directory"]
|
|
88
|
+
storage = get_file_storage(data_root_directory)
|
|
89
|
+
|
|
90
|
+
full_file_path = await storage.store(storage_file_name, full_content)
|
|
91
|
+
|
|
92
|
+
return full_file_path
|
|
93
|
+
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.error(f"Failed to process PDF {file_path}: {e}")
|
|
96
|
+
raise Exception(f"PDF processing failed: {e}") from e
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
|
|
3
|
+
from cognee.shared.logging_utils import get_logger
|
|
4
|
+
from cognee.infrastructure.files.storage import get_file_storage, get_storage_config
|
|
5
|
+
from cognee.infrastructure.files.utils.get_file_metadata import get_file_metadata
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class UnstructuredLoader(LoaderInterface):
|
|
11
|
+
"""
|
|
12
|
+
Document loader using the unstructured library.
|
|
13
|
+
|
|
14
|
+
Handles various document formats including docx, pptx, xlsx, odt, etc.
|
|
15
|
+
Uses the unstructured library's auto-partition functionality.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def supported_extensions(self) -> List[str]:
|
|
20
|
+
return [
|
|
21
|
+
"docx",
|
|
22
|
+
"doc",
|
|
23
|
+
"odt", # Word documents
|
|
24
|
+
"xlsx",
|
|
25
|
+
"xls",
|
|
26
|
+
"ods", # Spreadsheets
|
|
27
|
+
"pptx",
|
|
28
|
+
"ppt",
|
|
29
|
+
"odp", # Presentations
|
|
30
|
+
"rtf",
|
|
31
|
+
"html",
|
|
32
|
+
"htm", # Rich text and HTML
|
|
33
|
+
"eml",
|
|
34
|
+
"msg", # Email formats
|
|
35
|
+
"epub", # eBooks
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def supported_mime_types(self) -> List[str]:
|
|
40
|
+
return [
|
|
41
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # docx
|
|
42
|
+
"application/msword", # doc
|
|
43
|
+
"application/vnd.oasis.opendocument.text", # odt
|
|
44
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # xlsx
|
|
45
|
+
"application/vnd.ms-excel", # xls
|
|
46
|
+
"application/vnd.oasis.opendocument.spreadsheet", # ods
|
|
47
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation", # pptx
|
|
48
|
+
"application/vnd.ms-powerpoint", # ppt
|
|
49
|
+
"application/vnd.oasis.opendocument.presentation", # odp
|
|
50
|
+
"application/rtf", # rtf
|
|
51
|
+
"text/html", # html
|
|
52
|
+
"message/rfc822", # eml
|
|
53
|
+
"application/epub+zip", # epub
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def loader_name(self) -> str:
|
|
58
|
+
return "unstructured_loader"
|
|
59
|
+
|
|
60
|
+
def can_handle(self, extension: str, mime_type: str) -> bool:
|
|
61
|
+
"""Check if file can be handled by this loader."""
|
|
62
|
+
# Check file extension
|
|
63
|
+
if extension in self.supported_extensions and mime_type in self.supported_mime_types:
|
|
64
|
+
return True
|
|
65
|
+
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
async def load(self, file_path: str, strategy: str = "auto", **kwargs):
|
|
69
|
+
"""
|
|
70
|
+
Load document using unstructured library.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
file_path: Path to the document file
|
|
74
|
+
strategy: Partitioning strategy ("auto", "fast", "hi_res", "ocr_only")
|
|
75
|
+
**kwargs: Additional arguments passed to unstructured partition
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
LoaderResult with extracted text content and metadata
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ImportError: If unstructured is not installed
|
|
82
|
+
Exception: If document processing fails
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
from unstructured.partition.auto import partition
|
|
86
|
+
except ImportError as e:
|
|
87
|
+
raise ImportError(
|
|
88
|
+
"unstructured is required for document processing. "
|
|
89
|
+
"Install with: pip install unstructured"
|
|
90
|
+
) from e
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
logger.info(f"Processing document: {file_path}")
|
|
94
|
+
|
|
95
|
+
with open(file_path, "rb") as f:
|
|
96
|
+
file_metadata = await get_file_metadata(f)
|
|
97
|
+
# Name ingested file of current loader based on original file content hash
|
|
98
|
+
storage_file_name = "text_" + file_metadata["content_hash"] + ".txt"
|
|
99
|
+
|
|
100
|
+
# Set partitioning parameters
|
|
101
|
+
partition_kwargs = {"filename": file_path, "strategy": strategy, **kwargs}
|
|
102
|
+
|
|
103
|
+
# Use partition to extract elements
|
|
104
|
+
elements = partition(**partition_kwargs)
|
|
105
|
+
|
|
106
|
+
# Process elements into text content
|
|
107
|
+
text_parts = []
|
|
108
|
+
|
|
109
|
+
for element in elements:
|
|
110
|
+
element_text = str(element).strip()
|
|
111
|
+
if element_text:
|
|
112
|
+
text_parts.append(element_text)
|
|
113
|
+
|
|
114
|
+
# Combine all text content
|
|
115
|
+
full_content = "\n\n".join(text_parts)
|
|
116
|
+
|
|
117
|
+
storage_config = get_storage_config()
|
|
118
|
+
data_root_directory = storage_config["data_root_directory"]
|
|
119
|
+
storage = get_file_storage(data_root_directory)
|
|
120
|
+
|
|
121
|
+
full_file_path = await storage.store(storage_file_name, full_content)
|
|
122
|
+
|
|
123
|
+
return full_file_path
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
logger.error(f"Failed to process document {file_path}: {e}")
|
|
127
|
+
raise Exception(f"Document processing failed: {e}") from e
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from .LoaderEngine import LoaderEngine
|
|
3
|
+
from .create_loader_engine import create_loader_engine
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@lru_cache
|
|
7
|
+
def get_loader_engine() -> LoaderEngine:
|
|
8
|
+
"""
|
|
9
|
+
Factory function to get loader engine.
|
|
10
|
+
|
|
11
|
+
Follows cognee's pattern with @lru_cache for efficient reuse
|
|
12
|
+
of engine instances. Configuration is loaded from environment
|
|
13
|
+
variables and settings.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Cached LoaderEngine instance configured with current settings
|
|
17
|
+
"""
|
|
18
|
+
return create_loader_engine()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from cognee.infrastructure.loaders.external import PyPdfLoader
|
|
2
|
+
from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader
|
|
3
|
+
|
|
4
|
+
# Registry for loader implementations
|
|
5
|
+
supported_loaders = {
|
|
6
|
+
PyPdfLoader.loader_name: PyPdfLoader,
|
|
7
|
+
TextLoader.loader_name: TextLoader,
|
|
8
|
+
ImageLoader.loader_name: ImageLoader,
|
|
9
|
+
AudioLoader.loader_name: AudioLoader,
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
# Try adding optional loaders
|
|
13
|
+
try:
|
|
14
|
+
from cognee.infrastructure.loaders.external import UnstructuredLoader
|
|
15
|
+
|
|
16
|
+
supported_loaders[UnstructuredLoader.loader_name] = UnstructuredLoader
|
|
17
|
+
except ImportError:
|
|
18
|
+
pass
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .supported_loaders import supported_loaders
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def use_loader(loader_name: str, loader_class):
|
|
5
|
+
"""
|
|
6
|
+
Register a loader at runtime.
|
|
7
|
+
|
|
8
|
+
This allows external packages and custom loaders to be registered
|
|
9
|
+
into the loader system.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
loader_name: Unique name for the loader
|
|
13
|
+
loader_class: Loader class implementing LoaderInterface
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
from cognee.infrastructure.loaders import use_loader
|
|
17
|
+
from my_package import MyCustomLoader
|
|
18
|
+
|
|
19
|
+
use_loader("my_custom_loader", MyCustomLoader)
|
|
20
|
+
"""
|
|
21
|
+
supported_loaders[loader_name] = loader_class
|
|
File without changes
|
|
@@ -6,6 +6,7 @@ from .get_dataset import get_dataset
|
|
|
6
6
|
from .get_datasets import get_datasets
|
|
7
7
|
from .get_datasets_by_name import get_datasets_by_name
|
|
8
8
|
from .get_dataset_data import get_dataset_data
|
|
9
|
+
from .get_authorized_dataset import get_authorized_dataset
|
|
9
10
|
from .get_data import get_data
|
|
10
11
|
from .get_unique_dataset_id import get_unique_dataset_id
|
|
11
12
|
from .get_authorized_existing_datasets import get_authorized_existing_datasets
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
|
4
|
+
from ..models import Dataset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
async def get_authorized_dataset(
|
|
8
|
+
user_id: UUID, dataset_id: UUID, permission_type="read"
|
|
9
|
+
) -> Optional[Dataset]:
|
|
10
|
+
"""
|
|
11
|
+
Get a specific dataset with permissions for a user.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
user_id (UUID): user id
|
|
15
|
+
dataset_id (UUID): dataset id
|
|
16
|
+
permission_type (str): permission type(read, write, delete, share), default is read
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Optional[Dataset]: dataset with permissions
|
|
20
|
+
"""
|
|
21
|
+
datasets = await get_specific_user_permission_datasets(user_id, permission_type, [dataset_id])
|
|
22
|
+
|
|
23
|
+
return datasets[0] if datasets else None
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
from sqlalchemy import UUID, Column, DateTime, String, JSON, Integer
|
|
4
|
+
from sqlalchemy.ext.mutable import MutableDict
|
|
4
5
|
from sqlalchemy.orm import relationship
|
|
5
6
|
|
|
6
7
|
from cognee.infrastructure.databases.relational import Base
|
|
@@ -16,14 +17,23 @@ class Data(Base):
|
|
|
16
17
|
name = Column(String)
|
|
17
18
|
extension = Column(String)
|
|
18
19
|
mime_type = Column(String)
|
|
20
|
+
original_extension = Column(String, nullable=True)
|
|
21
|
+
original_mime_type = Column(String, nullable=True)
|
|
22
|
+
loader_engine = Column(String)
|
|
19
23
|
raw_data_location = Column(String)
|
|
24
|
+
original_data_location = Column(String)
|
|
20
25
|
owner_id = Column(UUID, index=True)
|
|
21
|
-
tenant_id = Column(UUID, index=True,
|
|
26
|
+
tenant_id = Column(UUID, index=True, nullable=True)
|
|
22
27
|
content_hash = Column(String)
|
|
28
|
+
raw_content_hash = Column(String)
|
|
23
29
|
external_metadata = Column(JSON)
|
|
24
|
-
|
|
30
|
+
# Store NodeSet as JSON list of strings
|
|
31
|
+
node_set = Column(JSON, nullable=True)
|
|
32
|
+
# MutableDict allows SQLAlchemy to notice key-value pair changes, without it changing a value for a key
|
|
33
|
+
# wouldn't be noticed when commiting a database session
|
|
34
|
+
pipeline_status = Column(MutableDict.as_mutable(JSON))
|
|
25
35
|
token_count = Column(Integer)
|
|
26
|
-
data_size = Column(Integer) # File size in bytes
|
|
36
|
+
data_size = Column(Integer, nullable=True) # File size in bytes
|
|
27
37
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
28
38
|
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
|
29
39
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
2
1
|
from cognee.modules.chunking.Chunker import Chunker
|
|
2
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
3
3
|
|
|
4
4
|
from .Document import Document
|
|
5
5
|
|
|
@@ -8,7 +8,7 @@ class AudioDocument(Document):
|
|
|
8
8
|
type: str = "audio"
|
|
9
9
|
|
|
10
10
|
async def create_transcript(self):
|
|
11
|
-
result = await
|
|
11
|
+
result = await LLMGateway.create_transcript(self.raw_data_location)
|
|
12
12
|
return result.text
|
|
13
13
|
|
|
14
14
|
async def read(self, chunker_cls: Chunker, max_chunk_size: int):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from cognee.infrastructure.llm.
|
|
1
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
2
2
|
from cognee.modules.chunking.Chunker import Chunker
|
|
3
3
|
|
|
4
4
|
from .Document import Document
|
|
@@ -8,7 +8,7 @@ class ImageDocument(Document):
|
|
|
8
8
|
type: str = "image"
|
|
9
9
|
|
|
10
10
|
async def transcribe_image(self):
|
|
11
|
-
result = await
|
|
11
|
+
result = await LLMGateway.transcribe_image(self.raw_data_location)
|
|
12
12
|
return result.choices[0].message.content
|
|
13
13
|
|
|
14
14
|
async def read(self, chunker_cls: Chunker, max_chunk_size: int):
|
|
@@ -5,7 +5,6 @@ from cognee.modules.chunking.Chunker import Chunker
|
|
|
5
5
|
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
|
6
6
|
|
|
7
7
|
from .Document import Document
|
|
8
|
-
from .exceptions.exceptions import PyPdfInternalError
|
|
9
8
|
|
|
10
9
|
logger = get_logger("PDFDocument")
|
|
11
10
|
|
|
@@ -17,18 +16,12 @@ class PdfDocument(Document):
|
|
|
17
16
|
async with open_data_file(self.raw_data_location, mode="rb") as stream:
|
|
18
17
|
logger.info(f"Reading PDF: {self.raw_data_location}")
|
|
19
18
|
|
|
20
|
-
|
|
21
|
-
file = PdfReader(stream, strict=False)
|
|
22
|
-
except Exception:
|
|
23
|
-
raise PyPdfInternalError()
|
|
19
|
+
file = PdfReader(stream, strict=False)
|
|
24
20
|
|
|
25
21
|
async def get_text():
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
yield page_text
|
|
30
|
-
except Exception:
|
|
31
|
-
raise PyPdfInternalError()
|
|
22
|
+
for page in file.pages:
|
|
23
|
+
page_text = page.extract_text()
|
|
24
|
+
yield page_text
|
|
32
25
|
|
|
33
26
|
chunker = chunker_cls(self, get_text=get_text, max_chunk_size=max_chunk_size)
|
|
34
27
|
|
|
@@ -18,11 +18,8 @@ class UnstructuredDocument(Document):
|
|
|
18
18
|
except ModuleNotFoundError:
|
|
19
19
|
raise UnstructuredLibraryImportError
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
elements = partition(file=f, content_type=self.mime_type)
|
|
24
|
-
else:
|
|
25
|
-
elements = partition(self.raw_data_location, content_type=self.mime_type)
|
|
21
|
+
async with open_data_file(self.raw_data_location, mode="rb") as f:
|
|
22
|
+
elements = partition(file=f, content_type=self.mime_type)
|
|
26
23
|
|
|
27
24
|
in_memory_file = StringIO("\n\n".join([str(el) for el in elements]))
|
|
28
25
|
in_memory_file.seek(0)
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import time
|
|
1
2
|
from cognee.shared.logging_utils import get_logger
|
|
2
3
|
from typing import List, Dict, Union, Optional, Type
|
|
3
4
|
|
|
@@ -8,7 +9,7 @@ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
|
|
|
8
9
|
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
|
|
9
10
|
import heapq
|
|
10
11
|
|
|
11
|
-
logger = get_logger()
|
|
12
|
+
logger = get_logger("CogneeGraph")
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class CogneeGraph(CogneeAbstractGraph):
|
|
@@ -66,7 +67,13 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
66
67
|
) -> None:
|
|
67
68
|
if node_dimension < 1 or edge_dimension < 1:
|
|
68
69
|
raise InvalidValueError(message="Dimensions must be positive integers")
|
|
70
|
+
|
|
69
71
|
try:
|
|
72
|
+
import time
|
|
73
|
+
|
|
74
|
+
start_time = time.time()
|
|
75
|
+
|
|
76
|
+
# Determine projection strategy
|
|
70
77
|
if node_type is not None and node_name is not None:
|
|
71
78
|
nodes_data, edges_data = await adapter.get_nodeset_subgraph(
|
|
72
79
|
node_type=node_type, node_name=node_name
|
|
@@ -83,16 +90,17 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
83
90
|
nodes_data, edges_data = await adapter.get_filtered_graph_data(
|
|
84
91
|
attribute_filters=memory_fragment_filter
|
|
85
92
|
)
|
|
86
|
-
|
|
87
93
|
if not nodes_data or not edges_data:
|
|
88
94
|
raise EntityNotFoundError(
|
|
89
95
|
message="Empty filtered graph projected from the database."
|
|
90
96
|
)
|
|
91
97
|
|
|
98
|
+
# Process nodes
|
|
92
99
|
for node_id, properties in nodes_data:
|
|
93
100
|
node_attributes = {key: properties.get(key) for key in node_properties_to_project}
|
|
94
101
|
self.add_node(Node(str(node_id), node_attributes, dimension=node_dimension))
|
|
95
102
|
|
|
103
|
+
# Process edges
|
|
96
104
|
for source_id, target_id, relationship_type, properties in edges_data:
|
|
97
105
|
source_node = self.get_node(str(source_id))
|
|
98
106
|
target_node = self.get_node(str(target_id))
|
|
@@ -113,17 +121,23 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
113
121
|
|
|
114
122
|
source_node.add_skeleton_edge(edge)
|
|
115
123
|
target_node.add_skeleton_edge(edge)
|
|
116
|
-
|
|
117
124
|
else:
|
|
118
125
|
raise EntityNotFoundError(
|
|
119
126
|
message=f"Edge references nonexistent nodes: {source_id} -> {target_id}"
|
|
120
127
|
)
|
|
121
128
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
129
|
+
# Final statistics
|
|
130
|
+
projection_time = time.time() - start_time
|
|
131
|
+
logger.info(
|
|
132
|
+
f"Graph projection completed: {len(self.nodes)} nodes, {len(self.edges)} edges in {projection_time:.2f}s"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.error(f"Error during graph projection: {str(e)}")
|
|
137
|
+
raise
|
|
125
138
|
|
|
126
139
|
async def map_vector_distances_to_graph_nodes(self, node_distances) -> None:
|
|
140
|
+
mapped_nodes = 0
|
|
127
141
|
for category, scored_results in node_distances.items():
|
|
128
142
|
for scored_result in scored_results:
|
|
129
143
|
node_id = str(scored_result.id)
|
|
@@ -131,48 +145,44 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
131
145
|
node = self.get_node(node_id)
|
|
132
146
|
if node:
|
|
133
147
|
node.add_attribute("vector_distance", score)
|
|
148
|
+
mapped_nodes += 1
|
|
134
149
|
|
|
135
|
-
async def map_vector_distances_to_graph_edges(
|
|
150
|
+
async def map_vector_distances_to_graph_edges(
|
|
151
|
+
self, vector_engine, query_vector, edge_distances
|
|
152
|
+
) -> None:
|
|
136
153
|
try:
|
|
137
|
-
query_vector = await vector_engine.embed_data([query])
|
|
138
|
-
query_vector = query_vector[0]
|
|
139
154
|
if query_vector is None or len(query_vector) == 0:
|
|
140
155
|
raise ValueError("Failed to generate query embedding.")
|
|
141
156
|
|
|
142
|
-
edge_distances
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
157
|
+
if edge_distances is None:
|
|
158
|
+
start_time = time.time()
|
|
159
|
+
edge_distances = await vector_engine.search(
|
|
160
|
+
collection_name="EdgeType_relationship_name",
|
|
161
|
+
query_vector=query_vector,
|
|
162
|
+
limit=0,
|
|
163
|
+
)
|
|
164
|
+
projection_time = time.time() - start_time
|
|
165
|
+
logger.info(
|
|
166
|
+
f"Edge collection distances were calculated separately from nodes in {projection_time:.2f}s"
|
|
167
|
+
)
|
|
147
168
|
|
|
148
169
|
embedding_map = {result.payload["text"]: result.score for result in edge_distances}
|
|
149
170
|
|
|
150
171
|
for edge in self.edges:
|
|
151
172
|
relationship_type = edge.attributes.get("relationship_type")
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
edge.attributes["vector_distance"] = embedding_map[relationship_type]
|
|
173
|
+
distance = embedding_map.get(relationship_type, None)
|
|
174
|
+
if distance is not None:
|
|
175
|
+
edge.attributes["vector_distance"] = distance
|
|
157
176
|
|
|
158
177
|
except Exception as ex:
|
|
159
|
-
|
|
178
|
+
logger.error(f"Error mapping vector distances to edges: {str(ex)}")
|
|
160
179
|
raise ex
|
|
161
180
|
|
|
162
181
|
async def calculate_top_triplet_importances(self, k: int) -> List:
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
source_distance = source_node.attributes.get("vector_distance", 1) if source_node else 1
|
|
169
|
-
target_distance = target_node.attributes.get("vector_distance", 1) if target_node else 1
|
|
170
|
-
edge_distance = edge.attributes.get("vector_distance", 1)
|
|
171
|
-
|
|
172
|
-
total_distance = source_distance + target_distance + edge_distance
|
|
173
|
-
|
|
174
|
-
heapq.heappush(min_heap, (-total_distance, i, edge))
|
|
175
|
-
if len(min_heap) > k:
|
|
176
|
-
heapq.heappop(min_heap)
|
|
182
|
+
def score(edge):
|
|
183
|
+
n1 = edge.node1.attributes.get("vector_distance", 1)
|
|
184
|
+
n2 = edge.node2.attributes.get("vector_distance", 1)
|
|
185
|
+
e = edge.attributes.get("vector_distance", 1)
|
|
186
|
+
return n1 + n2 + e
|
|
177
187
|
|
|
178
|
-
return
|
|
188
|
+
return heapq.nsmallest(k, self.edges, key=score)
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
3
3
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
4
|
+
from cognee.modules.data.exceptions.exceptions import DatasetNotFoundError
|
|
5
|
+
from cognee.modules.data.methods import get_authorized_dataset
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
async def get_formatted_graph_data(dataset_id: UUID, user_id: UUID):
|
|
7
|
-
await
|
|
9
|
+
dataset = await get_authorized_dataset(user_id, dataset_id)
|
|
10
|
+
if not dataset:
|
|
11
|
+
raise DatasetNotFoundError(message="Dataset not found.")
|
|
12
|
+
|
|
13
|
+
await set_database_global_context_variables(dataset_id, dataset.owner_id)
|
|
8
14
|
|
|
9
15
|
graph_client = await get_graph_engine()
|
|
10
16
|
(nodes, edges) = await graph_client.get_graph_data()
|
|
@@ -33,7 +39,7 @@ async def get_formatted_graph_data(dataset_id: UUID, user_id: UUID):
|
|
|
33
39
|
lambda edge: {
|
|
34
40
|
"source": str(edge[0]),
|
|
35
41
|
"target": str(edge[1]),
|
|
36
|
-
"label": edge[2],
|
|
42
|
+
"label": str(edge[2]),
|
|
37
43
|
},
|
|
38
44
|
edges,
|
|
39
45
|
)
|