cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +41 -3
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -7
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +12 -7
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +1 -1
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +48 -13
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -15
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +11 -1
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +9 -18
- cognee/modules/graph/methods/get_formatted_graph_data.py +7 -1
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +251 -19
- cognee/modules/retrieval/code_retriever.py +3 -5
- cognee/modules/retrieval/completion_retriever.py +1 -1
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/natural_language_retriever.py +3 -5
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -4
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_qdrant.py → test_neptune_analytics_vector.py} +86 -16
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/METADATA +12 -6
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/RECORD +195 -156
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/WHEEL +0 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,21 +1,31 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
2
4
|
from uuid import UUID
|
|
3
|
-
from typing import Any
|
|
5
|
+
from typing import Any, List
|
|
4
6
|
from functools import wraps
|
|
7
|
+
from sqlalchemy import select
|
|
5
8
|
|
|
9
|
+
import cognee.modules.ingestion as ingestion
|
|
6
10
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
7
11
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
8
12
|
from cognee.modules.pipelines.operations.run_tasks_distributed import run_tasks_distributed
|
|
9
13
|
from cognee.modules.users.models import User
|
|
14
|
+
from cognee.modules.data.models import Data
|
|
15
|
+
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
|
10
16
|
from cognee.shared.logging_utils import get_logger
|
|
11
17
|
from cognee.modules.users.methods import get_default_user
|
|
12
18
|
from cognee.modules.pipelines.utils import generate_pipeline_id
|
|
19
|
+
from cognee.modules.pipelines.exceptions import PipelineRunFailedError
|
|
20
|
+
from cognee.tasks.ingestion import save_data_item_to_storage, resolve_data_directories
|
|
13
21
|
from cognee.modules.pipelines.models.PipelineRunInfo import (
|
|
14
22
|
PipelineRunCompleted,
|
|
15
23
|
PipelineRunErrored,
|
|
16
24
|
PipelineRunStarted,
|
|
17
25
|
PipelineRunYield,
|
|
26
|
+
PipelineRunAlreadyCompleted,
|
|
18
27
|
)
|
|
28
|
+
from cognee.modules.pipelines.models.DataItemStatus import DataItemStatus
|
|
19
29
|
|
|
20
30
|
from cognee.modules.pipelines.operations import (
|
|
21
31
|
log_pipeline_run_start,
|
|
@@ -50,13 +60,184 @@ def override_run_tasks(new_gen):
|
|
|
50
60
|
|
|
51
61
|
@override_run_tasks(run_tasks_distributed)
|
|
52
62
|
async def run_tasks(
|
|
53
|
-
tasks:
|
|
63
|
+
tasks: List[Task],
|
|
54
64
|
dataset_id: UUID,
|
|
55
|
-
data: Any = None,
|
|
65
|
+
data: List[Any] = None,
|
|
56
66
|
user: User = None,
|
|
57
67
|
pipeline_name: str = "unknown_pipeline",
|
|
58
68
|
context: dict = None,
|
|
69
|
+
incremental_loading: bool = False,
|
|
59
70
|
):
|
|
71
|
+
async def _run_tasks_data_item_incremental(
|
|
72
|
+
data_item,
|
|
73
|
+
dataset,
|
|
74
|
+
tasks,
|
|
75
|
+
pipeline_name,
|
|
76
|
+
pipeline_id,
|
|
77
|
+
pipeline_run_id,
|
|
78
|
+
context,
|
|
79
|
+
user,
|
|
80
|
+
):
|
|
81
|
+
db_engine = get_relational_engine()
|
|
82
|
+
# If incremental_loading of data is set to True don't process documents already processed by pipeline
|
|
83
|
+
# If data is being added to Cognee for the first time calculate the id of the data
|
|
84
|
+
if not isinstance(data_item, Data):
|
|
85
|
+
file_path = await save_data_item_to_storage(data_item)
|
|
86
|
+
# Ingest data and add metadata
|
|
87
|
+
async with open_data_file(file_path) as file:
|
|
88
|
+
classified_data = ingestion.classify(file)
|
|
89
|
+
# data_id is the hash of file contents + owner id to avoid duplicate data
|
|
90
|
+
data_id = ingestion.identify(classified_data, user)
|
|
91
|
+
else:
|
|
92
|
+
# If data was already processed by Cognee get data id
|
|
93
|
+
data_id = data_item.id
|
|
94
|
+
|
|
95
|
+
# Check pipeline status, if Data already processed for pipeline before skip current processing
|
|
96
|
+
async with db_engine.get_async_session() as session:
|
|
97
|
+
data_point = (
|
|
98
|
+
await session.execute(select(Data).filter(Data.id == data_id))
|
|
99
|
+
).scalar_one_or_none()
|
|
100
|
+
if data_point:
|
|
101
|
+
if (
|
|
102
|
+
data_point.pipeline_status.get(pipeline_name, {}).get(str(dataset.id))
|
|
103
|
+
== DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
|
|
104
|
+
):
|
|
105
|
+
yield {
|
|
106
|
+
"run_info": PipelineRunAlreadyCompleted(
|
|
107
|
+
pipeline_run_id=pipeline_run_id,
|
|
108
|
+
dataset_id=dataset.id,
|
|
109
|
+
dataset_name=dataset.name,
|
|
110
|
+
),
|
|
111
|
+
"data_id": data_id,
|
|
112
|
+
}
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Process data based on data_item and list of tasks
|
|
117
|
+
async for result in run_tasks_with_telemetry(
|
|
118
|
+
tasks=tasks,
|
|
119
|
+
data=[data_item],
|
|
120
|
+
user=user,
|
|
121
|
+
pipeline_name=pipeline_id,
|
|
122
|
+
context=context,
|
|
123
|
+
):
|
|
124
|
+
yield PipelineRunYield(
|
|
125
|
+
pipeline_run_id=pipeline_run_id,
|
|
126
|
+
dataset_id=dataset.id,
|
|
127
|
+
dataset_name=dataset.name,
|
|
128
|
+
payload=result,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Update pipeline status for Data element
|
|
132
|
+
async with db_engine.get_async_session() as session:
|
|
133
|
+
data_point = (
|
|
134
|
+
await session.execute(select(Data).filter(Data.id == data_id))
|
|
135
|
+
).scalar_one_or_none()
|
|
136
|
+
data_point.pipeline_status[pipeline_name] = {
|
|
137
|
+
str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
|
|
138
|
+
}
|
|
139
|
+
await session.merge(data_point)
|
|
140
|
+
await session.commit()
|
|
141
|
+
|
|
142
|
+
yield {
|
|
143
|
+
"run_info": PipelineRunCompleted(
|
|
144
|
+
pipeline_run_id=pipeline_run_id,
|
|
145
|
+
dataset_id=dataset.id,
|
|
146
|
+
dataset_name=dataset.name,
|
|
147
|
+
),
|
|
148
|
+
"data_id": data_id,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
except Exception as error:
|
|
152
|
+
# Temporarily swallow error and try to process rest of documents first, then re-raise error at end of data ingestion pipeline
|
|
153
|
+
logger.error(
|
|
154
|
+
f"Exception caught while processing data: {error}.\n Data processing failed for data item: {data_item}."
|
|
155
|
+
)
|
|
156
|
+
yield {
|
|
157
|
+
"run_info": PipelineRunErrored(
|
|
158
|
+
pipeline_run_id=pipeline_run_id,
|
|
159
|
+
payload=repr(error),
|
|
160
|
+
dataset_id=dataset.id,
|
|
161
|
+
dataset_name=dataset.name,
|
|
162
|
+
),
|
|
163
|
+
"data_id": data_id,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if os.getenv("RAISE_INCREMENTAL_LOADING_ERRORS", "true").lower() == "true":
|
|
167
|
+
raise error
|
|
168
|
+
|
|
169
|
+
async def _run_tasks_data_item_regular(
|
|
170
|
+
data_item,
|
|
171
|
+
dataset,
|
|
172
|
+
tasks,
|
|
173
|
+
pipeline_id,
|
|
174
|
+
pipeline_run_id,
|
|
175
|
+
context,
|
|
176
|
+
user,
|
|
177
|
+
):
|
|
178
|
+
# Process data based on data_item and list of tasks
|
|
179
|
+
async for result in run_tasks_with_telemetry(
|
|
180
|
+
tasks=tasks,
|
|
181
|
+
data=[data_item],
|
|
182
|
+
user=user,
|
|
183
|
+
pipeline_name=pipeline_id,
|
|
184
|
+
context=context,
|
|
185
|
+
):
|
|
186
|
+
yield PipelineRunYield(
|
|
187
|
+
pipeline_run_id=pipeline_run_id,
|
|
188
|
+
dataset_id=dataset.id,
|
|
189
|
+
dataset_name=dataset.name,
|
|
190
|
+
payload=result,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
yield {
|
|
194
|
+
"run_info": PipelineRunCompleted(
|
|
195
|
+
pipeline_run_id=pipeline_run_id,
|
|
196
|
+
dataset_id=dataset.id,
|
|
197
|
+
dataset_name=dataset.name,
|
|
198
|
+
)
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async def _run_tasks_data_item(
|
|
202
|
+
data_item,
|
|
203
|
+
dataset,
|
|
204
|
+
tasks,
|
|
205
|
+
pipeline_name,
|
|
206
|
+
pipeline_id,
|
|
207
|
+
pipeline_run_id,
|
|
208
|
+
context,
|
|
209
|
+
user,
|
|
210
|
+
incremental_loading,
|
|
211
|
+
):
|
|
212
|
+
# Go through async generator and return data item processing result. Result can be PipelineRunAlreadyCompleted when data item is skipped,
|
|
213
|
+
# PipelineRunCompleted when processing was successful and PipelineRunErrored if there were issues
|
|
214
|
+
result = None
|
|
215
|
+
if incremental_loading:
|
|
216
|
+
async for result in _run_tasks_data_item_incremental(
|
|
217
|
+
data_item=data_item,
|
|
218
|
+
dataset=dataset,
|
|
219
|
+
tasks=tasks,
|
|
220
|
+
pipeline_name=pipeline_name,
|
|
221
|
+
pipeline_id=pipeline_id,
|
|
222
|
+
pipeline_run_id=pipeline_run_id,
|
|
223
|
+
context=context,
|
|
224
|
+
user=user,
|
|
225
|
+
):
|
|
226
|
+
pass
|
|
227
|
+
else:
|
|
228
|
+
async for result in _run_tasks_data_item_regular(
|
|
229
|
+
data_item=data_item,
|
|
230
|
+
dataset=dataset,
|
|
231
|
+
tasks=tasks,
|
|
232
|
+
pipeline_id=pipeline_id,
|
|
233
|
+
pipeline_run_id=pipeline_run_id,
|
|
234
|
+
context=context,
|
|
235
|
+
user=user,
|
|
236
|
+
):
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
return result
|
|
240
|
+
|
|
60
241
|
if not user:
|
|
61
242
|
user = await get_default_user()
|
|
62
243
|
|
|
@@ -68,9 +249,7 @@ async def run_tasks(
|
|
|
68
249
|
dataset = await session.get(Dataset, dataset_id)
|
|
69
250
|
|
|
70
251
|
pipeline_id = generate_pipeline_id(user.id, dataset.id, pipeline_name)
|
|
71
|
-
|
|
72
252
|
pipeline_run = await log_pipeline_run_start(pipeline_id, pipeline_name, dataset_id, data)
|
|
73
|
-
|
|
74
253
|
pipeline_run_id = pipeline_run.pipeline_run_id
|
|
75
254
|
|
|
76
255
|
yield PipelineRunStarted(
|
|
@@ -81,18 +260,65 @@ async def run_tasks(
|
|
|
81
260
|
)
|
|
82
261
|
|
|
83
262
|
try:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
263
|
+
if not isinstance(data, list):
|
|
264
|
+
data = [data]
|
|
265
|
+
|
|
266
|
+
if incremental_loading:
|
|
267
|
+
data = await resolve_data_directories(data)
|
|
268
|
+
|
|
269
|
+
# TODO: Return to using async.gather for data items after Cognee release
|
|
270
|
+
# # Create async tasks per data item that will run the pipeline for the data item
|
|
271
|
+
# data_item_tasks = [
|
|
272
|
+
# asyncio.create_task(
|
|
273
|
+
# _run_tasks_data_item(
|
|
274
|
+
# data_item,
|
|
275
|
+
# dataset,
|
|
276
|
+
# tasks,
|
|
277
|
+
# pipeline_name,
|
|
278
|
+
# pipeline_id,
|
|
279
|
+
# pipeline_run_id,
|
|
280
|
+
# context,
|
|
281
|
+
# user,
|
|
282
|
+
# incremental_loading,
|
|
283
|
+
# )
|
|
284
|
+
# )
|
|
285
|
+
# for data_item in data
|
|
286
|
+
# ]
|
|
287
|
+
# results = await asyncio.gather(*data_item_tasks)
|
|
288
|
+
# # Remove skipped data items from results
|
|
289
|
+
# results = [result for result in results if result]
|
|
290
|
+
|
|
291
|
+
### TEMP sync data item handling
|
|
292
|
+
results = []
|
|
293
|
+
# Run the pipeline for each data_item sequentially, one after the other
|
|
294
|
+
for data_item in data:
|
|
295
|
+
result = await _run_tasks_data_item(
|
|
296
|
+
data_item,
|
|
297
|
+
dataset,
|
|
298
|
+
tasks,
|
|
299
|
+
pipeline_name,
|
|
300
|
+
pipeline_id,
|
|
301
|
+
pipeline_run_id,
|
|
302
|
+
context,
|
|
303
|
+
user,
|
|
304
|
+
incremental_loading,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Skip items that returned a false-y value
|
|
308
|
+
if result:
|
|
309
|
+
results.append(result)
|
|
310
|
+
### END
|
|
311
|
+
|
|
312
|
+
# Remove skipped data items from results
|
|
313
|
+
results = [result for result in results if result]
|
|
314
|
+
|
|
315
|
+
# If any data item could not be processed propagate error
|
|
316
|
+
errored_results = [
|
|
317
|
+
result for result in results if isinstance(result["run_info"], PipelineRunErrored)
|
|
318
|
+
]
|
|
319
|
+
if errored_results:
|
|
320
|
+
raise PipelineRunFailedError(
|
|
321
|
+
message="Pipeline run failed. Data item could not be processed."
|
|
96
322
|
)
|
|
97
323
|
|
|
98
324
|
await log_pipeline_run_complete(
|
|
@@ -103,6 +329,7 @@ async def run_tasks(
|
|
|
103
329
|
pipeline_run_id=pipeline_run_id,
|
|
104
330
|
dataset_id=dataset.id,
|
|
105
331
|
dataset_name=dataset.name,
|
|
332
|
+
data_ingestion_info=results,
|
|
106
333
|
)
|
|
107
334
|
|
|
108
335
|
graph_engine = await get_graph_engine()
|
|
@@ -120,9 +347,14 @@ async def run_tasks(
|
|
|
120
347
|
|
|
121
348
|
yield PipelineRunErrored(
|
|
122
349
|
pipeline_run_id=pipeline_run_id,
|
|
123
|
-
payload=error,
|
|
350
|
+
payload=repr(error),
|
|
124
351
|
dataset_id=dataset.id,
|
|
125
352
|
dataset_name=dataset.name,
|
|
353
|
+
data_ingestion_info=locals().get(
|
|
354
|
+
"results"
|
|
355
|
+
), # Returns results if they exist or returns None
|
|
126
356
|
)
|
|
127
357
|
|
|
128
|
-
raise error
|
|
358
|
+
# In case of error during incremental loading of data just let the user know the pipeline Errored, don't raise error
|
|
359
|
+
if not isinstance(error, PipelineRunFailedError):
|
|
360
|
+
raise error
|
|
@@ -7,8 +7,7 @@ from cognee.shared.logging_utils import get_logger
|
|
|
7
7
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
8
8
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
9
9
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
10
|
-
from cognee.infrastructure.llm.
|
|
11
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt
|
|
10
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
12
11
|
|
|
13
12
|
logger = get_logger("CodeRetriever")
|
|
14
13
|
|
|
@@ -42,11 +41,10 @@ class CodeRetriever(BaseRetriever):
|
|
|
42
41
|
f"Processing query with LLM: '{query[:100]}{'...' if len(query) > 100 else ''}'"
|
|
43
42
|
)
|
|
44
43
|
|
|
45
|
-
system_prompt = read_query_prompt("codegraph_retriever_system.txt")
|
|
46
|
-
llm_client = get_llm_client()
|
|
44
|
+
system_prompt = LLMGateway.read_query_prompt("codegraph_retriever_system.txt")
|
|
47
45
|
|
|
48
46
|
try:
|
|
49
|
-
result = await
|
|
47
|
+
result = await LLMGateway.acreate_structured_output(
|
|
50
48
|
text_input=query,
|
|
51
49
|
system_prompt=system_prompt,
|
|
52
50
|
response_model=self.CodeQueryInfo,
|
|
@@ -4,8 +4,6 @@ import asyncio
|
|
|
4
4
|
from cognee.infrastructure.context.BaseContextProvider import BaseContextProvider
|
|
5
5
|
from cognee.infrastructure.engine import DataPoint
|
|
6
6
|
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
7
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
8
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt
|
|
9
7
|
from cognee.modules.retrieval.utils.brute_force_triplet_search import (
|
|
10
8
|
brute_force_triplet_search,
|
|
11
9
|
format_triplets,
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
from typing import Any, Optional, List, Type
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
4
3
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
5
4
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
|
6
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
|
|
7
5
|
|
|
8
6
|
logger = get_logger()
|
|
9
7
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from typing import Any, Optional, List, Type
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
|
-
|
|
3
|
+
|
|
4
4
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
5
5
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
|
6
|
-
from cognee.infrastructure.llm.
|
|
6
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
7
7
|
|
|
8
8
|
logger = get_logger()
|
|
9
9
|
|
|
@@ -73,7 +73,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
73
73
|
|
|
74
74
|
- List[str]: A list containing the generated answer to the user's query.
|
|
75
75
|
"""
|
|
76
|
-
llm_client = get_llm_client()
|
|
77
76
|
followup_question = ""
|
|
78
77
|
triplets = []
|
|
79
78
|
answer = [""]
|
|
@@ -95,27 +94,27 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
95
94
|
logger.info(f"Chain-of-thought: round {round_idx} - answer: {answer}")
|
|
96
95
|
if round_idx < max_iter:
|
|
97
96
|
valid_args = {"query": query, "answer": answer, "context": context}
|
|
98
|
-
valid_user_prompt = render_prompt(
|
|
97
|
+
valid_user_prompt = LLMGateway.render_prompt(
|
|
99
98
|
filename=self.validation_user_prompt_path, context=valid_args
|
|
100
99
|
)
|
|
101
|
-
valid_system_prompt = read_query_prompt(
|
|
100
|
+
valid_system_prompt = LLMGateway.read_query_prompt(
|
|
102
101
|
prompt_file_name=self.validation_system_prompt_path
|
|
103
102
|
)
|
|
104
103
|
|
|
105
|
-
reasoning = await
|
|
104
|
+
reasoning = await LLMGateway.acreate_structured_output(
|
|
106
105
|
text_input=valid_user_prompt,
|
|
107
106
|
system_prompt=valid_system_prompt,
|
|
108
107
|
response_model=str,
|
|
109
108
|
)
|
|
110
109
|
followup_args = {"query": query, "answer": answer, "reasoning": reasoning}
|
|
111
|
-
followup_prompt = render_prompt(
|
|
110
|
+
followup_prompt = LLMGateway.render_prompt(
|
|
112
111
|
filename=self.followup_user_prompt_path, context=followup_args
|
|
113
112
|
)
|
|
114
|
-
followup_system = read_query_prompt(
|
|
113
|
+
followup_system = LLMGateway.read_query_prompt(
|
|
115
114
|
prompt_file_name=self.followup_system_prompt_path
|
|
116
115
|
)
|
|
117
116
|
|
|
118
|
-
followup_question = await
|
|
117
|
+
followup_question = await LLMGateway.acreate_structured_output(
|
|
119
118
|
text_input=followup_prompt, system_prompt=followup_system, response_model=str
|
|
120
119
|
)
|
|
121
120
|
logger.info(
|
|
@@ -2,8 +2,7 @@ from typing import Any, Optional
|
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
3
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
4
4
|
from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
|
|
5
|
-
from cognee.infrastructure.llm.
|
|
6
|
-
from cognee.infrastructure.llm.prompts import render_prompt
|
|
5
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
7
6
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
8
7
|
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
|
|
9
8
|
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
|
@@ -51,8 +50,7 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
51
50
|
|
|
52
51
|
async def _generate_cypher_query(self, query: str, edge_schemas, previous_attempts=None) -> str:
|
|
53
52
|
"""Generate a Cypher query using LLM based on natural language query and schema information."""
|
|
54
|
-
|
|
55
|
-
system_prompt = render_prompt(
|
|
53
|
+
system_prompt = LLMGateway.render_prompt(
|
|
56
54
|
self.system_prompt_path,
|
|
57
55
|
context={
|
|
58
56
|
"edge_schemas": edge_schemas,
|
|
@@ -60,7 +58,7 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
60
58
|
},
|
|
61
59
|
)
|
|
62
60
|
|
|
63
|
-
return await
|
|
61
|
+
return await LLMGateway.acreate_structured_output(
|
|
64
62
|
text_input=query,
|
|
65
63
|
system_prompt=system_prompt,
|
|
66
64
|
response_model=str,
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
from cognee.infrastructure.llm.
|
|
2
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
|
|
1
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
3
2
|
|
|
4
3
|
|
|
5
4
|
async def generate_completion(
|
|
@@ -10,11 +9,10 @@ async def generate_completion(
|
|
|
10
9
|
) -> str:
|
|
11
10
|
"""Generates a completion using LLM with given context and prompts."""
|
|
12
11
|
args = {"question": query, "context": context}
|
|
13
|
-
user_prompt = render_prompt(user_prompt_path, args)
|
|
14
|
-
system_prompt = read_query_prompt(system_prompt_path)
|
|
12
|
+
user_prompt = LLMGateway.render_prompt(user_prompt_path, args)
|
|
13
|
+
system_prompt = LLMGateway.read_query_prompt(system_prompt_path)
|
|
15
14
|
|
|
16
|
-
|
|
17
|
-
return await llm_client.acreate_structured_output(
|
|
15
|
+
return await LLMGateway.acreate_structured_output(
|
|
18
16
|
text_input=user_prompt,
|
|
19
17
|
system_prompt=system_prompt,
|
|
20
18
|
response_model=str,
|
|
@@ -26,10 +24,9 @@ async def summarize_text(
|
|
|
26
24
|
prompt_path: str = "summarize_search_results.txt",
|
|
27
25
|
) -> str:
|
|
28
26
|
"""Summarizes text using LLM with the specified prompt."""
|
|
29
|
-
system_prompt = read_query_prompt(prompt_path)
|
|
30
|
-
llm_client = get_llm_client()
|
|
27
|
+
system_prompt = LLMGateway.read_query_prompt(prompt_path)
|
|
31
28
|
|
|
32
|
-
return await
|
|
29
|
+
return await LLMGateway.acreate_structured_output(
|
|
33
30
|
text_input=text,
|
|
34
31
|
system_prompt=system_prompt,
|
|
35
32
|
response_model=str,
|
|
@@ -9,7 +9,7 @@ from cognee.modules.users.methods import get_default_user
|
|
|
9
9
|
from cognee.modules.users.models import User
|
|
10
10
|
from cognee.shared.utils import send_telemetry
|
|
11
11
|
from cognee.modules.search.methods import search
|
|
12
|
-
from cognee.infrastructure.llm.
|
|
12
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
13
13
|
|
|
14
14
|
logger = get_logger(level=ERROR)
|
|
15
15
|
|
|
@@ -71,8 +71,7 @@ async def code_description_to_code_part(
|
|
|
71
71
|
if isinstance(obj, dict) and "description" in obj
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
context_from_documents = await llm_client.acreate_structured_output(
|
|
74
|
+
context_from_documents = await LLMGateway.acreate_structured_output(
|
|
76
75
|
text_input=f"The retrieved context from documents is {concatenated_descriptions}.",
|
|
77
76
|
system_prompt="You are a Senior Software Engineer, summarize the context from documents"
|
|
78
77
|
f" in a way that it is gonna be provided next to codeparts as context"
|
|
@@ -27,7 +27,7 @@ from cognee.modules.users.models import User
|
|
|
27
27
|
from cognee.modules.data.models import Dataset
|
|
28
28
|
from cognee.shared.utils import send_telemetry
|
|
29
29
|
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
|
30
|
-
from cognee.modules.search.operations import log_query, log_result
|
|
30
|
+
from cognee.modules.search.operations import log_query, log_result, select_search_type
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
async def search(
|
|
@@ -129,6 +129,10 @@ async def specific_search(
|
|
|
129
129
|
SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
+
# If the query type is FEELING_LUCKY, select the search type intelligently
|
|
133
|
+
if query_type is SearchType.FEELING_LUCKY:
|
|
134
|
+
query_type = await select_search_type(query)
|
|
135
|
+
|
|
132
136
|
search_task = search_tasks.get(query_type)
|
|
133
137
|
|
|
134
138
|
if search_task is None:
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from cognee.infrastructure.llm.prompts import read_query_prompt
|
|
2
|
+
from cognee.modules.search.types import SearchType
|
|
3
|
+
from cognee.shared.logging_utils import get_logger
|
|
4
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
5
|
+
|
|
6
|
+
logger = get_logger("SearchTypeSelector")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def select_search_type(
|
|
10
|
+
query: str,
|
|
11
|
+
system_prompt_path: str = "search_type_selector_prompt.txt",
|
|
12
|
+
) -> SearchType:
|
|
13
|
+
"""
|
|
14
|
+
Analyzes the query and Selects the best search type.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
query: The query to analyze.
|
|
18
|
+
system_prompt_path: The path to the system prompt.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The best search type given by the LLM.
|
|
22
|
+
"""
|
|
23
|
+
default_search_type = SearchType.RAG_COMPLETION
|
|
24
|
+
system_prompt = read_query_prompt(system_prompt_path)
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
response = await LLMGateway.acreate_structured_output(
|
|
28
|
+
text_input=query,
|
|
29
|
+
system_prompt=system_prompt,
|
|
30
|
+
response_model=str,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if response.upper() in SearchType.__members__:
|
|
34
|
+
logger.info(f"Selected lucky search type: {response.upper()}")
|
|
35
|
+
return SearchType(response.upper())
|
|
36
|
+
|
|
37
|
+
# If the response is not a valid search type, return the default search type
|
|
38
|
+
logger.info(f"LLM gives an invalid search type: {response.upper()}")
|
|
39
|
+
return default_search_type
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.error(f"Failed to select search type intelligently from LLM: {str(e)}")
|
|
42
|
+
return default_search_type
|
|
@@ -6,7 +6,7 @@ from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
|
6
6
|
class VectorDBConfig(BaseModel):
|
|
7
7
|
url: str
|
|
8
8
|
api_key: str
|
|
9
|
-
provider: Union[Literal["lancedb"], Literal["
|
|
9
|
+
provider: Union[Literal["lancedb"], Literal["pgvector"]]
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
async def save_vector_db_config(vector_db_config: VectorDBConfig):
|
cognee/shared/data_models.py
CHANGED
|
@@ -4,7 +4,9 @@ from enum import Enum, auto
|
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
|
-
from cognee.infrastructure.llm.config import
|
|
7
|
+
from cognee.infrastructure.llm.config import (
|
|
8
|
+
get_llm_config,
|
|
9
|
+
)
|
|
8
10
|
|
|
9
11
|
if get_llm_config().llm_provider.lower() == "gemini":
|
|
10
12
|
"""
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -175,17 +175,13 @@ def log_database_configuration(logger):
|
|
|
175
175
|
try:
|
|
176
176
|
# Log relational database configuration
|
|
177
177
|
relational_config = get_relational_config()
|
|
178
|
-
logger.info(f"Relational database: {relational_config.db_provider}")
|
|
179
178
|
if relational_config.db_provider == "postgres":
|
|
180
179
|
logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
|
|
181
|
-
logger.info(f"Postgres database: {relational_config.db_name}")
|
|
182
180
|
elif relational_config.db_provider == "sqlite":
|
|
183
181
|
logger.info(f"SQLite path: {relational_config.db_path}")
|
|
184
|
-
logger.info(f"SQLite database: {relational_config.db_name}")
|
|
185
182
|
|
|
186
183
|
# Log vector database configuration
|
|
187
184
|
vector_config = get_vectordb_config()
|
|
188
|
-
logger.info(f"Vector database: {vector_config.vector_db_provider}")
|
|
189
185
|
if vector_config.vector_db_provider == "lancedb":
|
|
190
186
|
logger.info(f"Vector database path: {vector_config.vector_db_url}")
|
|
191
187
|
else:
|
|
@@ -193,7 +189,6 @@ def log_database_configuration(logger):
|
|
|
193
189
|
|
|
194
190
|
# Log graph database configuration
|
|
195
191
|
graph_config = get_graph_config()
|
|
196
|
-
logger.info(f"Graph database: {graph_config.graph_database_provider}")
|
|
197
192
|
if graph_config.graph_database_provider == "kuzu":
|
|
198
193
|
logger.info(f"Graph database path: {graph_config.graph_file_path}")
|
|
199
194
|
else:
|