cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from typing import Any, Optional, List, Type
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
|
-
|
|
3
|
+
|
|
4
4
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
5
5
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
|
6
|
-
from cognee.infrastructure.llm.
|
|
6
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
7
7
|
|
|
8
8
|
logger = get_logger()
|
|
9
9
|
|
|
@@ -73,7 +73,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
73
73
|
|
|
74
74
|
- List[str]: A list containing the generated answer to the user's query.
|
|
75
75
|
"""
|
|
76
|
-
llm_client = get_llm_client()
|
|
77
76
|
followup_question = ""
|
|
78
77
|
triplets = []
|
|
79
78
|
answer = [""]
|
|
@@ -95,27 +94,27 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
95
94
|
logger.info(f"Chain-of-thought: round {round_idx} - answer: {answer}")
|
|
96
95
|
if round_idx < max_iter:
|
|
97
96
|
valid_args = {"query": query, "answer": answer, "context": context}
|
|
98
|
-
valid_user_prompt = render_prompt(
|
|
97
|
+
valid_user_prompt = LLMGateway.render_prompt(
|
|
99
98
|
filename=self.validation_user_prompt_path, context=valid_args
|
|
100
99
|
)
|
|
101
|
-
valid_system_prompt = read_query_prompt(
|
|
100
|
+
valid_system_prompt = LLMGateway.read_query_prompt(
|
|
102
101
|
prompt_file_name=self.validation_system_prompt_path
|
|
103
102
|
)
|
|
104
103
|
|
|
105
|
-
reasoning = await
|
|
104
|
+
reasoning = await LLMGateway.acreate_structured_output(
|
|
106
105
|
text_input=valid_user_prompt,
|
|
107
106
|
system_prompt=valid_system_prompt,
|
|
108
107
|
response_model=str,
|
|
109
108
|
)
|
|
110
109
|
followup_args = {"query": query, "answer": answer, "reasoning": reasoning}
|
|
111
|
-
followup_prompt = render_prompt(
|
|
110
|
+
followup_prompt = LLMGateway.render_prompt(
|
|
112
111
|
filename=self.followup_user_prompt_path, context=followup_args
|
|
113
112
|
)
|
|
114
|
-
followup_system = read_query_prompt(
|
|
113
|
+
followup_system = LLMGateway.read_query_prompt(
|
|
115
114
|
prompt_file_name=self.followup_system_prompt_path
|
|
116
115
|
)
|
|
117
116
|
|
|
118
|
-
followup_question = await
|
|
117
|
+
followup_question = await LLMGateway.acreate_structured_output(
|
|
119
118
|
text_input=followup_prompt, system_prompt=followup_system, response_model=str
|
|
120
119
|
)
|
|
121
120
|
logger.info(
|
|
@@ -10,7 +10,7 @@ from cognee.modules.retrieval.utils.completion import generate_completion
|
|
|
10
10
|
from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
|
|
11
11
|
from cognee.shared.logging_utils import get_logger
|
|
12
12
|
|
|
13
|
-
logger = get_logger()
|
|
13
|
+
logger = get_logger("GraphCompletionRetriever")
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class GraphCompletionRetriever(BaseRetriever):
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from typing import Any, Optional
|
|
3
3
|
|
|
4
|
+
from cognee.shared.logging_utils import get_logger
|
|
4
5
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
5
6
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
6
7
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
7
8
|
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
8
9
|
from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
|
|
9
10
|
|
|
11
|
+
logger = get_logger("InsightsRetriever")
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
class InsightsRetriever(BaseRetriever):
|
|
12
15
|
"""
|
|
@@ -63,6 +66,7 @@ class InsightsRetriever(BaseRetriever):
|
|
|
63
66
|
vector_engine.search("EntityType_name", query_text=query, limit=self.top_k),
|
|
64
67
|
)
|
|
65
68
|
except CollectionNotFoundError as error:
|
|
69
|
+
logger.error("Entity collections not found")
|
|
66
70
|
raise NoDataError("No data found in the system, please add data first.") from error
|
|
67
71
|
|
|
68
72
|
results = [*results[0], *results[1]]
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
from typing import Any, Optional
|
|
2
|
-
import
|
|
2
|
+
from cognee.shared.logging_utils import get_logger
|
|
3
3
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
4
4
|
from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
|
|
5
|
-
from cognee.infrastructure.llm.
|
|
6
|
-
from cognee.infrastructure.llm.prompts import render_prompt
|
|
5
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
7
6
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
8
7
|
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
|
|
9
8
|
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
|
10
9
|
|
|
11
|
-
logger =
|
|
10
|
+
logger = get_logger("NaturalLanguageRetriever")
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class NaturalLanguageRetriever(BaseRetriever):
|
|
@@ -51,8 +50,7 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
51
50
|
|
|
52
51
|
async def _generate_cypher_query(self, query: str, edge_schemas, previous_attempts=None) -> str:
|
|
53
52
|
"""Generate a Cypher query using LLM based on natural language query and schema information."""
|
|
54
|
-
|
|
55
|
-
system_prompt = render_prompt(
|
|
53
|
+
system_prompt = LLMGateway.render_prompt(
|
|
56
54
|
self.system_prompt_path,
|
|
57
55
|
context={
|
|
58
56
|
"edge_schemas": edge_schemas,
|
|
@@ -60,7 +58,7 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
60
58
|
},
|
|
61
59
|
)
|
|
62
60
|
|
|
63
|
-
return await
|
|
61
|
+
return await LLMGateway.acreate_structured_output(
|
|
64
62
|
text_input=query,
|
|
65
63
|
system_prompt=system_prompt,
|
|
66
64
|
response_model=str,
|
|
@@ -123,16 +121,12 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
123
121
|
- Optional[Any]: Returns the context retrieved from the graph database based on the
|
|
124
122
|
query.
|
|
125
123
|
"""
|
|
126
|
-
|
|
127
|
-
graph_engine = await get_graph_engine()
|
|
124
|
+
graph_engine = await get_graph_engine()
|
|
128
125
|
|
|
129
|
-
|
|
130
|
-
|
|
126
|
+
if isinstance(graph_engine, (NetworkXAdapter)):
|
|
127
|
+
raise SearchTypeNotSupported("Natural language search type not supported.")
|
|
131
128
|
|
|
132
|
-
|
|
133
|
-
except Exception as e:
|
|
134
|
-
logger.error("Failed to execute natural language search retrieval: %s", str(e))
|
|
135
|
-
raise e
|
|
129
|
+
return await self._execute_cypher_query(query, graph_engine)
|
|
136
130
|
|
|
137
131
|
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
|
138
132
|
"""
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
from typing import Any, Optional
|
|
2
2
|
|
|
3
|
+
from cognee.shared.logging_utils import get_logger
|
|
3
4
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
4
5
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
5
6
|
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
6
7
|
from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
|
|
7
8
|
|
|
9
|
+
logger = get_logger("SummariesRetriever")
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class SummariesRetriever(BaseRetriever):
|
|
10
13
|
"""
|
|
@@ -40,16 +43,24 @@ class SummariesRetriever(BaseRetriever):
|
|
|
40
43
|
|
|
41
44
|
- Any: A list of payloads from the retrieved summaries.
|
|
42
45
|
"""
|
|
46
|
+
logger.info(
|
|
47
|
+
f"Starting summary retrieval for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
|
|
48
|
+
)
|
|
49
|
+
|
|
43
50
|
vector_engine = get_vector_engine()
|
|
44
51
|
|
|
45
52
|
try:
|
|
46
53
|
summaries_results = await vector_engine.search(
|
|
47
54
|
"TextSummary_text", query, limit=self.top_k
|
|
48
55
|
)
|
|
56
|
+
logger.info(f"Found {len(summaries_results)} summaries from vector search")
|
|
49
57
|
except CollectionNotFoundError as error:
|
|
58
|
+
logger.error("TextSummary_text collection not found in vector database")
|
|
50
59
|
raise NoDataError("No data found in the system, please add data first.") from error
|
|
51
60
|
|
|
52
|
-
|
|
61
|
+
summary_payloads = [summary.payload for summary in summaries_results]
|
|
62
|
+
logger.info(f"Returning {len(summary_payloads)} summary payloads")
|
|
63
|
+
return summary_payloads
|
|
53
64
|
|
|
54
65
|
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
|
55
66
|
"""
|
|
@@ -70,6 +81,17 @@ class SummariesRetriever(BaseRetriever):
|
|
|
70
81
|
|
|
71
82
|
- Any: The generated completion context, which is either provided or retrieved.
|
|
72
83
|
"""
|
|
84
|
+
logger.info(
|
|
85
|
+
f"Starting completion generation for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
|
|
86
|
+
)
|
|
87
|
+
|
|
73
88
|
if context is None:
|
|
89
|
+
logger.debug("No context provided, retrieving context from vector database")
|
|
74
90
|
context = await self.get_context(query)
|
|
91
|
+
else:
|
|
92
|
+
logger.debug("Using provided context")
|
|
93
|
+
|
|
94
|
+
logger.info(
|
|
95
|
+
f"Returning context with {len(context) if isinstance(context, list) else 1} item(s)"
|
|
96
|
+
)
|
|
75
97
|
return context
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import time
|
|
2
3
|
from typing import List, Optional, Type
|
|
3
4
|
|
|
4
5
|
from cognee.shared.logging_utils import get_logger, ERROR
|
|
@@ -59,13 +60,13 @@ async def get_memory_fragment(
|
|
|
59
60
|
node_name: Optional[List[str]] = None,
|
|
60
61
|
) -> CogneeGraph:
|
|
61
62
|
"""Creates and initializes a CogneeGraph memory fragment with optional property projections."""
|
|
62
|
-
graph_engine = await get_graph_engine()
|
|
63
|
-
memory_fragment = CogneeGraph()
|
|
64
|
-
|
|
65
63
|
if properties_to_project is None:
|
|
66
64
|
properties_to_project = ["id", "description", "name", "type", "text"]
|
|
67
65
|
|
|
68
66
|
try:
|
|
67
|
+
graph_engine = await get_graph_engine()
|
|
68
|
+
memory_fragment = CogneeGraph()
|
|
69
|
+
|
|
69
70
|
await memory_fragment.project_graph_from_db(
|
|
70
71
|
graph_engine,
|
|
71
72
|
node_properties_to_project=properties_to_project,
|
|
@@ -73,7 +74,13 @@ async def get_memory_fragment(
|
|
|
73
74
|
node_type=node_type,
|
|
74
75
|
node_name=node_name,
|
|
75
76
|
)
|
|
77
|
+
|
|
76
78
|
except EntityNotFoundError:
|
|
79
|
+
# This is expected behavior - continue with empty fragment
|
|
80
|
+
pass
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(f"Error during memory fragment creation: {str(e)}")
|
|
83
|
+
# Still return the fragment even if projection failed
|
|
77
84
|
pass
|
|
78
85
|
|
|
79
86
|
return memory_fragment
|
|
@@ -168,6 +175,8 @@ async def brute_force_search(
|
|
|
168
175
|
return []
|
|
169
176
|
|
|
170
177
|
try:
|
|
178
|
+
start_time = time.time()
|
|
179
|
+
|
|
171
180
|
results = await asyncio.gather(
|
|
172
181
|
*[search_in_collection(collection_name) for collection_name in collections]
|
|
173
182
|
)
|
|
@@ -175,10 +184,20 @@ async def brute_force_search(
|
|
|
175
184
|
if all(not item for item in results):
|
|
176
185
|
return []
|
|
177
186
|
|
|
187
|
+
# Final statistics
|
|
188
|
+
projection_time = time.time() - start_time
|
|
189
|
+
logger.info(
|
|
190
|
+
f"Vector collection retrieval completed: Retrieved distances from {sum(1 for res in results if res)} collections in {projection_time:.2f}s"
|
|
191
|
+
)
|
|
192
|
+
|
|
178
193
|
node_distances = {collection: result for collection, result in zip(collections, results)}
|
|
179
194
|
|
|
195
|
+
edge_distances = node_distances.get("EdgeType_relationship_name", None)
|
|
196
|
+
|
|
180
197
|
await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
|
|
181
|
-
await memory_fragment.map_vector_distances_to_graph_edges(
|
|
198
|
+
await memory_fragment.map_vector_distances_to_graph_edges(
|
|
199
|
+
vector_engine=vector_engine, query_vector=query_vector, edge_distances=edge_distances
|
|
200
|
+
)
|
|
182
201
|
|
|
183
202
|
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
|
|
184
203
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
from cognee.infrastructure.llm.
|
|
2
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
|
|
1
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
3
2
|
|
|
4
3
|
|
|
5
4
|
async def generate_completion(
|
|
@@ -10,11 +9,10 @@ async def generate_completion(
|
|
|
10
9
|
) -> str:
|
|
11
10
|
"""Generates a completion using LLM with given context and prompts."""
|
|
12
11
|
args = {"question": query, "context": context}
|
|
13
|
-
user_prompt = render_prompt(user_prompt_path, args)
|
|
14
|
-
system_prompt = read_query_prompt(system_prompt_path)
|
|
12
|
+
user_prompt = LLMGateway.render_prompt(user_prompt_path, args)
|
|
13
|
+
system_prompt = LLMGateway.read_query_prompt(system_prompt_path)
|
|
15
14
|
|
|
16
|
-
|
|
17
|
-
return await llm_client.acreate_structured_output(
|
|
15
|
+
return await LLMGateway.acreate_structured_output(
|
|
18
16
|
text_input=user_prompt,
|
|
19
17
|
system_prompt=system_prompt,
|
|
20
18
|
response_model=str,
|
|
@@ -26,10 +24,9 @@ async def summarize_text(
|
|
|
26
24
|
prompt_path: str = "summarize_search_results.txt",
|
|
27
25
|
) -> str:
|
|
28
26
|
"""Summarizes text using LLM with the specified prompt."""
|
|
29
|
-
system_prompt = read_query_prompt(prompt_path)
|
|
30
|
-
llm_client = get_llm_client()
|
|
27
|
+
system_prompt = LLMGateway.read_query_prompt(prompt_path)
|
|
31
28
|
|
|
32
|
-
return await
|
|
29
|
+
return await LLMGateway.acreate_structured_output(
|
|
33
30
|
text_input=text,
|
|
34
31
|
system_prompt=system_prompt,
|
|
35
32
|
response_model=str,
|
|
@@ -9,7 +9,7 @@ from cognee.modules.users.methods import get_default_user
|
|
|
9
9
|
from cognee.modules.users.models import User
|
|
10
10
|
from cognee.shared.utils import send_telemetry
|
|
11
11
|
from cognee.modules.search.methods import search
|
|
12
|
-
from cognee.infrastructure.llm.
|
|
12
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
13
13
|
|
|
14
14
|
logger = get_logger(level=ERROR)
|
|
15
15
|
|
|
@@ -71,8 +71,7 @@ async def code_description_to_code_part(
|
|
|
71
71
|
if isinstance(obj, dict) and "description" in obj
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
context_from_documents = await llm_client.acreate_structured_output(
|
|
74
|
+
context_from_documents = await LLMGateway.acreate_structured_output(
|
|
76
75
|
text_input=f"The retrieved context from documents is {concatenated_descriptions}.",
|
|
77
76
|
system_prompt="You are a Senior Software Engineer, summarize the context from documents"
|
|
78
77
|
f" in a way that it is gonna be provided next to codeparts as context"
|
|
@@ -27,7 +27,7 @@ from cognee.modules.users.models import User
|
|
|
27
27
|
from cognee.modules.data.models import Dataset
|
|
28
28
|
from cognee.shared.utils import send_telemetry
|
|
29
29
|
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
|
30
|
-
from cognee.modules.search.operations import log_query, log_result
|
|
30
|
+
from cognee.modules.search.operations import log_query, log_result, select_search_type
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
async def search(
|
|
@@ -129,6 +129,10 @@ async def specific_search(
|
|
|
129
129
|
SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
+
# If the query type is FEELING_LUCKY, select the search type intelligently
|
|
133
|
+
if query_type is SearchType.FEELING_LUCKY:
|
|
134
|
+
query_type = await select_search_type(query)
|
|
135
|
+
|
|
132
136
|
search_task = search_tasks.get(query_type)
|
|
133
137
|
|
|
134
138
|
if search_task is None:
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from cognee.infrastructure.llm.prompts import read_query_prompt
|
|
2
|
+
from cognee.modules.search.types import SearchType
|
|
3
|
+
from cognee.shared.logging_utils import get_logger
|
|
4
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
5
|
+
|
|
6
|
+
logger = get_logger("SearchTypeSelector")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def select_search_type(
|
|
10
|
+
query: str,
|
|
11
|
+
system_prompt_path: str = "search_type_selector_prompt.txt",
|
|
12
|
+
) -> SearchType:
|
|
13
|
+
"""
|
|
14
|
+
Analyzes the query and Selects the best search type.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
query: The query to analyze.
|
|
18
|
+
system_prompt_path: The path to the system prompt.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The best search type given by the LLM.
|
|
22
|
+
"""
|
|
23
|
+
default_search_type = SearchType.RAG_COMPLETION
|
|
24
|
+
system_prompt = read_query_prompt(system_prompt_path)
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
response = await LLMGateway.acreate_structured_output(
|
|
28
|
+
text_input=query,
|
|
29
|
+
system_prompt=system_prompt,
|
|
30
|
+
response_model=str,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if response.upper() in SearchType.__members__:
|
|
34
|
+
logger.info(f"Selected lucky search type: {response.upper()}")
|
|
35
|
+
return SearchType(response.upper())
|
|
36
|
+
|
|
37
|
+
# If the response is not a valid search type, return the default search type
|
|
38
|
+
logger.info(f"LLM gives an invalid search type: {response.upper()}")
|
|
39
|
+
return default_search_type
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.error(f"Failed to select search type intelligently from LLM: {str(e)}")
|
|
42
|
+
return default_search_type
|
|
@@ -43,14 +43,6 @@ def get_settings() -> SettingsDict:
|
|
|
43
43
|
llm_config = get_llm_config()
|
|
44
44
|
|
|
45
45
|
vector_dbs = [
|
|
46
|
-
{
|
|
47
|
-
"value": "weaviate",
|
|
48
|
-
"label": "Weaviate",
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
"value": "qdrant",
|
|
52
|
-
"label": "Qdrant",
|
|
53
|
-
},
|
|
54
46
|
{
|
|
55
47
|
"value": "lancedb",
|
|
56
48
|
"label": "LanceDB",
|
|
@@ -6,7 +6,7 @@ from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
|
6
6
|
class VectorDBConfig(BaseModel):
|
|
7
7
|
url: str
|
|
8
8
|
api_key: str
|
|
9
|
-
provider: Union[Literal["lancedb"], Literal["
|
|
9
|
+
provider: Union[Literal["lancedb"], Literal["pgvector"]]
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
async def save_vector_db_config(vector_db_config: VectorDBConfig):
|
cognee/shared/data_models.py
CHANGED
|
@@ -4,7 +4,9 @@ from enum import Enum, auto
|
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
|
-
from cognee.infrastructure.llm.config import
|
|
7
|
+
from cognee.infrastructure.llm.config import (
|
|
8
|
+
get_llm_config,
|
|
9
|
+
)
|
|
8
10
|
|
|
9
11
|
if get_llm_config().llm_provider.lower() == "gemini":
|
|
10
12
|
"""
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -175,17 +175,13 @@ def log_database_configuration(logger):
|
|
|
175
175
|
try:
|
|
176
176
|
# Log relational database configuration
|
|
177
177
|
relational_config = get_relational_config()
|
|
178
|
-
logger.info(f"Relational database: {relational_config.db_provider}")
|
|
179
178
|
if relational_config.db_provider == "postgres":
|
|
180
179
|
logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
|
|
181
|
-
logger.info(f"Postgres database: {relational_config.db_name}")
|
|
182
180
|
elif relational_config.db_provider == "sqlite":
|
|
183
181
|
logger.info(f"SQLite path: {relational_config.db_path}")
|
|
184
|
-
logger.info(f"SQLite database: {relational_config.db_name}")
|
|
185
182
|
|
|
186
183
|
# Log vector database configuration
|
|
187
184
|
vector_config = get_vectordb_config()
|
|
188
|
-
logger.info(f"Vector database: {vector_config.vector_db_provider}")
|
|
189
185
|
if vector_config.vector_db_provider == "lancedb":
|
|
190
186
|
logger.info(f"Vector database path: {vector_config.vector_db_url}")
|
|
191
187
|
else:
|
|
@@ -193,7 +189,6 @@ def log_database_configuration(logger):
|
|
|
193
189
|
|
|
194
190
|
# Log graph database configuration
|
|
195
191
|
graph_config = get_graph_config()
|
|
196
|
-
logger.info(f"Graph database: {graph_config.graph_database_provider}")
|
|
197
192
|
if graph_config.graph_database_provider == "kuzu":
|
|
198
193
|
logger.info(f"Graph database path: {graph_config.graph_file_path}")
|
|
199
194
|
else:
|
|
@@ -7,7 +7,7 @@ from pydantic import BaseModel
|
|
|
7
7
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
8
8
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
9
9
|
from cognee.infrastructure.engine.models import DataPoint
|
|
10
|
-
from cognee.
|
|
10
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
11
11
|
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
|
12
12
|
|
|
13
13
|
|
|
@@ -40,7 +40,7 @@ async def chunk_naive_llm_classifier(
|
|
|
40
40
|
return data_chunks
|
|
41
41
|
|
|
42
42
|
chunk_classifications = await asyncio.gather(
|
|
43
|
-
*[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
|
|
43
|
+
*[LLMGateway.extract_categories(chunk.text, classification_model) for chunk in data_chunks],
|
|
44
44
|
)
|
|
45
45
|
|
|
46
46
|
classification_data_points = []
|
|
@@ -8,7 +8,6 @@ from cognee.modules.data.models import Data
|
|
|
8
8
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
9
9
|
from cognee.modules.chunking.TextChunker import TextChunker
|
|
10
10
|
from cognee.modules.chunking.Chunker import Chunker
|
|
11
|
-
from cognee.modules.data.processing.document_types.exceptions.exceptions import PyPdfInternalError
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
async def update_document_token_count(document_id: UUID, token_count: int) -> None:
|
|
@@ -40,15 +39,14 @@ async def extract_chunks_from_documents(
|
|
|
40
39
|
"""
|
|
41
40
|
for document in documents:
|
|
42
41
|
document_token_count = 0
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
pass
|
|
42
|
+
|
|
43
|
+
async for document_chunk in document.read(
|
|
44
|
+
max_chunk_size=max_chunk_size, chunker_cls=chunker
|
|
45
|
+
):
|
|
46
|
+
document_token_count += document_chunk.chunk_size
|
|
47
|
+
document_chunk.belongs_to_set = document.belongs_to_set
|
|
48
|
+
yield document_chunk
|
|
49
|
+
|
|
50
|
+
await update_document_token_count(document.id, document_token_count)
|
|
51
|
+
|
|
54
52
|
# todo rita
|
|
@@ -6,8 +6,7 @@ from pydantic import BaseModel
|
|
|
6
6
|
from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
|
|
7
7
|
from cognee.modules.engine.models import Entity
|
|
8
8
|
from cognee.modules.engine.models.EntityType import EntityType
|
|
9
|
-
from cognee.infrastructure.llm.
|
|
10
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
9
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
11
10
|
|
|
12
11
|
logger = get_logger("llm_entity_extractor")
|
|
13
12
|
|
|
@@ -51,11 +50,10 @@ class LLMEntityExtractor(BaseEntityExtractor):
|
|
|
51
50
|
try:
|
|
52
51
|
logger.info(f"Extracting entities from text: {text[:100]}...")
|
|
53
52
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
system_prompt = read_query_prompt(self.system_prompt_template)
|
|
53
|
+
user_prompt = LLMGateway.render_prompt(self.user_prompt_template, {"text": text})
|
|
54
|
+
system_prompt = LLMGateway.read_query_prompt(self.system_prompt_template)
|
|
57
55
|
|
|
58
|
-
response = await
|
|
56
|
+
response = await LLMGateway.acreate_structured_output(
|
|
59
57
|
text_input=user_prompt,
|
|
60
58
|
system_prompt=system_prompt,
|
|
61
59
|
response_model=EntityList,
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from typing import List, Tuple
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
|
|
4
|
-
from cognee.infrastructure.llm.
|
|
5
|
-
from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
|
|
4
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
6
5
|
from cognee.root_dir import get_absolute_path
|
|
7
6
|
|
|
8
7
|
|
|
@@ -17,7 +16,6 @@ async def extract_content_nodes_and_relationship_names(
|
|
|
17
16
|
content: str, existing_nodes: List[str], n_rounds: int = 2
|
|
18
17
|
) -> Tuple[List[str], List[str]]:
|
|
19
18
|
"""Extracts node names and relationship_names from content through multiple rounds of analysis."""
|
|
20
|
-
llm_client = get_llm_client()
|
|
21
19
|
all_nodes: List[str] = existing_nodes.copy()
|
|
22
20
|
all_relationship_names: List[str] = []
|
|
23
21
|
existing_node_set = {node.lower() for node in all_nodes}
|
|
@@ -34,15 +32,15 @@ async def extract_content_nodes_and_relationship_names(
|
|
|
34
32
|
}
|
|
35
33
|
|
|
36
34
|
base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
|
|
37
|
-
text_input = render_prompt(
|
|
35
|
+
text_input = LLMGateway.render_prompt(
|
|
38
36
|
"extract_graph_relationship_names_prompt_input.txt",
|
|
39
37
|
context,
|
|
40
38
|
base_directory=base_directory,
|
|
41
39
|
)
|
|
42
|
-
system_prompt = read_query_prompt(
|
|
40
|
+
system_prompt = LLMGateway.read_query_prompt(
|
|
43
41
|
"extract_graph_relationship_names_prompt_system.txt", base_directory=base_directory
|
|
44
42
|
)
|
|
45
|
-
response = await
|
|
43
|
+
response = await LLMGateway.acreate_structured_output(
|
|
46
44
|
text_input=text_input,
|
|
47
45
|
system_prompt=system_prompt,
|
|
48
46
|
response_model=PotentialNodesAndRelationshipNames,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from typing import List
|
|
2
|
-
|
|
3
|
-
from cognee.infrastructure.llm.
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
4
4
|
from cognee.shared.data_models import KnowledgeGraph
|
|
5
5
|
from cognee.root_dir import get_absolute_path
|
|
6
6
|
|
|
@@ -9,7 +9,6 @@ async def extract_edge_triplets(
|
|
|
9
9
|
content: str, nodes: List[str], relationship_names: List[str], n_rounds: int = 2
|
|
10
10
|
) -> KnowledgeGraph:
|
|
11
11
|
"""Creates a knowledge graph by identifying relationships between the provided nodes."""
|
|
12
|
-
llm_client = get_llm_client()
|
|
13
12
|
final_graph = KnowledgeGraph(nodes=[], edges=[])
|
|
14
13
|
existing_nodes = set()
|
|
15
14
|
existing_node_ids = set()
|
|
@@ -27,13 +26,13 @@ async def extract_edge_triplets(
|
|
|
27
26
|
}
|
|
28
27
|
|
|
29
28
|
base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
|
|
30
|
-
text_input = render_prompt(
|
|
29
|
+
text_input = LLMGateway.render_prompt(
|
|
31
30
|
"extract_graph_edge_triplets_prompt_input.txt", context, base_directory=base_directory
|
|
32
31
|
)
|
|
33
|
-
system_prompt = read_query_prompt(
|
|
32
|
+
system_prompt = LLMGateway.read_query_prompt(
|
|
34
33
|
"extract_graph_edge_triplets_prompt_system.txt", base_directory=base_directory
|
|
35
34
|
)
|
|
36
|
-
extracted_graph = await
|
|
35
|
+
extracted_graph = await LLMGateway.acreate_structured_output(
|
|
37
36
|
text_input=text_input, system_prompt=system_prompt, response_model=KnowledgeGraph
|
|
38
37
|
)
|
|
39
38
|
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
|
|
4
|
-
from cognee.
|
|
5
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
6
|
-
from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
|
|
4
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
7
5
|
from cognee.root_dir import get_absolute_path
|
|
8
6
|
|
|
9
7
|
|
|
@@ -15,7 +13,6 @@ class PotentialNodes(BaseModel):
|
|
|
15
13
|
|
|
16
14
|
async def extract_nodes(text: str, n_rounds: int = 2) -> List[str]:
|
|
17
15
|
"""Extracts node names from content through multiple rounds of analysis."""
|
|
18
|
-
llm_client = get_llm_client()
|
|
19
16
|
all_nodes: List[str] = []
|
|
20
17
|
existing_nodes = set()
|
|
21
18
|
|
|
@@ -27,13 +24,13 @@ async def extract_nodes(text: str, n_rounds: int = 2) -> List[str]:
|
|
|
27
24
|
"text": text,
|
|
28
25
|
}
|
|
29
26
|
base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
|
|
30
|
-
text_input = render_prompt(
|
|
27
|
+
text_input = LLMGateway.render_prompt(
|
|
31
28
|
"extract_graph_nodes_prompt_input.txt", context, base_directory=base_directory
|
|
32
29
|
)
|
|
33
|
-
system_prompt = read_query_prompt(
|
|
30
|
+
system_prompt = LLMGateway.read_query_prompt(
|
|
34
31
|
"extract_graph_nodes_prompt_system.txt", base_directory=base_directory
|
|
35
32
|
)
|
|
36
|
-
response = await
|
|
33
|
+
response = await LLMGateway.acreate_structured_output(
|
|
37
34
|
text_input=text_input, system_prompt=system_prompt, response_model=PotentialNodes
|
|
38
35
|
)
|
|
39
36
|
|