cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__main__.py +4 -0
- cognee/api/v1/add/add.py +18 -11
- cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
- cognee/api/v1/cognify/cognify.py +22 -115
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/config/config.py +5 -13
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
- cognee/api/v1/delete/delete.py +1 -1
- cognee/api/v1/exceptions/__init__.py +13 -0
- cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/search.py +6 -7
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/exceptions/__init__.py +5 -5
- cognee/exceptions/exceptions.py +37 -17
- cognee/infrastructure/data/exceptions/__init__.py +7 -0
- cognee/infrastructure/data/exceptions/exceptions.py +22 -0
- cognee/infrastructure/data/utils/extract_keywords.py +3 -3
- cognee/infrastructure/databases/exceptions/__init__.py +3 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -8
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
- cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +14 -5
- cognee/infrastructure/llm/config.py +5 -5
- cognee/infrastructure/llm/exceptions.py +30 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +6 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +9 -7
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/modules/data/exceptions/exceptions.py +18 -5
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/delete_data.py +2 -4
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
- cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
- cognee/modules/graph/exceptions/__init__.py +2 -0
- cognee/modules/graph/exceptions/exceptions.py +25 -3
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/ingestion/exceptions/exceptions.py +2 -2
- cognee/modules/ontology/exceptions/exceptions.py +4 -4
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/exceptions.py +2 -2
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +23 -138
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/exceptions/exceptions.py +12 -6
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
- cognee/modules/retrieval/graph_completion_retriever.py +89 -5
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/exceptions/__init__.py +7 -0
- cognee/modules/search/exceptions/exceptions.py +15 -0
- cognee/modules/search/methods/search.py +47 -7
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/users/exceptions/exceptions.py +6 -6
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/exceptions/exceptions.py +2 -2
- cognee/shared/logging_utils.py +142 -31
- cognee/shared/utils.py +0 -1
- cognee/tasks/completion/exceptions/exceptions.py +3 -3
- cognee/tasks/documents/classify_documents.py +4 -0
- cognee/tasks/documents/exceptions/__init__.py +11 -0
- cognee/tasks/documents/exceptions/exceptions.py +36 -0
- cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
- cognee/tasks/graph/exceptions/__init__.py +12 -0
- cognee/tasks/graph/exceptions/exceptions.py +41 -0
- cognee/tasks/graph/extract_graph_from_data.py +34 -2
- cognee/tasks/ingestion/exceptions/__init__.py +8 -0
- cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
- cognee/tasks/ingestion/resolve_data_directories.py +5 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
- cognee/tasks/storage/add_data_points.py +41 -3
- cognee/tasks/storage/exceptions/__init__.py +9 -0
- cognee/tasks/storage/exceptions/exceptions.py +13 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/summarization/exceptions/__init__.py +9 -0
- cognee/tasks/summarization/exceptions/exceptions.py +14 -0
- cognee/tasks/summarization/summarize_text.py +8 -1
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/test_delete_by_id.py +1 -1
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_search_db.py +126 -7
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
- cognee/tests/unit/modules/search/search_methods_test.py +4 -2
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/RECORD +172 -121
- cognee-0.2.4.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
- cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/infrastructure/pipeline/models/__init__.py +0 -0
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -9,7 +9,7 @@ from sqlalchemy.exc import ProgrammingError
|
|
|
9
9
|
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
10
10
|
from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
|
|
13
13
|
from cognee.shared.logging_utils import get_logger
|
|
14
14
|
from cognee.infrastructure.engine import DataPoint
|
|
15
15
|
from cognee.infrastructure.engine.utils import parse_id
|
|
@@ -17,6 +17,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
|
|
17
17
|
|
|
18
18
|
from distributed.utils import override_distributed
|
|
19
19
|
from distributed.tasks.queued_add_data_points import queued_add_data_points
|
|
20
|
+
from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
|
|
20
21
|
|
|
21
22
|
from ...relational.ModelBase import Base
|
|
22
23
|
from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
|
|
@@ -275,7 +276,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|
|
275
276
|
return metadata.tables[collection_name]
|
|
276
277
|
else:
|
|
277
278
|
raise CollectionNotFoundError(
|
|
278
|
-
f"Collection '{collection_name}' not found!",
|
|
279
|
+
f"Collection '{collection_name}' not found!",
|
|
279
280
|
)
|
|
280
281
|
|
|
281
282
|
async def retrieve(self, collection_name: str, data_point_ids: List[str]):
|
|
@@ -302,7 +303,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|
|
302
303
|
with_vector: bool = False,
|
|
303
304
|
) -> List[ScoredResult]:
|
|
304
305
|
if query_text is None and query_vector is None:
|
|
305
|
-
raise
|
|
306
|
+
raise MissingQueryParameterError()
|
|
306
307
|
|
|
307
308
|
if query_text and not query_vector:
|
|
308
309
|
query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
|
|
@@ -5,19 +5,24 @@ from urllib.parse import urlparse
|
|
|
5
5
|
def get_data_file_path(file_path: str):
|
|
6
6
|
# Check if this is a file URI BEFORE normalizing (which corrupts URIs)
|
|
7
7
|
if file_path.startswith("file://"):
|
|
8
|
+
# Remove first occurrence of file:// prefix
|
|
9
|
+
pure_file_path = file_path.replace("file://", "", 1)
|
|
8
10
|
# Normalize the file URI for Windows - replace backslashes with forward slashes
|
|
9
|
-
normalized_file_uri = os.path.normpath(
|
|
11
|
+
normalized_file_uri = os.path.normpath(pure_file_path)
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
# Convert URI path to file system path
|
|
13
|
+
# Convert path to proper file system path
|
|
14
14
|
if os.name == "nt": # Windows
|
|
15
15
|
# Handle Windows drive letters correctly
|
|
16
|
-
fs_path =
|
|
17
|
-
if
|
|
18
|
-
fs_path
|
|
19
|
-
|
|
20
|
-
|
|
16
|
+
fs_path = normalized_file_uri
|
|
17
|
+
if (
|
|
18
|
+
(fs_path.startswith("/") or fs_path.startswith("\\"))
|
|
19
|
+
and len(fs_path) > 1
|
|
20
|
+
and fs_path[2] == ":"
|
|
21
|
+
):
|
|
22
|
+
fs_path = fs_path[1:]
|
|
23
|
+
else:
|
|
24
|
+
# Unix - like systems
|
|
25
|
+
fs_path = normalized_file_uri
|
|
21
26
|
|
|
22
27
|
# Now split the actual filesystem path
|
|
23
28
|
actual_fs_path = os.path.normpath(fs_path)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import os.path
|
|
3
3
|
from typing import BinaryIO, TypedDict
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
from cognee.shared.logging_utils import get_logger
|
|
6
7
|
from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
|
|
@@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|
|
55
56
|
file_type = guess_file_type(file)
|
|
56
57
|
|
|
57
58
|
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
|
|
58
|
-
file_name =
|
|
59
|
+
file_name = Path(file_path).stem if file_path else None
|
|
59
60
|
|
|
60
61
|
# Get file size
|
|
61
62
|
pos = file.tell() # remember current pointer
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
from typing import Type
|
|
1
|
+
from typing import Type, Optional, Coroutine
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
from typing import Coroutine
|
|
4
3
|
from cognee.infrastructure.llm import get_llm_config
|
|
5
4
|
|
|
6
5
|
|
|
@@ -79,7 +78,10 @@ class LLMGateway:
|
|
|
79
78
|
|
|
80
79
|
@staticmethod
|
|
81
80
|
def extract_content_graph(
|
|
82
|
-
content: str,
|
|
81
|
+
content: str,
|
|
82
|
+
response_model: Type[BaseModel],
|
|
83
|
+
mode: str = "simple",
|
|
84
|
+
custom_prompt: Optional[str] = None,
|
|
83
85
|
) -> Coroutine:
|
|
84
86
|
llm_config = get_llm_config()
|
|
85
87
|
if llm_config.structured_output_framework.upper() == "BAML":
|
|
@@ -87,13 +89,20 @@ class LLMGateway:
|
|
|
87
89
|
extract_content_graph,
|
|
88
90
|
)
|
|
89
91
|
|
|
90
|
-
return extract_content_graph(
|
|
92
|
+
return extract_content_graph(
|
|
93
|
+
content=content,
|
|
94
|
+
response_model=response_model,
|
|
95
|
+
mode=mode,
|
|
96
|
+
custom_prompt=custom_prompt,
|
|
97
|
+
)
|
|
91
98
|
else:
|
|
92
99
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
|
|
93
100
|
extract_content_graph,
|
|
94
101
|
)
|
|
95
102
|
|
|
96
|
-
return extract_content_graph(
|
|
103
|
+
return extract_content_graph(
|
|
104
|
+
content=content, response_model=response_model, custom_prompt=custom_prompt
|
|
105
|
+
)
|
|
97
106
|
|
|
98
107
|
@staticmethod
|
|
99
108
|
def extract_categories(content: str, response_model: Type[BaseModel]) -> Coroutine:
|
|
@@ -18,7 +18,7 @@ class LLMConfig(BaseSettings):
|
|
|
18
18
|
- llm_api_version
|
|
19
19
|
- llm_temperature
|
|
20
20
|
- llm_streaming
|
|
21
|
-
-
|
|
21
|
+
- llm_max_completion_tokens
|
|
22
22
|
- transcription_model
|
|
23
23
|
- graph_prompt_path
|
|
24
24
|
- llm_rate_limit_enabled
|
|
@@ -35,16 +35,16 @@ class LLMConfig(BaseSettings):
|
|
|
35
35
|
|
|
36
36
|
structured_output_framework: str = "instructor"
|
|
37
37
|
llm_provider: str = "openai"
|
|
38
|
-
llm_model: str = "gpt-
|
|
38
|
+
llm_model: str = "gpt-5-mini"
|
|
39
39
|
llm_endpoint: str = ""
|
|
40
40
|
llm_api_key: Optional[str] = None
|
|
41
41
|
llm_api_version: Optional[str] = None
|
|
42
42
|
llm_temperature: float = 0.0
|
|
43
43
|
llm_streaming: bool = False
|
|
44
|
-
|
|
44
|
+
llm_max_completion_tokens: int = 16384
|
|
45
45
|
|
|
46
46
|
baml_llm_provider: str = "openai"
|
|
47
|
-
baml_llm_model: str = "gpt-
|
|
47
|
+
baml_llm_model: str = "gpt-5-mini"
|
|
48
48
|
baml_llm_endpoint: str = ""
|
|
49
49
|
baml_llm_api_key: Optional[str] = None
|
|
50
50
|
baml_llm_temperature: float = 0.0
|
|
@@ -171,7 +171,7 @@ class LLMConfig(BaseSettings):
|
|
|
171
171
|
"api_version": self.llm_api_version,
|
|
172
172
|
"temperature": self.llm_temperature,
|
|
173
173
|
"streaming": self.llm_streaming,
|
|
174
|
-
"
|
|
174
|
+
"max_completion_tokens": self.llm_max_completion_tokens,
|
|
175
175
|
"transcription_model": self.transcription_model,
|
|
176
176
|
"graph_prompt_path": self.graph_prompt_path,
|
|
177
177
|
"rate_limit_enabled": self.llm_rate_limit_enabled,
|
|
@@ -1,5 +1,33 @@
|
|
|
1
|
-
from cognee.exceptions.exceptions import
|
|
1
|
+
from cognee.exceptions.exceptions import CogneeValidationError
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
class ContentPolicyFilterError(
|
|
4
|
+
class ContentPolicyFilterError(CogneeValidationError):
|
|
5
5
|
pass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class LLMAPIKeyNotSetError(CogneeValidationError):
|
|
9
|
+
"""
|
|
10
|
+
Raised when the LLM API key is not set in the configuration.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, message: str = "LLM API key is not set."):
|
|
14
|
+
super().__init__(message=message, name="LLMAPIKeyNotSetError")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class UnsupportedLLMProviderError(CogneeValidationError):
|
|
18
|
+
"""
|
|
19
|
+
Raised when an unsupported LLM provider is specified in the configuration.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, provider: str):
|
|
23
|
+
message = f"Unsupported LLM provider: {provider}"
|
|
24
|
+
super().__init__(message=message, name="UnsupportedLLMProviderError")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MissingSystemPromptPathError(CogneeValidationError):
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
name: str = "MissingSystemPromptPathError",
|
|
31
|
+
):
|
|
32
|
+
message = "No system prompt path provided."
|
|
33
|
+
super().__init__(message, name)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Type
|
|
1
|
+
from typing import Type, Optional
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
from cognee.infrastructure.llm.config import get_llm_config
|
|
4
4
|
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
@@ -6,7 +6,10 @@ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.asyn
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
async def extract_content_graph(
|
|
9
|
-
content: str,
|
|
9
|
+
content: str,
|
|
10
|
+
response_model: Type[BaseModel],
|
|
11
|
+
mode: str = "simple",
|
|
12
|
+
custom_prompt: Optional[str] = None,
|
|
10
13
|
):
|
|
11
14
|
config = get_llm_config()
|
|
12
15
|
setup_logging()
|
|
@@ -26,8 +29,16 @@ async def extract_content_graph(
|
|
|
26
29
|
# return graph
|
|
27
30
|
|
|
28
31
|
# else:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
if custom_prompt:
|
|
33
|
+
graph = await b.ExtractContentGraphGeneric(
|
|
34
|
+
content,
|
|
35
|
+
mode="custom",
|
|
36
|
+
custom_prompt_content=custom_prompt,
|
|
37
|
+
baml_options={"client_registry": config.baml_registry},
|
|
38
|
+
)
|
|
39
|
+
else:
|
|
40
|
+
graph = await b.ExtractContentGraphGeneric(
|
|
41
|
+
content, mode=mode, baml_options={"client_registry": config.baml_registry}
|
|
42
|
+
)
|
|
32
43
|
|
|
33
44
|
return graph
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Type
|
|
2
|
+
from typing import Type, Optional
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
5
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
@@ -8,21 +8,25 @@ from cognee.infrastructure.llm.config import (
|
|
|
8
8
|
)
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
async def extract_content_graph(
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
# Check if the prompt path is an absolute path or just a filename
|
|
17
|
-
if os.path.isabs(prompt_path):
|
|
18
|
-
# directory containing the file
|
|
19
|
-
base_directory = os.path.dirname(prompt_path)
|
|
20
|
-
# just the filename itself
|
|
21
|
-
prompt_path = os.path.basename(prompt_path)
|
|
11
|
+
async def extract_content_graph(
|
|
12
|
+
content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None
|
|
13
|
+
):
|
|
14
|
+
if custom_prompt:
|
|
15
|
+
system_prompt = custom_prompt
|
|
22
16
|
else:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
17
|
+
llm_config = get_llm_config()
|
|
18
|
+
prompt_path = llm_config.graph_prompt_path
|
|
19
|
+
|
|
20
|
+
# Check if the prompt path is an absolute path or just a filename
|
|
21
|
+
if os.path.isabs(prompt_path):
|
|
22
|
+
# directory containing the file
|
|
23
|
+
base_directory = os.path.dirname(prompt_path)
|
|
24
|
+
# just the filename itself
|
|
25
|
+
prompt_path = os.path.basename(prompt_path)
|
|
26
|
+
else:
|
|
27
|
+
base_directory = None
|
|
28
|
+
|
|
29
|
+
system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
|
|
26
30
|
|
|
27
31
|
content_graph = await LLMGateway.acreate_structured_output(
|
|
28
32
|
content, system_prompt, response_model
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
CHANGED
|
@@ -2,7 +2,7 @@ from typing import Type
|
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
import instructor
|
|
4
4
|
|
|
5
|
-
from cognee.exceptions import
|
|
5
|
+
from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
|
|
6
6
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
7
7
|
LLMInterface,
|
|
8
8
|
)
|
|
@@ -23,7 +23,7 @@ class AnthropicAdapter(LLMInterface):
|
|
|
23
23
|
name = "Anthropic"
|
|
24
24
|
model: str
|
|
25
25
|
|
|
26
|
-
def __init__(self,
|
|
26
|
+
def __init__(self, max_completion_tokens: int, model: str = None):
|
|
27
27
|
import anthropic
|
|
28
28
|
|
|
29
29
|
self.aclient = instructor.patch(
|
|
@@ -31,7 +31,7 @@ class AnthropicAdapter(LLMInterface):
|
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
self.model = model
|
|
34
|
-
self.
|
|
34
|
+
self.max_completion_tokens = max_completion_tokens
|
|
35
35
|
|
|
36
36
|
@sleep_and_retry_async()
|
|
37
37
|
@rate_limit_async
|
|
@@ -57,7 +57,7 @@ class AnthropicAdapter(LLMInterface):
|
|
|
57
57
|
|
|
58
58
|
return await self.aclient(
|
|
59
59
|
model=self.model,
|
|
60
|
-
|
|
60
|
+
max_completion_tokens=4096,
|
|
61
61
|
max_retries=5,
|
|
62
62
|
messages=[
|
|
63
63
|
{
|
|
@@ -89,7 +89,7 @@ class AnthropicAdapter(LLMInterface):
|
|
|
89
89
|
if not text_input:
|
|
90
90
|
text_input = "No user input provided."
|
|
91
91
|
if not system_prompt:
|
|
92
|
-
raise
|
|
92
|
+
raise MissingSystemPromptPathError()
|
|
93
93
|
|
|
94
94
|
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
95
95
|
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
CHANGED
|
@@ -5,7 +5,7 @@ from litellm import acompletion, JSONSchemaValidationError
|
|
|
5
5
|
|
|
6
6
|
from cognee.shared.logging_utils import get_logger
|
|
7
7
|
from cognee.modules.observability.get_observe import get_observe
|
|
8
|
-
from cognee.exceptions import
|
|
8
|
+
from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
|
|
9
9
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
10
10
|
LLMInterface,
|
|
11
11
|
)
|
|
@@ -34,7 +34,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
34
34
|
self,
|
|
35
35
|
api_key: str,
|
|
36
36
|
model: str,
|
|
37
|
-
|
|
37
|
+
max_completion_tokens: int,
|
|
38
38
|
endpoint: Optional[str] = None,
|
|
39
39
|
api_version: Optional[str] = None,
|
|
40
40
|
streaming: bool = False,
|
|
@@ -44,7 +44,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
44
44
|
self.endpoint = endpoint
|
|
45
45
|
self.api_version = api_version
|
|
46
46
|
self.streaming = streaming
|
|
47
|
-
self.
|
|
47
|
+
self.max_completion_tokens = max_completion_tokens
|
|
48
48
|
|
|
49
49
|
@observe(as_type="generation")
|
|
50
50
|
@sleep_and_retry_async()
|
|
@@ -90,7 +90,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
90
90
|
model=f"{self.model}",
|
|
91
91
|
messages=messages,
|
|
92
92
|
api_key=self.api_key,
|
|
93
|
-
|
|
93
|
+
max_completion_tokens=self.max_completion_tokens,
|
|
94
94
|
temperature=0.1,
|
|
95
95
|
response_format=response_schema,
|
|
96
96
|
timeout=100,
|
|
@@ -118,7 +118,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
118
118
|
"""
|
|
119
119
|
Format and display the prompt for a user query.
|
|
120
120
|
|
|
121
|
-
Raises an
|
|
121
|
+
Raises an MissingQueryParameterError if no system prompt is provided.
|
|
122
122
|
|
|
123
123
|
Parameters:
|
|
124
124
|
-----------
|
|
@@ -135,7 +135,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
135
135
|
if not text_input:
|
|
136
136
|
text_input = "No user input provided."
|
|
137
137
|
if not system_prompt:
|
|
138
|
-
raise
|
|
138
|
+
raise MissingSystemPromptPathError()
|
|
139
139
|
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
140
140
|
|
|
141
141
|
formatted_prompt = (
|
|
@@ -41,7 +41,7 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
41
41
|
api_key: str,
|
|
42
42
|
model: str,
|
|
43
43
|
name: str,
|
|
44
|
-
|
|
44
|
+
max_completion_tokens: int,
|
|
45
45
|
fallback_model: str = None,
|
|
46
46
|
fallback_api_key: str = None,
|
|
47
47
|
fallback_endpoint: str = None,
|
|
@@ -50,7 +50,7 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
50
50
|
self.model = model
|
|
51
51
|
self.api_key = api_key
|
|
52
52
|
self.endpoint = endpoint
|
|
53
|
-
self.
|
|
53
|
+
self.max_completion_tokens = max_completion_tokens
|
|
54
54
|
|
|
55
55
|
self.fallback_model = fallback_model
|
|
56
56
|
self.fallback_api_key = fallback_api_key
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
CHANGED
|
@@ -2,11 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
|
|
5
|
-
from cognee.exceptions import InvalidValueError
|
|
6
5
|
from cognee.infrastructure.llm import get_llm_config
|
|
7
6
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
|
|
8
7
|
OllamaAPIAdapter,
|
|
9
8
|
)
|
|
9
|
+
from cognee.infrastructure.llm.exceptions import (
|
|
10
|
+
LLMAPIKeyNotSetError,
|
|
11
|
+
UnsupportedLLMProviderError,
|
|
12
|
+
)
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
# Define an Enum for LLM Providers
|
|
@@ -35,7 +38,7 @@ def get_llm_client():
|
|
|
35
38
|
|
|
36
39
|
This function retrieves the configuration for the LLM provider and model, and
|
|
37
40
|
initializes the appropriate LLM client adapter accordingly. It raises an
|
|
38
|
-
|
|
41
|
+
LLMAPIKeyNotSetError if the LLM API key is not set for certain providers or if the provider
|
|
39
42
|
is unsupported.
|
|
40
43
|
|
|
41
44
|
Returns:
|
|
@@ -51,15 +54,19 @@ def get_llm_client():
|
|
|
51
54
|
# Check if max_token value is defined in liteLLM for given model
|
|
52
55
|
# if not use value from cognee configuration
|
|
53
56
|
from cognee.infrastructure.llm.utils import (
|
|
54
|
-
|
|
57
|
+
get_model_max_completion_tokens,
|
|
55
58
|
) # imported here to avoid circular imports
|
|
56
59
|
|
|
57
|
-
|
|
58
|
-
|
|
60
|
+
model_max_completion_tokens = get_model_max_completion_tokens(llm_config.llm_model)
|
|
61
|
+
max_completion_tokens = (
|
|
62
|
+
model_max_completion_tokens
|
|
63
|
+
if model_max_completion_tokens
|
|
64
|
+
else llm_config.llm_max_completion_tokens
|
|
65
|
+
)
|
|
59
66
|
|
|
60
67
|
if provider == LLMProvider.OPENAI:
|
|
61
68
|
if llm_config.llm_api_key is None:
|
|
62
|
-
raise
|
|
69
|
+
raise LLMAPIKeyNotSetError()
|
|
63
70
|
|
|
64
71
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
|
|
65
72
|
OpenAIAdapter,
|
|
@@ -71,7 +78,7 @@ def get_llm_client():
|
|
|
71
78
|
api_version=llm_config.llm_api_version,
|
|
72
79
|
model=llm_config.llm_model,
|
|
73
80
|
transcription_model=llm_config.transcription_model,
|
|
74
|
-
|
|
81
|
+
max_completion_tokens=max_completion_tokens,
|
|
75
82
|
streaming=llm_config.llm_streaming,
|
|
76
83
|
fallback_api_key=llm_config.fallback_api_key,
|
|
77
84
|
fallback_endpoint=llm_config.fallback_endpoint,
|
|
@@ -80,7 +87,7 @@ def get_llm_client():
|
|
|
80
87
|
|
|
81
88
|
elif provider == LLMProvider.OLLAMA:
|
|
82
89
|
if llm_config.llm_api_key is None:
|
|
83
|
-
raise
|
|
90
|
+
raise LLMAPIKeyNotSetError()
|
|
84
91
|
|
|
85
92
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
|
86
93
|
GenericAPIAdapter,
|
|
@@ -91,7 +98,7 @@ def get_llm_client():
|
|
|
91
98
|
llm_config.llm_api_key,
|
|
92
99
|
llm_config.llm_model,
|
|
93
100
|
"Ollama",
|
|
94
|
-
|
|
101
|
+
max_completion_tokens=max_completion_tokens,
|
|
95
102
|
)
|
|
96
103
|
|
|
97
104
|
elif provider == LLMProvider.ANTHROPIC:
|
|
@@ -99,11 +106,13 @@ def get_llm_client():
|
|
|
99
106
|
AnthropicAdapter,
|
|
100
107
|
)
|
|
101
108
|
|
|
102
|
-
return AnthropicAdapter(
|
|
109
|
+
return AnthropicAdapter(
|
|
110
|
+
max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
|
|
111
|
+
)
|
|
103
112
|
|
|
104
113
|
elif provider == LLMProvider.CUSTOM:
|
|
105
114
|
if llm_config.llm_api_key is None:
|
|
106
|
-
raise
|
|
115
|
+
raise LLMAPIKeyNotSetError()
|
|
107
116
|
|
|
108
117
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
|
|
109
118
|
GenericAPIAdapter,
|
|
@@ -114,7 +123,7 @@ def get_llm_client():
|
|
|
114
123
|
llm_config.llm_api_key,
|
|
115
124
|
llm_config.llm_model,
|
|
116
125
|
"Custom",
|
|
117
|
-
|
|
126
|
+
max_completion_tokens=max_completion_tokens,
|
|
118
127
|
fallback_api_key=llm_config.fallback_api_key,
|
|
119
128
|
fallback_endpoint=llm_config.fallback_endpoint,
|
|
120
129
|
fallback_model=llm_config.fallback_model,
|
|
@@ -122,7 +131,7 @@ def get_llm_client():
|
|
|
122
131
|
|
|
123
132
|
elif provider == LLMProvider.GEMINI:
|
|
124
133
|
if llm_config.llm_api_key is None:
|
|
125
|
-
raise
|
|
134
|
+
raise LLMAPIKeyNotSetError()
|
|
126
135
|
|
|
127
136
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
|
|
128
137
|
GeminiAdapter,
|
|
@@ -131,11 +140,11 @@ def get_llm_client():
|
|
|
131
140
|
return GeminiAdapter(
|
|
132
141
|
api_key=llm_config.llm_api_key,
|
|
133
142
|
model=llm_config.llm_model,
|
|
134
|
-
|
|
143
|
+
max_completion_tokens=max_completion_tokens,
|
|
135
144
|
endpoint=llm_config.llm_endpoint,
|
|
136
145
|
api_version=llm_config.llm_api_version,
|
|
137
146
|
streaming=llm_config.llm_streaming,
|
|
138
147
|
)
|
|
139
148
|
|
|
140
149
|
else:
|
|
141
|
-
raise
|
|
150
|
+
raise UnsupportedLLMProviderError(provider)
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
CHANGED
|
@@ -30,16 +30,18 @@ class OllamaAPIAdapter(LLMInterface):
|
|
|
30
30
|
- model
|
|
31
31
|
- api_key
|
|
32
32
|
- endpoint
|
|
33
|
-
-
|
|
33
|
+
- max_completion_tokens
|
|
34
34
|
- aclient
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
|
-
def __init__(
|
|
37
|
+
def __init__(
|
|
38
|
+
self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int
|
|
39
|
+
):
|
|
38
40
|
self.name = name
|
|
39
41
|
self.model = model
|
|
40
42
|
self.api_key = api_key
|
|
41
43
|
self.endpoint = endpoint
|
|
42
|
-
self.
|
|
44
|
+
self.max_completion_tokens = max_completion_tokens
|
|
43
45
|
|
|
44
46
|
self.aclient = instructor.from_openai(
|
|
45
47
|
OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
|
|
@@ -159,7 +161,7 @@ class OllamaAPIAdapter(LLMInterface):
|
|
|
159
161
|
],
|
|
160
162
|
}
|
|
161
163
|
],
|
|
162
|
-
|
|
164
|
+
max_completion_tokens=300,
|
|
163
165
|
)
|
|
164
166
|
|
|
165
167
|
# Ensure response is valid before accessing .choices[0].message.content
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
CHANGED
|
@@ -7,12 +7,14 @@ from openai import ContentFilterFinishReasonError
|
|
|
7
7
|
from litellm.exceptions import ContentPolicyViolationError
|
|
8
8
|
from instructor.exceptions import InstructorRetryException
|
|
9
9
|
|
|
10
|
-
from cognee.exceptions import InvalidValueError
|
|
11
10
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
12
11
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
13
12
|
LLMInterface,
|
|
14
13
|
)
|
|
15
|
-
from cognee.infrastructure.llm.exceptions import
|
|
14
|
+
from cognee.infrastructure.llm.exceptions import (
|
|
15
|
+
ContentPolicyFilterError,
|
|
16
|
+
MissingSystemPromptPathError,
|
|
17
|
+
)
|
|
16
18
|
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
|
17
19
|
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
|
18
20
|
rate_limit_async,
|
|
@@ -62,7 +64,7 @@ class OpenAIAdapter(LLMInterface):
|
|
|
62
64
|
api_version: str,
|
|
63
65
|
model: str,
|
|
64
66
|
transcription_model: str,
|
|
65
|
-
|
|
67
|
+
max_completion_tokens: int,
|
|
66
68
|
streaming: bool = False,
|
|
67
69
|
fallback_model: str = None,
|
|
68
70
|
fallback_api_key: str = None,
|
|
@@ -75,7 +77,7 @@ class OpenAIAdapter(LLMInterface):
|
|
|
75
77
|
self.api_key = api_key
|
|
76
78
|
self.endpoint = endpoint
|
|
77
79
|
self.api_version = api_version
|
|
78
|
-
self.
|
|
80
|
+
self.max_completion_tokens = max_completion_tokens
|
|
79
81
|
self.streaming = streaming
|
|
80
82
|
|
|
81
83
|
self.fallback_model = fallback_model
|
|
@@ -299,7 +301,7 @@ class OpenAIAdapter(LLMInterface):
|
|
|
299
301
|
api_key=self.api_key,
|
|
300
302
|
api_base=self.endpoint,
|
|
301
303
|
api_version=self.api_version,
|
|
302
|
-
|
|
304
|
+
max_completion_tokens=300,
|
|
303
305
|
max_retries=self.MAX_RETRIES,
|
|
304
306
|
)
|
|
305
307
|
|
|
@@ -308,7 +310,7 @@ class OpenAIAdapter(LLMInterface):
|
|
|
308
310
|
Format and display the prompt for a user query.
|
|
309
311
|
|
|
310
312
|
This method formats the prompt using the provided user input and system prompt,
|
|
311
|
-
returning a string representation. Raises
|
|
313
|
+
returning a string representation. Raises MissingSystemPromptPathError if the system prompt is not
|
|
312
314
|
provided.
|
|
313
315
|
|
|
314
316
|
Parameters:
|
|
@@ -325,7 +327,7 @@ class OpenAIAdapter(LLMInterface):
|
|
|
325
327
|
if not text_input:
|
|
326
328
|
text_input = "No user input provided."
|
|
327
329
|
if not system_prompt:
|
|
328
|
-
raise
|
|
330
|
+
raise MissingSystemPromptPathError()
|
|
329
331
|
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
330
332
|
|
|
331
333
|
formatted_prompt = (
|
|
@@ -17,10 +17,10 @@ class GeminiTokenizer(TokenizerInterface):
|
|
|
17
17
|
def __init__(
|
|
18
18
|
self,
|
|
19
19
|
model: str,
|
|
20
|
-
|
|
20
|
+
max_completion_tokens: int = 3072,
|
|
21
21
|
):
|
|
22
22
|
self.model = model
|
|
23
|
-
self.
|
|
23
|
+
self.max_completion_tokens = max_completion_tokens
|
|
24
24
|
|
|
25
25
|
# Get LLM API key from config
|
|
26
26
|
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
|
@@ -14,17 +14,17 @@ class HuggingFaceTokenizer(TokenizerInterface):
|
|
|
14
14
|
|
|
15
15
|
Instance variables include:
|
|
16
16
|
- model: str
|
|
17
|
-
-
|
|
17
|
+
- max_completion_tokens: int
|
|
18
18
|
- tokenizer: AutoTokenizer
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
def __init__(
|
|
22
22
|
self,
|
|
23
23
|
model: str,
|
|
24
|
-
|
|
24
|
+
max_completion_tokens: int = 512,
|
|
25
25
|
):
|
|
26
26
|
self.model = model
|
|
27
|
-
self.
|
|
27
|
+
self.max_completion_tokens = max_completion_tokens
|
|
28
28
|
|
|
29
29
|
# Import here to make it an optional dependency
|
|
30
30
|
from transformers import AutoTokenizer
|