cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. cognee/__main__.py +4 -0
  2. cognee/api/v1/add/add.py +18 -11
  3. cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
  4. cognee/api/v1/cognify/cognify.py +22 -115
  5. cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
  6. cognee/api/v1/config/config.py +5 -13
  7. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
  8. cognee/api/v1/delete/delete.py +1 -1
  9. cognee/api/v1/exceptions/__init__.py +13 -0
  10. cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
  11. cognee/api/v1/responses/default_tools.py +4 -0
  12. cognee/api/v1/responses/dispatch_function.py +6 -1
  13. cognee/api/v1/responses/models.py +1 -1
  14. cognee/api/v1/search/search.py +6 -7
  15. cognee/cli/__init__.py +10 -0
  16. cognee/cli/_cognee.py +180 -0
  17. cognee/cli/commands/__init__.py +1 -0
  18. cognee/cli/commands/add_command.py +80 -0
  19. cognee/cli/commands/cognify_command.py +128 -0
  20. cognee/cli/commands/config_command.py +225 -0
  21. cognee/cli/commands/delete_command.py +80 -0
  22. cognee/cli/commands/search_command.py +149 -0
  23. cognee/cli/config.py +33 -0
  24. cognee/cli/debug.py +21 -0
  25. cognee/cli/echo.py +45 -0
  26. cognee/cli/exceptions.py +23 -0
  27. cognee/cli/minimal_cli.py +97 -0
  28. cognee/cli/reference.py +26 -0
  29. cognee/cli/suppress_logging.py +12 -0
  30. cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
  31. cognee/eval_framework/eval_config.py +1 -1
  32. cognee/exceptions/__init__.py +5 -5
  33. cognee/exceptions/exceptions.py +37 -17
  34. cognee/infrastructure/data/exceptions/__init__.py +7 -0
  35. cognee/infrastructure/data/exceptions/exceptions.py +22 -0
  36. cognee/infrastructure/data/utils/extract_keywords.py +3 -3
  37. cognee/infrastructure/databases/exceptions/__init__.py +3 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
  41. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
  42. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
  43. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
  45. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -8
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
  50. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
  51. cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
  52. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
  54. cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
  55. cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
  56. cognee/infrastructure/llm/LLMGateway.py +14 -5
  57. cognee/infrastructure/llm/config.py +5 -5
  58. cognee/infrastructure/llm/exceptions.py +30 -2
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +6 -6
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -15
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +9 -7
  67. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
  68. cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
  69. cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
  70. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
  71. cognee/infrastructure/llm/utils.py +7 -7
  72. cognee/modules/data/exceptions/exceptions.py +18 -5
  73. cognee/modules/data/methods/__init__.py +2 -0
  74. cognee/modules/data/methods/create_authorized_dataset.py +19 -0
  75. cognee/modules/data/methods/delete_data.py +2 -4
  76. cognee/modules/data/methods/get_authorized_dataset.py +11 -5
  77. cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
  78. cognee/modules/data/methods/load_or_create_datasets.py +2 -20
  79. cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
  80. cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
  81. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
  82. cognee/modules/graph/exceptions/__init__.py +2 -0
  83. cognee/modules/graph/exceptions/exceptions.py +25 -3
  84. cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
  85. cognee/modules/ingestion/exceptions/exceptions.py +2 -2
  86. cognee/modules/ontology/exceptions/exceptions.py +4 -4
  87. cognee/modules/pipelines/__init__.py +1 -1
  88. cognee/modules/pipelines/exceptions/exceptions.py +2 -2
  89. cognee/modules/pipelines/exceptions/tasks.py +18 -0
  90. cognee/modules/pipelines/layers/__init__.py +1 -0
  91. cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
  92. cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
  93. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
  94. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
  95. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
  96. cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
  97. cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
  98. cognee/modules/pipelines/methods/__init__.py +2 -0
  99. cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
  100. cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
  101. cognee/modules/pipelines/operations/__init__.py +0 -1
  102. cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
  103. cognee/modules/pipelines/operations/pipeline.py +23 -138
  104. cognee/modules/retrieval/base_feedback.py +11 -0
  105. cognee/modules/retrieval/cypher_search_retriever.py +1 -9
  106. cognee/modules/retrieval/exceptions/exceptions.py +12 -6
  107. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
  108. cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
  109. cognee/modules/retrieval/graph_completion_retriever.py +89 -5
  110. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  111. cognee/modules/retrieval/natural_language_retriever.py +0 -4
  112. cognee/modules/retrieval/user_qa_feedback.py +83 -0
  113. cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
  114. cognee/modules/retrieval/utils/models.py +40 -0
  115. cognee/modules/search/exceptions/__init__.py +7 -0
  116. cognee/modules/search/exceptions/exceptions.py +15 -0
  117. cognee/modules/search/methods/search.py +47 -7
  118. cognee/modules/search/types/SearchType.py +1 -0
  119. cognee/modules/settings/get_settings.py +2 -2
  120. cognee/modules/users/exceptions/exceptions.py +6 -6
  121. cognee/shared/CodeGraphEntities.py +1 -0
  122. cognee/shared/exceptions/exceptions.py +2 -2
  123. cognee/shared/logging_utils.py +142 -31
  124. cognee/shared/utils.py +0 -1
  125. cognee/tasks/completion/exceptions/exceptions.py +3 -3
  126. cognee/tasks/documents/classify_documents.py +4 -0
  127. cognee/tasks/documents/exceptions/__init__.py +11 -0
  128. cognee/tasks/documents/exceptions/exceptions.py +36 -0
  129. cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
  130. cognee/tasks/graph/exceptions/__init__.py +12 -0
  131. cognee/tasks/graph/exceptions/exceptions.py +41 -0
  132. cognee/tasks/graph/extract_graph_from_data.py +34 -2
  133. cognee/tasks/ingestion/exceptions/__init__.py +8 -0
  134. cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
  135. cognee/tasks/ingestion/resolve_data_directories.py +5 -0
  136. cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
  137. cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
  138. cognee/tasks/storage/add_data_points.py +41 -3
  139. cognee/tasks/storage/exceptions/__init__.py +9 -0
  140. cognee/tasks/storage/exceptions/exceptions.py +13 -0
  141. cognee/tasks/storage/index_data_points.py +1 -1
  142. cognee/tasks/summarization/exceptions/__init__.py +9 -0
  143. cognee/tasks/summarization/exceptions/exceptions.py +14 -0
  144. cognee/tasks/summarization/summarize_text.py +8 -1
  145. cognee/tests/integration/cli/__init__.py +3 -0
  146. cognee/tests/integration/cli/test_cli_integration.py +331 -0
  147. cognee/tests/integration/documents/PdfDocument_test.py +2 -2
  148. cognee/tests/integration/documents/TextDocument_test.py +2 -4
  149. cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
  150. cognee/tests/test_delete_by_id.py +1 -1
  151. cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
  152. cognee/tests/test_delete_soft.py +85 -0
  153. cognee/tests/test_kuzu.py +2 -2
  154. cognee/tests/test_neo4j.py +2 -2
  155. cognee/tests/test_search_db.py +126 -7
  156. cognee/tests/unit/cli/__init__.py +3 -0
  157. cognee/tests/unit/cli/test_cli_commands.py +483 -0
  158. cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
  159. cognee/tests/unit/cli/test_cli_main.py +173 -0
  160. cognee/tests/unit/cli/test_cli_runner.py +62 -0
  161. cognee/tests/unit/cli/test_cli_utils.py +127 -0
  162. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
  163. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
  164. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
  165. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
  166. cognee/tests/unit/modules/search/search_methods_test.py +4 -2
  167. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
  168. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/RECORD +172 -121
  169. cognee-0.2.4.dist-info/entry_points.txt +2 -0
  170. cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
  171. cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
  172. cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
  173. cognee/infrastructure/pipeline/models/Operation.py +0 -60
  174. cognee/infrastructure/pipeline/models/__init__.py +0 -0
  175. cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
  176. cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
  177. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
  178. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
  179. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -9,7 +9,7 @@ from sqlalchemy.exc import ProgrammingError
9
9
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
10
10
  from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError
11
11
 
12
- from cognee.exceptions import InvalidValueError
12
+
13
13
  from cognee.shared.logging_utils import get_logger
14
14
  from cognee.infrastructure.engine import DataPoint
15
15
  from cognee.infrastructure.engine.utils import parse_id
@@ -17,6 +17,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
17
17
 
18
18
  from distributed.utils import override_distributed
19
19
  from distributed.tasks.queued_add_data_points import queued_add_data_points
20
+ from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
20
21
 
21
22
  from ...relational.ModelBase import Base
22
23
  from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
@@ -275,7 +276,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
275
276
  return metadata.tables[collection_name]
276
277
  else:
277
278
  raise CollectionNotFoundError(
278
- f"Collection '{collection_name}' not found!", log_level="DEBUG"
279
+ f"Collection '{collection_name}' not found!",
279
280
  )
280
281
 
281
282
  async def retrieve(self, collection_name: str, data_point_ids: List[str]):
@@ -302,7 +303,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
302
303
  with_vector: bool = False,
303
304
  ) -> List[ScoredResult]:
304
305
  if query_text is None and query_vector is None:
305
- raise InvalidValueError(message="One of query_text or query_vector must be provided!")
306
+ raise MissingQueryParameterError()
306
307
 
307
308
  if query_text and not query_vector:
308
309
  query_vector = (await self.embedding_engine.embed_text([query_text]))[0]
@@ -5,19 +5,24 @@ from urllib.parse import urlparse
5
5
  def get_data_file_path(file_path: str):
6
6
  # Check if this is a file URI BEFORE normalizing (which corrupts URIs)
7
7
  if file_path.startswith("file://"):
8
+ # Remove first occurrence of file:// prefix
9
+ pure_file_path = file_path.replace("file://", "", 1)
8
10
  # Normalize the file URI for Windows - replace backslashes with forward slashes
9
- normalized_file_uri = os.path.normpath(file_path)
11
+ normalized_file_uri = os.path.normpath(pure_file_path)
10
12
 
11
- parsed_url = urlparse(normalized_file_uri)
12
-
13
- # Convert URI path to file system path
13
+ # Convert path to proper file system path
14
14
  if os.name == "nt": # Windows
15
15
  # Handle Windows drive letters correctly
16
- fs_path = parsed_url.path
17
- if fs_path.startswith("/") and len(fs_path) > 1 and fs_path[2] == ":":
18
- fs_path = fs_path[1:] # Remove leading slash for Windows drive paths
19
- else: # Unix-like systems
20
- fs_path = parsed_url.path
16
+ fs_path = normalized_file_uri
17
+ if (
18
+ (fs_path.startswith("/") or fs_path.startswith("\\"))
19
+ and len(fs_path) > 1
20
+ and fs_path[2] == ":"
21
+ ):
22
+ fs_path = fs_path[1:]
23
+ else:
24
+ # Unix - like systems
25
+ fs_path = normalized_file_uri
21
26
 
22
27
  # Now split the actual filesystem path
23
28
  actual_fs_path = os.path.normpath(fs_path)
@@ -1,6 +1,7 @@
1
1
  import io
2
2
  import os.path
3
3
  from typing import BinaryIO, TypedDict
4
+ from pathlib import Path
4
5
 
5
6
  from cognee.shared.logging_utils import get_logger
6
7
  from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
@@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
55
56
  file_type = guess_file_type(file)
56
57
 
57
58
  file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
58
- file_name = str(file_path).split("/")[-1].split(".")[0] if file_path else None
59
+ file_name = Path(file_path).stem if file_path else None
59
60
 
60
61
  # Get file size
61
62
  pos = file.tell() # remember current pointer
@@ -1,6 +1,5 @@
1
- from typing import Type
1
+ from typing import Type, Optional, Coroutine
2
2
  from pydantic import BaseModel
3
- from typing import Coroutine
4
3
  from cognee.infrastructure.llm import get_llm_config
5
4
 
6
5
 
@@ -79,7 +78,10 @@ class LLMGateway:
79
78
 
80
79
  @staticmethod
81
80
  def extract_content_graph(
82
- content: str, response_model: Type[BaseModel], mode: str = "simple"
81
+ content: str,
82
+ response_model: Type[BaseModel],
83
+ mode: str = "simple",
84
+ custom_prompt: Optional[str] = None,
83
85
  ) -> Coroutine:
84
86
  llm_config = get_llm_config()
85
87
  if llm_config.structured_output_framework.upper() == "BAML":
@@ -87,13 +89,20 @@ class LLMGateway:
87
89
  extract_content_graph,
88
90
  )
89
91
 
90
- return extract_content_graph(content=content, response_model=response_model, mode=mode)
92
+ return extract_content_graph(
93
+ content=content,
94
+ response_model=response_model,
95
+ mode=mode,
96
+ custom_prompt=custom_prompt,
97
+ )
91
98
  else:
92
99
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
93
100
  extract_content_graph,
94
101
  )
95
102
 
96
- return extract_content_graph(content=content, response_model=response_model)
103
+ return extract_content_graph(
104
+ content=content, response_model=response_model, custom_prompt=custom_prompt
105
+ )
97
106
 
98
107
  @staticmethod
99
108
  def extract_categories(content: str, response_model: Type[BaseModel]) -> Coroutine:
@@ -18,7 +18,7 @@ class LLMConfig(BaseSettings):
18
18
  - llm_api_version
19
19
  - llm_temperature
20
20
  - llm_streaming
21
- - llm_max_tokens
21
+ - llm_max_completion_tokens
22
22
  - transcription_model
23
23
  - graph_prompt_path
24
24
  - llm_rate_limit_enabled
@@ -35,16 +35,16 @@ class LLMConfig(BaseSettings):
35
35
 
36
36
  structured_output_framework: str = "instructor"
37
37
  llm_provider: str = "openai"
38
- llm_model: str = "gpt-4o-mini"
38
+ llm_model: str = "gpt-5-mini"
39
39
  llm_endpoint: str = ""
40
40
  llm_api_key: Optional[str] = None
41
41
  llm_api_version: Optional[str] = None
42
42
  llm_temperature: float = 0.0
43
43
  llm_streaming: bool = False
44
- llm_max_tokens: int = 16384
44
+ llm_max_completion_tokens: int = 16384
45
45
 
46
46
  baml_llm_provider: str = "openai"
47
- baml_llm_model: str = "gpt-4o-mini"
47
+ baml_llm_model: str = "gpt-5-mini"
48
48
  baml_llm_endpoint: str = ""
49
49
  baml_llm_api_key: Optional[str] = None
50
50
  baml_llm_temperature: float = 0.0
@@ -171,7 +171,7 @@ class LLMConfig(BaseSettings):
171
171
  "api_version": self.llm_api_version,
172
172
  "temperature": self.llm_temperature,
173
173
  "streaming": self.llm_streaming,
174
- "max_tokens": self.llm_max_tokens,
174
+ "max_completion_tokens": self.llm_max_completion_tokens,
175
175
  "transcription_model": self.transcription_model,
176
176
  "graph_prompt_path": self.graph_prompt_path,
177
177
  "rate_limit_enabled": self.llm_rate_limit_enabled,
@@ -1,5 +1,33 @@
1
- from cognee.exceptions.exceptions import CriticalError
1
+ from cognee.exceptions.exceptions import CogneeValidationError
2
2
 
3
3
 
4
- class ContentPolicyFilterError(CriticalError):
4
+ class ContentPolicyFilterError(CogneeValidationError):
5
5
  pass
6
+
7
+
8
+ class LLMAPIKeyNotSetError(CogneeValidationError):
9
+ """
10
+ Raised when the LLM API key is not set in the configuration.
11
+ """
12
+
13
+ def __init__(self, message: str = "LLM API key is not set."):
14
+ super().__init__(message=message, name="LLMAPIKeyNotSetError")
15
+
16
+
17
+ class UnsupportedLLMProviderError(CogneeValidationError):
18
+ """
19
+ Raised when an unsupported LLM provider is specified in the configuration.
20
+ """
21
+
22
+ def __init__(self, provider: str):
23
+ message = f"Unsupported LLM provider: {provider}"
24
+ super().__init__(message=message, name="UnsupportedLLMProviderError")
25
+
26
+
27
+ class MissingSystemPromptPathError(CogneeValidationError):
28
+ def __init__(
29
+ self,
30
+ name: str = "MissingSystemPromptPathError",
31
+ ):
32
+ message = "No system prompt path provided."
33
+ super().__init__(message, name)
@@ -1,4 +1,4 @@
1
- from typing import Type
1
+ from typing import Type, Optional
2
2
  from pydantic import BaseModel
3
3
  from cognee.infrastructure.llm.config import get_llm_config
4
4
  from cognee.shared.logging_utils import get_logger, setup_logging
@@ -6,7 +6,10 @@ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.asyn
6
6
 
7
7
 
8
8
  async def extract_content_graph(
9
- content: str, response_model: Type[BaseModel], mode: str = "simple"
9
+ content: str,
10
+ response_model: Type[BaseModel],
11
+ mode: str = "simple",
12
+ custom_prompt: Optional[str] = None,
10
13
  ):
11
14
  config = get_llm_config()
12
15
  setup_logging()
@@ -26,8 +29,16 @@ async def extract_content_graph(
26
29
  # return graph
27
30
 
28
31
  # else:
29
- graph = await b.ExtractContentGraphGeneric(
30
- content, mode=mode, baml_options={"client_registry": config.baml_registry}
31
- )
32
+ if custom_prompt:
33
+ graph = await b.ExtractContentGraphGeneric(
34
+ content,
35
+ mode="custom",
36
+ custom_prompt_content=custom_prompt,
37
+ baml_options={"client_registry": config.baml_registry},
38
+ )
39
+ else:
40
+ graph = await b.ExtractContentGraphGeneric(
41
+ content, mode=mode, baml_options={"client_registry": config.baml_registry}
42
+ )
32
43
 
33
44
  return graph
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Type
2
+ from typing import Type, Optional
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from cognee.infrastructure.llm.LLMGateway import LLMGateway
@@ -8,21 +8,25 @@ from cognee.infrastructure.llm.config import (
8
8
  )
9
9
 
10
10
 
11
- async def extract_content_graph(content: str, response_model: Type[BaseModel]):
12
- llm_config = get_llm_config()
13
-
14
- prompt_path = llm_config.graph_prompt_path
15
-
16
- # Check if the prompt path is an absolute path or just a filename
17
- if os.path.isabs(prompt_path):
18
- # directory containing the file
19
- base_directory = os.path.dirname(prompt_path)
20
- # just the filename itself
21
- prompt_path = os.path.basename(prompt_path)
11
+ async def extract_content_graph(
12
+ content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None
13
+ ):
14
+ if custom_prompt:
15
+ system_prompt = custom_prompt
22
16
  else:
23
- base_directory = None
24
-
25
- system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
17
+ llm_config = get_llm_config()
18
+ prompt_path = llm_config.graph_prompt_path
19
+
20
+ # Check if the prompt path is an absolute path or just a filename
21
+ if os.path.isabs(prompt_path):
22
+ # directory containing the file
23
+ base_directory = os.path.dirname(prompt_path)
24
+ # just the filename itself
25
+ prompt_path = os.path.basename(prompt_path)
26
+ else:
27
+ base_directory = None
28
+
29
+ system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
26
30
 
27
31
  content_graph = await LLMGateway.acreate_structured_output(
28
32
  content, system_prompt, response_model
@@ -2,7 +2,7 @@ from typing import Type
2
2
  from pydantic import BaseModel
3
3
  import instructor
4
4
 
5
- from cognee.exceptions import InvalidValueError
5
+ from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
6
6
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
7
7
  LLMInterface,
8
8
  )
@@ -23,7 +23,7 @@ class AnthropicAdapter(LLMInterface):
23
23
  name = "Anthropic"
24
24
  model: str
25
25
 
26
- def __init__(self, max_tokens: int, model: str = None):
26
+ def __init__(self, max_completion_tokens: int, model: str = None):
27
27
  import anthropic
28
28
 
29
29
  self.aclient = instructor.patch(
@@ -31,7 +31,7 @@ class AnthropicAdapter(LLMInterface):
31
31
  )
32
32
 
33
33
  self.model = model
34
- self.max_tokens = max_tokens
34
+ self.max_completion_tokens = max_completion_tokens
35
35
 
36
36
  @sleep_and_retry_async()
37
37
  @rate_limit_async
@@ -57,7 +57,7 @@ class AnthropicAdapter(LLMInterface):
57
57
 
58
58
  return await self.aclient(
59
59
  model=self.model,
60
- max_tokens=4096,
60
+ max_completion_tokens=4096,
61
61
  max_retries=5,
62
62
  messages=[
63
63
  {
@@ -89,7 +89,7 @@ class AnthropicAdapter(LLMInterface):
89
89
  if not text_input:
90
90
  text_input = "No user input provided."
91
91
  if not system_prompt:
92
- raise InvalidValueError(message="No system prompt path provided.")
92
+ raise MissingSystemPromptPathError()
93
93
 
94
94
  system_prompt = LLMGateway.read_query_prompt(system_prompt)
95
95
 
@@ -5,7 +5,7 @@ from litellm import acompletion, JSONSchemaValidationError
5
5
 
6
6
  from cognee.shared.logging_utils import get_logger
7
7
  from cognee.modules.observability.get_observe import get_observe
8
- from cognee.exceptions import InvalidValueError
8
+ from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
9
9
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
10
10
  LLMInterface,
11
11
  )
@@ -34,7 +34,7 @@ class GeminiAdapter(LLMInterface):
34
34
  self,
35
35
  api_key: str,
36
36
  model: str,
37
- max_tokens: int,
37
+ max_completion_tokens: int,
38
38
  endpoint: Optional[str] = None,
39
39
  api_version: Optional[str] = None,
40
40
  streaming: bool = False,
@@ -44,7 +44,7 @@ class GeminiAdapter(LLMInterface):
44
44
  self.endpoint = endpoint
45
45
  self.api_version = api_version
46
46
  self.streaming = streaming
47
- self.max_tokens = max_tokens
47
+ self.max_completion_tokens = max_completion_tokens
48
48
 
49
49
  @observe(as_type="generation")
50
50
  @sleep_and_retry_async()
@@ -90,7 +90,7 @@ class GeminiAdapter(LLMInterface):
90
90
  model=f"{self.model}",
91
91
  messages=messages,
92
92
  api_key=self.api_key,
93
- max_tokens=self.max_tokens,
93
+ max_completion_tokens=self.max_completion_tokens,
94
94
  temperature=0.1,
95
95
  response_format=response_schema,
96
96
  timeout=100,
@@ -118,7 +118,7 @@ class GeminiAdapter(LLMInterface):
118
118
  """
119
119
  Format and display the prompt for a user query.
120
120
 
121
- Raises an InvalidValueError if no system prompt is provided.
121
+ Raises an MissingQueryParameterError if no system prompt is provided.
122
122
 
123
123
  Parameters:
124
124
  -----------
@@ -135,7 +135,7 @@ class GeminiAdapter(LLMInterface):
135
135
  if not text_input:
136
136
  text_input = "No user input provided."
137
137
  if not system_prompt:
138
- raise InvalidValueError(message="No system prompt path provided.")
138
+ raise MissingSystemPromptPathError()
139
139
  system_prompt = LLMGateway.read_query_prompt(system_prompt)
140
140
 
141
141
  formatted_prompt = (
@@ -41,7 +41,7 @@ class GenericAPIAdapter(LLMInterface):
41
41
  api_key: str,
42
42
  model: str,
43
43
  name: str,
44
- max_tokens: int,
44
+ max_completion_tokens: int,
45
45
  fallback_model: str = None,
46
46
  fallback_api_key: str = None,
47
47
  fallback_endpoint: str = None,
@@ -50,7 +50,7 @@ class GenericAPIAdapter(LLMInterface):
50
50
  self.model = model
51
51
  self.api_key = api_key
52
52
  self.endpoint = endpoint
53
- self.max_tokens = max_tokens
53
+ self.max_completion_tokens = max_completion_tokens
54
54
 
55
55
  self.fallback_model = fallback_model
56
56
  self.fallback_api_key = fallback_api_key
@@ -2,11 +2,14 @@
2
2
 
3
3
  from enum import Enum
4
4
 
5
- from cognee.exceptions import InvalidValueError
6
5
  from cognee.infrastructure.llm import get_llm_config
7
6
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
8
7
  OllamaAPIAdapter,
9
8
  )
9
+ from cognee.infrastructure.llm.exceptions import (
10
+ LLMAPIKeyNotSetError,
11
+ UnsupportedLLMProviderError,
12
+ )
10
13
 
11
14
 
12
15
  # Define an Enum for LLM Providers
@@ -35,7 +38,7 @@ def get_llm_client():
35
38
 
36
39
  This function retrieves the configuration for the LLM provider and model, and
37
40
  initializes the appropriate LLM client adapter accordingly. It raises an
38
- InvalidValueError if the LLM API key is not set for certain providers or if the provider
41
+ LLMAPIKeyNotSetError if the LLM API key is not set for certain providers or if the provider
39
42
  is unsupported.
40
43
 
41
44
  Returns:
@@ -51,15 +54,19 @@ def get_llm_client():
51
54
  # Check if max_token value is defined in liteLLM for given model
52
55
  # if not use value from cognee configuration
53
56
  from cognee.infrastructure.llm.utils import (
54
- get_model_max_tokens,
57
+ get_model_max_completion_tokens,
55
58
  ) # imported here to avoid circular imports
56
59
 
57
- model_max_tokens = get_model_max_tokens(llm_config.llm_model)
58
- max_tokens = model_max_tokens if model_max_tokens else llm_config.llm_max_tokens
60
+ model_max_completion_tokens = get_model_max_completion_tokens(llm_config.llm_model)
61
+ max_completion_tokens = (
62
+ model_max_completion_tokens
63
+ if model_max_completion_tokens
64
+ else llm_config.llm_max_completion_tokens
65
+ )
59
66
 
60
67
  if provider == LLMProvider.OPENAI:
61
68
  if llm_config.llm_api_key is None:
62
- raise InvalidValueError(message="LLM API key is not set.")
69
+ raise LLMAPIKeyNotSetError()
63
70
 
64
71
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
65
72
  OpenAIAdapter,
@@ -71,7 +78,7 @@ def get_llm_client():
71
78
  api_version=llm_config.llm_api_version,
72
79
  model=llm_config.llm_model,
73
80
  transcription_model=llm_config.transcription_model,
74
- max_tokens=max_tokens,
81
+ max_completion_tokens=max_completion_tokens,
75
82
  streaming=llm_config.llm_streaming,
76
83
  fallback_api_key=llm_config.fallback_api_key,
77
84
  fallback_endpoint=llm_config.fallback_endpoint,
@@ -80,7 +87,7 @@ def get_llm_client():
80
87
 
81
88
  elif provider == LLMProvider.OLLAMA:
82
89
  if llm_config.llm_api_key is None:
83
- raise InvalidValueError(message="LLM API key is not set.")
90
+ raise LLMAPIKeyNotSetError()
84
91
 
85
92
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
86
93
  GenericAPIAdapter,
@@ -91,7 +98,7 @@ def get_llm_client():
91
98
  llm_config.llm_api_key,
92
99
  llm_config.llm_model,
93
100
  "Ollama",
94
- max_tokens=max_tokens,
101
+ max_completion_tokens=max_completion_tokens,
95
102
  )
96
103
 
97
104
  elif provider == LLMProvider.ANTHROPIC:
@@ -99,11 +106,13 @@ def get_llm_client():
99
106
  AnthropicAdapter,
100
107
  )
101
108
 
102
- return AnthropicAdapter(max_tokens=max_tokens, model=llm_config.llm_model)
109
+ return AnthropicAdapter(
110
+ max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
111
+ )
103
112
 
104
113
  elif provider == LLMProvider.CUSTOM:
105
114
  if llm_config.llm_api_key is None:
106
- raise InvalidValueError(message="LLM API key is not set.")
115
+ raise LLMAPIKeyNotSetError()
107
116
 
108
117
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
109
118
  GenericAPIAdapter,
@@ -114,7 +123,7 @@ def get_llm_client():
114
123
  llm_config.llm_api_key,
115
124
  llm_config.llm_model,
116
125
  "Custom",
117
- max_tokens=max_tokens,
126
+ max_completion_tokens=max_completion_tokens,
118
127
  fallback_api_key=llm_config.fallback_api_key,
119
128
  fallback_endpoint=llm_config.fallback_endpoint,
120
129
  fallback_model=llm_config.fallback_model,
@@ -122,7 +131,7 @@ def get_llm_client():
122
131
 
123
132
  elif provider == LLMProvider.GEMINI:
124
133
  if llm_config.llm_api_key is None:
125
- raise InvalidValueError(message="LLM API key is not set.")
134
+ raise LLMAPIKeyNotSetError()
126
135
 
127
136
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
128
137
  GeminiAdapter,
@@ -131,11 +140,11 @@ def get_llm_client():
131
140
  return GeminiAdapter(
132
141
  api_key=llm_config.llm_api_key,
133
142
  model=llm_config.llm_model,
134
- max_tokens=max_tokens,
143
+ max_completion_tokens=max_completion_tokens,
135
144
  endpoint=llm_config.llm_endpoint,
136
145
  api_version=llm_config.llm_api_version,
137
146
  streaming=llm_config.llm_streaming,
138
147
  )
139
148
 
140
149
  else:
141
- raise InvalidValueError(message=f"Unsupported LLM provider: {provider}")
150
+ raise UnsupportedLLMProviderError(provider)
@@ -30,16 +30,18 @@ class OllamaAPIAdapter(LLMInterface):
30
30
  - model
31
31
  - api_key
32
32
  - endpoint
33
- - max_tokens
33
+ - max_completion_tokens
34
34
  - aclient
35
35
  """
36
36
 
37
- def __init__(self, endpoint: str, api_key: str, model: str, name: str, max_tokens: int):
37
+ def __init__(
38
+ self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int
39
+ ):
38
40
  self.name = name
39
41
  self.model = model
40
42
  self.api_key = api_key
41
43
  self.endpoint = endpoint
42
- self.max_tokens = max_tokens
44
+ self.max_completion_tokens = max_completion_tokens
43
45
 
44
46
  self.aclient = instructor.from_openai(
45
47
  OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
@@ -159,7 +161,7 @@ class OllamaAPIAdapter(LLMInterface):
159
161
  ],
160
162
  }
161
163
  ],
162
- max_tokens=300,
164
+ max_completion_tokens=300,
163
165
  )
164
166
 
165
167
  # Ensure response is valid before accessing .choices[0].message.content
@@ -7,12 +7,14 @@ from openai import ContentFilterFinishReasonError
7
7
  from litellm.exceptions import ContentPolicyViolationError
8
8
  from instructor.exceptions import InstructorRetryException
9
9
 
10
- from cognee.exceptions import InvalidValueError
11
10
  from cognee.infrastructure.llm.LLMGateway import LLMGateway
12
11
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
13
12
  LLMInterface,
14
13
  )
15
- from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
14
+ from cognee.infrastructure.llm.exceptions import (
15
+ ContentPolicyFilterError,
16
+ MissingSystemPromptPathError,
17
+ )
16
18
  from cognee.infrastructure.files.utils.open_data_file import open_data_file
17
19
  from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
18
20
  rate_limit_async,
@@ -62,7 +64,7 @@ class OpenAIAdapter(LLMInterface):
62
64
  api_version: str,
63
65
  model: str,
64
66
  transcription_model: str,
65
- max_tokens: int,
67
+ max_completion_tokens: int,
66
68
  streaming: bool = False,
67
69
  fallback_model: str = None,
68
70
  fallback_api_key: str = None,
@@ -75,7 +77,7 @@ class OpenAIAdapter(LLMInterface):
75
77
  self.api_key = api_key
76
78
  self.endpoint = endpoint
77
79
  self.api_version = api_version
78
- self.max_tokens = max_tokens
80
+ self.max_completion_tokens = max_completion_tokens
79
81
  self.streaming = streaming
80
82
 
81
83
  self.fallback_model = fallback_model
@@ -299,7 +301,7 @@ class OpenAIAdapter(LLMInterface):
299
301
  api_key=self.api_key,
300
302
  api_base=self.endpoint,
301
303
  api_version=self.api_version,
302
- max_tokens=300,
304
+ max_completion_tokens=300,
303
305
  max_retries=self.MAX_RETRIES,
304
306
  )
305
307
 
@@ -308,7 +310,7 @@ class OpenAIAdapter(LLMInterface):
308
310
  Format and display the prompt for a user query.
309
311
 
310
312
  This method formats the prompt using the provided user input and system prompt,
311
- returning a string representation. Raises InvalidValueError if the system prompt is not
313
+ returning a string representation. Raises MissingSystemPromptPathError if the system prompt is not
312
314
  provided.
313
315
 
314
316
  Parameters:
@@ -325,7 +327,7 @@ class OpenAIAdapter(LLMInterface):
325
327
  if not text_input:
326
328
  text_input = "No user input provided."
327
329
  if not system_prompt:
328
- raise InvalidValueError(message="No system prompt path provided.")
330
+ raise MissingSystemPromptPathError()
329
331
  system_prompt = LLMGateway.read_query_prompt(system_prompt)
330
332
 
331
333
  formatted_prompt = (
@@ -17,10 +17,10 @@ class GeminiTokenizer(TokenizerInterface):
17
17
  def __init__(
18
18
  self,
19
19
  model: str,
20
- max_tokens: int = 3072,
20
+ max_completion_tokens: int = 3072,
21
21
  ):
22
22
  self.model = model
23
- self.max_tokens = max_tokens
23
+ self.max_completion_tokens = max_completion_tokens
24
24
 
25
25
  # Get LLM API key from config
26
26
  from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
@@ -14,17 +14,17 @@ class HuggingFaceTokenizer(TokenizerInterface):
14
14
 
15
15
  Instance variables include:
16
16
  - model: str
17
- - max_tokens: int
17
+ - max_completion_tokens: int
18
18
  - tokenizer: AutoTokenizer
19
19
  """
20
20
 
21
21
  def __init__(
22
22
  self,
23
23
  model: str,
24
- max_tokens: int = 512,
24
+ max_completion_tokens: int = 512,
25
25
  ):
26
26
  self.model = model
27
- self.max_tokens = max_tokens
27
+ self.max_completion_tokens = max_completion_tokens
28
28
 
29
29
  # Import here to make it an optional dependency
30
30
  from transformers import AutoTokenizer