cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +5 -1
  2. cognee/api/v1/add/add.py +1 -2
  3. cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
  4. cognee/api/v1/cognify/cognify.py +16 -24
  5. cognee/api/v1/cognify/routers/__init__.py +1 -0
  6. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/ontologies.py +37 -12
  10. cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
  11. cognee/api/v1/search/search.py +0 -4
  12. cognee/api/v1/ui/ui.py +68 -38
  13. cognee/context_global_variables.py +16 -61
  14. cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
  15. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  16. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
  17. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  18. cognee/eval_framework/eval_config.py +2 -2
  19. cognee/eval_framework/modal_run_eval.py +28 -16
  20. cognee/infrastructure/databases/graph/config.py +0 -3
  21. cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
  22. cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
  23. cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
  24. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
  25. cognee/infrastructure/databases/utils/__init__.py +0 -3
  26. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
  27. cognee/infrastructure/databases/vector/config.py +0 -2
  28. cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
  32. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
  33. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
  34. cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
  35. cognee/infrastructure/files/storage/s3_config.py +0 -2
  36. cognee/infrastructure/llm/LLMGateway.py +2 -5
  37. cognee/infrastructure/llm/config.py +0 -35
  38. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  39. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
  40. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
  41. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
  42. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
  43. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
  44. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
  45. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
  47. cognee/modules/cognify/config.py +0 -2
  48. cognee/modules/data/deletion/prune_system.py +2 -52
  49. cognee/modules/data/methods/delete_dataset.py +0 -26
  50. cognee/modules/engine/models/__init__.py +0 -1
  51. cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
  52. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
  53. cognee/modules/memify/memify.py +7 -1
  54. cognee/modules/pipelines/operations/pipeline.py +2 -18
  55. cognee/modules/retrieval/__init__.py +1 -1
  56. cognee/modules/retrieval/code_retriever.py +232 -0
  57. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
  58. cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
  59. cognee/modules/retrieval/graph_completion_retriever.py +0 -10
  60. cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
  61. cognee/modules/retrieval/temporal_retriever.py +0 -4
  62. cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
  63. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
  64. cognee/modules/search/methods/get_search_type_tools.py +8 -54
  65. cognee/modules/search/methods/no_access_control_search.py +0 -4
  66. cognee/modules/search/methods/search.py +0 -21
  67. cognee/modules/search/types/SearchType.py +1 -1
  68. cognee/modules/settings/get_settings.py +0 -19
  69. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  70. cognee/modules/users/models/DatasetDatabase.py +3 -15
  71. cognee/shared/logging_utils.py +0 -4
  72. cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
  73. cognee/tasks/code/get_local_dependencies_checker.py +20 -0
  74. cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
  75. cognee/tasks/documents/__init__.py +1 -0
  76. cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
  77. cognee/tasks/graph/extract_graph_from_data.py +10 -9
  78. cognee/tasks/repo_processor/__init__.py +2 -0
  79. cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
  80. cognee/tasks/repo_processor/get_non_code_files.py +158 -0
  81. cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
  82. cognee/tasks/storage/add_data_points.py +2 -142
  83. cognee/tests/test_cognee_server_start.py +4 -2
  84. cognee/tests/test_conversation_history.py +1 -23
  85. cognee/tests/test_delete_bmw_example.py +60 -0
  86. cognee/tests/test_search_db.py +1 -37
  87. cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
  88. cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
  89. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
  90. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  91. cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
  92. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
  93. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
  94. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  95. cognee/api/v1/ui/node_setup.py +0 -360
  96. cognee/api/v1/ui/npm_utils.py +0 -50
  97. cognee/eval_framework/Dockerfile +0 -29
  98. cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
  99. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
  100. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
  101. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
  102. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
  103. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
  104. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
  105. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
  106. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
  107. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
  108. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
  109. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
  110. cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
  111. cognee/modules/engine/models/Triplet.py +0 -9
  112. cognee/modules/retrieval/register_retriever.py +0 -10
  113. cognee/modules/retrieval/registered_community_retrievers.py +0 -1
  114. cognee/modules/retrieval/triplet_retriever.py +0 -182
  115. cognee/shared/rate_limiting.py +0 -30
  116. cognee/tasks/memify/get_triplet_datapoints.py +0 -289
  117. cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
  118. cognee/tests/integration/tasks/test_add_data_points.py +0 -139
  119. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
  120. cognee/tests/test_dataset_database_handler.py +0 -137
  121. cognee/tests/test_dataset_delete.py +0 -76
  122. cognee/tests/test_edge_centered_payload.py +0 -170
  123. cognee/tests/test_pipeline_cache.py +0 -164
  124. cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
  125. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
  126. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
  127. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
  128. cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
  129. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,168 +0,0 @@
1
- import os
2
- import asyncio
3
- import requests
4
- import base64
5
- import hashlib
6
- from uuid import UUID
7
- from typing import Optional
8
- from cryptography.fernet import Fernet
9
-
10
- from cognee.infrastructure.databases.graph import get_graph_config
11
- from cognee.modules.users.models import User, DatasetDatabase
12
- from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
13
-
14
-
15
- class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
16
- """
17
- Handler for a quick development PoC integration of Cognee multi-user and permission mode with Neo4j Aura databases.
18
- This handler creates a new Neo4j Aura instance for each Cognee dataset created.
19
-
20
- Improvements needed to be production ready:
21
- - Secret management for client credentials, currently secrets are encrypted and stored in the Cognee relational database,
22
- a secret manager or a similar system should be used instead.
23
-
24
- Quality of life improvements:
25
- - Allow configuration of different Neo4j Aura plans and regions.
26
- - Requests should be made async, currently a blocking requests library is used.
27
- """
28
-
29
- @classmethod
30
- async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
31
- """
32
- Create a new Neo4j Aura instance for the dataset. Return connection info that will be mapped to the dataset.
33
-
34
- Args:
35
- dataset_id: Dataset UUID
36
- user: User object who owns the dataset and is making the request
37
-
38
- Returns:
39
- dict: Connection details for the created Neo4j instance
40
-
41
- """
42
- graph_config = get_graph_config()
43
-
44
- if graph_config.graph_database_provider != "neo4j":
45
- raise ValueError(
46
- "Neo4jAuraDevDatasetDatabaseHandler can only be used with Neo4j graph database provider."
47
- )
48
-
49
- graph_db_name = f"{dataset_id}"
50
-
51
- # Client credentials and encryption
52
- client_id = os.environ.get("NEO4J_CLIENT_ID", None)
53
- client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None)
54
- tenant_id = os.environ.get("NEO4J_TENANT_ID", None)
55
- encryption_env_key = os.environ.get("NEO4J_ENCRYPTION_KEY", "test_key")
56
- encryption_key = base64.urlsafe_b64encode(
57
- hashlib.sha256(encryption_env_key.encode()).digest()
58
- )
59
- cipher = Fernet(encryption_key)
60
-
61
- if client_id is None or client_secret is None or tenant_id is None:
62
- raise ValueError(
63
- "NEO4J_CLIENT_ID, NEO4J_CLIENT_SECRET, and NEO4J_TENANT_ID environment variables must be set to use Neo4j Aura DatasetDatabase Handling."
64
- )
65
-
66
- # Make the request with HTTP Basic Auth
67
- def get_aura_token(client_id: str, client_secret: str) -> dict:
68
- url = "https://api.neo4j.io/oauth/token"
69
- data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded
70
-
71
- resp = requests.post(url, data=data, auth=(client_id, client_secret))
72
- resp.raise_for_status() # raises if the request failed
73
- return resp.json()
74
-
75
- resp = get_aura_token(client_id, client_secret)
76
-
77
- url = "https://api.neo4j.io/v1/instances"
78
-
79
- headers = {
80
- "accept": "application/json",
81
- "Authorization": f"Bearer {resp['access_token']}",
82
- "Content-Type": "application/json",
83
- }
84
-
85
- # TODO: Maybe we can allow **kwargs parameter forwarding for cases like these
86
- # Too allow different configurations between datasets
87
- payload = {
88
- "version": "5",
89
- "region": "europe-west1",
90
- "memory": "1GB",
91
- "name": graph_db_name[
92
- 0:29
93
- ], # TODO: Find better name to name Neo4j instance within 30 character limit
94
- "type": "professional-db",
95
- "tenant_id": tenant_id,
96
- "cloud_provider": "gcp",
97
- }
98
-
99
- response = requests.post(url, headers=headers, json=payload)
100
-
101
- graph_db_name = "neo4j" # Has to be 'neo4j' for Aura
102
- graph_db_url = response.json()["data"]["connection_url"]
103
- graph_db_key = resp["access_token"]
104
- graph_db_username = response.json()["data"]["username"]
105
- graph_db_password = response.json()["data"]["password"]
106
-
107
- async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict):
108
- # Poll until the instance is running
109
- status_url = f"https://api.neo4j.io/v1/instances/{instance_id}"
110
- status = ""
111
- for attempt in range(30): # Try for up to ~5 minutes
112
- status_resp = requests.get(
113
- status_url, headers=headers
114
- ) # TODO: Use async requests with httpx
115
- status = status_resp.json()["data"]["status"]
116
- if status.lower() == "running":
117
- return
118
- await asyncio.sleep(10)
119
- raise TimeoutError(
120
- f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}"
121
- )
122
-
123
- instance_id = response.json()["data"]["id"]
124
- await _wait_for_neo4j_instance_provisioning(instance_id, headers)
125
-
126
- encrypted_db_password_bytes = cipher.encrypt(graph_db_password.encode())
127
- encrypted_db_password_string = encrypted_db_password_bytes.decode()
128
-
129
- return {
130
- "graph_database_name": graph_db_name,
131
- "graph_database_url": graph_db_url,
132
- "graph_database_provider": "neo4j",
133
- "graph_database_key": graph_db_key,
134
- "graph_dataset_database_handler": "neo4j_aura_dev",
135
- "graph_database_connection_info": {
136
- "graph_database_username": graph_db_username,
137
- "graph_database_password": encrypted_db_password_string,
138
- },
139
- }
140
-
141
- @classmethod
142
- async def resolve_dataset_connection_info(
143
- cls, dataset_database: DatasetDatabase
144
- ) -> DatasetDatabase:
145
- """
146
- Resolve and decrypt connection info for the Neo4j dataset database.
147
- In this case, decrypt the password stored in the database.
148
-
149
- Args:
150
- dataset_database: DatasetDatabase instance containing encrypted connection info.
151
- """
152
- encryption_env_key = os.environ.get("NEO4J_ENCRYPTION_KEY", "test_key")
153
- encryption_key = base64.urlsafe_b64encode(
154
- hashlib.sha256(encryption_env_key.encode()).digest()
155
- )
156
- cipher = Fernet(encryption_key)
157
- graph_db_password = cipher.decrypt(
158
- dataset_database.graph_database_connection_info["graph_database_password"].encode()
159
- ).decode()
160
-
161
- dataset_database.graph_database_connection_info["graph_database_password"] = (
162
- graph_db_password
163
- )
164
- return dataset_database
165
-
166
- @classmethod
167
- async def delete_dataset(cls, dataset_database: DatasetDatabase):
168
- pass
@@ -1,10 +0,0 @@
1
- from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
2
-
3
-
4
- def get_graph_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
5
- from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
6
- supported_dataset_database_handlers,
7
- )
8
-
9
- handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
10
- return handler
@@ -1,10 +0,0 @@
1
- from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
2
-
3
-
4
- def get_vector_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
5
- from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
6
- supported_dataset_database_handlers,
7
- )
8
-
9
- handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler]
10
- return handler
@@ -1,30 +0,0 @@
1
- from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
2
- get_graph_dataset_database_handler,
3
- )
4
- from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
5
- get_vector_dataset_database_handler,
6
- )
7
- from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
8
-
9
-
10
- async def resolve_dataset_database_connection_info(
11
- dataset_database: DatasetDatabase,
12
- ) -> DatasetDatabase:
13
- """
14
- Resolve the connection info for the given DatasetDatabase instance.
15
- Resolve both vector and graph database connection info and return the updated DatasetDatabase instance.
16
-
17
- Args:
18
- dataset_database: DatasetDatabase instance
19
- Returns:
20
- DatasetDatabase instance with resolved connection info
21
- """
22
- vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
23
- graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
24
- dataset_database = await vector_dataset_database_handler[
25
- "handler_instance"
26
- ].resolve_dataset_connection_info(dataset_database)
27
- dataset_database = await graph_dataset_database_handler[
28
- "handler_instance"
29
- ].resolve_dataset_connection_info(dataset_database)
30
- return dataset_database
@@ -1,50 +0,0 @@
1
- import os
2
- from uuid import UUID
3
- from typing import Optional
4
-
5
- from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine
6
- from cognee.modules.users.models import User
7
- from cognee.modules.users.models import DatasetDatabase
8
- from cognee.base_config import get_base_config
9
- from cognee.infrastructure.databases.vector import get_vectordb_config
10
- from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
11
-
12
-
13
- class LanceDBDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
14
- """
15
- Handler for interacting with LanceDB Dataset databases.
16
- """
17
-
18
- @classmethod
19
- async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
20
- vector_config = get_vectordb_config()
21
- base_config = get_base_config()
22
-
23
- if vector_config.vector_db_provider != "lancedb":
24
- raise ValueError(
25
- "LanceDBDatasetDatabaseHandler can only be used with LanceDB vector database provider."
26
- )
27
-
28
- databases_directory_path = os.path.join(
29
- base_config.system_root_directory, "databases", str(user.id)
30
- )
31
-
32
- vector_db_name = f"{dataset_id}.lance.db"
33
-
34
- return {
35
- "vector_database_provider": vector_config.vector_db_provider,
36
- "vector_database_url": os.path.join(databases_directory_path, vector_db_name),
37
- "vector_database_key": vector_config.vector_db_key,
38
- "vector_database_name": vector_db_name,
39
- "vector_dataset_database_handler": "lancedb",
40
- }
41
-
42
- @classmethod
43
- async def delete_dataset(cls, dataset_database: DatasetDatabase):
44
- vector_engine = create_vector_engine(
45
- vector_db_provider=dataset_database.vector_database_provider,
46
- vector_db_url=dataset_database.vector_database_url,
47
- vector_db_key=dataset_database.vector_database_key,
48
- vector_db_name=dataset_database.vector_database_name,
49
- )
50
- await vector_engine.prune()
@@ -1,5 +0,0 @@
1
- """Bedrock LLM adapter module."""
2
-
3
- from .adapter import BedrockAdapter
4
-
5
- __all__ = ["BedrockAdapter"]
@@ -1,153 +0,0 @@
1
- import litellm
2
- import instructor
3
- from typing import Type
4
- from pydantic import BaseModel
5
- from litellm.exceptions import ContentPolicyViolationError
6
- from instructor.exceptions import InstructorRetryException
7
-
8
- from cognee.infrastructure.llm.LLMGateway import LLMGateway
9
- from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
10
- LLMInterface,
11
- )
12
- from cognee.infrastructure.llm.exceptions import (
13
- ContentPolicyFilterError,
14
- MissingSystemPromptPathError,
15
- )
16
- from cognee.infrastructure.files.storage.s3_config import get_s3_config
17
- from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
18
- rate_limit_async,
19
- rate_limit_sync,
20
- sleep_and_retry_async,
21
- sleep_and_retry_sync,
22
- )
23
- from cognee.modules.observability.get_observe import get_observe
24
-
25
- observe = get_observe()
26
-
27
-
28
- class BedrockAdapter(LLMInterface):
29
- """
30
- Adapter for AWS Bedrock API with support for three authentication methods:
31
- 1. API Key (Bearer Token)
32
- 2. AWS Credentials (access key + secret key)
33
- 3. AWS Profile (boto3 credential chain)
34
- """
35
-
36
- name = "Bedrock"
37
- model: str
38
- api_key: str
39
- default_instructor_mode = "json_schema_mode"
40
-
41
- MAX_RETRIES = 5
42
-
43
- def __init__(
44
- self,
45
- model: str,
46
- api_key: str = None,
47
- max_completion_tokens: int = 16384,
48
- streaming: bool = False,
49
- instructor_mode: str = None,
50
- ):
51
- self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
52
-
53
- self.aclient = instructor.from_litellm(
54
- litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
55
- )
56
- self.client = instructor.from_litellm(litellm.completion)
57
- self.model = model
58
- self.api_key = api_key
59
- self.max_completion_tokens = max_completion_tokens
60
- self.streaming = streaming
61
-
62
- def _create_bedrock_request(
63
- self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
64
- ) -> dict:
65
- """Create Bedrock request with authentication."""
66
-
67
- request_params = {
68
- "model": self.model,
69
- "custom_llm_provider": "bedrock",
70
- "drop_params": True,
71
- "messages": [
72
- {"role": "user", "content": text_input},
73
- {"role": "system", "content": system_prompt},
74
- ],
75
- "response_model": response_model,
76
- "max_retries": self.MAX_RETRIES,
77
- "max_completion_tokens": self.max_completion_tokens,
78
- "stream": self.streaming,
79
- }
80
-
81
- s3_config = get_s3_config()
82
-
83
- # Add authentication parameters
84
- if self.api_key:
85
- request_params["api_key"] = self.api_key
86
- elif s3_config.aws_access_key_id and s3_config.aws_secret_access_key:
87
- request_params["aws_access_key_id"] = s3_config.aws_access_key_id
88
- request_params["aws_secret_access_key"] = s3_config.aws_secret_access_key
89
- if s3_config.aws_session_token:
90
- request_params["aws_session_token"] = s3_config.aws_session_token
91
- elif s3_config.aws_profile_name:
92
- request_params["aws_profile_name"] = s3_config.aws_profile_name
93
-
94
- if s3_config.aws_region:
95
- request_params["aws_region_name"] = s3_config.aws_region
96
-
97
- # Add optional parameters
98
- if s3_config.aws_bedrock_runtime_endpoint:
99
- request_params["aws_bedrock_runtime_endpoint"] = s3_config.aws_bedrock_runtime_endpoint
100
-
101
- return request_params
102
-
103
- @observe(as_type="generation")
104
- @sleep_and_retry_async()
105
- @rate_limit_async
106
- async def acreate_structured_output(
107
- self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
108
- ) -> BaseModel:
109
- """Generate structured output from AWS Bedrock API."""
110
-
111
- try:
112
- request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
113
- return await self.aclient.chat.completions.create(**request_params)
114
-
115
- except (
116
- ContentPolicyViolationError,
117
- InstructorRetryException,
118
- ) as error:
119
- if (
120
- isinstance(error, InstructorRetryException)
121
- and "content management policy" not in str(error).lower()
122
- ):
123
- raise error
124
-
125
- raise ContentPolicyFilterError(
126
- f"The provided input contains content that is not aligned with our content policy: {text_input}"
127
- )
128
-
129
- @observe
130
- @sleep_and_retry_sync()
131
- @rate_limit_sync
132
- def create_structured_output(
133
- self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
134
- ) -> BaseModel:
135
- """Generate structured output from AWS Bedrock API (synchronous)."""
136
-
137
- request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
138
- return self.client.chat.completions.create(**request_params)
139
-
140
- def show_prompt(self, text_input: str, system_prompt: str) -> str:
141
- """Format and display the prompt for a user query."""
142
- if not text_input:
143
- text_input = "No user input provided."
144
- if not system_prompt:
145
- raise MissingSystemPromptPathError()
146
- system_prompt = LLMGateway.read_query_prompt(system_prompt)
147
-
148
- formatted_prompt = (
149
- f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
150
- if system_prompt
151
- else None
152
- )
153
- return formatted_prompt
@@ -1,53 +0,0 @@
1
- from typing import Any
2
-
3
- from cognee import memify
4
- from cognee.context_global_variables import (
5
- set_database_global_context_variables,
6
- )
7
- from cognee.exceptions import CogneeValidationError
8
- from cognee.modules.data.methods import get_authorized_existing_datasets
9
- from cognee.shared.logging_utils import get_logger
10
- from cognee.modules.pipelines.tasks.task import Task
11
- from cognee.modules.users.models import User
12
- from cognee.tasks.memify.get_triplet_datapoints import get_triplet_datapoints
13
- from cognee.tasks.storage import index_data_points
14
-
15
- logger = get_logger("create_triplet_embeddings")
16
-
17
-
18
- async def create_triplet_embeddings(
19
- user: User,
20
- dataset: str = "main_dataset",
21
- run_in_background: bool = False,
22
- triplets_batch_size: int = 100,
23
- ) -> dict[str, Any]:
24
- dataset_to_write = await get_authorized_existing_datasets(
25
- user=user, datasets=[dataset], permission_type="write"
26
- )
27
-
28
- if not dataset_to_write:
29
- raise CogneeValidationError(
30
- message=f"User does not have write access to dataset: {dataset}",
31
- log=False,
32
- )
33
-
34
- await set_database_global_context_variables(
35
- dataset_to_write[0].id, dataset_to_write[0].owner_id
36
- )
37
-
38
- extraction_tasks = [Task(get_triplet_datapoints, triplets_batch_size=triplets_batch_size)]
39
-
40
- enrichment_tasks = [
41
- Task(index_data_points, task_config={"batch_size": triplets_batch_size}),
42
- ]
43
-
44
- result = await memify(
45
- extraction_tasks=extraction_tasks,
46
- enrichment_tasks=enrichment_tasks,
47
- dataset=dataset_to_write[0].id,
48
- data=[{}],
49
- user=user,
50
- run_in_background=run_in_background,
51
- )
52
-
53
- return result
@@ -1,9 +0,0 @@
1
- from cognee.infrastructure.engine import DataPoint
2
-
3
-
4
- class Triplet(DataPoint):
5
- text: str
6
- from_node_id: str
7
- to_node_id: str
8
-
9
- metadata: dict = {"index_fields": ["text"]}
@@ -1,10 +0,0 @@
1
- from typing import Type
2
-
3
- from .base_retriever import BaseRetriever
4
- from .registered_community_retrievers import registered_community_retrievers
5
- from ..search.types import SearchType
6
-
7
-
8
- def use_retriever(search_type: SearchType, retriever: Type[BaseRetriever]):
9
- """Register a retriever class for a given search type."""
10
- registered_community_retrievers[search_type] = retriever
@@ -1 +0,0 @@
1
- registered_community_retrievers = {}