cognee 0.2.4__py3-none-any.whl → 0.3.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
- cognee/api/v1/cognify/cognify.py +50 -3
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
- cognee/api/v1/memify/__init__.py +0 -0
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +11 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/llm/LLMGateway.py +18 -0
- cognee/infrastructure/llm/config.py +4 -2
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +1 -1
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
- cognee/modules/pipelines/operations/pipeline.py +1 -0
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
- cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
- cognee/modules/retrieval/graph_completion_retriever.py +32 -65
- cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +219 -139
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +2 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/logging_utils.py +1 -1
- cognee/tasks/codingagents/__init__.py +0 -0
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/test_kuzu.py +4 -4
- cognee/tests/test_neo4j.py +4 -4
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +18 -24
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/METADATA +8 -6
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/RECORD +162 -89
- cognee/tests/unit/modules/search/search_methods_test.py +0 -225
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,7 +1,19 @@
|
|
|
1
|
+
from fastapi import Depends
|
|
2
|
+
|
|
1
3
|
from cognee.modules.users.get_fastapi_users import get_fastapi_users
|
|
4
|
+
from cognee.modules.users.models import User
|
|
5
|
+
from cognee.modules.users.methods import get_authenticated_user
|
|
2
6
|
from cognee.modules.users.authentication.get_client_auth_backend import get_client_auth_backend
|
|
3
7
|
|
|
4
8
|
|
|
5
9
|
def get_auth_router():
|
|
6
10
|
auth_backend = get_client_auth_backend()
|
|
7
|
-
|
|
11
|
+
auth_router = get_fastapi_users().get_auth_router(auth_backend)
|
|
12
|
+
|
|
13
|
+
@auth_router.get("/me")
|
|
14
|
+
async def get_me(user: User = Depends(get_authenticated_user)):
|
|
15
|
+
return {
|
|
16
|
+
"email": user.email,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return auth_router
|
cognee/base_config.py
CHANGED
|
@@ -1,15 +1,24 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Optional
|
|
3
3
|
from functools import lru_cache
|
|
4
|
-
from cognee.root_dir import get_absolute_path
|
|
4
|
+
from cognee.root_dir import get_absolute_path, ensure_absolute_path
|
|
5
5
|
from cognee.modules.observability.observers import Observer
|
|
6
6
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
|
+
import pydantic
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class BaseConfig(BaseSettings):
|
|
10
11
|
data_root_directory: str = get_absolute_path(".data_storage")
|
|
11
12
|
system_root_directory: str = get_absolute_path(".cognee_system")
|
|
12
13
|
monitoring_tool: object = Observer.LANGFUSE
|
|
14
|
+
|
|
15
|
+
@pydantic.model_validator(mode="after")
|
|
16
|
+
def validate_paths(self):
|
|
17
|
+
# Require absolute paths for root directories
|
|
18
|
+
self.data_root_directory = ensure_absolute_path(self.data_root_directory)
|
|
19
|
+
self.system_root_directory = ensure_absolute_path(self.system_root_directory)
|
|
20
|
+
return self
|
|
21
|
+
|
|
13
22
|
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
14
23
|
langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY")
|
|
15
24
|
langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST")
|
|
@@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from pydantic import Field
|
|
8
8
|
from cognee.base_config import get_base_config
|
|
9
|
+
from cognee.root_dir import ensure_absolute_path
|
|
9
10
|
from cognee.shared.data_models import KnowledgeGraph
|
|
10
11
|
|
|
11
12
|
|
|
@@ -51,15 +52,20 @@ class GraphConfig(BaseSettings):
|
|
|
51
52
|
@pydantic.model_validator(mode="after")
|
|
52
53
|
def fill_derived(cls, values):
|
|
53
54
|
provider = values.graph_database_provider.lower()
|
|
55
|
+
base_config = get_base_config()
|
|
54
56
|
|
|
55
57
|
# Set default filename if no filename is provided
|
|
56
58
|
if not values.graph_filename:
|
|
57
59
|
values.graph_filename = f"cognee_graph_{provider}"
|
|
58
60
|
|
|
59
|
-
#
|
|
60
|
-
if
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
# Handle graph file path
|
|
62
|
+
if values.graph_file_path:
|
|
63
|
+
# Check if absolute path is provided
|
|
64
|
+
values.graph_file_path = ensure_absolute_path(
|
|
65
|
+
os.path.join(values.graph_file_path, values.graph_filename)
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
# Default path
|
|
63
69
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
|
64
70
|
values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename)
|
|
65
71
|
|
|
@@ -21,6 +21,8 @@ from cognee.infrastructure.databases.graph.graph_db_interface import (
|
|
|
21
21
|
)
|
|
22
22
|
from cognee.infrastructure.engine import DataPoint
|
|
23
23
|
from cognee.modules.storage.utils import JSONEncoder
|
|
24
|
+
from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
|
|
25
|
+
from cognee.tasks.temporal_graph.models import Timestamp
|
|
24
26
|
|
|
25
27
|
logger = get_logger()
|
|
26
28
|
|
|
@@ -106,6 +108,18 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
106
108
|
|
|
107
109
|
self.db.init_database()
|
|
108
110
|
self.connection = Connection(self.db)
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
self.connection.execute("INSTALL JSON;")
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.info(f"JSON extension already installed or not needed: {e}")
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
self.connection.execute("LOAD EXTENSION JSON;")
|
|
119
|
+
logger.info("Loaded JSON extension")
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.info(f"JSON extension already loaded or unavailable: {e}")
|
|
122
|
+
|
|
109
123
|
# Create node table with essential fields and timestamp
|
|
110
124
|
self.connection.execute("""
|
|
111
125
|
CREATE NODE TABLE IF NOT EXISTS Node(
|
|
@@ -1693,3 +1707,124 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1693
1707
|
SET r.properties = $props
|
|
1694
1708
|
"""
|
|
1695
1709
|
await self.query(update_query, {"node_id": node_id, "props": new_props})
|
|
1710
|
+
|
|
1711
|
+
async def collect_events(self, ids: List[str]) -> Any:
|
|
1712
|
+
"""
|
|
1713
|
+
Collect all Event-type nodes reachable within 1..2 hops
|
|
1714
|
+
from the given node IDs.
|
|
1715
|
+
|
|
1716
|
+
Args:
|
|
1717
|
+
graph_engine: Object exposing an async .query(str) -> Any
|
|
1718
|
+
ids: List of node IDs (strings)
|
|
1719
|
+
|
|
1720
|
+
Returns:
|
|
1721
|
+
List of events
|
|
1722
|
+
"""
|
|
1723
|
+
|
|
1724
|
+
event_collection_cypher = """UNWIND [{quoted}] AS uid
|
|
1725
|
+
MATCH (start {{id: uid}})
|
|
1726
|
+
MATCH (start)-[*1..2]-(event)
|
|
1727
|
+
WHERE event.type = 'Event'
|
|
1728
|
+
WITH DISTINCT event
|
|
1729
|
+
RETURN collect(event) AS events;
|
|
1730
|
+
"""
|
|
1731
|
+
|
|
1732
|
+
query = event_collection_cypher.format(quoted=ids)
|
|
1733
|
+
result = await self.query(query)
|
|
1734
|
+
events = []
|
|
1735
|
+
for node in result[0][0]:
|
|
1736
|
+
props = json.loads(node["properties"])
|
|
1737
|
+
|
|
1738
|
+
event = {
|
|
1739
|
+
"id": node["id"],
|
|
1740
|
+
"name": node["name"],
|
|
1741
|
+
"description": props.get("description"),
|
|
1742
|
+
}
|
|
1743
|
+
|
|
1744
|
+
if props.get("location"):
|
|
1745
|
+
event["location"] = props["location"]
|
|
1746
|
+
|
|
1747
|
+
events.append(event)
|
|
1748
|
+
|
|
1749
|
+
return [{"events": events}]
|
|
1750
|
+
|
|
1751
|
+
async def collect_time_ids(
|
|
1752
|
+
self,
|
|
1753
|
+
time_from: Optional[Timestamp] = None,
|
|
1754
|
+
time_to: Optional[Timestamp] = None,
|
|
1755
|
+
) -> str:
|
|
1756
|
+
"""
|
|
1757
|
+
Collect IDs of Timestamp nodes between time_from and time_to.
|
|
1758
|
+
|
|
1759
|
+
Args:
|
|
1760
|
+
graph_engine: Object exposing an async .query(query, params) -> list[dict]
|
|
1761
|
+
time_from: Lower bound int (inclusive), optional
|
|
1762
|
+
time_to: Upper bound int (inclusive), optional
|
|
1763
|
+
|
|
1764
|
+
Returns:
|
|
1765
|
+
A string of quoted IDs: "'id1', 'id2', 'id3'"
|
|
1766
|
+
(ready for use in a Cypher UNWIND clause).
|
|
1767
|
+
"""
|
|
1768
|
+
|
|
1769
|
+
ids: List[str] = []
|
|
1770
|
+
|
|
1771
|
+
if time_from and time_to:
|
|
1772
|
+
time_from = date_to_int(time_from)
|
|
1773
|
+
time_to = date_to_int(time_to)
|
|
1774
|
+
|
|
1775
|
+
cypher = f"""
|
|
1776
|
+
MATCH (n:Node)
|
|
1777
|
+
WHERE n.type = 'Timestamp'
|
|
1778
|
+
// Extract time_at from the JSON string and cast to INT64
|
|
1779
|
+
WITH n, json_extract(n.properties, '$.time_at') AS t_str
|
|
1780
|
+
WITH n,
|
|
1781
|
+
CASE
|
|
1782
|
+
WHEN t_str IS NULL OR t_str = '' THEN NULL
|
|
1783
|
+
ELSE CAST(t_str AS INT64)
|
|
1784
|
+
END AS t
|
|
1785
|
+
WHERE t >= {time_from}
|
|
1786
|
+
AND t <= {time_to}
|
|
1787
|
+
RETURN n.id as id
|
|
1788
|
+
"""
|
|
1789
|
+
|
|
1790
|
+
elif time_from:
|
|
1791
|
+
time_from = date_to_int(time_from)
|
|
1792
|
+
|
|
1793
|
+
cypher = f"""
|
|
1794
|
+
MATCH (n:Node)
|
|
1795
|
+
WHERE n.type = 'Timestamp'
|
|
1796
|
+
// Extract time_at from the JSON string and cast to INT64
|
|
1797
|
+
WITH n, json_extract(n.properties, '$.time_at') AS t_str
|
|
1798
|
+
WITH n,
|
|
1799
|
+
CASE
|
|
1800
|
+
WHEN t_str IS NULL OR t_str = '' THEN NULL
|
|
1801
|
+
ELSE CAST(t_str AS INT64)
|
|
1802
|
+
END AS t
|
|
1803
|
+
WHERE t >= {time_from}
|
|
1804
|
+
RETURN n.id as id
|
|
1805
|
+
"""
|
|
1806
|
+
|
|
1807
|
+
elif time_to:
|
|
1808
|
+
time_to = date_to_int(time_to)
|
|
1809
|
+
|
|
1810
|
+
cypher = f"""
|
|
1811
|
+
MATCH (n:Node)
|
|
1812
|
+
WHERE n.type = 'Timestamp'
|
|
1813
|
+
// Extract time_at from the JSON string and cast to INT64
|
|
1814
|
+
WITH n, json_extract(n.properties, '$.time_at') AS t_str
|
|
1815
|
+
WITH n,
|
|
1816
|
+
CASE
|
|
1817
|
+
WHEN t_str IS NULL OR t_str = '' THEN NULL
|
|
1818
|
+
ELSE CAST(t_str AS INT64)
|
|
1819
|
+
END AS t
|
|
1820
|
+
WHERE t <= {time_to}
|
|
1821
|
+
RETURN n.id as id
|
|
1822
|
+
"""
|
|
1823
|
+
|
|
1824
|
+
else:
|
|
1825
|
+
return ids
|
|
1826
|
+
|
|
1827
|
+
time_nodes = await self.query(cypher)
|
|
1828
|
+
time_ids_list = [item[0] for item in time_nodes]
|
|
1829
|
+
|
|
1830
|
+
return ", ".join(f"'{uid}'" for uid in time_ids_list)
|
|
@@ -11,6 +11,8 @@ from contextlib import asynccontextmanager
|
|
|
11
11
|
from typing import Optional, Any, List, Dict, Type, Tuple
|
|
12
12
|
|
|
13
13
|
from cognee.infrastructure.engine import DataPoint
|
|
14
|
+
from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
|
|
15
|
+
from cognee.tasks.temporal_graph.models import Timestamp
|
|
14
16
|
from cognee.shared.logging_utils import get_logger, ERROR
|
|
15
17
|
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
|
16
18
|
GraphDBInterface,
|
|
@@ -1371,3 +1373,90 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1371
1373
|
query,
|
|
1372
1374
|
params={"weight": float(weight), "node_ids": list(node_ids)},
|
|
1373
1375
|
)
|
|
1376
|
+
|
|
1377
|
+
async def collect_events(self, ids: List[str]) -> Any:
|
|
1378
|
+
"""
|
|
1379
|
+
Collect all Event-type nodes reachable within 1..2 hops
|
|
1380
|
+
from the given node IDs.
|
|
1381
|
+
|
|
1382
|
+
Args:
|
|
1383
|
+
graph_engine: Object exposing an async .query(str) -> Any
|
|
1384
|
+
ids: List of node IDs (strings)
|
|
1385
|
+
|
|
1386
|
+
Returns:
|
|
1387
|
+
List of events
|
|
1388
|
+
"""
|
|
1389
|
+
|
|
1390
|
+
event_collection_cypher = """UNWIND [{quoted}] AS uid
|
|
1391
|
+
MATCH (start {{id: uid}})
|
|
1392
|
+
MATCH (start)-[*1..2]-(event)
|
|
1393
|
+
WHERE event.type = 'Event'
|
|
1394
|
+
WITH DISTINCT event
|
|
1395
|
+
RETURN collect(event) AS events;
|
|
1396
|
+
"""
|
|
1397
|
+
|
|
1398
|
+
query = event_collection_cypher.format(quoted=ids)
|
|
1399
|
+
return await self.query(query)
|
|
1400
|
+
|
|
1401
|
+
async def collect_time_ids(
|
|
1402
|
+
self,
|
|
1403
|
+
time_from: Optional[Timestamp] = None,
|
|
1404
|
+
time_to: Optional[Timestamp] = None,
|
|
1405
|
+
) -> str:
|
|
1406
|
+
"""
|
|
1407
|
+
Collect IDs of Timestamp nodes between time_from and time_to.
|
|
1408
|
+
|
|
1409
|
+
Args:
|
|
1410
|
+
graph_engine: Object exposing an async .query(query, params) -> list[dict]
|
|
1411
|
+
time_from: Lower bound int (inclusive), optional
|
|
1412
|
+
time_to: Upper bound int (inclusive), optional
|
|
1413
|
+
|
|
1414
|
+
Returns:
|
|
1415
|
+
A string of quoted IDs: "'id1', 'id2', 'id3'"
|
|
1416
|
+
(ready for use in a Cypher UNWIND clause).
|
|
1417
|
+
"""
|
|
1418
|
+
|
|
1419
|
+
ids: List[str] = []
|
|
1420
|
+
|
|
1421
|
+
if time_from and time_to:
|
|
1422
|
+
time_from = date_to_int(time_from)
|
|
1423
|
+
time_to = date_to_int(time_to)
|
|
1424
|
+
|
|
1425
|
+
cypher = """
|
|
1426
|
+
MATCH (n)
|
|
1427
|
+
WHERE n.type = 'Timestamp'
|
|
1428
|
+
AND n.time_at >= $time_from
|
|
1429
|
+
AND n.time_at <= $time_to
|
|
1430
|
+
RETURN n.id AS id
|
|
1431
|
+
"""
|
|
1432
|
+
params = {"time_from": time_from, "time_to": time_to}
|
|
1433
|
+
|
|
1434
|
+
elif time_from:
|
|
1435
|
+
time_from = date_to_int(time_from)
|
|
1436
|
+
|
|
1437
|
+
cypher = """
|
|
1438
|
+
MATCH (n)
|
|
1439
|
+
WHERE n.type = 'Timestamp'
|
|
1440
|
+
AND n.time_at >= $time_from
|
|
1441
|
+
RETURN n.id AS id
|
|
1442
|
+
"""
|
|
1443
|
+
params = {"time_from": time_from}
|
|
1444
|
+
|
|
1445
|
+
elif time_to:
|
|
1446
|
+
time_to = date_to_int(time_to)
|
|
1447
|
+
|
|
1448
|
+
cypher = """
|
|
1449
|
+
MATCH (n)
|
|
1450
|
+
WHERE n.type = 'Timestamp'
|
|
1451
|
+
AND n.time_at <= $time_to
|
|
1452
|
+
RETURN n.id AS id
|
|
1453
|
+
"""
|
|
1454
|
+
params = {"time_to": time_to}
|
|
1455
|
+
|
|
1456
|
+
else:
|
|
1457
|
+
return ids
|
|
1458
|
+
|
|
1459
|
+
time_nodes = await self.query(cypher, params)
|
|
1460
|
+
time_ids_list = [item["id"] for item in time_nodes if "id" in item]
|
|
1461
|
+
|
|
1462
|
+
return ", ".join(f"'{uid}'" for uid in time_ids_list)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from .ModelBase import Base
|
|
2
2
|
from .config import get_relational_config
|
|
3
3
|
from .config import get_migration_config
|
|
4
|
+
from .get_async_session import get_async_session
|
|
5
|
+
from .with_async_session import with_async_session
|
|
4
6
|
from .create_db_and_tables import create_db_and_tables
|
|
5
7
|
from .get_relational_engine import get_relational_engine
|
|
6
8
|
from .get_migration_relational_engine import get_migration_relational_engine
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import AsyncGenerator
|
|
2
|
+
from contextlib import asynccontextmanager
|
|
3
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
+
|
|
5
|
+
from .get_relational_engine import get_relational_engine
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@asynccontextmanager
|
|
9
|
+
async def get_async_session(auto_commit=False) -> AsyncGenerator[AsyncSession, None]:
|
|
10
|
+
db_engine = get_relational_engine()
|
|
11
|
+
async with db_engine.get_async_session() as session:
|
|
12
|
+
yield session
|
|
13
|
+
|
|
14
|
+
if auto_commit:
|
|
15
|
+
await session.commit()
|
|
@@ -57,7 +57,12 @@ class SQLAlchemyAdapter:
|
|
|
57
57
|
)
|
|
58
58
|
else:
|
|
59
59
|
self.engine = create_async_engine(
|
|
60
|
-
connection_string,
|
|
60
|
+
connection_string,
|
|
61
|
+
pool_size=5,
|
|
62
|
+
max_overflow=10,
|
|
63
|
+
pool_recycle=280,
|
|
64
|
+
pool_pre_ping=True,
|
|
65
|
+
pool_timeout=280,
|
|
61
66
|
)
|
|
62
67
|
|
|
63
68
|
self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Any, Callable, Optional
|
|
2
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
3
|
+
from .get_async_session import get_async_session
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_session_from_args(args):
|
|
7
|
+
last_arg = args[-1]
|
|
8
|
+
if isinstance(last_arg, AsyncSession):
|
|
9
|
+
return last_arg
|
|
10
|
+
return None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def with_async_session(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
14
|
+
async def wrapper(*args, **kwargs):
|
|
15
|
+
session = kwargs.get("session") or get_session_from_args(args) # type: Optional[AsyncSession]
|
|
16
|
+
|
|
17
|
+
if session is None:
|
|
18
|
+
async with get_async_session() as session:
|
|
19
|
+
result = await func(*args, **kwargs, session=session)
|
|
20
|
+
await session.commit()
|
|
21
|
+
return result
|
|
22
|
+
else:
|
|
23
|
+
return await func(*args, **kwargs)
|
|
24
|
+
|
|
25
|
+
return wrapper
|
|
@@ -538,7 +538,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
|
|
538
538
|
Returns True upon successful deletion of all collections.
|
|
539
539
|
"""
|
|
540
540
|
client = await self.get_connection()
|
|
541
|
-
collections = await
|
|
541
|
+
collections = await client.list_collections()
|
|
542
542
|
for collection_name in collections:
|
|
543
543
|
await client.delete_collection(collection_name)
|
|
544
544
|
return True
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import pydantic
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from functools import lru_cache
|
|
4
5
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
6
|
|
|
6
7
|
from cognee.base_config import get_base_config
|
|
8
|
+
from cognee.root_dir import ensure_absolute_path
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class VectorConfig(BaseSettings):
|
|
@@ -11,11 +13,9 @@ class VectorConfig(BaseSettings):
|
|
|
11
13
|
Manage the configuration settings for the vector database.
|
|
12
14
|
|
|
13
15
|
Public methods:
|
|
14
|
-
|
|
15
16
|
- to_dict: Convert the configuration to a dictionary.
|
|
16
17
|
|
|
17
18
|
Instance variables:
|
|
18
|
-
|
|
19
19
|
- vector_db_url: The URL of the vector database.
|
|
20
20
|
- vector_db_port: The port for the vector database.
|
|
21
21
|
- vector_db_key: The key for accessing the vector database.
|
|
@@ -30,10 +30,17 @@ class VectorConfig(BaseSettings):
|
|
|
30
30
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
31
31
|
|
|
32
32
|
@pydantic.model_validator(mode="after")
|
|
33
|
-
def
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
def validate_paths(cls, values):
|
|
34
|
+
base_config = get_base_config()
|
|
35
|
+
|
|
36
|
+
# If vector_db_url is provided and is not a path skip checking if path is absolute (as it can also be a url)
|
|
37
|
+
if values.vector_db_url and Path(values.vector_db_url).exists():
|
|
38
|
+
# Relative path to absolute
|
|
39
|
+
values.vector_db_url = ensure_absolute_path(
|
|
40
|
+
values.vector_db_url,
|
|
41
|
+
)
|
|
42
|
+
else:
|
|
43
|
+
# Default path
|
|
37
44
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
|
38
45
|
values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
|
|
39
46
|
|
|
@@ -4,7 +4,7 @@ from fastembed import TextEmbedding
|
|
|
4
4
|
import litellm
|
|
5
5
|
import os
|
|
6
6
|
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
7
|
-
from cognee.infrastructure.databases.exceptions
|
|
7
|
+
from cognee.infrastructure.databases.exceptions import EmbeddingException
|
|
8
8
|
from cognee.infrastructure.llm.tokenizer.TikToken import (
|
|
9
9
|
TikTokenTokenizer,
|
|
10
10
|
)
|
|
@@ -250,9 +250,7 @@ def embedding_rate_limit_sync(func):
|
|
|
250
250
|
logger.warning(error_msg)
|
|
251
251
|
|
|
252
252
|
# Create a custom embedding rate limit exception
|
|
253
|
-
from cognee.infrastructure.databases.exceptions
|
|
254
|
-
EmbeddingException,
|
|
255
|
-
)
|
|
253
|
+
from cognee.infrastructure.databases.exceptions import EmbeddingException
|
|
256
254
|
|
|
257
255
|
raise EmbeddingException(error_msg)
|
|
258
256
|
|
|
@@ -307,9 +305,7 @@ def embedding_rate_limit_async(func):
|
|
|
307
305
|
logger.warning(error_msg)
|
|
308
306
|
|
|
309
307
|
# Create a custom embedding rate limit exception
|
|
310
|
-
from cognee.infrastructure.databases.exceptions
|
|
311
|
-
EmbeddingException,
|
|
312
|
-
)
|
|
308
|
+
from cognee.infrastructure.databases.exceptions import EmbeddingException
|
|
313
309
|
|
|
314
310
|
raise EmbeddingException(error_msg)
|
|
315
311
|
|
|
@@ -33,6 +33,7 @@ def get_embedding_engine() -> EmbeddingEngine:
|
|
|
33
33
|
config.embedding_api_version,
|
|
34
34
|
config.huggingface_tokenizer,
|
|
35
35
|
llm_config.llm_api_key,
|
|
36
|
+
llm_config.llm_provider,
|
|
36
37
|
)
|
|
37
38
|
|
|
38
39
|
|
|
@@ -47,6 +48,7 @@ def create_embedding_engine(
|
|
|
47
48
|
embedding_api_version,
|
|
48
49
|
huggingface_tokenizer,
|
|
49
50
|
llm_api_key,
|
|
51
|
+
llm_provider,
|
|
50
52
|
):
|
|
51
53
|
"""
|
|
52
54
|
Create and return an embedding engine based on the specified provider.
|
|
@@ -99,7 +101,8 @@ def create_embedding_engine(
|
|
|
99
101
|
|
|
100
102
|
return LiteLLMEmbeddingEngine(
|
|
101
103
|
provider=embedding_provider,
|
|
102
|
-
api_key=embedding_api_key
|
|
104
|
+
api_key=embedding_api_key
|
|
105
|
+
or (embedding_api_key if llm_provider == "custom" else llm_api_key),
|
|
103
106
|
endpoint=embedding_endpoint,
|
|
104
107
|
api_version=embedding_api_version,
|
|
105
108
|
model=embedding_model,
|
|
@@ -189,6 +189,15 @@ class LocalFileStorage(Storage):
|
|
|
189
189
|
|
|
190
190
|
return os.path.isfile(os.path.join(parsed_storage_path, file_path))
|
|
191
191
|
|
|
192
|
+
def get_size(self, file_path: str) -> int:
|
|
193
|
+
parsed_storage_path = get_parsed_path(self.storage_path)
|
|
194
|
+
|
|
195
|
+
return (
|
|
196
|
+
os.path.getsize(os.path.join(parsed_storage_path, file_path))
|
|
197
|
+
if self.file_exists(file_path)
|
|
198
|
+
else 0
|
|
199
|
+
)
|
|
200
|
+
|
|
192
201
|
def ensure_directory_exists(self, directory_path: str = ""):
|
|
193
202
|
"""
|
|
194
203
|
Ensure that the specified directory exists, creating it if necessary.
|
|
@@ -146,6 +146,11 @@ class S3FileStorage(Storage):
|
|
|
146
146
|
self.s3.isfile, os.path.join(self.storage_path.replace("s3://", ""), file_path)
|
|
147
147
|
)
|
|
148
148
|
|
|
149
|
+
async def get_size(self, file_path: str) -> int:
|
|
150
|
+
return await run_async(
|
|
151
|
+
self.s3.size, os.path.join(self.storage_path.replace("s3://", ""), file_path)
|
|
152
|
+
)
|
|
153
|
+
|
|
149
154
|
async def ensure_directory_exists(self, directory_path: str = ""):
|
|
150
155
|
"""
|
|
151
156
|
Ensure that the specified directory exists, creating it if necessary.
|
|
@@ -46,6 +46,12 @@ class StorageManager:
|
|
|
46
46
|
else:
|
|
47
47
|
return self.storage.is_file(file_path)
|
|
48
48
|
|
|
49
|
+
async def get_size(self, file_path: str) -> int:
|
|
50
|
+
if inspect.iscoroutinefunction(self.storage.get_size):
|
|
51
|
+
return await self.storage.get_size(file_path)
|
|
52
|
+
else:
|
|
53
|
+
return self.storage.get_size(file_path)
|
|
54
|
+
|
|
49
55
|
async def store(self, file_path: str, data: BinaryIO, overwrite: bool = False) -> str:
|
|
50
56
|
"""
|
|
51
57
|
Store data at the specified file path.
|
|
@@ -84,7 +90,7 @@ class StorageManager:
|
|
|
84
90
|
"""
|
|
85
91
|
# Check the actual storage type by class name to determine if open() is async or sync
|
|
86
92
|
|
|
87
|
-
if self.storage.__class__.__name__ == "S3FileStorage"
|
|
93
|
+
if self.storage.__class__.__name__ == "S3FileStorage":
|
|
88
94
|
# S3FileStorage.open() is async
|
|
89
95
|
async with self.storage.open(file_path, *args, **kwargs) as file:
|
|
90
96
|
yield file
|
|
@@ -40,6 +40,22 @@ class Storage(Protocol):
|
|
|
40
40
|
"""
|
|
41
41
|
pass
|
|
42
42
|
|
|
43
|
+
def get_size(self, file_path: str) -> int:
|
|
44
|
+
"""
|
|
45
|
+
Get the size of a specified file in bytes.
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
-----------
|
|
49
|
+
|
|
50
|
+
- file_path (str): The path of the file to get the size of.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
--------
|
|
54
|
+
|
|
55
|
+
- int: The size of the file in bytes.
|
|
56
|
+
"""
|
|
57
|
+
pass
|
|
58
|
+
|
|
43
59
|
def store(self, file_path: str, data: Union[BinaryIO, str], overwrite: bool):
|
|
44
60
|
"""
|
|
45
61
|
Store data at the specified file path.
|
|
@@ -144,3 +144,21 @@ class LLMGateway:
|
|
|
144
144
|
)
|
|
145
145
|
|
|
146
146
|
return extract_summary(content=content, response_model=response_model)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def extract_event_graph(content: str, response_model: Type[BaseModel]) -> Coroutine:
|
|
150
|
+
# TODO: Add BAML version of category and extraction and update function (consulted with Igor)
|
|
151
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
|
|
152
|
+
extract_event_graph,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return extract_event_graph(content=content, response_model=response_model)
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def extract_event_entities(content: str, response_model: Type[BaseModel]) -> Coroutine:
|
|
159
|
+
# TODO: Add BAML version of category and extraction and update function (consulted with Igor)
|
|
160
|
+
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
|
|
161
|
+
extract_event_entities,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return extract_event_entities(content=content, response_model=response_model)
|
|
@@ -35,7 +35,7 @@ class LLMConfig(BaseSettings):
|
|
|
35
35
|
|
|
36
36
|
structured_output_framework: str = "instructor"
|
|
37
37
|
llm_provider: str = "openai"
|
|
38
|
-
llm_model: str = "gpt-
|
|
38
|
+
llm_model: str = "openai/gpt-4o-mini"
|
|
39
39
|
llm_endpoint: str = ""
|
|
40
40
|
llm_api_key: Optional[str] = None
|
|
41
41
|
llm_api_version: Optional[str] = None
|
|
@@ -44,7 +44,7 @@ class LLMConfig(BaseSettings):
|
|
|
44
44
|
llm_max_completion_tokens: int = 16384
|
|
45
45
|
|
|
46
46
|
baml_llm_provider: str = "openai"
|
|
47
|
-
baml_llm_model: str = "gpt-
|
|
47
|
+
baml_llm_model: str = "gpt-4o-mini"
|
|
48
48
|
baml_llm_endpoint: str = ""
|
|
49
49
|
baml_llm_api_key: Optional[str] = None
|
|
50
50
|
baml_llm_temperature: float = 0.0
|
|
@@ -52,6 +52,8 @@ class LLMConfig(BaseSettings):
|
|
|
52
52
|
|
|
53
53
|
transcription_model: str = "whisper-1"
|
|
54
54
|
graph_prompt_path: str = "generate_graph_prompt.txt"
|
|
55
|
+
temporal_graph_prompt_path: str = "generate_event_graph_prompt.txt"
|
|
56
|
+
event_entity_prompt_path: str = "generate_event_entity_prompt.txt"
|
|
55
57
|
llm_rate_limit_enabled: bool = False
|
|
56
58
|
llm_rate_limit_requests: int = 60
|
|
57
59
|
llm_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query.
|
|
2
|
+
## Timestamp requirements
|
|
3
|
+
- If the query contains interval extrack both starts_at and ends_at properties
|
|
4
|
+
- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same
|
|
5
|
+
- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none
|
|
6
|
+
-For example: "before 2009" -- starts_at: None, ends_at: 2009 or "after 2009" -- starts_at: 2009, ends_at: None
|
|
7
|
+
- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at
|
|
8
|
+
- If starts_at or ends_at cannot be extracted both of them has to be None
|
|
9
|
+
## Output Format
|
|
10
|
+
Your reply should be a JSON: list of dictionaries with the following structure:
|
|
11
|
+
```python
|
|
12
|
+
class QueryInterval(BaseModel):
|
|
13
|
+
starts_at: Optional[Timestamp] = None
|
|
14
|
+
ends_at: Optional[Timestamp] = None
|
|
15
|
+
```
|