cognee 0.2.2.dev1__py3-none-any.whl → 0.2.3.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/add/add.py +0 -5
- cognee/api/v1/cognify/cognify.py +0 -8
- cognee/api/v1/config/config.py +5 -13
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -1
- cognee/api/v1/delete/delete.py +1 -1
- cognee/api/v1/exceptions/__init__.py +13 -0
- cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
- cognee/api/v1/search/search.py +0 -7
- cognee/exceptions/__init__.py +5 -5
- cognee/exceptions/exceptions.py +37 -17
- cognee/infrastructure/data/exceptions/__init__.py +7 -0
- cognee/infrastructure/data/exceptions/exceptions.py +22 -0
- cognee/infrastructure/data/utils/extract_keywords.py +3 -3
- cognee/infrastructure/databases/exceptions/__init__.py +3 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +1 -1
- cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
- cognee/infrastructure/llm/exceptions.py +30 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +10 -7
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +6 -4
- cognee/infrastructure/loaders/LoaderInterface.py +1 -1
- cognee/modules/data/exceptions/exceptions.py +18 -5
- cognee/modules/data/methods/delete_data.py +2 -4
- cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
- cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
- cognee/modules/graph/exceptions/__init__.py +2 -0
- cognee/modules/graph/exceptions/exceptions.py +25 -3
- cognee/modules/ingestion/exceptions/exceptions.py +2 -2
- cognee/modules/ontology/exceptions/exceptions.py +4 -4
- cognee/modules/pipelines/exceptions/exceptions.py +2 -2
- cognee/modules/retrieval/exceptions/exceptions.py +12 -6
- cognee/modules/search/exceptions/__init__.py +7 -0
- cognee/modules/search/exceptions/exceptions.py +15 -0
- cognee/modules/search/methods/search.py +2 -3
- cognee/modules/users/exceptions/exceptions.py +6 -6
- cognee/shared/exceptions/exceptions.py +2 -2
- cognee/tasks/completion/exceptions/exceptions.py +3 -3
- cognee/tasks/documents/classify_documents.py +4 -0
- cognee/tasks/documents/exceptions/__init__.py +11 -0
- cognee/tasks/documents/exceptions/exceptions.py +36 -0
- cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
- cognee/tasks/graph/exceptions/__init__.py +12 -0
- cognee/tasks/graph/exceptions/exceptions.py +41 -0
- cognee/tasks/graph/extract_graph_from_data.py +28 -0
- cognee/tasks/ingestion/exceptions/__init__.py +8 -0
- cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
- cognee/tasks/ingestion/resolve_data_directories.py +5 -0
- cognee/tasks/storage/add_data_points.py +8 -0
- cognee/tasks/storage/exceptions/__init__.py +9 -0
- cognee/tasks/storage/exceptions/exceptions.py +13 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/summarization/exceptions/__init__.py +9 -0
- cognee/tasks/summarization/exceptions/exceptions.py +14 -0
- cognee/tasks/summarization/summarize_text.py +8 -1
- cognee/tests/test_delete_by_id.py +1 -1
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
- cognee/tests/unit/modules/search/search_methods_test.py +2 -2
- {cognee-0.2.2.dev1.dist-info → cognee-0.2.3.dev1.dist-info}/METADATA +4 -4
- {cognee-0.2.2.dev1.dist-info → cognee-0.2.3.dev1.dist-info}/RECORD +88 -57
- distributed/Dockerfile +34 -0
- distributed/__init__.py +0 -0
- distributed/app.py +4 -0
- distributed/entrypoint.py +71 -0
- distributed/entrypoint.sh +5 -0
- distributed/modal_image.py +11 -0
- distributed/poetry.lock +12238 -0
- distributed/pyproject.toml +186 -0
- distributed/queues.py +5 -0
- distributed/tasks/__init__.py +0 -0
- distributed/tasks/queued_add_data_points.py +13 -0
- distributed/tasks/queued_add_edges.py +13 -0
- distributed/tasks/queued_add_nodes.py +13 -0
- distributed/test.py +28 -0
- distributed/utils.py +19 -0
- distributed/workers/data_point_saving_worker.py +93 -0
- distributed/workers/graph_saving_worker.py +104 -0
- cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
- {cognee-0.2.2.dev1.dist-info → cognee-0.2.3.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.2.dev1.dist-info → cognee-0.2.3.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.2.dev1.dist-info → cognee-0.2.3.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cognee"
|
|
3
|
+
version = "0.2.2.dev0"
|
|
4
|
+
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Vasilije Markovic" },
|
|
7
|
+
{ name = "Boris Arzentar" },
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.10,<=3.13"
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: Apache Software License",
|
|
16
|
+
"Topic :: Software Development :: Libraries",
|
|
17
|
+
"Operating System :: MacOS :: MacOS X",
|
|
18
|
+
"Operating System :: POSIX :: Linux",
|
|
19
|
+
"Operating System :: Microsoft :: Windows",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"openai>=1.80.1,<2",
|
|
23
|
+
"python-dotenv>=1.0.1,<2.0.0",
|
|
24
|
+
"pydantic>=2.11.7,<3.0.0",
|
|
25
|
+
"pydantic-settings>=2.10.1,<3",
|
|
26
|
+
"typing_extensions>=4.12.2,<5.0.0",
|
|
27
|
+
"nltk>=3.9.1,<4.0.0",
|
|
28
|
+
"numpy>=1.26.4, <=4.0.0",
|
|
29
|
+
"pandas>=2.2.2,<3.0.0",
|
|
30
|
+
# Note: New s3fs and boto3 versions don't work well together
|
|
31
|
+
# Always use comaptible fixed versions of these two dependencies
|
|
32
|
+
"s3fs[boto3]==2025.3.2",
|
|
33
|
+
"sqlalchemy>=2.0.39,<3.0.0",
|
|
34
|
+
"aiosqlite>=0.20.0,<1.0.0",
|
|
35
|
+
"tiktoken>=0.8.0,<1.0.0",
|
|
36
|
+
"litellm>=1.57.4, <1.71.0",
|
|
37
|
+
"instructor>=1.9.1,<2.0.0",
|
|
38
|
+
"langfuse>=2.32.0,<3",
|
|
39
|
+
"filetype>=1.2.0,<2.0.0",
|
|
40
|
+
"aiohttp>=3.11.14,<4.0.0",
|
|
41
|
+
"aiofiles>=23.2.1,<24.0.0",
|
|
42
|
+
"rdflib>=7.1.4,<7.2.0",
|
|
43
|
+
"pypdf>=4.1.0,<6.0.0",
|
|
44
|
+
"jinja2>=3.1.3,<4",
|
|
45
|
+
"matplotlib>=3.8.3,<4",
|
|
46
|
+
"networkx>=3.4.2,<4",
|
|
47
|
+
"lancedb>=0.24.0,<1.0.0",
|
|
48
|
+
"alembic>=1.13.3,<2",
|
|
49
|
+
"pre-commit>=4.0.1,<5",
|
|
50
|
+
"scikit-learn>=1.6.1,<2",
|
|
51
|
+
"limits>=4.4.1,<5",
|
|
52
|
+
"fastapi>=0.115.7,<1.0.0",
|
|
53
|
+
"python-multipart>=0.0.20,<1.0.0",
|
|
54
|
+
"fastapi-users[sqlalchemy]>=14.0.1,<15.0.0",
|
|
55
|
+
"dlt[sqlalchemy]>=1.9.0,<2",
|
|
56
|
+
"sentry-sdk[fastapi]>=2.9.0,<3",
|
|
57
|
+
"structlog>=25.2.0,<26",
|
|
58
|
+
"pympler>=1.1,<2.0.0",
|
|
59
|
+
"onnxruntime>=1.0.0,<2.0.0",
|
|
60
|
+
"pylance>=0.22.0,<1.0.0",
|
|
61
|
+
"kuzu (==0.11.0)"
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
[project.optional-dependencies]
|
|
65
|
+
api = [
|
|
66
|
+
"uvicorn>=0.34.0,<1.0.0",
|
|
67
|
+
"gunicorn>=20.1.0,<24",
|
|
68
|
+
"websockets>=15.0.1,<16.0.0"
|
|
69
|
+
]
|
|
70
|
+
distributed = [
|
|
71
|
+
"modal>=1.0.5,<2.0.0",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
neo4j = ["neo4j>=5.28.0,<6"]
|
|
75
|
+
postgres = [
|
|
76
|
+
"psycopg2>=2.9.10,<3",
|
|
77
|
+
"pgvector>=0.3.5,<0.4",
|
|
78
|
+
"asyncpg>=0.30.0,<1.0.0",
|
|
79
|
+
]
|
|
80
|
+
postgres-binary = [
|
|
81
|
+
"psycopg2-binary>=2.9.10,<3.0.0",
|
|
82
|
+
"pgvector>=0.3.5,<0.4",
|
|
83
|
+
"asyncpg>=0.30.0,<1.0.0",
|
|
84
|
+
]
|
|
85
|
+
notebook = ["notebook>=7.1.0,<8"]
|
|
86
|
+
langchain = [
|
|
87
|
+
"langsmith>=0.2.3,<1.0.0",
|
|
88
|
+
"langchain_text_splitters>=0.3.2,<1.0.0",
|
|
89
|
+
]
|
|
90
|
+
llama-index = ["llama-index-core>=0.12.11,<0.13"]
|
|
91
|
+
gemini = ["google-generativeai>=0.8.4,<0.9"]
|
|
92
|
+
huggingface = ["transformers>=4.46.3,<5"]
|
|
93
|
+
ollama = ["transformers>=4.46.3,<5"]
|
|
94
|
+
mistral = ["mistral-common>=1.5.2,<2"]
|
|
95
|
+
anthropic = ["anthropic>=0.26.1,<0.27"]
|
|
96
|
+
deepeval = ["deepeval>=2.0.1,<3"]
|
|
97
|
+
posthog = ["posthog>=3.5.0,<4"]
|
|
98
|
+
falkordb = ["falkordb>=1.0.9,<2.0.0"]
|
|
99
|
+
groq = ["groq>=0.8.0,<1.0.0"]
|
|
100
|
+
chromadb = [
|
|
101
|
+
"chromadb>=0.3.0,<0.7",
|
|
102
|
+
"pypika==0.48.8",
|
|
103
|
+
]
|
|
104
|
+
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.18.1,<19"]
|
|
105
|
+
codegraph = [
|
|
106
|
+
"fastembed<=0.6.0 ; python_version < '3.13'",
|
|
107
|
+
"transformers>=4.46.3,<5",
|
|
108
|
+
"tree-sitter>=0.24.0,<0.25",
|
|
109
|
+
"tree-sitter-python>=0.23.6,<0.24",
|
|
110
|
+
]
|
|
111
|
+
evals = [
|
|
112
|
+
"plotly>=6.0.0,<7",
|
|
113
|
+
"gdown>=5.2.0,<6",
|
|
114
|
+
]
|
|
115
|
+
gui = [
|
|
116
|
+
"pyside6>=6.8.3,<7",
|
|
117
|
+
"qasync>=0.27.1,<0.28",
|
|
118
|
+
]
|
|
119
|
+
graphiti = ["graphiti-core>=0.7.0,<0.8"]
|
|
120
|
+
# Note: New s3fs and boto3 versions don't work well together
|
|
121
|
+
# Always use comaptible fixed versions of these two dependencies
|
|
122
|
+
aws = ["s3fs[boto3]==2025.3.2"]
|
|
123
|
+
dev = [
|
|
124
|
+
"pytest>=7.4.0,<8",
|
|
125
|
+
"pytest-cov>=6.1.1,<7.0.0",
|
|
126
|
+
"pytest-asyncio>=0.21.1,<0.22",
|
|
127
|
+
"coverage>=7.3.2,<8",
|
|
128
|
+
"mypy>=1.7.1,<2",
|
|
129
|
+
"notebook>=7.1.0,<8",
|
|
130
|
+
"deptry>=0.20.0,<0.21",
|
|
131
|
+
"pylint>=3.0.3,<4",
|
|
132
|
+
"ruff>=0.9.2,<1.0.0",
|
|
133
|
+
"tweepy>=4.14.0,<5.0.0",
|
|
134
|
+
"gitpython>=3.1.43,<4",
|
|
135
|
+
"mkdocs-material>=9.5.42,<10",
|
|
136
|
+
"mkdocs-minify-plugin>=0.8.0,<0.9",
|
|
137
|
+
"mkdocstrings[python]>=0.26.2,<0.27",
|
|
138
|
+
]
|
|
139
|
+
debug = ["debugpy>=1.8.9,<2.0.0"]
|
|
140
|
+
|
|
141
|
+
[project.urls]
|
|
142
|
+
Homepage = "https://www.cognee.ai"
|
|
143
|
+
Repository = "https://github.com/topoteretes/cognee"
|
|
144
|
+
|
|
145
|
+
[build-system]
|
|
146
|
+
requires = ["hatchling"]
|
|
147
|
+
build-backend = "hatchling.build"
|
|
148
|
+
|
|
149
|
+
[tool.hatch.build]
|
|
150
|
+
exclude = [
|
|
151
|
+
"/bin",
|
|
152
|
+
"/dist",
|
|
153
|
+
"/.data",
|
|
154
|
+
"/.github",
|
|
155
|
+
"/alembic",
|
|
156
|
+
"/deployment",
|
|
157
|
+
"/cognee-mcp",
|
|
158
|
+
"/cognee-frontend",
|
|
159
|
+
"/examples",
|
|
160
|
+
"/helm",
|
|
161
|
+
"/licenses",
|
|
162
|
+
"/logs",
|
|
163
|
+
"/notebooks",
|
|
164
|
+
"/profiling",
|
|
165
|
+
"/tests",
|
|
166
|
+
"/tools",
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
[tool.hatch.build.targets.wheel]
|
|
170
|
+
packages = ["cognee", "distributed"]
|
|
171
|
+
|
|
172
|
+
[tool.ruff]
|
|
173
|
+
line-length = 100
|
|
174
|
+
exclude = [
|
|
175
|
+
"migrations/", # Ignore migrations directory
|
|
176
|
+
"notebooks/", # Ignore notebook files
|
|
177
|
+
"build/", # Ignore build directory
|
|
178
|
+
"cognee/pipelines.py",
|
|
179
|
+
"cognee/modules/users/models/Group.py",
|
|
180
|
+
"cognee/modules/users/models/ACL.py",
|
|
181
|
+
"cognee/modules/pipelines/models/Task.py",
|
|
182
|
+
"cognee/modules/data/models/Dataset.py"
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
[tool.ruff.lint]
|
|
186
|
+
ignore = ["F401"]
|
distributed/queues.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
async def queued_add_data_points(collection_name, data_points_batch):
|
|
2
|
+
from grpclib import GRPCError
|
|
3
|
+
from ..queues import add_data_points_queue
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
await add_data_points_queue.put.aio((collection_name, data_points_batch))
|
|
7
|
+
except GRPCError:
|
|
8
|
+
first_half, second_half = (
|
|
9
|
+
data_points_batch[: len(data_points_batch) // 2],
|
|
10
|
+
data_points_batch[len(data_points_batch) // 2 :],
|
|
11
|
+
)
|
|
12
|
+
await queued_add_data_points(collection_name, first_half)
|
|
13
|
+
await queued_add_data_points(collection_name, second_half)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
async def queued_add_edges(edge_batch):
|
|
2
|
+
from grpclib import GRPCError
|
|
3
|
+
from ..queues import add_nodes_and_edges_queue
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
await add_nodes_and_edges_queue.put.aio(([], edge_batch))
|
|
7
|
+
except GRPCError:
|
|
8
|
+
first_half, second_half = (
|
|
9
|
+
edge_batch[: len(edge_batch) // 2],
|
|
10
|
+
edge_batch[len(edge_batch) // 2 :],
|
|
11
|
+
)
|
|
12
|
+
await queued_add_edges(first_half)
|
|
13
|
+
await queued_add_edges(second_half)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
async def queued_add_nodes(node_batch):
|
|
2
|
+
from grpclib import GRPCError
|
|
3
|
+
from ..queues import add_nodes_and_edges_queue
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
await add_nodes_and_edges_queue.put.aio((node_batch, []))
|
|
7
|
+
except GRPCError:
|
|
8
|
+
first_half, second_half = (
|
|
9
|
+
node_batch[: len(node_batch) // 2],
|
|
10
|
+
node_batch[len(node_batch) // 2 :],
|
|
11
|
+
)
|
|
12
|
+
await queued_add_nodes(first_half)
|
|
13
|
+
await queued_add_nodes(second_half)
|
distributed/test.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from modal import App
|
|
2
|
+
|
|
3
|
+
app = App("cognee_distributed_test")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@app.function()
|
|
7
|
+
def sum_distributed(numbers: list):
|
|
8
|
+
result = sum(numbers)
|
|
9
|
+
|
|
10
|
+
return result
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@app.local_entrypoint()
|
|
14
|
+
def main():
|
|
15
|
+
sum = 0
|
|
16
|
+
numbers = range(100)
|
|
17
|
+
batch_size = 10
|
|
18
|
+
|
|
19
|
+
local_sum = sum_distributed.local(numbers=numbers)
|
|
20
|
+
|
|
21
|
+
print(f"Local sum: {local_sum}")
|
|
22
|
+
|
|
23
|
+
batches = [list(numbers[i : i + batch_size]) for i in range(0, len(numbers), batch_size)]
|
|
24
|
+
|
|
25
|
+
for result in sum_distributed.map(batches):
|
|
26
|
+
sum += result
|
|
27
|
+
|
|
28
|
+
print(f"Distributed sum: {sum}")
|
distributed/utils.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from functools import wraps
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def override_distributed(new_func):
|
|
6
|
+
def decorator(func):
|
|
7
|
+
@wraps(func)
|
|
8
|
+
async def wrapper(self, *args, distributed=None, **kwargs):
|
|
9
|
+
default_distributed_value = os.getenv("COGNEE_DISTRIBUTED", "False").lower() == "true"
|
|
10
|
+
distributed = default_distributed_value if distributed is None else distributed
|
|
11
|
+
|
|
12
|
+
if distributed:
|
|
13
|
+
return await new_func(*args, **kwargs)
|
|
14
|
+
else:
|
|
15
|
+
return await func(self, *args, **kwargs)
|
|
16
|
+
|
|
17
|
+
return wrapper
|
|
18
|
+
|
|
19
|
+
return decorator
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import modal
|
|
2
|
+
import asyncio
|
|
3
|
+
from sqlalchemy.exc import OperationalError, DBAPIError
|
|
4
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
5
|
+
|
|
6
|
+
from distributed.app import app
|
|
7
|
+
from distributed.modal_image import image
|
|
8
|
+
from distributed.queues import add_data_points_queue
|
|
9
|
+
|
|
10
|
+
from cognee.shared.logging_utils import get_logger
|
|
11
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = get_logger("data_point_saving_worker")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VectorDatabaseDeadlockError(Exception):
|
|
18
|
+
message = "A deadlock occurred while trying to add data points to the vector database."
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def is_deadlock_error(error):
|
|
22
|
+
# SQLAlchemy-wrapped asyncpg
|
|
23
|
+
try:
|
|
24
|
+
import asyncpg
|
|
25
|
+
|
|
26
|
+
if isinstance(error.orig, asyncpg.exceptions.DeadlockDetectedError):
|
|
27
|
+
return True
|
|
28
|
+
except ImportError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
# PostgreSQL: SQLSTATE 40P01 = deadlock_detected
|
|
32
|
+
if hasattr(error.orig, "pgcode") and error.orig.pgcode == "40P01":
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
# SQLite: It doesn't support real deadlocks but may simulate them as "database is locked"
|
|
36
|
+
if "database is locked" in str(error.orig).lower():
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.function(
|
|
43
|
+
retries=3,
|
|
44
|
+
image=image,
|
|
45
|
+
timeout=86400,
|
|
46
|
+
max_containers=5,
|
|
47
|
+
secrets=[modal.Secret.from_name("distributed_cognee")],
|
|
48
|
+
)
|
|
49
|
+
async def data_point_saving_worker():
|
|
50
|
+
print("Started processing of data points; starting vector engine queue.")
|
|
51
|
+
vector_engine = get_vector_engine()
|
|
52
|
+
|
|
53
|
+
while True:
|
|
54
|
+
if await add_data_points_queue.len.aio() != 0:
|
|
55
|
+
try:
|
|
56
|
+
add_data_points_request = await add_data_points_queue.get.aio(block=False)
|
|
57
|
+
except modal.exception.DeserializationError as error:
|
|
58
|
+
logger.error(f"Deserialization error: {str(error)}")
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
if len(add_data_points_request) == 0:
|
|
62
|
+
print("Finished processing all data points; stopping vector engine queue.")
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
if len(add_data_points_request) == 2:
|
|
66
|
+
(collection_name, data_points) = add_data_points_request
|
|
67
|
+
|
|
68
|
+
print(f"Adding {len(data_points)} data points to '{collection_name}' collection.")
|
|
69
|
+
|
|
70
|
+
@retry(
|
|
71
|
+
retry=retry_if_exception_type(VectorDatabaseDeadlockError),
|
|
72
|
+
stop=stop_after_attempt(3),
|
|
73
|
+
wait=wait_exponential(multiplier=2, min=1, max=6),
|
|
74
|
+
)
|
|
75
|
+
async def add_data_points():
|
|
76
|
+
try:
|
|
77
|
+
await vector_engine.create_data_points(
|
|
78
|
+
collection_name, data_points, distributed=False
|
|
79
|
+
)
|
|
80
|
+
except DBAPIError as error:
|
|
81
|
+
if is_deadlock_error(error):
|
|
82
|
+
raise VectorDatabaseDeadlockError()
|
|
83
|
+
except OperationalError as error:
|
|
84
|
+
if is_deadlock_error(error):
|
|
85
|
+
raise VectorDatabaseDeadlockError()
|
|
86
|
+
|
|
87
|
+
await add_data_points()
|
|
88
|
+
|
|
89
|
+
print("Finished adding data points.")
|
|
90
|
+
|
|
91
|
+
else:
|
|
92
|
+
print("No jobs, go to sleep.")
|
|
93
|
+
await asyncio.sleep(5)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import modal
|
|
2
|
+
import asyncio
|
|
3
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
4
|
+
|
|
5
|
+
from distributed.app import app
|
|
6
|
+
from distributed.modal_image import image
|
|
7
|
+
from distributed.queues import add_nodes_and_edges_queue
|
|
8
|
+
|
|
9
|
+
from cognee.shared.logging_utils import get_logger
|
|
10
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
11
|
+
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = get_logger("graph_saving_worker")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GraphDatabaseDeadlockError(Exception):
|
|
18
|
+
message = "A deadlock occurred while trying to add data points to the vector database."
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def is_deadlock_error(error):
|
|
22
|
+
graph_config = get_graph_config()
|
|
23
|
+
|
|
24
|
+
if graph_config.graph_database_provider == "neo4j":
|
|
25
|
+
# Neo4j
|
|
26
|
+
from neo4j.exceptions import TransientError
|
|
27
|
+
|
|
28
|
+
if isinstance(error, TransientError) and (
|
|
29
|
+
error.code == "Neo.TransientError.Transaction.DeadlockDetected"
|
|
30
|
+
):
|
|
31
|
+
return True
|
|
32
|
+
|
|
33
|
+
# Kuzu
|
|
34
|
+
if "deadlock" in str(error).lower() or "cannot acquire lock" in str(error).lower():
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.function(
|
|
41
|
+
retries=3,
|
|
42
|
+
image=image,
|
|
43
|
+
timeout=86400,
|
|
44
|
+
max_containers=5,
|
|
45
|
+
secrets=[modal.Secret.from_name("distributed_cognee")],
|
|
46
|
+
)
|
|
47
|
+
async def graph_saving_worker():
|
|
48
|
+
print("Started processing of nodes and edges; starting graph engine queue.")
|
|
49
|
+
graph_engine = await get_graph_engine()
|
|
50
|
+
|
|
51
|
+
while True:
|
|
52
|
+
if await add_nodes_and_edges_queue.len.aio() != 0:
|
|
53
|
+
try:
|
|
54
|
+
nodes_and_edges = await add_nodes_and_edges_queue.get.aio(block=False)
|
|
55
|
+
except modal.exception.DeserializationError as error:
|
|
56
|
+
logger.error(f"Deserialization error: {str(error)}")
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
if len(nodes_and_edges) == 0:
|
|
60
|
+
print("Finished processing all nodes and edges; stopping graph engine queue.")
|
|
61
|
+
return True
|
|
62
|
+
|
|
63
|
+
if len(nodes_and_edges) == 2:
|
|
64
|
+
print(
|
|
65
|
+
f"Adding {len(nodes_and_edges[0])} nodes and {len(nodes_and_edges[1])} edges."
|
|
66
|
+
)
|
|
67
|
+
nodes = nodes_and_edges[0]
|
|
68
|
+
edges = nodes_and_edges[1]
|
|
69
|
+
|
|
70
|
+
@retry(
|
|
71
|
+
retry=retry_if_exception_type(GraphDatabaseDeadlockError),
|
|
72
|
+
stop=stop_after_attempt(3),
|
|
73
|
+
wait=wait_exponential(multiplier=2, min=1, max=6),
|
|
74
|
+
)
|
|
75
|
+
async def save_graph_nodes(new_nodes):
|
|
76
|
+
try:
|
|
77
|
+
await graph_engine.add_nodes(new_nodes, distributed=False)
|
|
78
|
+
except Exception as error:
|
|
79
|
+
if is_deadlock_error(error):
|
|
80
|
+
raise GraphDatabaseDeadlockError()
|
|
81
|
+
|
|
82
|
+
@retry(
|
|
83
|
+
retry=retry_if_exception_type(GraphDatabaseDeadlockError),
|
|
84
|
+
stop=stop_after_attempt(3),
|
|
85
|
+
wait=wait_exponential(multiplier=2, min=1, max=6),
|
|
86
|
+
)
|
|
87
|
+
async def save_graph_edges(new_edges):
|
|
88
|
+
try:
|
|
89
|
+
await graph_engine.add_edges(new_edges, distributed=False)
|
|
90
|
+
except Exception as error:
|
|
91
|
+
if is_deadlock_error(error):
|
|
92
|
+
raise GraphDatabaseDeadlockError()
|
|
93
|
+
|
|
94
|
+
if nodes:
|
|
95
|
+
await save_graph_nodes(nodes)
|
|
96
|
+
|
|
97
|
+
if edges:
|
|
98
|
+
await save_graph_edges(edges)
|
|
99
|
+
|
|
100
|
+
print("Finished adding nodes and edges.")
|
|
101
|
+
|
|
102
|
+
else:
|
|
103
|
+
print("No jobs, go to sleep.")
|
|
104
|
+
await asyncio.sleep(5)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from cognee.exceptions import CogneeApiError
|
|
2
|
-
from fastapi import status
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class EmbeddingException(CogneeApiError):
|
|
6
|
-
"""
|
|
7
|
-
Custom exception for handling embedding-related errors.
|
|
8
|
-
|
|
9
|
-
This exception class is designed to indicate issues specifically related to embeddings
|
|
10
|
-
within the application. It extends the base exception class CogneeApiError and allows
|
|
11
|
-
for customization of the error message, name, and status code.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
message: str = "Embedding Exception.",
|
|
17
|
-
name: str = "EmbeddingException",
|
|
18
|
-
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
19
|
-
):
|
|
20
|
-
super().__init__(message, name, status_code)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|