cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +5 -1
- cognee/api/v1/add/add.py +1 -2
- cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
- cognee/api/v1/cognify/cognify.py +16 -24
- cognee/api/v1/cognify/routers/__init__.py +1 -0
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +37 -12
- cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
- cognee/api/v1/search/search.py +0 -4
- cognee/api/v1/ui/ui.py +68 -38
- cognee/context_global_variables.py +16 -61
- cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +28 -16
- cognee/infrastructure/databases/graph/config.py +0 -3
- cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
- cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
- cognee/infrastructure/databases/utils/__init__.py +0 -3
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
- cognee/infrastructure/databases/vector/config.py +0 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
- cognee/infrastructure/files/storage/s3_config.py +0 -2
- cognee/infrastructure/llm/LLMGateway.py +2 -5
- cognee/infrastructure/llm/config.py +0 -35
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
- cognee/modules/cognify/config.py +0 -2
- cognee/modules/data/deletion/prune_system.py +2 -52
- cognee/modules/data/methods/delete_dataset.py +0 -26
- cognee/modules/engine/models/__init__.py +0 -1
- cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
- cognee/modules/memify/memify.py +7 -1
- cognee/modules/pipelines/operations/pipeline.py +2 -18
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/code_retriever.py +232 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_retriever.py +0 -10
- cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
- cognee/modules/retrieval/temporal_retriever.py +0 -4
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
- cognee/modules/search/methods/get_search_type_tools.py +8 -54
- cognee/modules/search/methods/no_access_control_search.py +0 -4
- cognee/modules/search/methods/search.py +0 -21
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +0 -19
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +3 -15
- cognee/shared/logging_utils.py +0 -4
- cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
- cognee/tasks/code/get_local_dependencies_checker.py +20 -0
- cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
- cognee/tasks/documents/__init__.py +1 -0
- cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
- cognee/tasks/graph/extract_graph_from_data.py +10 -9
- cognee/tasks/repo_processor/__init__.py +2 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
- cognee/tasks/repo_processor/get_non_code_files.py +158 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
- cognee/tasks/storage/add_data_points.py +2 -142
- cognee/tests/test_cognee_server_start.py +4 -2
- cognee/tests/test_conversation_history.py +1 -23
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_search_db.py +1 -37
- cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- cognee/api/v1/ui/node_setup.py +0 -360
- cognee/api/v1/ui/npm_utils.py +0 -50
- cognee/eval_framework/Dockerfile +0 -29
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
- cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
- cognee/modules/engine/models/Triplet.py +0 -9
- cognee/modules/retrieval/register_retriever.py +0 -10
- cognee/modules/retrieval/registered_community_retrievers.py +0 -1
- cognee/modules/retrieval/triplet_retriever.py +0 -182
- cognee/shared/rate_limiting.py +0 -30
- cognee/tasks/memify/get_triplet_datapoints.py +0 -289
- cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
- cognee/tests/integration/tasks/test_add_data_points.py +0 -139
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
- cognee/tests/test_dataset_database_handler.py +0 -137
- cognee/tests/test_dataset_delete.py +0 -76
- cognee/tests/test_edge_centered_payload.py +0 -170
- cognee/tests/test_pipeline_cache.py +0 -164
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
- cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/client.py
CHANGED
|
@@ -21,7 +21,7 @@ from cognee.api.v1.notebooks.routers import get_notebooks_router
|
|
|
21
21
|
from cognee.api.v1.permissions.routers import get_permissions_router
|
|
22
22
|
from cognee.api.v1.settings.routers import get_settings_router
|
|
23
23
|
from cognee.api.v1.datasets.routers import get_datasets_router
|
|
24
|
-
from cognee.api.v1.cognify.routers import get_cognify_router
|
|
24
|
+
from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
|
|
25
25
|
from cognee.api.v1.search.routers import get_search_router
|
|
26
26
|
from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
|
|
27
27
|
from cognee.api.v1.memify.routers import get_memify_router
|
|
@@ -278,6 +278,10 @@ app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["re
|
|
|
278
278
|
|
|
279
279
|
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
|
|
280
280
|
|
|
281
|
+
codegraph_routes = get_code_pipeline_router()
|
|
282
|
+
if codegraph_routes:
|
|
283
|
+
app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
|
|
284
|
+
|
|
281
285
|
app.include_router(
|
|
282
286
|
get_users_router(),
|
|
283
287
|
prefix="/api/v1/users",
|
cognee/api/v1/add/add.py
CHANGED
|
@@ -155,7 +155,7 @@ async def add(
|
|
|
155
155
|
- LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
|
|
156
156
|
|
|
157
157
|
Optional:
|
|
158
|
-
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
|
|
158
|
+
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
|
|
159
159
|
- LLM_MODEL: Model name (default: "gpt-5-mini")
|
|
160
160
|
- DEFAULT_USER_EMAIL: Custom default user email
|
|
161
161
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
|
@@ -205,7 +205,6 @@ async def add(
|
|
|
205
205
|
pipeline_name="add_pipeline",
|
|
206
206
|
vector_db_config=vector_db_config,
|
|
207
207
|
graph_db_config=graph_db_config,
|
|
208
|
-
use_pipeline_cache=True,
|
|
209
208
|
incremental_loading=incremental_loading,
|
|
210
209
|
data_per_batch=data_per_batch,
|
|
211
210
|
):
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
6
|
+
from cognee.modules.observability.get_observe import get_observe
|
|
7
|
+
|
|
8
|
+
from cognee.api.v1.search import SearchType, search
|
|
9
|
+
from cognee.api.v1.visualize.visualize import visualize_graph
|
|
10
|
+
from cognee.modules.cognify.config import get_cognify_config
|
|
11
|
+
from cognee.modules.pipelines import run_tasks
|
|
12
|
+
from cognee.modules.pipelines.tasks.task import Task
|
|
13
|
+
from cognee.modules.users.methods import get_default_user
|
|
14
|
+
from cognee.shared.data_models import KnowledgeGraph
|
|
15
|
+
from cognee.modules.data.methods import create_dataset
|
|
16
|
+
from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
|
|
17
|
+
from cognee.tasks.graph import extract_graph_from_data
|
|
18
|
+
from cognee.tasks.ingestion import ingest_data
|
|
19
|
+
from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
|
|
20
|
+
|
|
21
|
+
from cognee.tasks.storage import add_data_points
|
|
22
|
+
from cognee.tasks.summarization import summarize_text
|
|
23
|
+
from cognee.infrastructure.llm import get_max_chunk_tokens
|
|
24
|
+
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
25
|
+
|
|
26
|
+
observe = get_observe()
|
|
27
|
+
|
|
28
|
+
logger = get_logger("code_graph_pipeline")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@observe
|
|
32
|
+
async def run_code_graph_pipeline(
|
|
33
|
+
repo_path,
|
|
34
|
+
include_docs=False,
|
|
35
|
+
excluded_paths: Optional[list[str]] = None,
|
|
36
|
+
supported_languages: Optional[list[str]] = None,
|
|
37
|
+
):
|
|
38
|
+
import cognee
|
|
39
|
+
from cognee.low_level import setup
|
|
40
|
+
|
|
41
|
+
await cognee.prune.prune_data()
|
|
42
|
+
await cognee.prune.prune_system(metadata=True)
|
|
43
|
+
await setup()
|
|
44
|
+
|
|
45
|
+
cognee_config = get_cognify_config()
|
|
46
|
+
user = await get_default_user()
|
|
47
|
+
detailed_extraction = True
|
|
48
|
+
|
|
49
|
+
tasks = [
|
|
50
|
+
Task(
|
|
51
|
+
get_repo_file_dependencies,
|
|
52
|
+
detailed_extraction=detailed_extraction,
|
|
53
|
+
supported_languages=supported_languages,
|
|
54
|
+
excluded_paths=excluded_paths,
|
|
55
|
+
),
|
|
56
|
+
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
|
57
|
+
Task(add_data_points, task_config={"batch_size": 30}),
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
if include_docs:
|
|
61
|
+
# This tasks take a long time to complete
|
|
62
|
+
non_code_tasks = [
|
|
63
|
+
Task(get_non_py_files, task_config={"batch_size": 50}),
|
|
64
|
+
Task(ingest_data, dataset_name="repo_docs", user=user),
|
|
65
|
+
Task(classify_documents),
|
|
66
|
+
Task(extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()),
|
|
67
|
+
Task(
|
|
68
|
+
extract_graph_from_data,
|
|
69
|
+
graph_model=KnowledgeGraph,
|
|
70
|
+
task_config={"batch_size": 50},
|
|
71
|
+
),
|
|
72
|
+
Task(
|
|
73
|
+
summarize_text,
|
|
74
|
+
summarization_model=cognee_config.summarization_model,
|
|
75
|
+
task_config={"batch_size": 50},
|
|
76
|
+
),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
dataset_name = "codebase"
|
|
80
|
+
|
|
81
|
+
# Save dataset to database
|
|
82
|
+
db_engine = get_relational_engine()
|
|
83
|
+
async with db_engine.get_async_session() as session:
|
|
84
|
+
dataset = await create_dataset(dataset_name, user, session)
|
|
85
|
+
|
|
86
|
+
if include_docs:
|
|
87
|
+
non_code_pipeline_run = run_tasks(
|
|
88
|
+
non_code_tasks, dataset.id, repo_path, user, "cognify_pipeline"
|
|
89
|
+
)
|
|
90
|
+
async for run_status in non_code_pipeline_run:
|
|
91
|
+
yield run_status
|
|
92
|
+
|
|
93
|
+
async for run_status in run_tasks(
|
|
94
|
+
tasks, dataset.id, repo_path, user, "cognify_code_pipeline", incremental_loading=False
|
|
95
|
+
):
|
|
96
|
+
yield run_status
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
if __name__ == "__main__":
|
|
100
|
+
|
|
101
|
+
async def main():
|
|
102
|
+
async for run_status in run_code_graph_pipeline("REPO_PATH"):
|
|
103
|
+
print(f"{run_status.pipeline_run_id}: {run_status.status}")
|
|
104
|
+
|
|
105
|
+
file_path = os.path.join(
|
|
106
|
+
pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
|
|
107
|
+
)
|
|
108
|
+
await visualize_graph(file_path)
|
|
109
|
+
|
|
110
|
+
search_results = await search(
|
|
111
|
+
query_type=SearchType.CODE,
|
|
112
|
+
query_text="How is Relationship weight calculated?",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
for file in search_results:
|
|
116
|
+
print(file["name"])
|
|
117
|
+
|
|
118
|
+
logger = setup_logging(name="code_graph_pipeline")
|
|
119
|
+
asyncio.run(main())
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -3,7 +3,6 @@ from pydantic import BaseModel
|
|
|
3
3
|
from typing import Union, Optional
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
|
-
from cognee.modules.cognify.config import get_cognify_config
|
|
7
6
|
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
|
8
7
|
from cognee.shared.logging_utils import get_logger
|
|
9
8
|
from cognee.shared.data_models import KnowledgeGraph
|
|
@@ -20,6 +19,7 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
|
|
|
20
19
|
from cognee.modules.users.models import User
|
|
21
20
|
|
|
22
21
|
from cognee.tasks.documents import (
|
|
22
|
+
check_permissions_on_dataset,
|
|
23
23
|
classify_documents,
|
|
24
24
|
extract_chunks_from_documents,
|
|
25
25
|
)
|
|
@@ -53,7 +53,6 @@ async def cognify(
|
|
|
53
53
|
custom_prompt: Optional[str] = None,
|
|
54
54
|
temporal_cognify: bool = False,
|
|
55
55
|
data_per_batch: int = 20,
|
|
56
|
-
**kwargs,
|
|
57
56
|
):
|
|
58
57
|
"""
|
|
59
58
|
Transform ingested data into a structured knowledge graph.
|
|
@@ -79,11 +78,12 @@ async def cognify(
|
|
|
79
78
|
|
|
80
79
|
Processing Pipeline:
|
|
81
80
|
1. **Document Classification**: Identifies document types and structures
|
|
82
|
-
2. **
|
|
83
|
-
3. **
|
|
84
|
-
4. **
|
|
85
|
-
5. **
|
|
86
|
-
6. **
|
|
81
|
+
2. **Permission Validation**: Ensures user has processing rights
|
|
82
|
+
3. **Text Chunking**: Breaks content into semantically meaningful segments
|
|
83
|
+
4. **Entity Extraction**: Identifies key concepts, people, places, organizations
|
|
84
|
+
5. **Relationship Detection**: Discovers connections between entities
|
|
85
|
+
6. **Graph Construction**: Builds semantic knowledge graph with embeddings
|
|
86
|
+
7. **Content Summarization**: Creates hierarchical summaries for navigation
|
|
87
87
|
|
|
88
88
|
Graph Model Customization:
|
|
89
89
|
The `graph_model` parameter allows custom knowledge structures:
|
|
@@ -224,7 +224,6 @@ async def cognify(
|
|
|
224
224
|
config=config,
|
|
225
225
|
custom_prompt=custom_prompt,
|
|
226
226
|
chunks_per_batch=chunks_per_batch,
|
|
227
|
-
**kwargs,
|
|
228
227
|
)
|
|
229
228
|
|
|
230
229
|
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
@@ -239,7 +238,6 @@ async def cognify(
|
|
|
239
238
|
vector_db_config=vector_db_config,
|
|
240
239
|
graph_db_config=graph_db_config,
|
|
241
240
|
incremental_loading=incremental_loading,
|
|
242
|
-
use_pipeline_cache=True,
|
|
243
241
|
pipeline_name="cognify_pipeline",
|
|
244
242
|
data_per_batch=data_per_batch,
|
|
245
243
|
)
|
|
@@ -253,7 +251,6 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
253
251
|
config: Config = None,
|
|
254
252
|
custom_prompt: Optional[str] = None,
|
|
255
253
|
chunks_per_batch: int = 100,
|
|
256
|
-
**kwargs,
|
|
257
254
|
) -> list[Task]:
|
|
258
255
|
if config is None:
|
|
259
256
|
ontology_config = get_ontology_env_config()
|
|
@@ -275,11 +272,9 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
275
272
|
if chunks_per_batch is None:
|
|
276
273
|
chunks_per_batch = 100
|
|
277
274
|
|
|
278
|
-
cognify_config = get_cognify_config()
|
|
279
|
-
embed_triplets = cognify_config.triplet_embedding
|
|
280
|
-
|
|
281
275
|
default_tasks = [
|
|
282
276
|
Task(classify_documents),
|
|
277
|
+
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
283
278
|
Task(
|
|
284
279
|
extract_chunks_from_documents,
|
|
285
280
|
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
|
@@ -291,17 +286,12 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
291
286
|
config=config,
|
|
292
287
|
custom_prompt=custom_prompt,
|
|
293
288
|
task_config={"batch_size": chunks_per_batch},
|
|
294
|
-
**kwargs,
|
|
295
289
|
), # Generate knowledge graphs from the document chunks.
|
|
296
290
|
Task(
|
|
297
291
|
summarize_text,
|
|
298
292
|
task_config={"batch_size": chunks_per_batch},
|
|
299
293
|
),
|
|
300
|
-
Task(
|
|
301
|
-
add_data_points,
|
|
302
|
-
embed_triplets=embed_triplets,
|
|
303
|
-
task_config={"batch_size": chunks_per_batch},
|
|
304
|
-
),
|
|
294
|
+
Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
|
|
305
295
|
]
|
|
306
296
|
|
|
307
297
|
return default_tasks
|
|
@@ -315,13 +305,14 @@ async def get_temporal_tasks(
|
|
|
315
305
|
|
|
316
306
|
The pipeline includes:
|
|
317
307
|
1. Document classification.
|
|
318
|
-
2.
|
|
319
|
-
3.
|
|
320
|
-
4.
|
|
321
|
-
5.
|
|
308
|
+
2. Dataset permission checks (requires "write" access).
|
|
309
|
+
3. Document chunking with a specified or default chunk size.
|
|
310
|
+
4. Event and timestamp extraction from chunks.
|
|
311
|
+
5. Knowledge graph extraction from events.
|
|
312
|
+
6. Batched insertion of data points.
|
|
322
313
|
|
|
323
314
|
Args:
|
|
324
|
-
user (User, optional): The user requesting task execution.
|
|
315
|
+
user (User, optional): The user requesting task execution, used for permission checks.
|
|
325
316
|
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
|
|
326
317
|
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
|
|
327
318
|
chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
|
|
@@ -334,6 +325,7 @@ async def get_temporal_tasks(
|
|
|
334
325
|
|
|
335
326
|
temporal_tasks = [
|
|
336
327
|
Task(classify_documents),
|
|
328
|
+
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
337
329
|
Task(
|
|
338
330
|
extract_chunks_from_documents,
|
|
339
331
|
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from cognee.shared.logging_utils import get_logger
|
|
3
|
+
from fastapi import APIRouter
|
|
4
|
+
from fastapi.responses import JSONResponse
|
|
5
|
+
from cognee.api.DTO import InDTO
|
|
6
|
+
from cognee.modules.retrieval.code_retriever import CodeRetriever
|
|
7
|
+
from cognee.modules.storage.utils import JSONEncoder
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = get_logger()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CodePipelineIndexPayloadDTO(InDTO):
|
|
14
|
+
repo_path: str
|
|
15
|
+
include_docs: bool = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CodePipelineRetrievePayloadDTO(InDTO):
|
|
19
|
+
query: str
|
|
20
|
+
full_input: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_code_pipeline_router() -> APIRouter:
|
|
24
|
+
try:
|
|
25
|
+
import cognee.api.v1.cognify.code_graph_pipeline
|
|
26
|
+
except ModuleNotFoundError:
|
|
27
|
+
logger.error("codegraph dependencies not found. Skipping codegraph API routes.")
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
router = APIRouter()
|
|
31
|
+
|
|
32
|
+
@router.post("/index", response_model=None)
|
|
33
|
+
async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO):
|
|
34
|
+
"""
|
|
35
|
+
Run indexation on a code repository.
|
|
36
|
+
|
|
37
|
+
This endpoint processes a code repository to create a knowledge graph
|
|
38
|
+
of the codebase structure, dependencies, and relationships.
|
|
39
|
+
|
|
40
|
+
## Request Parameters
|
|
41
|
+
- **repo_path** (str): Path to the code repository
|
|
42
|
+
- **include_docs** (bool): Whether to include documentation files (default: false)
|
|
43
|
+
|
|
44
|
+
## Response
|
|
45
|
+
No content returned. Processing results are logged.
|
|
46
|
+
|
|
47
|
+
## Error Codes
|
|
48
|
+
- **409 Conflict**: Error during indexation process
|
|
49
|
+
"""
|
|
50
|
+
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs):
|
|
54
|
+
logger.info(result)
|
|
55
|
+
except Exception as error:
|
|
56
|
+
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
57
|
+
|
|
58
|
+
@router.post("/retrieve", response_model=list[dict])
|
|
59
|
+
async def code_pipeline_retrieve(payload: CodePipelineRetrievePayloadDTO):
|
|
60
|
+
"""
|
|
61
|
+
Retrieve context from the code knowledge graph.
|
|
62
|
+
|
|
63
|
+
This endpoint searches the indexed code repository to find relevant
|
|
64
|
+
context based on the provided query.
|
|
65
|
+
|
|
66
|
+
## Request Parameters
|
|
67
|
+
- **query** (str): Search query for code context
|
|
68
|
+
- **full_input** (str): Full input text for processing
|
|
69
|
+
|
|
70
|
+
## Response
|
|
71
|
+
Returns a list of relevant code files and context as JSON.
|
|
72
|
+
|
|
73
|
+
## Error Codes
|
|
74
|
+
- **409 Conflict**: Error during retrieval process
|
|
75
|
+
"""
|
|
76
|
+
try:
|
|
77
|
+
query = (
|
|
78
|
+
payload.full_input.replace("cognee ", "")
|
|
79
|
+
if payload.full_input.startswith("cognee ")
|
|
80
|
+
else payload.full_input
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
retriever = CodeRetriever()
|
|
84
|
+
retrieved_files = await retriever.get_context(query)
|
|
85
|
+
|
|
86
|
+
return json.dumps(retrieved_files, cls=JSONEncoder)
|
|
87
|
+
except Exception as error:
|
|
88
|
+
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
89
|
+
|
|
90
|
+
return router
|
|
@@ -42,9 +42,7 @@ class CognifyPayloadDTO(InDTO):
|
|
|
42
42
|
default="", description="Custom prompt for entity extraction and graph generation"
|
|
43
43
|
)
|
|
44
44
|
ontology_key: Optional[List[str]] = Field(
|
|
45
|
-
default=None,
|
|
46
|
-
examples=[[]],
|
|
47
|
-
description="Reference to one or more previously uploaded ontologies",
|
|
45
|
+
default=None, description="Reference to one or more previously uploaded ontologies"
|
|
48
46
|
)
|
|
49
47
|
|
|
50
48
|
|
|
@@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter:
|
|
|
208
208
|
},
|
|
209
209
|
)
|
|
210
210
|
|
|
211
|
-
from cognee.modules.data.methods import delete_dataset
|
|
211
|
+
from cognee.modules.data.methods import get_dataset, delete_dataset
|
|
212
212
|
|
|
213
|
-
dataset = await
|
|
213
|
+
dataset = await get_dataset(user.id, dataset_id)
|
|
214
214
|
|
|
215
215
|
if dataset is None:
|
|
216
216
|
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
|
|
217
217
|
|
|
218
|
-
await delete_dataset(dataset
|
|
218
|
+
await delete_dataset(dataset)
|
|
219
219
|
|
|
220
220
|
@router.delete(
|
|
221
221
|
"/{dataset_id}/data/{data_id}",
|
|
@@ -5,7 +5,6 @@ from pathlib import Path
|
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from typing import Optional, List
|
|
7
7
|
from dataclasses import dataclass
|
|
8
|
-
from fastapi import UploadFile
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
@dataclass
|
|
@@ -46,10 +45,8 @@ class OntologyService:
|
|
|
46
45
|
json.dump(metadata, f, indent=2)
|
|
47
46
|
|
|
48
47
|
async def upload_ontology(
|
|
49
|
-
self, ontology_key: str, file
|
|
48
|
+
self, ontology_key: str, file, user, description: Optional[str] = None
|
|
50
49
|
) -> OntologyMetadata:
|
|
51
|
-
if not file.filename:
|
|
52
|
-
raise ValueError("File must have a filename")
|
|
53
50
|
if not file.filename.lower().endswith(".owl"):
|
|
54
51
|
raise ValueError("File must be in .owl format")
|
|
55
52
|
|
|
@@ -60,6 +57,8 @@ class OntologyService:
|
|
|
60
57
|
raise ValueError(f"Ontology key '{ontology_key}' already exists")
|
|
61
58
|
|
|
62
59
|
content = await file.read()
|
|
60
|
+
if len(content) > 10 * 1024 * 1024:
|
|
61
|
+
raise ValueError("File size exceeds 10MB limit")
|
|
63
62
|
|
|
64
63
|
file_path = user_dir / f"{ontology_key}.owl"
|
|
65
64
|
with open(file_path, "wb") as f:
|
|
@@ -83,11 +82,7 @@ class OntologyService:
|
|
|
83
82
|
)
|
|
84
83
|
|
|
85
84
|
async def upload_ontologies(
|
|
86
|
-
self,
|
|
87
|
-
ontology_key: List[str],
|
|
88
|
-
files: List[UploadFile],
|
|
89
|
-
user,
|
|
90
|
-
descriptions: Optional[List[str]] = None,
|
|
85
|
+
self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
|
|
91
86
|
) -> List[OntologyMetadata]:
|
|
92
87
|
"""
|
|
93
88
|
Upload ontology files with their respective keys.
|
|
@@ -110,17 +105,47 @@ class OntologyService:
|
|
|
110
105
|
if len(set(ontology_key)) != len(ontology_key):
|
|
111
106
|
raise ValueError("Duplicate ontology keys not allowed")
|
|
112
107
|
|
|
108
|
+
if descriptions and len(descriptions) != len(files):
|
|
109
|
+
raise ValueError("Number of descriptions must match number of files")
|
|
110
|
+
|
|
113
111
|
results = []
|
|
112
|
+
user_dir = self._get_user_dir(str(user.id))
|
|
113
|
+
metadata = self._load_metadata(user_dir)
|
|
114
114
|
|
|
115
115
|
for i, (key, file) in enumerate(zip(ontology_key, files)):
|
|
116
|
+
if key in metadata:
|
|
117
|
+
raise ValueError(f"Ontology key '{key}' already exists")
|
|
118
|
+
|
|
119
|
+
if not file.filename.lower().endswith(".owl"):
|
|
120
|
+
raise ValueError(f"File '{file.filename}' must be in .owl format")
|
|
121
|
+
|
|
122
|
+
content = await file.read()
|
|
123
|
+
if len(content) > 10 * 1024 * 1024:
|
|
124
|
+
raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
|
|
125
|
+
|
|
126
|
+
file_path = user_dir / f"{key}.owl"
|
|
127
|
+
with open(file_path, "wb") as f:
|
|
128
|
+
f.write(content)
|
|
129
|
+
|
|
130
|
+
ontology_metadata = {
|
|
131
|
+
"filename": file.filename,
|
|
132
|
+
"size_bytes": len(content),
|
|
133
|
+
"uploaded_at": datetime.now(timezone.utc).isoformat(),
|
|
134
|
+
"description": descriptions[i] if descriptions else None,
|
|
135
|
+
}
|
|
136
|
+
metadata[key] = ontology_metadata
|
|
137
|
+
|
|
116
138
|
results.append(
|
|
117
|
-
|
|
139
|
+
OntologyMetadata(
|
|
118
140
|
ontology_key=key,
|
|
119
|
-
|
|
120
|
-
|
|
141
|
+
filename=file.filename,
|
|
142
|
+
size_bytes=len(content),
|
|
143
|
+
uploaded_at=ontology_metadata["uploaded_at"],
|
|
121
144
|
description=descriptions[i] if descriptions else None,
|
|
122
145
|
)
|
|
123
146
|
)
|
|
147
|
+
|
|
148
|
+
self._save_metadata(user_dir, metadata)
|
|
124
149
|
return results
|
|
125
150
|
|
|
126
151
|
def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from fastapi import APIRouter, File, Form, UploadFile, Depends,
|
|
1
|
+
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
|
|
2
2
|
from fastapi.responses import JSONResponse
|
|
3
3
|
from typing import Optional, List
|
|
4
4
|
|
|
@@ -15,25 +15,28 @@ def get_ontology_router() -> APIRouter:
|
|
|
15
15
|
|
|
16
16
|
@router.post("", response_model=dict)
|
|
17
17
|
async def upload_ontology(
|
|
18
|
-
request: Request,
|
|
19
18
|
ontology_key: str = Form(...),
|
|
20
|
-
ontology_file: UploadFile = File(...),
|
|
21
|
-
|
|
19
|
+
ontology_file: List[UploadFile] = File(...),
|
|
20
|
+
descriptions: Optional[str] = Form(None),
|
|
22
21
|
user: User = Depends(get_authenticated_user),
|
|
23
22
|
):
|
|
24
23
|
"""
|
|
25
|
-
Upload
|
|
24
|
+
Upload ontology files with their respective keys for later use in cognify operations.
|
|
25
|
+
|
|
26
|
+
Supports both single and multiple file uploads:
|
|
27
|
+
- Single file: ontology_key=["key"], ontology_file=[file]
|
|
28
|
+
- Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
|
|
26
29
|
|
|
27
30
|
## Request Parameters
|
|
28
|
-
- **ontology_key** (str):
|
|
29
|
-
- **ontology_file** (UploadFile):
|
|
30
|
-
- **
|
|
31
|
+
- **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
|
|
32
|
+
- **ontology_file** (List[UploadFile]): OWL format ontology files
|
|
33
|
+
- **descriptions** (Optional[str]): JSON array string of optional descriptions
|
|
31
34
|
|
|
32
35
|
## Response
|
|
33
|
-
Returns metadata about
|
|
36
|
+
Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
|
|
34
37
|
|
|
35
38
|
## Error Codes
|
|
36
|
-
- **400 Bad Request**: Invalid file format, duplicate
|
|
39
|
+
- **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
|
|
37
40
|
- **500 Internal Server Error**: File system or processing errors
|
|
38
41
|
"""
|
|
39
42
|
send_telemetry(
|
|
@@ -46,22 +49,16 @@ def get_ontology_router() -> APIRouter:
|
|
|
46
49
|
)
|
|
47
50
|
|
|
48
51
|
try:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
result = await ontology_service.upload_ontology(
|
|
61
|
-
ontology_key=ontology_key,
|
|
62
|
-
file=ontology_file,
|
|
63
|
-
user=user,
|
|
64
|
-
description=description,
|
|
52
|
+
import json
|
|
53
|
+
|
|
54
|
+
ontology_keys = json.loads(ontology_key)
|
|
55
|
+
description_list = json.loads(descriptions) if descriptions else None
|
|
56
|
+
|
|
57
|
+
if not isinstance(ontology_keys, list):
|
|
58
|
+
raise ValueError("ontology_key must be a JSON array")
|
|
59
|
+
|
|
60
|
+
results = await ontology_service.upload_ontologies(
|
|
61
|
+
ontology_keys, ontology_file, user, description_list
|
|
65
62
|
)
|
|
66
63
|
|
|
67
64
|
return {
|
|
@@ -73,9 +70,10 @@ def get_ontology_router() -> APIRouter:
|
|
|
73
70
|
"uploaded_at": result.uploaded_at,
|
|
74
71
|
"description": result.description,
|
|
75
72
|
}
|
|
73
|
+
for result in results
|
|
76
74
|
]
|
|
77
75
|
}
|
|
78
|
-
except ValueError as e:
|
|
76
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
79
77
|
return JSONResponse(status_code=400, content={"error": str(e)})
|
|
80
78
|
except Exception as e:
|
|
81
79
|
return JSONResponse(status_code=500, content={"error": str(e)})
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -31,8 +31,6 @@ async def search(
|
|
|
31
31
|
only_context: bool = False,
|
|
32
32
|
use_combined_context: bool = False,
|
|
33
33
|
session_id: Optional[str] = None,
|
|
34
|
-
wide_search_top_k: Optional[int] = 100,
|
|
35
|
-
triplet_distance_penalty: Optional[float] = 3.5,
|
|
36
34
|
) -> Union[List[SearchResult], CombinedSearchResult]:
|
|
37
35
|
"""
|
|
38
36
|
Search and query the knowledge graph for insights, information, and connections.
|
|
@@ -202,8 +200,6 @@ async def search(
|
|
|
202
200
|
only_context=only_context,
|
|
203
201
|
use_combined_context=use_combined_context,
|
|
204
202
|
session_id=session_id,
|
|
205
|
-
wide_search_top_k=wide_search_top_k,
|
|
206
|
-
triplet_distance_penalty=triplet_distance_penalty,
|
|
207
203
|
)
|
|
208
204
|
|
|
209
205
|
return filtered_search_results
|