cognee 0.5.0.dev0__py3-none-any.whl → 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +1 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +12 -37
- cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
- cognee/api/v1/search/search.py +4 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/context_global_variables.py +61 -16
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/graph/config.py +3 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +2 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +35 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_retriever.py +10 -0
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +4 -0
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +21 -0
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +15 -3
- cognee/shared/logging_utils.py +4 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/test_cognee_server_start.py +2 -4
- cognee/tests/test_conversation_history.py +23 -1
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_search_db.py +37 -1
- cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/METADATA +76 -89
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/RECORD +118 -97
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/WHEEL +1 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- cognee/tests/test_delete_bmw_example.py +0 -60
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/client.py
CHANGED
|
@@ -21,7 +21,7 @@ from cognee.api.v1.notebooks.routers import get_notebooks_router
|
|
|
21
21
|
from cognee.api.v1.permissions.routers import get_permissions_router
|
|
22
22
|
from cognee.api.v1.settings.routers import get_settings_router
|
|
23
23
|
from cognee.api.v1.datasets.routers import get_datasets_router
|
|
24
|
-
from cognee.api.v1.cognify.routers import
|
|
24
|
+
from cognee.api.v1.cognify.routers import get_cognify_router
|
|
25
25
|
from cognee.api.v1.search.routers import get_search_router
|
|
26
26
|
from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
|
|
27
27
|
from cognee.api.v1.memify.routers import get_memify_router
|
|
@@ -278,10 +278,6 @@ app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["re
|
|
|
278
278
|
|
|
279
279
|
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
|
|
280
280
|
|
|
281
|
-
codegraph_routes = get_code_pipeline_router()
|
|
282
|
-
if codegraph_routes:
|
|
283
|
-
app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
|
|
284
|
-
|
|
285
281
|
app.include_router(
|
|
286
282
|
get_users_router(),
|
|
287
283
|
prefix="/api/v1/users",
|
cognee/api/v1/add/add.py
CHANGED
|
@@ -155,7 +155,7 @@ async def add(
|
|
|
155
155
|
- LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
|
|
156
156
|
|
|
157
157
|
Optional:
|
|
158
|
-
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
|
|
158
|
+
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral", "bedrock"
|
|
159
159
|
- LLM_MODEL: Model name (default: "gpt-5-mini")
|
|
160
160
|
- DEFAULT_USER_EMAIL: Custom default user email
|
|
161
161
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
|
@@ -205,6 +205,7 @@ async def add(
|
|
|
205
205
|
pipeline_name="add_pipeline",
|
|
206
206
|
vector_db_config=vector_db_config,
|
|
207
207
|
graph_db_config=graph_db_config,
|
|
208
|
+
use_pipeline_cache=True,
|
|
208
209
|
incremental_loading=incremental_loading,
|
|
209
210
|
data_per_batch=data_per_batch,
|
|
210
211
|
):
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|
|
3
3
|
from typing import Union, Optional
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
|
+
from cognee.modules.cognify.config import get_cognify_config
|
|
6
7
|
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
|
7
8
|
from cognee.shared.logging_utils import get_logger
|
|
8
9
|
from cognee.shared.data_models import KnowledgeGraph
|
|
@@ -19,7 +20,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
|
|
|
19
20
|
from cognee.modules.users.models import User
|
|
20
21
|
|
|
21
22
|
from cognee.tasks.documents import (
|
|
22
|
-
check_permissions_on_dataset,
|
|
23
23
|
classify_documents,
|
|
24
24
|
extract_chunks_from_documents,
|
|
25
25
|
)
|
|
@@ -53,6 +53,7 @@ async def cognify(
|
|
|
53
53
|
custom_prompt: Optional[str] = None,
|
|
54
54
|
temporal_cognify: bool = False,
|
|
55
55
|
data_per_batch: int = 20,
|
|
56
|
+
**kwargs,
|
|
56
57
|
):
|
|
57
58
|
"""
|
|
58
59
|
Transform ingested data into a structured knowledge graph.
|
|
@@ -78,12 +79,11 @@ async def cognify(
|
|
|
78
79
|
|
|
79
80
|
Processing Pipeline:
|
|
80
81
|
1. **Document Classification**: Identifies document types and structures
|
|
81
|
-
2. **
|
|
82
|
-
3. **
|
|
83
|
-
4. **
|
|
84
|
-
5. **
|
|
85
|
-
6. **
|
|
86
|
-
7. **Content Summarization**: Creates hierarchical summaries for navigation
|
|
82
|
+
2. **Text Chunking**: Breaks content into semantically meaningful segments
|
|
83
|
+
3. **Entity Extraction**: Identifies key concepts, people, places, organizations
|
|
84
|
+
4. **Relationship Detection**: Discovers connections between entities
|
|
85
|
+
5. **Graph Construction**: Builds semantic knowledge graph with embeddings
|
|
86
|
+
6. **Content Summarization**: Creates hierarchical summaries for navigation
|
|
87
87
|
|
|
88
88
|
Graph Model Customization:
|
|
89
89
|
The `graph_model` parameter allows custom knowledge structures:
|
|
@@ -224,6 +224,7 @@ async def cognify(
|
|
|
224
224
|
config=config,
|
|
225
225
|
custom_prompt=custom_prompt,
|
|
226
226
|
chunks_per_batch=chunks_per_batch,
|
|
227
|
+
**kwargs,
|
|
227
228
|
)
|
|
228
229
|
|
|
229
230
|
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
@@ -238,6 +239,7 @@ async def cognify(
|
|
|
238
239
|
vector_db_config=vector_db_config,
|
|
239
240
|
graph_db_config=graph_db_config,
|
|
240
241
|
incremental_loading=incremental_loading,
|
|
242
|
+
use_pipeline_cache=True,
|
|
241
243
|
pipeline_name="cognify_pipeline",
|
|
242
244
|
data_per_batch=data_per_batch,
|
|
243
245
|
)
|
|
@@ -251,6 +253,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
251
253
|
config: Config = None,
|
|
252
254
|
custom_prompt: Optional[str] = None,
|
|
253
255
|
chunks_per_batch: int = 100,
|
|
256
|
+
**kwargs,
|
|
254
257
|
) -> list[Task]:
|
|
255
258
|
if config is None:
|
|
256
259
|
ontology_config = get_ontology_env_config()
|
|
@@ -272,9 +275,11 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
272
275
|
if chunks_per_batch is None:
|
|
273
276
|
chunks_per_batch = 100
|
|
274
277
|
|
|
278
|
+
cognify_config = get_cognify_config()
|
|
279
|
+
embed_triplets = cognify_config.triplet_embedding
|
|
280
|
+
|
|
275
281
|
default_tasks = [
|
|
276
282
|
Task(classify_documents),
|
|
277
|
-
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
278
283
|
Task(
|
|
279
284
|
extract_chunks_from_documents,
|
|
280
285
|
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
|
@@ -286,12 +291,17 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
286
291
|
config=config,
|
|
287
292
|
custom_prompt=custom_prompt,
|
|
288
293
|
task_config={"batch_size": chunks_per_batch},
|
|
294
|
+
**kwargs,
|
|
289
295
|
), # Generate knowledge graphs from the document chunks.
|
|
290
296
|
Task(
|
|
291
297
|
summarize_text,
|
|
292
298
|
task_config={"batch_size": chunks_per_batch},
|
|
293
299
|
),
|
|
294
|
-
Task(
|
|
300
|
+
Task(
|
|
301
|
+
add_data_points,
|
|
302
|
+
embed_triplets=embed_triplets,
|
|
303
|
+
task_config={"batch_size": chunks_per_batch},
|
|
304
|
+
),
|
|
295
305
|
]
|
|
296
306
|
|
|
297
307
|
return default_tasks
|
|
@@ -305,14 +315,13 @@ async def get_temporal_tasks(
|
|
|
305
315
|
|
|
306
316
|
The pipeline includes:
|
|
307
317
|
1. Document classification.
|
|
308
|
-
2.
|
|
309
|
-
3.
|
|
310
|
-
4.
|
|
311
|
-
5.
|
|
312
|
-
6. Batched insertion of data points.
|
|
318
|
+
2. Document chunking with a specified or default chunk size.
|
|
319
|
+
3. Event and timestamp extraction from chunks.
|
|
320
|
+
4. Knowledge graph extraction from events.
|
|
321
|
+
5. Batched insertion of data points.
|
|
313
322
|
|
|
314
323
|
Args:
|
|
315
|
-
user (User, optional): The user requesting task execution
|
|
324
|
+
user (User, optional): The user requesting task execution.
|
|
316
325
|
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
|
|
317
326
|
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
|
|
318
327
|
chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
|
|
@@ -325,7 +334,6 @@ async def get_temporal_tasks(
|
|
|
325
334
|
|
|
326
335
|
temporal_tasks = [
|
|
327
336
|
Task(classify_documents),
|
|
328
|
-
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
329
337
|
Task(
|
|
330
338
|
extract_chunks_from_documents,
|
|
331
339
|
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
|
@@ -42,7 +42,9 @@ class CognifyPayloadDTO(InDTO):
|
|
|
42
42
|
default="", description="Custom prompt for entity extraction and graph generation"
|
|
43
43
|
)
|
|
44
44
|
ontology_key: Optional[List[str]] = Field(
|
|
45
|
-
default=None,
|
|
45
|
+
default=None,
|
|
46
|
+
examples=[[]],
|
|
47
|
+
description="Reference to one or more previously uploaded ontologies",
|
|
46
48
|
)
|
|
47
49
|
|
|
48
50
|
|
|
@@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter:
|
|
|
208
208
|
},
|
|
209
209
|
)
|
|
210
210
|
|
|
211
|
-
from cognee.modules.data.methods import
|
|
211
|
+
from cognee.modules.data.methods import delete_dataset
|
|
212
212
|
|
|
213
|
-
dataset = await
|
|
213
|
+
dataset = await get_authorized_existing_datasets([dataset_id], "delete", user)
|
|
214
214
|
|
|
215
215
|
if dataset is None:
|
|
216
216
|
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
|
|
217
217
|
|
|
218
|
-
await delete_dataset(dataset)
|
|
218
|
+
await delete_dataset(dataset[0])
|
|
219
219
|
|
|
220
220
|
@router.delete(
|
|
221
221
|
"/{dataset_id}/data/{data_id}",
|
|
@@ -5,6 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from typing import Optional, List
|
|
7
7
|
from dataclasses import dataclass
|
|
8
|
+
from fastapi import UploadFile
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
@dataclass
|
|
@@ -45,8 +46,10 @@ class OntologyService:
|
|
|
45
46
|
json.dump(metadata, f, indent=2)
|
|
46
47
|
|
|
47
48
|
async def upload_ontology(
|
|
48
|
-
self, ontology_key: str, file, user, description: Optional[str] = None
|
|
49
|
+
self, ontology_key: str, file: UploadFile, user, description: Optional[str] = None
|
|
49
50
|
) -> OntologyMetadata:
|
|
51
|
+
if not file.filename:
|
|
52
|
+
raise ValueError("File must have a filename")
|
|
50
53
|
if not file.filename.lower().endswith(".owl"):
|
|
51
54
|
raise ValueError("File must be in .owl format")
|
|
52
55
|
|
|
@@ -57,8 +60,6 @@ class OntologyService:
|
|
|
57
60
|
raise ValueError(f"Ontology key '{ontology_key}' already exists")
|
|
58
61
|
|
|
59
62
|
content = await file.read()
|
|
60
|
-
if len(content) > 10 * 1024 * 1024:
|
|
61
|
-
raise ValueError("File size exceeds 10MB limit")
|
|
62
63
|
|
|
63
64
|
file_path = user_dir / f"{ontology_key}.owl"
|
|
64
65
|
with open(file_path, "wb") as f:
|
|
@@ -82,7 +83,11 @@ class OntologyService:
|
|
|
82
83
|
)
|
|
83
84
|
|
|
84
85
|
async def upload_ontologies(
|
|
85
|
-
self,
|
|
86
|
+
self,
|
|
87
|
+
ontology_key: List[str],
|
|
88
|
+
files: List[UploadFile],
|
|
89
|
+
user,
|
|
90
|
+
descriptions: Optional[List[str]] = None,
|
|
86
91
|
) -> List[OntologyMetadata]:
|
|
87
92
|
"""
|
|
88
93
|
Upload ontology files with their respective keys.
|
|
@@ -105,47 +110,17 @@ class OntologyService:
|
|
|
105
110
|
if len(set(ontology_key)) != len(ontology_key):
|
|
106
111
|
raise ValueError("Duplicate ontology keys not allowed")
|
|
107
112
|
|
|
108
|
-
if descriptions and len(descriptions) != len(files):
|
|
109
|
-
raise ValueError("Number of descriptions must match number of files")
|
|
110
|
-
|
|
111
113
|
results = []
|
|
112
|
-
user_dir = self._get_user_dir(str(user.id))
|
|
113
|
-
metadata = self._load_metadata(user_dir)
|
|
114
114
|
|
|
115
115
|
for i, (key, file) in enumerate(zip(ontology_key, files)):
|
|
116
|
-
if key in metadata:
|
|
117
|
-
raise ValueError(f"Ontology key '{key}' already exists")
|
|
118
|
-
|
|
119
|
-
if not file.filename.lower().endswith(".owl"):
|
|
120
|
-
raise ValueError(f"File '{file.filename}' must be in .owl format")
|
|
121
|
-
|
|
122
|
-
content = await file.read()
|
|
123
|
-
if len(content) > 10 * 1024 * 1024:
|
|
124
|
-
raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
|
|
125
|
-
|
|
126
|
-
file_path = user_dir / f"{key}.owl"
|
|
127
|
-
with open(file_path, "wb") as f:
|
|
128
|
-
f.write(content)
|
|
129
|
-
|
|
130
|
-
ontology_metadata = {
|
|
131
|
-
"filename": file.filename,
|
|
132
|
-
"size_bytes": len(content),
|
|
133
|
-
"uploaded_at": datetime.now(timezone.utc).isoformat(),
|
|
134
|
-
"description": descriptions[i] if descriptions else None,
|
|
135
|
-
}
|
|
136
|
-
metadata[key] = ontology_metadata
|
|
137
|
-
|
|
138
116
|
results.append(
|
|
139
|
-
|
|
117
|
+
await self.upload_ontology(
|
|
140
118
|
ontology_key=key,
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
uploaded_at=ontology_metadata["uploaded_at"],
|
|
119
|
+
file=file,
|
|
120
|
+
user=user,
|
|
144
121
|
description=descriptions[i] if descriptions else None,
|
|
145
122
|
)
|
|
146
123
|
)
|
|
147
|
-
|
|
148
|
-
self._save_metadata(user_dir, metadata)
|
|
149
124
|
return results
|
|
150
125
|
|
|
151
126
|
def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from fastapi import APIRouter, File, Form, UploadFile, Depends,
|
|
1
|
+
from fastapi import APIRouter, File, Form, UploadFile, Depends, Request
|
|
2
2
|
from fastapi.responses import JSONResponse
|
|
3
3
|
from typing import Optional, List
|
|
4
4
|
|
|
@@ -15,28 +15,25 @@ def get_ontology_router() -> APIRouter:
|
|
|
15
15
|
|
|
16
16
|
@router.post("", response_model=dict)
|
|
17
17
|
async def upload_ontology(
|
|
18
|
+
request: Request,
|
|
18
19
|
ontology_key: str = Form(...),
|
|
19
|
-
ontology_file:
|
|
20
|
-
|
|
20
|
+
ontology_file: UploadFile = File(...),
|
|
21
|
+
description: Optional[str] = Form(None),
|
|
21
22
|
user: User = Depends(get_authenticated_user),
|
|
22
23
|
):
|
|
23
24
|
"""
|
|
24
|
-
Upload
|
|
25
|
-
|
|
26
|
-
Supports both single and multiple file uploads:
|
|
27
|
-
- Single file: ontology_key=["key"], ontology_file=[file]
|
|
28
|
-
- Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
|
|
25
|
+
Upload a single ontology file for later use in cognify operations.
|
|
29
26
|
|
|
30
27
|
## Request Parameters
|
|
31
|
-
- **ontology_key** (str):
|
|
32
|
-
- **ontology_file** (
|
|
33
|
-
- **
|
|
28
|
+
- **ontology_key** (str): User-defined identifier for the ontology.
|
|
29
|
+
- **ontology_file** (UploadFile): Single OWL format ontology file
|
|
30
|
+
- **description** (Optional[str]): Optional description for the ontology.
|
|
34
31
|
|
|
35
32
|
## Response
|
|
36
|
-
Returns metadata about uploaded
|
|
33
|
+
Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
|
|
37
34
|
|
|
38
35
|
## Error Codes
|
|
39
|
-
- **400 Bad Request**: Invalid file format, duplicate
|
|
36
|
+
- **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded
|
|
40
37
|
- **500 Internal Server Error**: File system or processing errors
|
|
41
38
|
"""
|
|
42
39
|
send_telemetry(
|
|
@@ -49,16 +46,22 @@ def get_ontology_router() -> APIRouter:
|
|
|
49
46
|
)
|
|
50
47
|
|
|
51
48
|
try:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
49
|
+
# Enforce: exactly one uploaded file for "ontology_file"
|
|
50
|
+
form = await request.form()
|
|
51
|
+
uploaded_files = form.getlist("ontology_file")
|
|
52
|
+
if len(uploaded_files) != 1:
|
|
53
|
+
raise ValueError("Only one ontology_file is allowed")
|
|
54
|
+
|
|
55
|
+
if ontology_key.strip().startswith(("[", "{")):
|
|
56
|
+
raise ValueError("ontology_key must be a string")
|
|
57
|
+
if description is not None and description.strip().startswith(("[", "{")):
|
|
58
|
+
raise ValueError("description must be a string")
|
|
59
|
+
|
|
60
|
+
result = await ontology_service.upload_ontology(
|
|
61
|
+
ontology_key=ontology_key,
|
|
62
|
+
file=ontology_file,
|
|
63
|
+
user=user,
|
|
64
|
+
description=description,
|
|
62
65
|
)
|
|
63
66
|
|
|
64
67
|
return {
|
|
@@ -70,10 +73,9 @@ def get_ontology_router() -> APIRouter:
|
|
|
70
73
|
"uploaded_at": result.uploaded_at,
|
|
71
74
|
"description": result.description,
|
|
72
75
|
}
|
|
73
|
-
for result in results
|
|
74
76
|
]
|
|
75
77
|
}
|
|
76
|
-
except
|
|
78
|
+
except ValueError as e:
|
|
77
79
|
return JSONResponse(status_code=400, content={"error": str(e)})
|
|
78
80
|
except Exception as e:
|
|
79
81
|
return JSONResponse(status_code=500, content={"error": str(e)})
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -31,6 +31,8 @@ async def search(
|
|
|
31
31
|
only_context: bool = False,
|
|
32
32
|
use_combined_context: bool = False,
|
|
33
33
|
session_id: Optional[str] = None,
|
|
34
|
+
wide_search_top_k: Optional[int] = 100,
|
|
35
|
+
triplet_distance_penalty: Optional[float] = 3.5,
|
|
34
36
|
) -> Union[List[SearchResult], CombinedSearchResult]:
|
|
35
37
|
"""
|
|
36
38
|
Search and query the knowledge graph for insights, information, and connections.
|
|
@@ -200,6 +202,8 @@ async def search(
|
|
|
200
202
|
only_context=only_context,
|
|
201
203
|
use_combined_context=use_combined_context,
|
|
202
204
|
session_id=session_id,
|
|
205
|
+
wide_search_top_k=wide_search_top_k,
|
|
206
|
+
triplet_distance_penalty=triplet_distance_penalty,
|
|
203
207
|
)
|
|
204
208
|
|
|
205
209
|
return filtered_search_results
|