cognee 0.3.4.dev3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +16 -7
- cognee/api/health.py +5 -9
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
- cognee/api/v1/search/search.py +3 -0
- cognee/api/v1/ui/__init__.py +1 -1
- cognee/api/v1/ui/ui.py +215 -150
- cognee/api/v1/update/__init__.py +1 -0
- cognee/api/v1/update/routers/__init__.py +1 -0
- cognee/api/v1/update/routers/get_update_router.py +90 -0
- cognee/api/v1/update/update.py +100 -0
- cognee/base_config.py +5 -2
- cognee/cli/_cognee.py +28 -10
- cognee/cli/commands/delete_command.py +34 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
- cognee/eval_framework/modal_eval_dashboard.py +9 -1
- cognee/infrastructure/databases/graph/config.py +9 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
- cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
- cognee/infrastructure/databases/relational/config.py +4 -4
- cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
- cognee/infrastructure/databases/vector/config.py +7 -7
- cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
- cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
- cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
- cognee/infrastructure/files/storage/s3_config.py +1 -0
- cognee/infrastructure/files/utils/open_data_file.py +7 -14
- cognee/infrastructure/llm/LLMGateway.py +19 -117
- cognee/infrastructure/llm/config.py +28 -13
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
- cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
- cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
- cognee/infrastructure/llm/prompts/test.txt +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
- cognee/infrastructure/llm/utils.py +4 -4
- cognee/infrastructure/loaders/LoaderEngine.py +5 -2
- cognee/infrastructure/loaders/external/__init__.py +7 -0
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
- cognee/infrastructure/loaders/supported_loaders.py +7 -0
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/data/methods/get_deletion_counts.py +92 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/ingestion/data_types/TextData.py +0 -1
- cognee/modules/notebooks/methods/create_notebook.py +3 -1
- cognee/modules/notebooks/methods/get_notebooks.py +27 -1
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
- cognee/modules/retrieval/code_retriever.py +2 -1
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
- cognee/modules/retrieval/graph_completion_retriever.py +0 -3
- cognee/modules/retrieval/insights_retriever.py +1 -1
- cognee/modules/retrieval/jaccard_retrival.py +60 -0
- cognee/modules/retrieval/lexical_retriever.py +123 -0
- cognee/modules/retrieval/natural_language_retriever.py +2 -1
- cognee/modules/retrieval/temporal_retriever.py +3 -2
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
- cognee/modules/retrieval/utils/completion.py +4 -7
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/no_access_control_search.py +1 -1
- cognee/modules/search/methods/search.py +32 -13
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/users/methods/create_user.py +0 -2
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +12 -1
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +12 -1
- cognee/modules/visualization/cognee_network_visualization.py +13 -9
- cognee/shared/data_models.py +0 -1
- cognee/shared/utils.py +0 -32
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/codingagents/coding_rule_associations.py +3 -2
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
- cognee/tasks/graph/extract_graph_from_code.py +2 -2
- cognee/tasks/graph/extract_graph_from_data.py +55 -12
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/migrate_relational_database.py +132 -41
- cognee/tasks/ingestion/resolve_data_directories.py +4 -1
- cognee/tasks/schema/ingest_database_schema.py +134 -0
- cognee/tasks/schema/models.py +40 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +3 -1
- cognee/tasks/summarization/summarize_code.py +2 -2
- cognee/tasks/summarization/summarize_text.py +2 -2
- cognee/tasks/temporal_graph/enrich_events.py +2 -2
- cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
- cognee/tests/test_advanced_pdf_loader.py +141 -0
- cognee/tests/test_chromadb.py +40 -0
- cognee/tests/test_cognee_server_start.py +6 -1
- cognee/tests/test_data/Quantum_computers.txt +9 -0
- cognee/tests/test_lancedb.py +211 -0
- cognee/tests/test_pgvector.py +40 -0
- cognee/tests/test_relational_db_migration.py +76 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
- {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/METADATA +92 -96
- {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/RECORD +176 -162
- distributed/pyproject.toml +0 -1
- cognee/infrastructure/data/utils/extract_keywords.py +0 -48
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
- cognee/tasks/graph/infer_data_ontology.py +0 -309
- cognee/tests/test_falkordb.py +0 -174
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
- {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/WHEEL +0 -0
- {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -89,7 +89,6 @@ async def get_memory_fragment(
|
|
|
89
89
|
|
|
90
90
|
async def brute_force_triplet_search(
|
|
91
91
|
query: str,
|
|
92
|
-
user: User,
|
|
93
92
|
top_k: int = 5,
|
|
94
93
|
collections: Optional[List[str]] = None,
|
|
95
94
|
properties_to_project: Optional[List[str]] = None,
|
|
@@ -102,7 +101,6 @@ async def brute_force_triplet_search(
|
|
|
102
101
|
|
|
103
102
|
Args:
|
|
104
103
|
query (str): The search query.
|
|
105
|
-
user (User): The user performing the search.
|
|
106
104
|
top_k (int): The number of top results to retrieve.
|
|
107
105
|
collections (Optional[List[str]]): List of collections to query.
|
|
108
106
|
properties_to_project (Optional[List[str]]): List of properties to project.
|
|
@@ -139,12 +137,10 @@ async def brute_force_triplet_search(
|
|
|
139
137
|
|
|
140
138
|
query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
|
|
141
139
|
|
|
142
|
-
send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
|
|
143
|
-
|
|
144
140
|
async def search_in_collection(collection_name: str):
|
|
145
141
|
try:
|
|
146
142
|
return await vector_engine.search(
|
|
147
|
-
collection_name=collection_name, query_vector=query_vector, limit=
|
|
143
|
+
collection_name=collection_name, query_vector=query_vector, limit=None
|
|
148
144
|
)
|
|
149
145
|
except CollectionNotFoundError:
|
|
150
146
|
return []
|
|
@@ -176,20 +172,14 @@ async def brute_force_triplet_search(
|
|
|
176
172
|
|
|
177
173
|
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
|
|
178
174
|
|
|
179
|
-
send_telemetry("cognee.brute_force_triplet_search EXECUTION COMPLETED", user.id)
|
|
180
|
-
|
|
181
175
|
return results
|
|
182
176
|
|
|
183
177
|
except CollectionNotFoundError:
|
|
184
178
|
return []
|
|
185
179
|
except Exception as error:
|
|
186
180
|
logger.error(
|
|
187
|
-
"Error during brute force search for
|
|
188
|
-
user.id,
|
|
181
|
+
"Error during brute force search for query: %s. Error: %s",
|
|
189
182
|
query,
|
|
190
183
|
error,
|
|
191
184
|
)
|
|
192
|
-
send_telemetry(
|
|
193
|
-
"cognee.brute_force_triplet_search EXECUTION FAILED", user.id, {"error": str(error)}
|
|
194
|
-
)
|
|
195
185
|
raise error
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
3
|
+
from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
async def generate_completion(
|
|
@@ -11,10 +12,8 @@ async def generate_completion(
|
|
|
11
12
|
) -> str:
|
|
12
13
|
"""Generates a completion using LLM with given context and prompts."""
|
|
13
14
|
args = {"question": query, "context": context}
|
|
14
|
-
user_prompt =
|
|
15
|
-
system_prompt = (
|
|
16
|
-
system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
|
|
17
|
-
)
|
|
15
|
+
user_prompt = render_prompt(user_prompt_path, args)
|
|
16
|
+
system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
|
|
18
17
|
|
|
19
18
|
return await LLMGateway.acreate_structured_output(
|
|
20
19
|
text_input=user_prompt,
|
|
@@ -29,9 +28,7 @@ async def summarize_text(
|
|
|
29
28
|
system_prompt: str = None,
|
|
30
29
|
) -> str:
|
|
31
30
|
"""Summarizes text using LLM with the specified prompt."""
|
|
32
|
-
system_prompt = (
|
|
33
|
-
system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
|
|
34
|
-
)
|
|
31
|
+
system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
|
|
35
32
|
|
|
36
33
|
return await LLMGateway.acreate_structured_output(
|
|
37
34
|
text_input=text,
|
|
@@ -15,6 +15,7 @@ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
|
|
|
15
15
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
16
16
|
from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
|
|
17
17
|
from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
|
|
18
|
+
from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
|
|
18
19
|
from cognee.modules.retrieval.graph_summary_completion_retriever import (
|
|
19
20
|
GraphSummaryCompletionRetriever,
|
|
20
21
|
)
|
|
@@ -152,6 +153,12 @@ async def get_search_type_tools(
|
|
|
152
153
|
TemporalRetriever(top_k=top_k).get_completion,
|
|
153
154
|
TemporalRetriever(top_k=top_k).get_context,
|
|
154
155
|
],
|
|
156
|
+
SearchType.CHUNKS_LEXICAL: (
|
|
157
|
+
lambda _r=JaccardChunksRetriever(top_k=top_k): [
|
|
158
|
+
_r.get_completion,
|
|
159
|
+
_r.get_context,
|
|
160
|
+
]
|
|
161
|
+
)(),
|
|
155
162
|
SearchType.CODING_RULES: [
|
|
156
163
|
CodingRulesRetriever(rules_nodeset_name=node_name).get_existing_rules,
|
|
157
164
|
],
|
|
@@ -35,7 +35,7 @@ async def no_access_control_search(
|
|
|
35
35
|
[get_completion, get_context] = search_tools
|
|
36
36
|
|
|
37
37
|
if only_context:
|
|
38
|
-
return await get_context(query_text)
|
|
38
|
+
return None, await get_context(query_text), []
|
|
39
39
|
|
|
40
40
|
context = await get_context(query_text)
|
|
41
41
|
result = await get_completion(query_text, context)
|
|
@@ -19,7 +19,9 @@ from cognee.modules.search.types import (
|
|
|
19
19
|
from cognee.modules.search.operations import log_query, log_result
|
|
20
20
|
from cognee.modules.users.models import User
|
|
21
21
|
from cognee.modules.data.models import Dataset
|
|
22
|
-
from cognee.modules.
|
|
22
|
+
from cognee.modules.data.methods.get_authorized_existing_datasets import (
|
|
23
|
+
get_authorized_existing_datasets,
|
|
24
|
+
)
|
|
23
25
|
|
|
24
26
|
from .get_search_type_tools import get_search_type_tools
|
|
25
27
|
from .no_access_control_search import no_access_control_search
|
|
@@ -143,20 +145,35 @@ async def search(
|
|
|
143
145
|
context = prepared_search_results["context"]
|
|
144
146
|
datasets = prepared_search_results["datasets"]
|
|
145
147
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
148
|
+
if only_context:
|
|
149
|
+
return_value.append(
|
|
150
|
+
{
|
|
151
|
+
"search_result": [context] if context else None,
|
|
152
|
+
"dataset_id": datasets[0].id,
|
|
153
|
+
"dataset_name": datasets[0].name,
|
|
154
|
+
"graphs": graphs,
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
return_value.append(
|
|
159
|
+
{
|
|
160
|
+
"search_result": [result] if result else None,
|
|
161
|
+
"dataset_id": datasets[0].id,
|
|
162
|
+
"dataset_name": datasets[0].name,
|
|
163
|
+
"graphs": graphs,
|
|
164
|
+
}
|
|
165
|
+
)
|
|
154
166
|
return return_value
|
|
155
167
|
else:
|
|
156
168
|
return_value = []
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
169
|
+
if only_context:
|
|
170
|
+
for search_result in search_results:
|
|
171
|
+
prepared_search_results = await prepare_search_result(search_result)
|
|
172
|
+
return_value.append(prepared_search_results["context"])
|
|
173
|
+
else:
|
|
174
|
+
for search_result in search_results:
|
|
175
|
+
result, context, datasets = search_result
|
|
176
|
+
return_value.append(result)
|
|
160
177
|
# For maintaining backwards compatibility
|
|
161
178
|
if len(return_value) == 1 and isinstance(return_value[0], list):
|
|
162
179
|
return return_value[0]
|
|
@@ -187,7 +204,9 @@ async def authorized_search(
|
|
|
187
204
|
Not to be used outside of active access control mode.
|
|
188
205
|
"""
|
|
189
206
|
# Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
|
|
190
|
-
search_datasets = await
|
|
207
|
+
search_datasets = await get_authorized_existing_datasets(
|
|
208
|
+
datasets=dataset_ids, permission_type="read", user=user
|
|
209
|
+
)
|
|
191
210
|
|
|
192
211
|
if use_combined_context:
|
|
193
212
|
search_responses = await search_in_datasets_context(
|
|
@@ -9,6 +9,18 @@ from uuid import UUID
|
|
|
9
9
|
async def authorized_give_permission_on_datasets(
|
|
10
10
|
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
|
|
11
11
|
):
|
|
12
|
+
"""
|
|
13
|
+
Give permission to certain datasets to a user.
|
|
14
|
+
The request owner must have the necessary permission to share the datasets.
|
|
15
|
+
Args:
|
|
16
|
+
principal_id: Id of user to whom datasets are shared
|
|
17
|
+
dataset_ids: Ids of datasets to share
|
|
18
|
+
permission_name: Name of permission to give
|
|
19
|
+
owner_id: Id of the request owner
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
None
|
|
23
|
+
"""
|
|
12
24
|
# If only a single dataset UUID is provided transform it to a list
|
|
13
25
|
if not isinstance(dataset_ids, list):
|
|
14
26
|
dataset_ids = [dataset_ids]
|
|
@@ -10,6 +10,17 @@ logger = get_logger()
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
|
|
13
|
+
"""
|
|
14
|
+
Check if a user has a specific permission on a dataset.
|
|
15
|
+
Args:
|
|
16
|
+
user: User whose permission is checked
|
|
17
|
+
permission_type: Type of permission to check
|
|
18
|
+
dataset_id: Id of the dataset
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
None
|
|
22
|
+
|
|
23
|
+
"""
|
|
13
24
|
if user is None:
|
|
14
25
|
user = await get_default_user()
|
|
15
26
|
|
|
@@ -11,6 +11,16 @@ logger = get_logger()
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
|
|
14
|
+
"""
|
|
15
|
+
Return a list of datasets the user has permission for.
|
|
16
|
+
If the user is part of a tenant, return datasets his roles have permission for.
|
|
17
|
+
Args:
|
|
18
|
+
user
|
|
19
|
+
permission_type
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
list[Dataset]: List of datasets user has permission for
|
|
23
|
+
"""
|
|
14
24
|
datasets = list()
|
|
15
25
|
# Get all datasets User has explicit access to
|
|
16
26
|
datasets.extend(await get_principal_datasets(user, permission_type))
|
|
@@ -8,6 +8,16 @@ from ...models import ACL, Permission
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]:
|
|
11
|
+
"""
|
|
12
|
+
Return a list of documents ids for which the user has read permission.
|
|
13
|
+
If datasets are specified, return only documents from those datasets.
|
|
14
|
+
Args:
|
|
15
|
+
user_id: Id of the user
|
|
16
|
+
datasets: List of datasets
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
list[str]: List of documents for which the user has read permission
|
|
20
|
+
"""
|
|
11
21
|
db_engine = get_relational_engine()
|
|
12
22
|
|
|
13
23
|
async with db_engine.get_async_session() as session:
|
|
@@ -6,6 +6,15 @@ from ...models.Principal import Principal
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
async def get_principal(principal_id: UUID):
|
|
9
|
+
"""
|
|
10
|
+
Return information about a user based on their id
|
|
11
|
+
Args:
|
|
12
|
+
principal_id: Id of the user
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
principal: Information about the user (principal)
|
|
16
|
+
|
|
17
|
+
"""
|
|
9
18
|
db_engine = get_relational_engine()
|
|
10
19
|
|
|
11
20
|
async with db_engine.get_async_session() as session:
|
|
@@ -9,6 +9,17 @@ from ...models.ACL import ACL
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
|
|
12
|
+
"""
|
|
13
|
+
Return a list of datasets for which the user (principal) has a certain permission.
|
|
14
|
+
Args:
|
|
15
|
+
principal: Information about the user
|
|
16
|
+
permission_type: Type of permission
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
list[Dataset]: List of datasets for which the user (principal)
|
|
20
|
+
has the permission (permission_type).
|
|
21
|
+
|
|
22
|
+
"""
|
|
12
23
|
db_engine = get_relational_engine()
|
|
13
24
|
|
|
14
25
|
async with db_engine.get_async_session() as session:
|
|
@@ -9,6 +9,16 @@ from ...models.Role import Role
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
async def get_role(tenant_id: UUID, role_name: str):
|
|
12
|
+
"""
|
|
13
|
+
Return the role with the name role_name of the given tenant.
|
|
14
|
+
Args:
|
|
15
|
+
tenant_id: Id of the given tenant
|
|
16
|
+
role_name: Name of the role
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
The role for the given tenant.
|
|
20
|
+
|
|
21
|
+
"""
|
|
12
22
|
db_engine = get_relational_engine()
|
|
13
23
|
|
|
14
24
|
async with db_engine.get_async_session() as session:
|
|
@@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets(
|
|
|
15
15
|
Return a list of datasets user has given permission for. If a list of datasets is provided,
|
|
16
16
|
verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
|
|
17
17
|
Args:
|
|
18
|
-
user_id:
|
|
19
|
-
permission_type:
|
|
20
|
-
dataset_ids:
|
|
18
|
+
user_id: Id of the user.
|
|
19
|
+
permission_type: Type of the permission.
|
|
20
|
+
dataset_ids: Ids of the provided datasets
|
|
21
21
|
|
|
22
22
|
Returns:
|
|
23
23
|
list[Dataset]: List of datasets user has permission for
|
|
@@ -8,6 +8,15 @@ from ...models.Tenant import Tenant
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
async def get_tenant(tenant_id: UUID):
|
|
11
|
+
"""
|
|
12
|
+
Return information about the tenant based on the given id.
|
|
13
|
+
Args:
|
|
14
|
+
tenant_id: Id of the given tenant
|
|
15
|
+
|
|
16
|
+
Returns
|
|
17
|
+
Information about the given tenant.
|
|
18
|
+
|
|
19
|
+
"""
|
|
11
20
|
db_engine = get_relational_engine()
|
|
12
21
|
|
|
13
22
|
async with db_engine.get_async_session() as session:
|
|
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
async def give_default_permission_to_role(role_id: UUID, permission_name: str):
|
|
19
|
+
"""
|
|
20
|
+
Give the permission with given name to the role with the given id as a default permission.
|
|
21
|
+
Args:
|
|
22
|
+
role_id: Id of the role
|
|
23
|
+
permission_name: Name of the permission
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
None
|
|
27
|
+
"""
|
|
19
28
|
db_engine = get_relational_engine()
|
|
20
29
|
|
|
21
30
|
async with db_engine.get_async_session() as session:
|
|
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
|
|
19
|
+
"""
|
|
20
|
+
Give the permission with given name to the tenant with the given id as a default permission.
|
|
21
|
+
Args:
|
|
22
|
+
tenant_id: Id of the tenant
|
|
23
|
+
permission_name: Name of the permission
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
None
|
|
27
|
+
"""
|
|
19
28
|
db_engine = get_relational_engine()
|
|
20
29
|
async with db_engine.get_async_session() as session:
|
|
21
30
|
tenant = (
|
|
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
|
|
19
|
+
"""
|
|
20
|
+
Give the permission with given name to the user with the given id as a default permission.
|
|
21
|
+
Args:
|
|
22
|
+
user_id: Id of the tenant
|
|
23
|
+
permission_name: Name of the permission
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
None
|
|
27
|
+
"""
|
|
19
28
|
db_engine = get_relational_engine()
|
|
20
29
|
async with db_engine.get_async_session() as session:
|
|
21
30
|
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
|
|
@@ -24,6 +24,16 @@ async def give_permission_on_dataset(
|
|
|
24
24
|
dataset_id: UUID,
|
|
25
25
|
permission_name: str,
|
|
26
26
|
):
|
|
27
|
+
"""
|
|
28
|
+
Give a specific permission on a dataset to a user.
|
|
29
|
+
Args:
|
|
30
|
+
principal: User who is being given the permission on the dataset
|
|
31
|
+
dataset_id: Id of the dataset
|
|
32
|
+
permission_name: Name of permission to give
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
None
|
|
36
|
+
"""
|
|
27
37
|
db_engine = get_relational_engine()
|
|
28
38
|
|
|
29
39
|
async with db_engine.get_async_session() as session:
|
|
@@ -21,6 +21,17 @@ from cognee.modules.users.models import (
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
|
|
24
|
+
"""
|
|
25
|
+
Add a user with the given id to the role with the given id.
|
|
26
|
+
Args:
|
|
27
|
+
user_id: Id of the user.
|
|
28
|
+
role_id: Id of the role.
|
|
29
|
+
owner_id: Id of the request owner.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
None
|
|
33
|
+
|
|
34
|
+
"""
|
|
24
35
|
db_engine = get_relational_engine()
|
|
25
36
|
async with db_engine.get_async_session() as session:
|
|
26
37
|
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
|
|
@@ -15,7 +15,17 @@ from cognee.modules.users.models import (
|
|
|
15
15
|
async def create_role(
|
|
16
16
|
role_name: str,
|
|
17
17
|
owner_id: UUID,
|
|
18
|
-
):
|
|
18
|
+
) -> UUID:
|
|
19
|
+
"""
|
|
20
|
+
Create a new role with the given name, if the request owner with the given id
|
|
21
|
+
has the necessary permission.
|
|
22
|
+
Args:
|
|
23
|
+
role_name: Name of the new role.
|
|
24
|
+
owner_id: Id of the request owner.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
None
|
|
28
|
+
"""
|
|
19
29
|
db_engine = get_relational_engine()
|
|
20
30
|
async with db_engine.get_async_session() as session:
|
|
21
31
|
user = await get_user(owner_id)
|
|
@@ -35,3 +45,4 @@ async def create_role(
|
|
|
35
45
|
|
|
36
46
|
await session.commit()
|
|
37
47
|
await session.refresh(role)
|
|
48
|
+
return role.id
|
|
@@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import (
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
|
|
16
|
+
"""
|
|
17
|
+
Add a user with the given id to the tenant with the given id.
|
|
18
|
+
This can only be successful if the request owner with the given id is the tenant owner.
|
|
19
|
+
Args:
|
|
20
|
+
user_id: Id of the user.
|
|
21
|
+
tenant_id: Id of the tenant.
|
|
22
|
+
owner_id: Id of the request owner.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
None
|
|
26
|
+
|
|
27
|
+
"""
|
|
16
28
|
db_engine = get_relational_engine()
|
|
17
29
|
async with db_engine.get_async_session() as session:
|
|
18
30
|
user = await get_user(user_id)
|
|
@@ -7,7 +7,17 @@ from cognee.modules.users.models import Tenant
|
|
|
7
7
|
from cognee.modules.users.methods import get_user
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
async def create_tenant(tenant_name: str, user_id: UUID):
|
|
10
|
+
async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
|
|
11
|
+
"""
|
|
12
|
+
Create a new tenant with the given name, for the user with the given id.
|
|
13
|
+
This user is the owner of the tenant.
|
|
14
|
+
Args:
|
|
15
|
+
tenant_name: Name of the new tenant.
|
|
16
|
+
user_id: Id of the user.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
None
|
|
20
|
+
"""
|
|
11
21
|
db_engine = get_relational_engine()
|
|
12
22
|
async with db_engine.get_async_session() as session:
|
|
13
23
|
try:
|
|
@@ -24,5 +34,6 @@ async def create_tenant(tenant_name: str, user_id: UUID):
|
|
|
24
34
|
user.tenant_id = tenant.id
|
|
25
35
|
await session.merge(user)
|
|
26
36
|
await session.commit()
|
|
37
|
+
return tenant.id
|
|
27
38
|
except IntegrityError:
|
|
28
39
|
raise EntityAlreadyExistsError(message="Tenant already exists.")
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
|
-
import networkx
|
|
4
3
|
|
|
5
4
|
from cognee.shared.logging_utils import get_logger
|
|
6
5
|
from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage
|
|
@@ -9,6 +8,8 @@ logger = get_logger()
|
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
async def cognee_network_visualization(graph_data, destination_file_path: str = None):
|
|
11
|
+
import networkx
|
|
12
|
+
|
|
12
13
|
nodes_data, edges_data = graph_data
|
|
13
14
|
|
|
14
15
|
G = networkx.DiGraph()
|
|
@@ -22,6 +23,9 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|
|
22
23
|
"TableRow": "#f47710",
|
|
23
24
|
"TableType": "#6510f4",
|
|
24
25
|
"ColumnValue": "#13613a",
|
|
26
|
+
"SchemaTable": "#f47710",
|
|
27
|
+
"DatabaseSchema": "#6510f4",
|
|
28
|
+
"SchemaRelationship": "#13613a",
|
|
25
29
|
"default": "#D3D3D3",
|
|
26
30
|
}
|
|
27
31
|
|
|
@@ -104,7 +108,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|
|
104
108
|
.nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); }
|
|
105
109
|
.node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
|
|
106
110
|
.edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
|
|
107
|
-
|
|
111
|
+
|
|
108
112
|
.tooltip {
|
|
109
113
|
position: absolute;
|
|
110
114
|
text-align: left;
|
|
@@ -166,7 +170,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|
|
166
170
|
// Create tooltip content for edge
|
|
167
171
|
var content = "<strong>Edge Information</strong><br/>";
|
|
168
172
|
content += "Relationship: " + d.relation + "<br/>";
|
|
169
|
-
|
|
173
|
+
|
|
170
174
|
// Show all weights
|
|
171
175
|
if (d.all_weights && Object.keys(d.all_weights).length > 0) {
|
|
172
176
|
content += "<strong>Weights:</strong><br/>";
|
|
@@ -176,23 +180,23 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|
|
176
180
|
} else if (d.weight !== null && d.weight !== undefined) {
|
|
177
181
|
content += "Weight: " + d.weight + "<br/>";
|
|
178
182
|
}
|
|
179
|
-
|
|
183
|
+
|
|
180
184
|
if (d.relationship_type) {
|
|
181
185
|
content += "Type: " + d.relationship_type + "<br/>";
|
|
182
186
|
}
|
|
183
|
-
|
|
187
|
+
|
|
184
188
|
// Add other edge properties
|
|
185
189
|
if (d.edge_info) {
|
|
186
190
|
Object.keys(d.edge_info).forEach(function(key) {
|
|
187
|
-
if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
|
|
188
|
-
key !== 'source_node_id' && key !== 'target_node_id' &&
|
|
189
|
-
key !== 'relationship_name' && key !== 'updated_at' &&
|
|
191
|
+
if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
|
|
192
|
+
key !== 'source_node_id' && key !== 'target_node_id' &&
|
|
193
|
+
key !== 'relationship_name' && key !== 'updated_at' &&
|
|
190
194
|
!key.startsWith('weight_')) {
|
|
191
195
|
content += key + ": " + d.edge_info[key] + "<br/>";
|
|
192
196
|
}
|
|
193
197
|
});
|
|
194
198
|
}
|
|
195
|
-
|
|
199
|
+
|
|
196
200
|
tooltip.html(content)
|
|
197
201
|
.style("left", (d3.event.pageX + 10) + "px")
|
|
198
202
|
.style("top", (d3.event.pageY - 10) + "px")
|
cognee/shared/data_models.py
CHANGED
cognee/shared/utils.py
CHANGED
|
@@ -4,7 +4,6 @@ import os
|
|
|
4
4
|
import ssl
|
|
5
5
|
import requests
|
|
6
6
|
from datetime import datetime, timezone
|
|
7
|
-
import matplotlib.pyplot as plt
|
|
8
7
|
import http.server
|
|
9
8
|
import socketserver
|
|
10
9
|
from threading import Thread
|
|
@@ -30,37 +29,6 @@ def create_secure_ssl_context() -> ssl.SSLContext:
|
|
|
30
29
|
return ssl.create_default_context()
|
|
31
30
|
|
|
32
31
|
|
|
33
|
-
def get_entities(tagged_tokens):
|
|
34
|
-
import nltk
|
|
35
|
-
|
|
36
|
-
nltk.download("maxent_ne_chunker", quiet=True)
|
|
37
|
-
|
|
38
|
-
from nltk.chunk import ne_chunk
|
|
39
|
-
|
|
40
|
-
return ne_chunk(tagged_tokens)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def extract_pos_tags(sentence):
|
|
44
|
-
"""Extract Part-of-Speech (POS) tags for words in a sentence."""
|
|
45
|
-
import nltk
|
|
46
|
-
|
|
47
|
-
# Ensure that the necessary NLTK resources are downloaded
|
|
48
|
-
nltk.download("words", quiet=True)
|
|
49
|
-
nltk.download("punkt", quiet=True)
|
|
50
|
-
nltk.download("averaged_perceptron_tagger", quiet=True)
|
|
51
|
-
|
|
52
|
-
from nltk.tag import pos_tag
|
|
53
|
-
from nltk.tokenize import word_tokenize
|
|
54
|
-
|
|
55
|
-
# Tokenize the sentence into words
|
|
56
|
-
tokens = word_tokenize(sentence)
|
|
57
|
-
|
|
58
|
-
# Tag each word with its corresponding POS tag
|
|
59
|
-
pos_tags = pos_tag(tokens)
|
|
60
|
-
|
|
61
|
-
return pos_tags
|
|
62
|
-
|
|
63
|
-
|
|
64
32
|
def get_anonymous_id():
|
|
65
33
|
"""Creates or reads a anonymous user id"""
|
|
66
34
|
tracking_id = os.getenv("TRACKING_ID", None)
|
|
@@ -7,7 +7,7 @@ from pydantic import BaseModel
|
|
|
7
7
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
8
8
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
9
9
|
from cognee.infrastructure.engine.models import DataPoint
|
|
10
|
-
from cognee.infrastructure.llm.
|
|
10
|
+
from cognee.infrastructure.llm.extraction import extract_categories
|
|
11
11
|
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
|
12
12
|
|
|
13
13
|
|
|
@@ -40,7 +40,7 @@ async def chunk_naive_llm_classifier(
|
|
|
40
40
|
return data_chunks
|
|
41
41
|
|
|
42
42
|
chunk_classifications = await asyncio.gather(
|
|
43
|
-
*[
|
|
43
|
+
*[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
|
|
44
44
|
)
|
|
45
45
|
|
|
46
46
|
classification_data_points = []
|