cognee 0.3.4.dev4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +16 -7
- cognee/api/health.py +5 -9
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
- cognee/api/v1/search/search.py +3 -0
- cognee/api/v1/ui/__init__.py +1 -1
- cognee/api/v1/ui/ui.py +215 -150
- cognee/api/v1/update/__init__.py +1 -0
- cognee/api/v1/update/routers/__init__.py +1 -0
- cognee/api/v1/update/routers/get_update_router.py +90 -0
- cognee/api/v1/update/update.py +100 -0
- cognee/base_config.py +5 -2
- cognee/cli/_cognee.py +28 -10
- cognee/cli/commands/delete_command.py +34 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
- cognee/eval_framework/modal_eval_dashboard.py +9 -1
- cognee/infrastructure/databases/graph/config.py +9 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
- cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
- cognee/infrastructure/databases/relational/config.py +4 -4
- cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
- cognee/infrastructure/databases/vector/config.py +7 -7
- cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
- cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
- cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
- cognee/infrastructure/files/storage/s3_config.py +1 -0
- cognee/infrastructure/files/utils/open_data_file.py +7 -14
- cognee/infrastructure/llm/LLMGateway.py +19 -117
- cognee/infrastructure/llm/config.py +28 -13
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
- cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
- cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
- cognee/infrastructure/llm/prompts/test.txt +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
- cognee/infrastructure/llm/utils.py +4 -4
- cognee/infrastructure/loaders/LoaderEngine.py +5 -2
- cognee/infrastructure/loaders/external/__init__.py +7 -0
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
- cognee/infrastructure/loaders/supported_loaders.py +7 -0
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/data/methods/get_deletion_counts.py +92 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/ingestion/data_types/TextData.py +0 -1
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
- cognee/modules/retrieval/code_retriever.py +2 -1
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
- cognee/modules/retrieval/graph_completion_retriever.py +0 -3
- cognee/modules/retrieval/insights_retriever.py +1 -1
- cognee/modules/retrieval/jaccard_retrival.py +60 -0
- cognee/modules/retrieval/lexical_retriever.py +123 -0
- cognee/modules/retrieval/natural_language_retriever.py +2 -1
- cognee/modules/retrieval/temporal_retriever.py +3 -2
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
- cognee/modules/retrieval/utils/completion.py +4 -7
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/no_access_control_search.py +1 -1
- cognee/modules/search/methods/search.py +32 -13
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +12 -1
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +12 -1
- cognee/modules/visualization/cognee_network_visualization.py +13 -9
- cognee/shared/data_models.py +0 -1
- cognee/shared/utils.py +0 -32
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/codingagents/coding_rule_associations.py +3 -2
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
- cognee/tasks/graph/extract_graph_from_code.py +2 -2
- cognee/tasks/graph/extract_graph_from_data.py +55 -12
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/migrate_relational_database.py +132 -41
- cognee/tasks/ingestion/resolve_data_directories.py +4 -1
- cognee/tasks/schema/ingest_database_schema.py +134 -0
- cognee/tasks/schema/models.py +40 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +3 -1
- cognee/tasks/summarization/summarize_code.py +2 -2
- cognee/tasks/summarization/summarize_text.py +2 -2
- cognee/tasks/temporal_graph/enrich_events.py +2 -2
- cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
- cognee/tests/test_advanced_pdf_loader.py +141 -0
- cognee/tests/test_chromadb.py +40 -0
- cognee/tests/test_cognee_server_start.py +6 -1
- cognee/tests/test_data/Quantum_computers.txt +9 -0
- cognee/tests/test_lancedb.py +211 -0
- cognee/tests/test_pgvector.py +40 -0
- cognee/tests/test_relational_db_migration.py +76 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/METADATA +92 -96
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/RECORD +172 -160
- cognee/infrastructure/data/utils/extract_keywords.py +0 -48
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
- cognee/tasks/graph/infer_data_ontology.py +0 -309
- cognee/tests/test_falkordb.py +0 -174
- distributed/poetry.lock +0 -12238
- distributed/pyproject.toml +0 -186
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/WHEEL +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AttachedOntologyNode:
|
|
5
|
+
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, uri: Any, category: str):
|
|
8
|
+
self.uri = uri
|
|
9
|
+
self.name = self._extract_name(uri)
|
|
10
|
+
self.category = category
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def _extract_name(uri: Any) -> str:
|
|
14
|
+
uri_str = str(uri)
|
|
15
|
+
if "#" in uri_str:
|
|
16
|
+
return uri_str.split("#")[-1]
|
|
17
|
+
return uri_str.rstrip("/").split("/")[-1]
|
|
18
|
+
|
|
19
|
+
def __repr__(self):
|
|
20
|
+
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import TypedDict, Optional
|
|
2
|
+
|
|
3
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
4
|
+
from cognee.modules.ontology.matching_strategies import MatchingStrategy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OntologyConfig(TypedDict, total=False):
|
|
8
|
+
"""Configuration containing ontology resolver.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
ontology_resolver: The ontology resolver instance to use
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
ontology_resolver: Optional[BaseOntologyResolver]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Config(TypedDict, total=False):
|
|
18
|
+
"""Top-level configuration dictionary.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
ontology_config: Configuration containing ontology resolver
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
ontology_config: Optional[OntologyConfig]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""This module contains the configuration for ontology handling."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OntologyEnvConfig(BaseSettings):
|
|
8
|
+
"""
|
|
9
|
+
Represents the configuration for ontology handling, including parameters for
|
|
10
|
+
ontology file storage and resolution/matching strategies.
|
|
11
|
+
|
|
12
|
+
Public methods:
|
|
13
|
+
- to_dict
|
|
14
|
+
|
|
15
|
+
Instance variables:
|
|
16
|
+
- ontology_resolver
|
|
17
|
+
- ontology_matching
|
|
18
|
+
- ontology_file_path
|
|
19
|
+
- model_config
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
ontology_resolver: str = "rdflib"
|
|
23
|
+
matching_strategy: str = "fuzzy"
|
|
24
|
+
ontology_file_path: str = ""
|
|
25
|
+
|
|
26
|
+
model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
|
|
27
|
+
|
|
28
|
+
def to_dict(self) -> dict:
|
|
29
|
+
"""
|
|
30
|
+
Return the configuration as a dictionary.
|
|
31
|
+
"""
|
|
32
|
+
return {
|
|
33
|
+
"ontology_resolver": self.ontology_resolver,
|
|
34
|
+
"matching_strategy": self.matching_strategy,
|
|
35
|
+
"ontology_file_path": self.ontology_file_path,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@lru_cache
|
|
40
|
+
def get_ontology_env_config():
|
|
41
|
+
"""
|
|
42
|
+
Retrieve the ontology configuration. This function utilizes caching to return a
|
|
43
|
+
singleton instance of the OntologyConfig class for efficiency.
|
|
44
|
+
"""
|
|
45
|
+
return OntologyEnvConfig()
|
|
@@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
|
|
|
10
10
|
FindClosestMatchError,
|
|
11
11
|
GetSubgraphError,
|
|
12
12
|
)
|
|
13
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
14
|
+
from cognee.modules.ontology.models import AttachedOntologyNode
|
|
15
|
+
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
|
13
16
|
|
|
14
17
|
logger = get_logger("OntologyAdapter")
|
|
15
18
|
|
|
16
19
|
|
|
17
|
-
class
|
|
18
|
-
"""
|
|
20
|
+
class RDFLibOntologyResolver(BaseOntologyResolver):
|
|
21
|
+
"""RDFLib-based ontology resolver implementation.
|
|
19
22
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
self.category = category
|
|
23
|
+
This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
|
|
24
|
+
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
|
|
25
|
+
"""
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def __repr__(self):
|
|
33
|
-
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class OntologyResolver:
|
|
37
|
-
def __init__(self, ontology_file: Optional[str] = None):
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
ontology_file: Optional[str] = None,
|
|
30
|
+
matching_strategy: Optional[MatchingStrategy] = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
super().__init__(matching_strategy)
|
|
38
33
|
self.ontology_file = ontology_file
|
|
39
34
|
try:
|
|
40
35
|
if ontology_file and os.path.exists(ontology_file):
|
|
@@ -60,7 +55,7 @@ class OntologyResolver:
|
|
|
60
55
|
name = uri_str.rstrip("/").split("/")[-1]
|
|
61
56
|
return name.lower().replace(" ", "_").strip()
|
|
62
57
|
|
|
63
|
-
def build_lookup(self):
|
|
58
|
+
def build_lookup(self) -> None:
|
|
64
59
|
try:
|
|
65
60
|
classes: Dict[str, URIRef] = {}
|
|
66
61
|
individuals: Dict[str, URIRef] = {}
|
|
@@ -97,7 +92,7 @@ class OntologyResolver:
|
|
|
97
92
|
logger.error("Failed to build lookup dictionary: %s", str(e))
|
|
98
93
|
raise RuntimeError("Lookup build failed") from e
|
|
99
94
|
|
|
100
|
-
def refresh_lookup(self):
|
|
95
|
+
def refresh_lookup(self) -> None:
|
|
101
96
|
self.build_lookup()
|
|
102
97
|
logger.info("Ontology lookup refreshed.")
|
|
103
98
|
|
|
@@ -105,13 +100,8 @@ class OntologyResolver:
|
|
|
105
100
|
try:
|
|
106
101
|
normalized_name = name.lower().replace(" ", "_").strip()
|
|
107
102
|
possible_matches = list(self.lookup.get(category, {}).keys())
|
|
108
|
-
if normalized_name in possible_matches:
|
|
109
|
-
return normalized_name
|
|
110
103
|
|
|
111
|
-
|
|
112
|
-
normalized_name, possible_matches, n=1, cutoff=0.8
|
|
113
|
-
)
|
|
114
|
-
return best_match[0] if best_match else None
|
|
104
|
+
return self.matching_strategy.find_match(normalized_name, possible_matches)
|
|
115
105
|
except Exception as e:
|
|
116
106
|
logger.error("Error in find_closest_match: %s", str(e))
|
|
117
107
|
raise FindClosestMatchError() from e
|
|
@@ -125,7 +115,9 @@ class OntologyResolver:
|
|
|
125
115
|
|
|
126
116
|
def get_subgraph(
|
|
127
117
|
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
|
128
|
-
) -> Tuple[
|
|
118
|
+
) -> Tuple[
|
|
119
|
+
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
|
|
120
|
+
]:
|
|
129
121
|
nodes_set = set()
|
|
130
122
|
edges: List[Tuple[str, str, str]] = []
|
|
131
123
|
visited = set()
|
|
@@ -1,34 +1,31 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
|
-
from cognee.api.v1.exceptions import DatasetNotFoundError
|
|
4
4
|
from cognee.modules.users.models import User
|
|
5
|
-
from cognee.modules.
|
|
6
|
-
|
|
7
|
-
create_authorized_dataset,
|
|
8
|
-
get_authorized_dataset,
|
|
9
|
-
get_authorized_dataset_by_name,
|
|
5
|
+
from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
|
|
6
|
+
resolve_authorized_user_datasets,
|
|
10
7
|
)
|
|
11
8
|
|
|
12
9
|
|
|
13
|
-
async def resolve_authorized_user_dataset(
|
|
14
|
-
|
|
15
|
-
|
|
10
|
+
async def resolve_authorized_user_dataset(
|
|
11
|
+
dataset_name: str, dataset_id: Optional[UUID] = None, user: Optional[User] = None
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Function handles creation and dataset authorization if dataset already exist for Cognee.
|
|
15
|
+
Verifies that provided user has necessary permission for provided Dataset.
|
|
16
|
+
If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
Args:
|
|
19
|
+
dataset_name: Name of the dataset.
|
|
20
|
+
dataset_id: Id of the dataset.
|
|
21
|
+
user: Cognee User request is being processed for, if None default user will be used.
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
)
|
|
26
|
-
else:
|
|
27
|
-
raise ValueError("Either dataset_id or dataset_name must be provided.")
|
|
23
|
+
Returns:
|
|
24
|
+
Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
|
|
25
|
+
"""
|
|
28
26
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
)
|
|
27
|
+
user, authorized_datasets = await resolve_authorized_user_datasets(
|
|
28
|
+
datasets=dataset_id if dataset_id else dataset_name, user=user
|
|
29
|
+
)
|
|
33
30
|
|
|
34
|
-
return user,
|
|
31
|
+
return user, authorized_datasets[0]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
|
-
from typing import Union, Tuple, List
|
|
2
|
+
from typing import Union, Tuple, List, Optional
|
|
3
3
|
|
|
4
4
|
from cognee.modules.users.methods import get_default_user
|
|
5
5
|
from cognee.modules.users.models import User
|
|
@@ -13,7 +13,7 @@ from cognee.modules.data.methods import (
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
async def resolve_authorized_user_datasets(
|
|
16
|
-
datasets: Union[str, UUID, list[str], list[UUID]], user: User = None
|
|
16
|
+
datasets: Union[str, UUID, list[str], list[UUID]], user: Optional[User] = None
|
|
17
17
|
) -> Tuple[User, List[Dataset]]:
|
|
18
18
|
"""
|
|
19
19
|
Function handles creation and dataset authorization if datasets already exist for Cognee.
|
|
@@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
|
|
|
25
25
|
datasets: Dataset names or Dataset UUID (in case Datasets already exist)
|
|
26
26
|
|
|
27
27
|
Returns:
|
|
28
|
-
|
|
28
|
+
Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
|
|
29
29
|
"""
|
|
30
30
|
# If no user is provided use default user
|
|
31
31
|
if user is None:
|
|
@@ -7,6 +7,7 @@ from cognee.shared.logging_utils import get_logger
|
|
|
7
7
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
8
8
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
9
9
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
10
|
+
from cognee.infrastructure.llm.prompts import read_query_prompt
|
|
10
11
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
11
12
|
|
|
12
13
|
logger = get_logger("CodeRetriever")
|
|
@@ -41,7 +42,7 @@ class CodeRetriever(BaseRetriever):
|
|
|
41
42
|
f"Processing query with LLM: '{query[:100]}{'...' if len(query) > 100 else ''}'"
|
|
42
43
|
)
|
|
43
44
|
|
|
44
|
-
system_prompt =
|
|
45
|
+
system_prompt = read_query_prompt("codegraph_retriever_system.txt")
|
|
45
46
|
|
|
46
47
|
try:
|
|
47
48
|
result = await LLMGateway.acreate_structured_output(
|
|
@@ -42,14 +42,12 @@ class TripletSearchContextProvider(BaseContextProvider):
|
|
|
42
42
|
self,
|
|
43
43
|
entities: List[DataPoint],
|
|
44
44
|
query: str,
|
|
45
|
-
user: User,
|
|
46
45
|
memory_fragment: CogneeGraph,
|
|
47
46
|
) -> List:
|
|
48
47
|
"""Creates search tasks for valid entities."""
|
|
49
48
|
tasks = [
|
|
50
49
|
brute_force_triplet_search(
|
|
51
50
|
query=f"{entity_text} {query}",
|
|
52
|
-
user=user,
|
|
53
51
|
top_k=self.top_k,
|
|
54
52
|
collections=self.collections,
|
|
55
53
|
properties_to_project=self.properties_to_project,
|
|
@@ -84,9 +82,8 @@ class TripletSearchContextProvider(BaseContextProvider):
|
|
|
84
82
|
if not entities:
|
|
85
83
|
return "No entities provided for context search."
|
|
86
84
|
|
|
87
|
-
user = await get_default_user()
|
|
88
85
|
memory_fragment = await get_memory_fragment(self.properties_to_project)
|
|
89
|
-
search_tasks = self._get_search_tasks(entities, query,
|
|
86
|
+
search_tasks = self._get_search_tasks(entities, query, memory_fragment)
|
|
90
87
|
|
|
91
88
|
if not search_tasks:
|
|
92
89
|
return "No valid entities found for context search."
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from typing import Optional, List, Type
|
|
1
|
+
from typing import Optional, List, Type, Any
|
|
2
2
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
3
3
|
from cognee.shared.logging_utils import get_logger
|
|
4
4
|
|
|
5
5
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
6
6
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
|
7
7
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
8
|
+
from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
|
|
8
9
|
|
|
9
10
|
logger = get_logger()
|
|
10
11
|
|
|
@@ -106,10 +107,10 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
106
107
|
logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}")
|
|
107
108
|
if round_idx < max_iter:
|
|
108
109
|
valid_args = {"query": query, "answer": completion, "context": context_text}
|
|
109
|
-
valid_user_prompt =
|
|
110
|
+
valid_user_prompt = render_prompt(
|
|
110
111
|
filename=self.validation_user_prompt_path, context=valid_args
|
|
111
112
|
)
|
|
112
|
-
valid_system_prompt =
|
|
113
|
+
valid_system_prompt = read_query_prompt(
|
|
113
114
|
prompt_file_name=self.validation_system_prompt_path
|
|
114
115
|
)
|
|
115
116
|
|
|
@@ -119,10 +120,10 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
119
120
|
response_model=str,
|
|
120
121
|
)
|
|
121
122
|
followup_args = {"query": query, "answer": completion, "reasoning": reasoning}
|
|
122
|
-
followup_prompt =
|
|
123
|
+
followup_prompt = render_prompt(
|
|
123
124
|
filename=self.followup_user_prompt_path, context=followup_args
|
|
124
125
|
)
|
|
125
|
-
followup_system =
|
|
126
|
+
followup_system = read_query_prompt(
|
|
126
127
|
prompt_file_name=self.followup_system_prompt_path
|
|
127
128
|
)
|
|
128
129
|
|
|
@@ -93,11 +93,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|
|
93
93
|
for field_name in index_fields:
|
|
94
94
|
vector_index_collections.append(f"{subclass.__name__}_{field_name}")
|
|
95
95
|
|
|
96
|
-
user = await get_default_user()
|
|
97
|
-
|
|
98
96
|
found_triplets = await brute_force_triplet_search(
|
|
99
97
|
query,
|
|
100
|
-
user=user,
|
|
101
98
|
top_k=self.top_k,
|
|
102
99
|
collections=vector_index_collections or None,
|
|
103
100
|
node_type=self.node_type,
|
|
@@ -25,7 +25,7 @@ class InsightsRetriever(BaseGraphRetriever):
|
|
|
25
25
|
- top_k
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
def __init__(self, exploration_levels: int = 1, top_k: int = 5):
|
|
28
|
+
def __init__(self, exploration_levels: int = 1, top_k: Optional[int] = 5):
|
|
29
29
|
"""Initialize retriever with exploration levels and search parameters."""
|
|
30
30
|
self.exploration_levels = exploration_levels
|
|
31
31
|
self.top_k = top_k
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from cognee.modules.retrieval.lexical_retriever import LexicalRetriever
|
|
2
|
+
import re
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class JaccardChunksRetriever(LexicalRetriever):
|
|
8
|
+
"""
|
|
9
|
+
Retriever that specializes LexicalRetriever to use Jaccard similarity.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
top_k: int = 10,
|
|
15
|
+
with_scores: bool = False,
|
|
16
|
+
stop_words: Optional[list[str]] = None,
|
|
17
|
+
multiset_jaccard: bool = False,
|
|
18
|
+
):
|
|
19
|
+
"""
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
top_k : int
|
|
23
|
+
Number of top results to return.
|
|
24
|
+
with_scores : bool
|
|
25
|
+
If True, return (payload, score) pairs. Otherwise, only payloads.
|
|
26
|
+
stop_words : list[str], optional
|
|
27
|
+
List of tokens to filter out.
|
|
28
|
+
multiset_jaccard : bool
|
|
29
|
+
If True, use multiset Jaccard (frequency aware).
|
|
30
|
+
"""
|
|
31
|
+
self.stop_words = {t.lower() for t in stop_words} if stop_words else set()
|
|
32
|
+
self.multiset_jaccard = multiset_jaccard
|
|
33
|
+
|
|
34
|
+
super().__init__(
|
|
35
|
+
tokenizer=self._tokenizer, scorer=self._scorer, top_k=top_k, with_scores=with_scores
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def _tokenizer(self, text: str) -> list[str]:
|
|
39
|
+
"""
|
|
40
|
+
Tokenizer: lowercases, splits on word characters (w+), filters stopwords.
|
|
41
|
+
"""
|
|
42
|
+
tokens = re.findall(r"\w+", text.lower())
|
|
43
|
+
return [t for t in tokens if t not in self.stop_words]
|
|
44
|
+
|
|
45
|
+
def _scorer(self, query_tokens: list[str], chunk_tokens: list[str]) -> float:
|
|
46
|
+
"""
|
|
47
|
+
Jaccard similarity scorer.
|
|
48
|
+
- If multiset_jaccard=True, uses frequency-aware Jaccard.
|
|
49
|
+
- Otherwise, normal set Jaccard.
|
|
50
|
+
"""
|
|
51
|
+
if self.multiset_jaccard:
|
|
52
|
+
q_counts, c_counts = Counter(query_tokens), Counter(chunk_tokens)
|
|
53
|
+
numerator = sum(min(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
|
|
54
|
+
denominator = sum(max(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
|
|
55
|
+
return numerator / denominator if denominator else 0.0
|
|
56
|
+
else:
|
|
57
|
+
q_set, c_set = set(query_tokens), set(chunk_tokens)
|
|
58
|
+
if not q_set or not c_set:
|
|
59
|
+
return 0.0
|
|
60
|
+
return len(q_set & c_set) / len(q_set | c_set)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Any, Callable, Optional
|
|
3
|
+
from heapq import nlargest
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
6
|
+
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
7
|
+
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = get_logger("LexicalRetriever")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LexicalRetriever(BaseRetriever):
|
|
15
|
+
def __init__(
|
|
16
|
+
self, tokenizer: Callable, scorer: Callable, top_k: int = 10, with_scores: bool = False
|
|
17
|
+
):
|
|
18
|
+
if not callable(tokenizer) or not callable(scorer):
|
|
19
|
+
raise TypeError("tokenizer and scorer must be callables")
|
|
20
|
+
if not isinstance(top_k, int) or top_k <= 0:
|
|
21
|
+
raise ValueError("top_k must be a positive integer")
|
|
22
|
+
|
|
23
|
+
self.tokenizer = tokenizer
|
|
24
|
+
self.scorer = scorer
|
|
25
|
+
self.top_k = top_k
|
|
26
|
+
self.with_scores = bool(with_scores)
|
|
27
|
+
|
|
28
|
+
# Cache keyed by dataset context
|
|
29
|
+
self.chunks: dict[str, Any] = {} # {chunk_id: tokens}
|
|
30
|
+
self.payloads: dict[str, Any] = {} # {chunk_id: original_document}
|
|
31
|
+
self._initialized = False
|
|
32
|
+
self._init_lock = asyncio.Lock()
|
|
33
|
+
|
|
34
|
+
async def initialize(self):
|
|
35
|
+
"""Initialize retriever by reading all DocumentChunks from graph_engine."""
|
|
36
|
+
async with self._init_lock:
|
|
37
|
+
if self._initialized:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
logger.info("Initializing LexicalRetriever by loading DocumentChunks from graph engine")
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
graph_engine = await get_graph_engine()
|
|
44
|
+
nodes, _ = await graph_engine.get_filtered_graph_data([{"type": ["DocumentChunk"]}])
|
|
45
|
+
except Exception as e:
|
|
46
|
+
logger.error("Graph engine initialization failed")
|
|
47
|
+
raise NoDataError("Graph engine initialization failed") from e
|
|
48
|
+
|
|
49
|
+
chunk_count = 0
|
|
50
|
+
for node in nodes:
|
|
51
|
+
try:
|
|
52
|
+
chunk_id, document = node
|
|
53
|
+
except Exception:
|
|
54
|
+
logger.warning("Skipping node with unexpected shape: %r", node)
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
if document.get("type") == "DocumentChunk" and document.get("text"):
|
|
58
|
+
try:
|
|
59
|
+
tokens = self.tokenizer(document["text"])
|
|
60
|
+
if not tokens:
|
|
61
|
+
continue
|
|
62
|
+
self.chunks[str(document.get("id", chunk_id))] = tokens
|
|
63
|
+
self.payloads[str(document.get("id", chunk_id))] = document
|
|
64
|
+
chunk_count += 1
|
|
65
|
+
except Exception as e:
|
|
66
|
+
logger.error("Tokenizer failed for chunk %s: %s", chunk_id, str(e))
|
|
67
|
+
|
|
68
|
+
if chunk_count == 0:
|
|
69
|
+
logger.error("Initialization completed but no valid chunks were loaded.")
|
|
70
|
+
raise NoDataError("No valid chunks loaded during initialization.")
|
|
71
|
+
|
|
72
|
+
self._initialized = True
|
|
73
|
+
logger.info("Initialized with %d document chunks", len(self.chunks))
|
|
74
|
+
|
|
75
|
+
async def get_context(self, query: str) -> Any:
|
|
76
|
+
"""Retrieves relevant chunks for the given query."""
|
|
77
|
+
if not self._initialized:
|
|
78
|
+
await self.initialize()
|
|
79
|
+
|
|
80
|
+
if not self.chunks:
|
|
81
|
+
logger.warning("No chunks available in retriever")
|
|
82
|
+
return []
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
query_tokens = self.tokenizer(query)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error("Failed to tokenize query: %s", str(e))
|
|
88
|
+
return []
|
|
89
|
+
|
|
90
|
+
if not query_tokens:
|
|
91
|
+
logger.warning("Query produced no tokens")
|
|
92
|
+
return []
|
|
93
|
+
|
|
94
|
+
results = []
|
|
95
|
+
for chunk_id, chunk_tokens in self.chunks.items():
|
|
96
|
+
try:
|
|
97
|
+
score = self.scorer(query_tokens, chunk_tokens)
|
|
98
|
+
if not isinstance(score, (int, float)):
|
|
99
|
+
logger.warning("Non-numeric score for chunk %s → treated as 0.0", chunk_id)
|
|
100
|
+
score = 0.0
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.error("Scorer failed for chunk %s: %s", chunk_id, str(e))
|
|
103
|
+
score = 0.0
|
|
104
|
+
results.append((chunk_id, score))
|
|
105
|
+
|
|
106
|
+
top_results = nlargest(self.top_k, results, key=lambda x: x[1])
|
|
107
|
+
logger.info(
|
|
108
|
+
"Retrieved %d/%d chunks for query (len=%d)",
|
|
109
|
+
len(top_results),
|
|
110
|
+
len(results),
|
|
111
|
+
len(query_tokens),
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if self.with_scores:
|
|
115
|
+
return [(self.payloads[chunk_id], score) for chunk_id, score in top_results]
|
|
116
|
+
else:
|
|
117
|
+
return [self.payloads[chunk_id] for chunk_id, _ in top_results]
|
|
118
|
+
|
|
119
|
+
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
|
120
|
+
"""Returns context for the given query (retrieves if not provided)."""
|
|
121
|
+
if context is None:
|
|
122
|
+
context = await self.get_context(query)
|
|
123
|
+
return context
|
|
@@ -2,6 +2,7 @@ from typing import Any, Optional
|
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
3
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
4
4
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
5
|
+
from cognee.infrastructure.llm.prompts import render_prompt
|
|
5
6
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
6
7
|
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
|
|
7
8
|
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
|
@@ -49,7 +50,7 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
49
50
|
|
|
50
51
|
async def _generate_cypher_query(self, query: str, edge_schemas, previous_attempts=None) -> str:
|
|
51
52
|
"""Generate a Cypher query using LLM based on natural language query and schema information."""
|
|
52
|
-
system_prompt =
|
|
53
|
+
system_prompt = render_prompt(
|
|
53
54
|
self.system_prompt_path,
|
|
54
55
|
context={
|
|
55
56
|
"edge_schemas": edge_schemas,
|
|
@@ -6,6 +6,7 @@ from operator import itemgetter
|
|
|
6
6
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
7
7
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
|
8
8
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
9
|
+
from cognee.infrastructure.llm.prompts import render_prompt
|
|
9
10
|
from cognee.infrastructure.llm import LLMGateway
|
|
10
11
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
11
12
|
from cognee.shared.logging_utils import get_logger
|
|
@@ -72,7 +73,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
72
73
|
else:
|
|
73
74
|
base_directory = None
|
|
74
75
|
|
|
75
|
-
system_prompt =
|
|
76
|
+
system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
|
|
76
77
|
|
|
77
78
|
interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval)
|
|
78
79
|
|
|
@@ -129,7 +130,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
129
130
|
query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
|
|
130
131
|
|
|
131
132
|
vector_search_results = await vector_engine.search(
|
|
132
|
-
collection_name="Event_name", query_vector=query_vector, limit=
|
|
133
|
+
collection_name="Event_name", query_vector=query_vector, limit=None
|
|
133
134
|
)
|
|
134
135
|
|
|
135
136
|
top_k_events = await self.filter_top_k_events(relevant_events, vector_search_results)
|
|
@@ -89,7 +89,6 @@ async def get_memory_fragment(
|
|
|
89
89
|
|
|
90
90
|
async def brute_force_triplet_search(
|
|
91
91
|
query: str,
|
|
92
|
-
user: User,
|
|
93
92
|
top_k: int = 5,
|
|
94
93
|
collections: Optional[List[str]] = None,
|
|
95
94
|
properties_to_project: Optional[List[str]] = None,
|
|
@@ -102,7 +101,6 @@ async def brute_force_triplet_search(
|
|
|
102
101
|
|
|
103
102
|
Args:
|
|
104
103
|
query (str): The search query.
|
|
105
|
-
user (User): The user performing the search.
|
|
106
104
|
top_k (int): The number of top results to retrieve.
|
|
107
105
|
collections (Optional[List[str]]): List of collections to query.
|
|
108
106
|
properties_to_project (Optional[List[str]]): List of properties to project.
|
|
@@ -139,12 +137,10 @@ async def brute_force_triplet_search(
|
|
|
139
137
|
|
|
140
138
|
query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
|
|
141
139
|
|
|
142
|
-
send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
|
|
143
|
-
|
|
144
140
|
async def search_in_collection(collection_name: str):
|
|
145
141
|
try:
|
|
146
142
|
return await vector_engine.search(
|
|
147
|
-
collection_name=collection_name, query_vector=query_vector, limit=
|
|
143
|
+
collection_name=collection_name, query_vector=query_vector, limit=None
|
|
148
144
|
)
|
|
149
145
|
except CollectionNotFoundError:
|
|
150
146
|
return []
|
|
@@ -176,20 +172,14 @@ async def brute_force_triplet_search(
|
|
|
176
172
|
|
|
177
173
|
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
|
|
178
174
|
|
|
179
|
-
send_telemetry("cognee.brute_force_triplet_search EXECUTION COMPLETED", user.id)
|
|
180
|
-
|
|
181
175
|
return results
|
|
182
176
|
|
|
183
177
|
except CollectionNotFoundError:
|
|
184
178
|
return []
|
|
185
179
|
except Exception as error:
|
|
186
180
|
logger.error(
|
|
187
|
-
"Error during brute force search for
|
|
188
|
-
user.id,
|
|
181
|
+
"Error during brute force search for query: %s. Error: %s",
|
|
189
182
|
query,
|
|
190
183
|
error,
|
|
191
184
|
)
|
|
192
|
-
send_telemetry(
|
|
193
|
-
"cognee.brute_force_triplet_search EXECUTION FAILED", user.id, {"error": str(error)}
|
|
194
|
-
)
|
|
195
185
|
raise error
|