cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +8 -0
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +28 -1
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +183 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +107 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +41 -9
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +4 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +2 -0
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +37 -3
- cognee/infrastructure/databases/vector/config.py +3 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +5 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +1 -4
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/config.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +7 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +7 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +8 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +12 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +13 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +5 -2
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +4 -2
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +6 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +14 -51
- cognee/modules/retrieval/graph_completion_retriever.py +4 -1
- cognee/modules/retrieval/temporal_retriever.py +9 -2
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +1 -1
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +69 -0
- cognee/modules/search/methods/search.py +5 -3
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +2 -1
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +9 -0
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +2 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -1
- cognee/tests/test_conversation_history.py +45 -4
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +7 -1
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +264 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +88 -71
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +135 -104
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -1
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import csv
|
|
3
|
+
from typing import Type
|
|
4
|
+
|
|
5
|
+
from cognee.modules.chunking.Chunker import Chunker
|
|
6
|
+
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
|
7
|
+
from .Document import Document
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CsvDocument(Document):
|
|
11
|
+
type: str = "csv"
|
|
12
|
+
mime_type: str = "text/csv"
|
|
13
|
+
|
|
14
|
+
async def read(self, chunker_cls: Type[Chunker], max_chunk_size: int):
|
|
15
|
+
async def get_text():
|
|
16
|
+
async with open_data_file(
|
|
17
|
+
self.raw_data_location, mode="r", encoding="utf-8", newline=""
|
|
18
|
+
) as file:
|
|
19
|
+
content = file.read()
|
|
20
|
+
file_like_obj = io.StringIO(content)
|
|
21
|
+
reader = csv.DictReader(file_like_obj)
|
|
22
|
+
|
|
23
|
+
for row in reader:
|
|
24
|
+
pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()]
|
|
25
|
+
row_text = ", ".join(pairs)
|
|
26
|
+
if not row_text.strip():
|
|
27
|
+
break
|
|
28
|
+
yield row_text
|
|
29
|
+
|
|
30
|
+
chunker = chunker_cls(self, max_chunk_size=max_chunk_size, get_text=get_text)
|
|
31
|
+
|
|
32
|
+
async for chunk in chunker.read():
|
|
33
|
+
yield chunk
|
|
@@ -171,8 +171,10 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
171
171
|
embedding_map = {result.payload["text"]: result.score for result in edge_distances}
|
|
172
172
|
|
|
173
173
|
for edge in self.edges:
|
|
174
|
-
|
|
175
|
-
|
|
174
|
+
edge_key = edge.attributes.get("edge_text") or edge.attributes.get(
|
|
175
|
+
"relationship_type"
|
|
176
|
+
)
|
|
177
|
+
distance = embedding_map.get(edge_key, None)
|
|
176
178
|
if distance is not None:
|
|
177
179
|
edge.attributes["vector_distance"] = distance
|
|
178
180
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
+
from cognee.infrastructure.engine.models.Edge import Edge
|
|
3
4
|
from cognee.modules.chunking.models import DocumentChunk
|
|
4
5
|
from cognee.modules.engine.models import Entity, EntityType
|
|
5
6
|
from cognee.modules.engine.utils import (
|
|
@@ -243,10 +244,26 @@ def _process_graph_nodes(
|
|
|
243
244
|
ontology_relationships,
|
|
244
245
|
)
|
|
245
246
|
|
|
246
|
-
# Add entity to data chunk
|
|
247
247
|
if data_chunk.contains is None:
|
|
248
248
|
data_chunk.contains = []
|
|
249
|
-
|
|
249
|
+
|
|
250
|
+
edge_text = "; ".join(
|
|
251
|
+
[
|
|
252
|
+
"relationship_name: contains",
|
|
253
|
+
f"entity_name: {entity_node.name}",
|
|
254
|
+
f"entity_description: {entity_node.description}",
|
|
255
|
+
]
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
data_chunk.contains.append(
|
|
259
|
+
(
|
|
260
|
+
Edge(
|
|
261
|
+
relationship_type="contains",
|
|
262
|
+
edge_text=edge_text,
|
|
263
|
+
),
|
|
264
|
+
entity_node,
|
|
265
|
+
)
|
|
266
|
+
)
|
|
250
267
|
|
|
251
268
|
|
|
252
269
|
def _process_graph_edges(
|
|
@@ -1,71 +1,70 @@
|
|
|
1
|
+
import string
|
|
1
2
|
from typing import List
|
|
2
|
-
from
|
|
3
|
-
|
|
3
|
+
from collections import Counter
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Converts retrieved graph edges into a human-readable string format.
|
|
5
|
+
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
6
|
+
from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
|
|
8
7
|
|
|
9
|
-
Parameters:
|
|
10
|
-
-----------
|
|
11
8
|
|
|
12
|
-
|
|
9
|
+
def _get_top_n_frequent_words(
|
|
10
|
+
text: str, stop_words: set = None, top_n: int = 3, separator: str = ", "
|
|
11
|
+
) -> str:
|
|
12
|
+
"""Concatenates the top N frequent words in text."""
|
|
13
|
+
if stop_words is None:
|
|
14
|
+
stop_words = DEFAULT_STOP_WORDS
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
words = [word.lower().strip(string.punctuation) for word in text.split()]
|
|
17
|
+
words = [word for word in words if word and word not in stop_words]
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
+
top_words = [word for word, freq in Counter(words).most_common(top_n)]
|
|
20
|
+
return separator.join(top_words)
|
|
19
21
|
|
|
20
|
-
def _get_nodes(retrieved_edges: List[Edge]) -> dict:
|
|
21
|
-
def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
|
|
22
|
-
def _top_n_words(text, stop_words=None, top_n=3, separator=", "):
|
|
23
|
-
"""Concatenates the top N frequent words in text."""
|
|
24
|
-
if stop_words is None:
|
|
25
|
-
from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
|
|
26
22
|
|
|
27
|
-
|
|
23
|
+
def _create_title_from_text(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
|
|
24
|
+
"""Creates a title by combining first words with most frequent words from the text."""
|
|
25
|
+
first_words = text.split()[:first_n_words]
|
|
26
|
+
top_words = _get_top_n_frequent_words(text, top_n=top_n_words)
|
|
27
|
+
return f"{' '.join(first_words)}... [{top_words}]"
|
|
28
28
|
|
|
29
|
-
import string
|
|
30
29
|
|
|
31
|
-
|
|
30
|
+
def _extract_nodes_from_edges(retrieved_edges: List[Edge]) -> dict:
|
|
31
|
+
"""Creates a dictionary of nodes with their names and content."""
|
|
32
|
+
nodes = {}
|
|
32
33
|
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
for edge in retrieved_edges:
|
|
35
|
+
for node in (edge.node1, edge.node2):
|
|
36
|
+
if node.id in nodes:
|
|
37
|
+
continue
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
text = node.attributes.get("text")
|
|
40
|
+
if text:
|
|
41
|
+
name = _create_title_from_text(text)
|
|
42
|
+
content = text
|
|
43
|
+
else:
|
|
44
|
+
name = node.attributes.get("name", "Unnamed Node")
|
|
45
|
+
content = node.attributes.get("description", name)
|
|
37
46
|
|
|
38
|
-
|
|
47
|
+
nodes[node.id] = {"node": node, "name": name, "content": content}
|
|
39
48
|
|
|
40
|
-
|
|
49
|
+
return nodes
|
|
41
50
|
|
|
42
|
-
"""Creates a title, by combining first words with most frequent words from the text."""
|
|
43
|
-
first_words = text.split()[:first_n_words]
|
|
44
|
-
top_words = _top_n_words(text, top_n=first_n_words)
|
|
45
|
-
return f"{' '.join(first_words)}... [{top_words}]"
|
|
46
51
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
for node in (edge.node1, edge.node2):
|
|
51
|
-
if node.id not in nodes:
|
|
52
|
-
text = node.attributes.get("text")
|
|
53
|
-
if text:
|
|
54
|
-
name = _get_title(text)
|
|
55
|
-
content = text
|
|
56
|
-
else:
|
|
57
|
-
name = node.attributes.get("name", "Unnamed Node")
|
|
58
|
-
content = node.attributes.get("description", name)
|
|
59
|
-
nodes[node.id] = {"node": node, "name": name, "content": content}
|
|
60
|
-
return nodes
|
|
52
|
+
async def resolve_edges_to_text(retrieved_edges: List[Edge]) -> str:
|
|
53
|
+
"""Converts retrieved graph edges into a human-readable string format."""
|
|
54
|
+
nodes = _extract_nodes_from_edges(retrieved_edges)
|
|
61
55
|
|
|
62
|
-
nodes = _get_nodes(retrieved_edges)
|
|
63
56
|
node_section = "\n".join(
|
|
64
57
|
f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n"
|
|
65
58
|
for info in nodes.values()
|
|
66
59
|
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
60
|
+
|
|
61
|
+
connections = []
|
|
62
|
+
for edge in retrieved_edges:
|
|
63
|
+
source_name = nodes[edge.node1.id]["name"]
|
|
64
|
+
target_name = nodes[edge.node2.id]["name"]
|
|
65
|
+
edge_label = edge.attributes.get("edge_text") or edge.attributes.get("relationship_type")
|
|
66
|
+
connections.append(f"{source_name} --[{edge_label}]--> {target_name}")
|
|
67
|
+
|
|
68
|
+
connection_section = "\n".join(connections)
|
|
69
|
+
|
|
71
70
|
return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}"
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from uuid import
|
|
1
|
+
from uuid import UUID
|
|
2
2
|
from .data_types import IngestionData
|
|
3
3
|
|
|
4
4
|
from cognee.modules.users.models import User
|
|
5
|
+
from cognee.modules.data.methods import get_unique_data_id
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
def identify(data: IngestionData, user: User) ->
|
|
8
|
+
async def identify(data: IngestionData, user: User) -> UUID:
|
|
8
9
|
data_content_hash: str = data.get_identifier()
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
return uuid5(NAMESPACE_OID, f"{data_content_hash}{user.id}")
|
|
11
|
+
return await get_unique_data_id(data_identifier=data_content_hash, user=user)
|
|
@@ -2,6 +2,8 @@ import io
|
|
|
2
2
|
import sys
|
|
3
3
|
import traceback
|
|
4
4
|
|
|
5
|
+
import cognee
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
def wrap_in_async_handler(user_code: str) -> str:
|
|
7
9
|
return (
|
|
@@ -34,6 +36,7 @@ def run_in_local_sandbox(code, environment=None, loop=None):
|
|
|
34
36
|
|
|
35
37
|
environment["print"] = customPrintFunction
|
|
36
38
|
environment["running_loop"] = loop
|
|
39
|
+
environment["cognee"] = cognee
|
|
37
40
|
|
|
38
41
|
try:
|
|
39
42
|
exec(code, environment)
|
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import difflib
|
|
3
3
|
from cognee.shared.logging_utils import get_logger
|
|
4
4
|
from collections import deque
|
|
5
|
-
from typing import List, Tuple, Dict, Optional, Any, Union
|
|
5
|
+
from typing import List, Tuple, Dict, Optional, Any, Union, IO
|
|
6
6
|
from rdflib import Graph, URIRef, RDF, RDFS, OWL
|
|
7
7
|
|
|
8
8
|
from cognee.modules.ontology.exceptions import (
|
|
@@ -26,44 +26,76 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
|
|
|
26
26
|
|
|
27
27
|
def __init__(
|
|
28
28
|
self,
|
|
29
|
-
ontology_file: Optional[Union[str, List[str]]] = None,
|
|
29
|
+
ontology_file: Optional[Union[str, List[str], IO, List[IO]]] = None,
|
|
30
30
|
matching_strategy: Optional[MatchingStrategy] = None,
|
|
31
31
|
) -> None:
|
|
32
32
|
super().__init__(matching_strategy)
|
|
33
33
|
self.ontology_file = ontology_file
|
|
34
34
|
try:
|
|
35
|
-
|
|
35
|
+
self.graph = None
|
|
36
36
|
if ontology_file is not None:
|
|
37
|
-
|
|
37
|
+
files_to_load = []
|
|
38
|
+
file_objects = []
|
|
39
|
+
|
|
40
|
+
if hasattr(ontology_file, "read"):
|
|
41
|
+
file_objects = [ontology_file]
|
|
42
|
+
elif isinstance(ontology_file, str):
|
|
38
43
|
files_to_load = [ontology_file]
|
|
39
44
|
elif isinstance(ontology_file, list):
|
|
40
|
-
|
|
45
|
+
if all(hasattr(item, "read") for item in ontology_file):
|
|
46
|
+
file_objects = ontology_file
|
|
47
|
+
else:
|
|
48
|
+
files_to_load = ontology_file
|
|
41
49
|
else:
|
|
42
50
|
raise ValueError(
|
|
43
|
-
f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}"
|
|
51
|
+
f"ontology_file must be a string, list of strings, file-like object, list of file-like objects, or None. Got: {type(ontology_file)}"
|
|
44
52
|
)
|
|
45
53
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
if file_objects:
|
|
55
|
+
self.graph = Graph()
|
|
56
|
+
loaded_objects = []
|
|
57
|
+
for file_obj in file_objects:
|
|
58
|
+
try:
|
|
59
|
+
content = file_obj.read()
|
|
60
|
+
self.graph.parse(data=content, format="xml")
|
|
61
|
+
loaded_objects.append(file_obj)
|
|
62
|
+
logger.info("Ontology loaded successfully from file object")
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logger.warning("Failed to parse ontology file object: %s", str(e))
|
|
65
|
+
|
|
66
|
+
if not loaded_objects:
|
|
67
|
+
logger.info(
|
|
68
|
+
"No valid ontology file objects found. No owl ontology will be attached to the graph."
|
|
69
|
+
)
|
|
70
|
+
self.graph = None
|
|
54
71
|
else:
|
|
55
|
-
logger.
|
|
56
|
-
|
|
57
|
-
|
|
72
|
+
logger.info("Total ontology file objects loaded: %d", len(loaded_objects))
|
|
73
|
+
|
|
74
|
+
elif files_to_load:
|
|
75
|
+
self.graph = Graph()
|
|
76
|
+
loaded_files = []
|
|
77
|
+
for file_path in files_to_load:
|
|
78
|
+
if os.path.exists(file_path):
|
|
79
|
+
self.graph.parse(file_path)
|
|
80
|
+
loaded_files.append(file_path)
|
|
81
|
+
logger.info("Ontology loaded successfully from file: %s", file_path)
|
|
82
|
+
else:
|
|
83
|
+
logger.warning(
|
|
84
|
+
"Ontology file '%s' not found. Skipping this file.",
|
|
85
|
+
file_path,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
if not loaded_files:
|
|
89
|
+
logger.info(
|
|
90
|
+
"No valid ontology files found. No owl ontology will be attached to the graph."
|
|
58
91
|
)
|
|
59
|
-
|
|
60
|
-
|
|
92
|
+
self.graph = None
|
|
93
|
+
else:
|
|
94
|
+
logger.info("Total ontology files loaded: %d", len(loaded_files))
|
|
95
|
+
else:
|
|
61
96
|
logger.info(
|
|
62
|
-
"No
|
|
97
|
+
"No ontology file provided. No owl ontology will be attached to the graph."
|
|
63
98
|
)
|
|
64
|
-
self.graph = None
|
|
65
|
-
else:
|
|
66
|
-
logger.info("Total ontology files loaded: %d", len(loaded_files))
|
|
67
99
|
else:
|
|
68
100
|
logger.info(
|
|
69
101
|
"No ontology file provided. No owl ontology will be attached to the graph."
|
|
@@ -69,7 +69,7 @@ async def run_tasks_data_item_incremental(
|
|
|
69
69
|
async with open_data_file(file_path) as file:
|
|
70
70
|
classified_data = ingestion.classify(file)
|
|
71
71
|
# data_id is the hash of file contents + owner id to avoid duplicate data
|
|
72
|
-
data_id = ingestion.identify(classified_data, user)
|
|
72
|
+
data_id = await ingestion.identify(classified_data, user)
|
|
73
73
|
else:
|
|
74
74
|
# If data was already processed by Cognee get data id
|
|
75
75
|
data_id = data_item.id
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Optional, List
|
|
2
|
+
from typing import Any, Optional, List, Type
|
|
3
3
|
from cognee.shared.logging_utils import get_logger
|
|
4
4
|
|
|
5
5
|
from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
|
|
@@ -85,8 +85,12 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
85
85
|
return None
|
|
86
86
|
|
|
87
87
|
async def get_completion(
|
|
88
|
-
self,
|
|
89
|
-
|
|
88
|
+
self,
|
|
89
|
+
query: str,
|
|
90
|
+
context: Optional[Any] = None,
|
|
91
|
+
session_id: Optional[str] = None,
|
|
92
|
+
response_model: Type = str,
|
|
93
|
+
) -> List[Any]:
|
|
90
94
|
"""
|
|
91
95
|
Generate completion using provided context or fetch new context.
|
|
92
96
|
|
|
@@ -102,6 +106,7 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
102
106
|
fetched if not provided. (default None)
|
|
103
107
|
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
104
108
|
defaults to 'default_session'. (default None)
|
|
109
|
+
- response_model (Type): The Pydantic model type for structured output. (default str)
|
|
105
110
|
|
|
106
111
|
Returns:
|
|
107
112
|
--------
|
|
@@ -133,6 +138,7 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
133
138
|
user_prompt_path=self.user_prompt_path,
|
|
134
139
|
system_prompt_path=self.system_prompt_path,
|
|
135
140
|
conversation_history=conversation_history,
|
|
141
|
+
response_model=response_model,
|
|
136
142
|
),
|
|
137
143
|
)
|
|
138
144
|
else:
|
|
@@ -141,6 +147,7 @@ class EntityCompletionRetriever(BaseRetriever):
|
|
|
141
147
|
context=context,
|
|
142
148
|
user_prompt_path=self.user_prompt_path,
|
|
143
149
|
system_prompt_path=self.system_prompt_path,
|
|
150
|
+
response_model=response_model,
|
|
144
151
|
)
|
|
145
152
|
|
|
146
153
|
if session_save:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
1
|
+
from typing import Any, List, Optional, Type
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
|
|
4
4
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
@@ -14,7 +14,11 @@ class BaseGraphRetriever(ABC):
|
|
|
14
14
|
|
|
15
15
|
@abstractmethod
|
|
16
16
|
async def get_completion(
|
|
17
|
-
self,
|
|
18
|
-
|
|
17
|
+
self,
|
|
18
|
+
query: str,
|
|
19
|
+
context: Optional[List[Edge]] = None,
|
|
20
|
+
session_id: Optional[str] = None,
|
|
21
|
+
response_model: Type = str,
|
|
22
|
+
) -> List[Any]:
|
|
19
23
|
"""Generates a response using the query and optional context (triplets)."""
|
|
20
24
|
pass
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Any, Optional
|
|
2
|
+
from typing import Any, Optional, Type, List
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class BaseRetriever(ABC):
|
|
@@ -12,7 +12,11 @@ class BaseRetriever(ABC):
|
|
|
12
12
|
|
|
13
13
|
@abstractmethod
|
|
14
14
|
async def get_completion(
|
|
15
|
-
self,
|
|
16
|
-
|
|
15
|
+
self,
|
|
16
|
+
query: str,
|
|
17
|
+
context: Optional[Any] = None,
|
|
18
|
+
session_id: Optional[str] = None,
|
|
19
|
+
response_model: Type = str,
|
|
20
|
+
) -> List[Any]:
|
|
17
21
|
"""Generates a response using the query and optional context."""
|
|
18
22
|
pass
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Optional
|
|
2
|
+
from typing import Any, Optional, Type, List
|
|
3
3
|
|
|
4
4
|
from cognee.shared.logging_utils import get_logger
|
|
5
5
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
@@ -75,8 +75,12 @@ class CompletionRetriever(BaseRetriever):
|
|
|
75
75
|
raise NoDataError("No data found in the system, please add data first.") from error
|
|
76
76
|
|
|
77
77
|
async def get_completion(
|
|
78
|
-
self,
|
|
79
|
-
|
|
78
|
+
self,
|
|
79
|
+
query: str,
|
|
80
|
+
context: Optional[Any] = None,
|
|
81
|
+
session_id: Optional[str] = None,
|
|
82
|
+
response_model: Type = str,
|
|
83
|
+
) -> List[Any]:
|
|
80
84
|
"""
|
|
81
85
|
Generates an LLM completion using the context.
|
|
82
86
|
|
|
@@ -91,6 +95,7 @@ class CompletionRetriever(BaseRetriever):
|
|
|
91
95
|
completion; if None, it retrieves the context for the query. (default None)
|
|
92
96
|
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
93
97
|
defaults to 'default_session'. (default None)
|
|
98
|
+
- response_model (Type): The Pydantic model type for structured output. (default str)
|
|
94
99
|
|
|
95
100
|
Returns:
|
|
96
101
|
--------
|
|
@@ -118,6 +123,7 @@ class CompletionRetriever(BaseRetriever):
|
|
|
118
123
|
system_prompt_path=self.system_prompt_path,
|
|
119
124
|
system_prompt=self.system_prompt,
|
|
120
125
|
conversation_history=conversation_history,
|
|
126
|
+
response_model=response_model,
|
|
121
127
|
),
|
|
122
128
|
)
|
|
123
129
|
else:
|
|
@@ -127,6 +133,7 @@ class CompletionRetriever(BaseRetriever):
|
|
|
127
133
|
user_prompt_path=self.user_prompt_path,
|
|
128
134
|
system_prompt_path=self.system_prompt_path,
|
|
129
135
|
system_prompt=self.system_prompt,
|
|
136
|
+
response_model=response_model,
|
|
130
137
|
)
|
|
131
138
|
|
|
132
139
|
if session_save:
|
|
@@ -137,4 +144,4 @@ class CompletionRetriever(BaseRetriever):
|
|
|
137
144
|
session_id=session_id,
|
|
138
145
|
)
|
|
139
146
|
|
|
140
|
-
return completion
|
|
147
|
+
return [completion]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Optional, List, Type
|
|
2
|
+
from typing import Optional, List, Type, Any
|
|
3
3
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
4
4
|
from cognee.shared.logging_utils import get_logger
|
|
5
5
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
@@ -56,7 +56,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
56
56
|
context: Optional[List[Edge]] = None,
|
|
57
57
|
session_id: Optional[str] = None,
|
|
58
58
|
context_extension_rounds=4,
|
|
59
|
-
|
|
59
|
+
response_model: Type = str,
|
|
60
|
+
) -> List[Any]:
|
|
60
61
|
"""
|
|
61
62
|
Extends the context for a given query by retrieving related triplets and generating new
|
|
62
63
|
completions based on them.
|
|
@@ -76,6 +77,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
76
77
|
defaults to 'default_session'. (default None)
|
|
77
78
|
- context_extension_rounds: The maximum number of rounds to extend the context with
|
|
78
79
|
new triplets before halting. (default 4)
|
|
80
|
+
- response_model (Type): The Pydantic model type for structured output. (default str)
|
|
79
81
|
|
|
80
82
|
Returns:
|
|
81
83
|
--------
|
|
@@ -143,6 +145,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
143
145
|
system_prompt_path=self.system_prompt_path,
|
|
144
146
|
system_prompt=self.system_prompt,
|
|
145
147
|
conversation_history=conversation_history,
|
|
148
|
+
response_model=response_model,
|
|
146
149
|
),
|
|
147
150
|
)
|
|
148
151
|
else:
|
|
@@ -152,6 +155,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
152
155
|
user_prompt_path=self.user_prompt_path,
|
|
153
156
|
system_prompt_path=self.system_prompt_path,
|
|
154
157
|
system_prompt=self.system_prompt,
|
|
158
|
+
response_model=response_model,
|
|
155
159
|
)
|
|
156
160
|
|
|
157
161
|
if self.save_interaction and context_text and triplets and completion:
|
|
@@ -7,7 +7,7 @@ from cognee.shared.logging_utils import get_logger
|
|
|
7
7
|
|
|
8
8
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
9
9
|
from cognee.modules.retrieval.utils.completion import (
|
|
10
|
-
|
|
10
|
+
generate_completion,
|
|
11
11
|
summarize_text,
|
|
12
12
|
)
|
|
13
13
|
from cognee.modules.retrieval.utils.session_cache import (
|
|
@@ -44,7 +44,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
44
44
|
questions based on reasoning. The public methods are:
|
|
45
45
|
|
|
46
46
|
- get_completion
|
|
47
|
-
- get_structured_completion
|
|
48
47
|
|
|
49
48
|
Instance variables include:
|
|
50
49
|
- validation_system_prompt_path
|
|
@@ -121,7 +120,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
121
120
|
triplets += await self.get_context(followup_question)
|
|
122
121
|
context_text = await self.resolve_edges_to_text(list(set(triplets)))
|
|
123
122
|
|
|
124
|
-
completion = await
|
|
123
|
+
completion = await generate_completion(
|
|
125
124
|
query=query,
|
|
126
125
|
context=context_text,
|
|
127
126
|
user_prompt_path=self.user_prompt_path,
|
|
@@ -165,24 +164,28 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
165
164
|
|
|
166
165
|
return completion, context_text, triplets
|
|
167
166
|
|
|
168
|
-
async def
|
|
167
|
+
async def get_completion(
|
|
169
168
|
self,
|
|
170
169
|
query: str,
|
|
171
170
|
context: Optional[List[Edge]] = None,
|
|
172
171
|
session_id: Optional[str] = None,
|
|
173
|
-
max_iter
|
|
172
|
+
max_iter=4,
|
|
174
173
|
response_model: Type = str,
|
|
175
|
-
) -> Any:
|
|
174
|
+
) -> List[Any]:
|
|
176
175
|
"""
|
|
177
|
-
Generate
|
|
176
|
+
Generate completion responses based on a user query and contextual information.
|
|
178
177
|
|
|
179
|
-
This method
|
|
178
|
+
This method interacts with a language model client to retrieve a structured response,
|
|
179
|
+
using a series of iterations to refine the answers and generate follow-up questions
|
|
180
|
+
based on reasoning derived from previous outputs. It raises exceptions if the context
|
|
181
|
+
retrieval fails or if the model encounters issues in generating outputs. It returns
|
|
180
182
|
structured output using the provided response model.
|
|
181
183
|
|
|
182
184
|
Parameters:
|
|
183
185
|
-----------
|
|
186
|
+
|
|
184
187
|
- query (str): The user's query to be processed and answered.
|
|
185
|
-
- context (Optional[
|
|
188
|
+
- context (Optional[Any]): Optional context that may assist in answering the query.
|
|
186
189
|
If not provided, it will be fetched based on the query. (default None)
|
|
187
190
|
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
188
191
|
defaults to 'default_session'. (default None)
|
|
@@ -192,7 +195,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
192
195
|
|
|
193
196
|
Returns:
|
|
194
197
|
--------
|
|
195
|
-
|
|
198
|
+
|
|
199
|
+
- List[str]: A list containing the generated answer to the user's query.
|
|
196
200
|
"""
|
|
197
201
|
# Check if session saving is enabled
|
|
198
202
|
cache_config = CacheConfig()
|
|
@@ -228,45 +232,4 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
228
232
|
session_id=session_id,
|
|
229
233
|
)
|
|
230
234
|
|
|
231
|
-
return completion
|
|
232
|
-
|
|
233
|
-
async def get_completion(
|
|
234
|
-
self,
|
|
235
|
-
query: str,
|
|
236
|
-
context: Optional[List[Edge]] = None,
|
|
237
|
-
session_id: Optional[str] = None,
|
|
238
|
-
max_iter=4,
|
|
239
|
-
) -> List[str]:
|
|
240
|
-
"""
|
|
241
|
-
Generate completion responses based on a user query and contextual information.
|
|
242
|
-
|
|
243
|
-
This method interacts with a language model client to retrieve a structured response,
|
|
244
|
-
using a series of iterations to refine the answers and generate follow-up questions
|
|
245
|
-
based on reasoning derived from previous outputs. It raises exceptions if the context
|
|
246
|
-
retrieval fails or if the model encounters issues in generating outputs.
|
|
247
|
-
|
|
248
|
-
Parameters:
|
|
249
|
-
-----------
|
|
250
|
-
|
|
251
|
-
- query (str): The user's query to be processed and answered.
|
|
252
|
-
- context (Optional[Any]): Optional context that may assist in answering the query.
|
|
253
|
-
If not provided, it will be fetched based on the query. (default None)
|
|
254
|
-
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
255
|
-
defaults to 'default_session'. (default None)
|
|
256
|
-
- max_iter: The maximum number of iterations to refine the answer and generate
|
|
257
|
-
follow-up questions. (default 4)
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
--------
|
|
261
|
-
|
|
262
|
-
- List[str]: A list containing the generated answer to the user's query.
|
|
263
|
-
"""
|
|
264
|
-
completion = await self.get_structured_completion(
|
|
265
|
-
query=query,
|
|
266
|
-
context=context,
|
|
267
|
-
session_id=session_id,
|
|
268
|
-
max_iter=max_iter,
|
|
269
|
-
response_model=str,
|
|
270
|
-
)
|
|
271
|
-
|
|
272
235
|
return [completion]
|