cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +8 -0
  3. cognee/api/v1/add/routers/get_add_router.py +3 -1
  4. cognee/api/v1/cognify/routers/get_cognify_router.py +28 -1
  5. cognee/api/v1/ontologies/__init__.py +4 -0
  6. cognee/api/v1/ontologies/ontologies.py +183 -0
  7. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  8. cognee/api/v1/ontologies/routers/get_ontology_router.py +107 -0
  9. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  10. cognee/cli/commands/cognify_command.py +8 -1
  11. cognee/cli/config.py +1 -1
  12. cognee/context_global_variables.py +41 -9
  13. cognee/infrastructure/databases/cache/config.py +3 -1
  14. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  15. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  16. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  17. cognee/infrastructure/databases/graph/config.py +4 -0
  18. cognee/infrastructure/databases/graph/get_graph_engine.py +2 -0
  19. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  20. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +37 -3
  21. cognee/infrastructure/databases/vector/config.py +3 -0
  22. cognee/infrastructure/databases/vector/create_vector_engine.py +5 -1
  23. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +1 -4
  24. cognee/infrastructure/engine/models/Edge.py +13 -1
  25. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  26. cognee/infrastructure/llm/config.py +2 -0
  27. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -2
  28. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +7 -1
  29. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +7 -1
  30. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +8 -16
  31. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +12 -2
  32. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +13 -2
  33. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +5 -2
  34. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  35. cognee/infrastructure/loaders/core/__init__.py +2 -1
  36. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  37. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  38. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  39. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  40. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  41. cognee/modules/chunking/CsvChunker.py +35 -0
  42. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  43. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  44. cognee/modules/data/methods/__init__.py +1 -0
  45. cognee/modules/data/methods/create_dataset.py +4 -2
  46. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  47. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  48. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  49. cognee/modules/data/models/Dataset.py +2 -0
  50. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  51. cognee/modules/data/processing/document_types/__init__.py +1 -0
  52. cognee/modules/graph/cognee_graph/CogneeGraph.py +4 -2
  53. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  54. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  55. cognee/modules/ingestion/identify.py +4 -4
  56. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  57. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  58. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  59. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  60. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  61. cognee/modules/retrieval/base_retriever.py +7 -3
  62. cognee/modules/retrieval/completion_retriever.py +11 -4
  63. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +6 -2
  64. cognee/modules/retrieval/graph_completion_cot_retriever.py +14 -51
  65. cognee/modules/retrieval/graph_completion_retriever.py +4 -1
  66. cognee/modules/retrieval/temporal_retriever.py +9 -2
  67. cognee/modules/retrieval/utils/brute_force_triplet_search.py +1 -1
  68. cognee/modules/retrieval/utils/completion.py +2 -22
  69. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  70. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +69 -0
  71. cognee/modules/search/methods/search.py +5 -3
  72. cognee/modules/users/methods/create_user.py +12 -27
  73. cognee/modules/users/methods/get_authenticated_user.py +2 -1
  74. cognee/modules/users/methods/get_default_user.py +4 -2
  75. cognee/modules/users/methods/get_user.py +1 -1
  76. cognee/modules/users/methods/get_user_by_email.py +1 -1
  77. cognee/modules/users/models/DatasetDatabase.py +9 -0
  78. cognee/modules/users/models/Tenant.py +6 -7
  79. cognee/modules/users/models/User.py +6 -5
  80. cognee/modules/users/models/UserTenant.py +12 -0
  81. cognee/modules/users/models/__init__.py +1 -0
  82. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  83. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  84. cognee/modules/users/tenants/methods/__init__.py +1 -0
  85. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  86. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  87. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  88. cognee/shared/logging_utils.py +2 -0
  89. cognee/tasks/chunks/__init__.py +1 -0
  90. cognee/tasks/chunks/chunk_by_row.py +94 -0
  91. cognee/tasks/documents/classify_documents.py +2 -0
  92. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  93. cognee/tasks/ingestion/ingest_data.py +1 -1
  94. cognee/tasks/memify/__init__.py +2 -0
  95. cognee/tasks/memify/cognify_session.py +41 -0
  96. cognee/tasks/memify/extract_user_sessions.py +73 -0
  97. cognee/tasks/storage/index_data_points.py +33 -22
  98. cognee/tasks/storage/index_graph_edges.py +37 -57
  99. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  100. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  101. cognee/tests/test_add_docling_document.py +2 -2
  102. cognee/tests/test_cognee_server_start.py +84 -1
  103. cognee/tests/test_conversation_history.py +45 -4
  104. cognee/tests/test_data/example_with_header.csv +3 -0
  105. cognee/tests/test_delete_bmw_example.py +60 -0
  106. cognee/tests/test_edge_ingestion.py +27 -0
  107. cognee/tests/test_feedback_enrichment.py +1 -1
  108. cognee/tests/test_library.py +6 -4
  109. cognee/tests/test_load.py +62 -0
  110. cognee/tests/test_multi_tenancy.py +165 -0
  111. cognee/tests/test_parallel_databases.py +2 -0
  112. cognee/tests/test_relational_db_migration.py +54 -2
  113. cognee/tests/test_search_db.py +7 -1
  114. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  115. cognee/tests/unit/api/test_ontology_endpoint.py +264 -0
  116. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  117. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  118. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  119. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  120. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  121. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  122. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  123. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  124. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  125. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  126. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  127. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  128. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  129. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  130. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +88 -71
  131. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +135 -104
  132. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  133. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -1
  134. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  135. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,33 @@
1
+ import io
2
+ import csv
3
+ from typing import Type
4
+
5
+ from cognee.modules.chunking.Chunker import Chunker
6
+ from cognee.infrastructure.files.utils.open_data_file import open_data_file
7
+ from .Document import Document
8
+
9
+
10
+ class CsvDocument(Document):
11
+ type: str = "csv"
12
+ mime_type: str = "text/csv"
13
+
14
+ async def read(self, chunker_cls: Type[Chunker], max_chunk_size: int):
15
+ async def get_text():
16
+ async with open_data_file(
17
+ self.raw_data_location, mode="r", encoding="utf-8", newline=""
18
+ ) as file:
19
+ content = file.read()
20
+ file_like_obj = io.StringIO(content)
21
+ reader = csv.DictReader(file_like_obj)
22
+
23
+ for row in reader:
24
+ pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()]
25
+ row_text = ", ".join(pairs)
26
+ if not row_text.strip():
27
+ break
28
+ yield row_text
29
+
30
+ chunker = chunker_cls(self, max_chunk_size=max_chunk_size, get_text=get_text)
31
+
32
+ async for chunk in chunker.read():
33
+ yield chunk
@@ -4,3 +4,4 @@ from .TextDocument import TextDocument
4
4
  from .ImageDocument import ImageDocument
5
5
  from .AudioDocument import AudioDocument
6
6
  from .UnstructuredDocument import UnstructuredDocument
7
+ from .CsvDocument import CsvDocument
@@ -171,8 +171,10 @@ class CogneeGraph(CogneeAbstractGraph):
171
171
  embedding_map = {result.payload["text"]: result.score for result in edge_distances}
172
172
 
173
173
  for edge in self.edges:
174
- relationship_type = edge.attributes.get("relationship_type")
175
- distance = embedding_map.get(relationship_type, None)
174
+ edge_key = edge.attributes.get("edge_text") or edge.attributes.get(
175
+ "relationship_type"
176
+ )
177
+ distance = embedding_map.get(edge_key, None)
176
178
  if distance is not None:
177
179
  edge.attributes["vector_distance"] = distance
178
180
 
@@ -1,5 +1,6 @@
1
1
  from typing import Optional
2
2
 
3
+ from cognee.infrastructure.engine.models.Edge import Edge
3
4
  from cognee.modules.chunking.models import DocumentChunk
4
5
  from cognee.modules.engine.models import Entity, EntityType
5
6
  from cognee.modules.engine.utils import (
@@ -243,10 +244,26 @@ def _process_graph_nodes(
243
244
  ontology_relationships,
244
245
  )
245
246
 
246
- # Add entity to data chunk
247
247
  if data_chunk.contains is None:
248
248
  data_chunk.contains = []
249
- data_chunk.contains.append(entity_node)
249
+
250
+ edge_text = "; ".join(
251
+ [
252
+ "relationship_name: contains",
253
+ f"entity_name: {entity_node.name}",
254
+ f"entity_description: {entity_node.description}",
255
+ ]
256
+ )
257
+
258
+ data_chunk.contains.append(
259
+ (
260
+ Edge(
261
+ relationship_type="contains",
262
+ edge_text=edge_text,
263
+ ),
264
+ entity_node,
265
+ )
266
+ )
250
267
 
251
268
 
252
269
  def _process_graph_edges(
@@ -1,71 +1,70 @@
1
+ import string
1
2
  from typing import List
2
- from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
3
-
3
+ from collections import Counter
4
4
 
5
- async def resolve_edges_to_text(retrieved_edges: List[Edge]) -> str:
6
- """
7
- Converts retrieved graph edges into a human-readable string format.
5
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
6
+ from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
8
7
 
9
- Parameters:
10
- -----------
11
8
 
12
- - retrieved_edges (list): A list of edges retrieved from the graph.
9
+ def _get_top_n_frequent_words(
10
+ text: str, stop_words: set = None, top_n: int = 3, separator: str = ", "
11
+ ) -> str:
12
+ """Concatenates the top N frequent words in text."""
13
+ if stop_words is None:
14
+ stop_words = DEFAULT_STOP_WORDS
13
15
 
14
- Returns:
15
- --------
16
+ words = [word.lower().strip(string.punctuation) for word in text.split()]
17
+ words = [word for word in words if word and word not in stop_words]
16
18
 
17
- - str: A formatted string representation of the nodes and their connections.
18
- """
19
+ top_words = [word for word, freq in Counter(words).most_common(top_n)]
20
+ return separator.join(top_words)
19
21
 
20
- def _get_nodes(retrieved_edges: List[Edge]) -> dict:
21
- def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
22
- def _top_n_words(text, stop_words=None, top_n=3, separator=", "):
23
- """Concatenates the top N frequent words in text."""
24
- if stop_words is None:
25
- from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
26
22
 
27
- stop_words = DEFAULT_STOP_WORDS
23
+ def _create_title_from_text(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
24
+ """Creates a title by combining first words with most frequent words from the text."""
25
+ first_words = text.split()[:first_n_words]
26
+ top_words = _get_top_n_frequent_words(text, top_n=top_n_words)
27
+ return f"{' '.join(first_words)}... [{top_words}]"
28
28
 
29
- import string
30
29
 
31
- words = [word.lower().strip(string.punctuation) for word in text.split()]
30
+ def _extract_nodes_from_edges(retrieved_edges: List[Edge]) -> dict:
31
+ """Creates a dictionary of nodes with their names and content."""
32
+ nodes = {}
32
33
 
33
- if stop_words:
34
- words = [word for word in words if word and word not in stop_words]
34
+ for edge in retrieved_edges:
35
+ for node in (edge.node1, edge.node2):
36
+ if node.id in nodes:
37
+ continue
35
38
 
36
- from collections import Counter
39
+ text = node.attributes.get("text")
40
+ if text:
41
+ name = _create_title_from_text(text)
42
+ content = text
43
+ else:
44
+ name = node.attributes.get("name", "Unnamed Node")
45
+ content = node.attributes.get("description", name)
37
46
 
38
- top_words = [word for word, freq in Counter(words).most_common(top_n)]
47
+ nodes[node.id] = {"node": node, "name": name, "content": content}
39
48
 
40
- return separator.join(top_words)
49
+ return nodes
41
50
 
42
- """Creates a title, by combining first words with most frequent words from the text."""
43
- first_words = text.split()[:first_n_words]
44
- top_words = _top_n_words(text, top_n=first_n_words)
45
- return f"{' '.join(first_words)}... [{top_words}]"
46
51
 
47
- """Creates a dictionary of nodes with their names and content."""
48
- nodes = {}
49
- for edge in retrieved_edges:
50
- for node in (edge.node1, edge.node2):
51
- if node.id not in nodes:
52
- text = node.attributes.get("text")
53
- if text:
54
- name = _get_title(text)
55
- content = text
56
- else:
57
- name = node.attributes.get("name", "Unnamed Node")
58
- content = node.attributes.get("description", name)
59
- nodes[node.id] = {"node": node, "name": name, "content": content}
60
- return nodes
52
+ async def resolve_edges_to_text(retrieved_edges: List[Edge]) -> str:
53
+ """Converts retrieved graph edges into a human-readable string format."""
54
+ nodes = _extract_nodes_from_edges(retrieved_edges)
61
55
 
62
- nodes = _get_nodes(retrieved_edges)
63
56
  node_section = "\n".join(
64
57
  f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n"
65
58
  for info in nodes.values()
66
59
  )
67
- connection_section = "\n".join(
68
- f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}"
69
- for edge in retrieved_edges
70
- )
60
+
61
+ connections = []
62
+ for edge in retrieved_edges:
63
+ source_name = nodes[edge.node1.id]["name"]
64
+ target_name = nodes[edge.node2.id]["name"]
65
+ edge_label = edge.attributes.get("edge_text") or edge.attributes.get("relationship_type")
66
+ connections.append(f"{source_name} --[{edge_label}]--> {target_name}")
67
+
68
+ connection_section = "\n".join(connections)
69
+
71
70
  return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}"
@@ -1,11 +1,11 @@
1
- from uuid import uuid5, NAMESPACE_OID
1
+ from uuid import UUID
2
2
  from .data_types import IngestionData
3
3
 
4
4
  from cognee.modules.users.models import User
5
+ from cognee.modules.data.methods import get_unique_data_id
5
6
 
6
7
 
7
- def identify(data: IngestionData, user: User) -> str:
8
+ async def identify(data: IngestionData, user: User) -> UUID:
8
9
  data_content_hash: str = data.get_identifier()
9
10
 
10
- # return UUID hash of file contents + owner id
11
- return uuid5(NAMESPACE_OID, f"{data_content_hash}{user.id}")
11
+ return await get_unique_data_id(data_identifier=data_content_hash, user=user)
@@ -2,6 +2,8 @@ import io
2
2
  import sys
3
3
  import traceback
4
4
 
5
+ import cognee
6
+
5
7
 
6
8
  def wrap_in_async_handler(user_code: str) -> str:
7
9
  return (
@@ -34,6 +36,7 @@ def run_in_local_sandbox(code, environment=None, loop=None):
34
36
 
35
37
  environment["print"] = customPrintFunction
36
38
  environment["running_loop"] = loop
39
+ environment["cognee"] = cognee
37
40
 
38
41
  try:
39
42
  exec(code, environment)
@@ -2,7 +2,7 @@ import os
2
2
  import difflib
3
3
  from cognee.shared.logging_utils import get_logger
4
4
  from collections import deque
5
- from typing import List, Tuple, Dict, Optional, Any, Union
5
+ from typing import List, Tuple, Dict, Optional, Any, Union, IO
6
6
  from rdflib import Graph, URIRef, RDF, RDFS, OWL
7
7
 
8
8
  from cognee.modules.ontology.exceptions import (
@@ -26,44 +26,76 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
26
26
 
27
27
  def __init__(
28
28
  self,
29
- ontology_file: Optional[Union[str, List[str]]] = None,
29
+ ontology_file: Optional[Union[str, List[str], IO, List[IO]]] = None,
30
30
  matching_strategy: Optional[MatchingStrategy] = None,
31
31
  ) -> None:
32
32
  super().__init__(matching_strategy)
33
33
  self.ontology_file = ontology_file
34
34
  try:
35
- files_to_load = []
35
+ self.graph = None
36
36
  if ontology_file is not None:
37
- if isinstance(ontology_file, str):
37
+ files_to_load = []
38
+ file_objects = []
39
+
40
+ if hasattr(ontology_file, "read"):
41
+ file_objects = [ontology_file]
42
+ elif isinstance(ontology_file, str):
38
43
  files_to_load = [ontology_file]
39
44
  elif isinstance(ontology_file, list):
40
- files_to_load = ontology_file
45
+ if all(hasattr(item, "read") for item in ontology_file):
46
+ file_objects = ontology_file
47
+ else:
48
+ files_to_load = ontology_file
41
49
  else:
42
50
  raise ValueError(
43
- f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}"
51
+ f"ontology_file must be a string, list of strings, file-like object, list of file-like objects, or None. Got: {type(ontology_file)}"
44
52
  )
45
53
 
46
- if files_to_load:
47
- self.graph = Graph()
48
- loaded_files = []
49
- for file_path in files_to_load:
50
- if os.path.exists(file_path):
51
- self.graph.parse(file_path)
52
- loaded_files.append(file_path)
53
- logger.info("Ontology loaded successfully from file: %s", file_path)
54
+ if file_objects:
55
+ self.graph = Graph()
56
+ loaded_objects = []
57
+ for file_obj in file_objects:
58
+ try:
59
+ content = file_obj.read()
60
+ self.graph.parse(data=content, format="xml")
61
+ loaded_objects.append(file_obj)
62
+ logger.info("Ontology loaded successfully from file object")
63
+ except Exception as e:
64
+ logger.warning("Failed to parse ontology file object: %s", str(e))
65
+
66
+ if not loaded_objects:
67
+ logger.info(
68
+ "No valid ontology file objects found. No owl ontology will be attached to the graph."
69
+ )
70
+ self.graph = None
54
71
  else:
55
- logger.warning(
56
- "Ontology file '%s' not found. Skipping this file.",
57
- file_path,
72
+ logger.info("Total ontology file objects loaded: %d", len(loaded_objects))
73
+
74
+ elif files_to_load:
75
+ self.graph = Graph()
76
+ loaded_files = []
77
+ for file_path in files_to_load:
78
+ if os.path.exists(file_path):
79
+ self.graph.parse(file_path)
80
+ loaded_files.append(file_path)
81
+ logger.info("Ontology loaded successfully from file: %s", file_path)
82
+ else:
83
+ logger.warning(
84
+ "Ontology file '%s' not found. Skipping this file.",
85
+ file_path,
86
+ )
87
+
88
+ if not loaded_files:
89
+ logger.info(
90
+ "No valid ontology files found. No owl ontology will be attached to the graph."
58
91
  )
59
-
60
- if not loaded_files:
92
+ self.graph = None
93
+ else:
94
+ logger.info("Total ontology files loaded: %d", len(loaded_files))
95
+ else:
61
96
  logger.info(
62
- "No valid ontology files found. No owl ontology will be attached to the graph."
97
+ "No ontology file provided. No owl ontology will be attached to the graph."
63
98
  )
64
- self.graph = None
65
- else:
66
- logger.info("Total ontology files loaded: %d", len(loaded_files))
67
99
  else:
68
100
  logger.info(
69
101
  "No ontology file provided. No owl ontology will be attached to the graph."
@@ -69,7 +69,7 @@ async def run_tasks_data_item_incremental(
69
69
  async with open_data_file(file_path) as file:
70
70
  classified_data = ingestion.classify(file)
71
71
  # data_id is the hash of file contents + owner id to avoid duplicate data
72
- data_id = ingestion.identify(classified_data, user)
72
+ data_id = await ingestion.identify(classified_data, user)
73
73
  else:
74
74
  # If data was already processed by Cognee get data id
75
75
  data_id = data_item.id
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Any, Optional, List
2
+ from typing import Any, Optional, List, Type
3
3
  from cognee.shared.logging_utils import get_logger
4
4
 
5
5
  from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
@@ -85,8 +85,12 @@ class EntityCompletionRetriever(BaseRetriever):
85
85
  return None
86
86
 
87
87
  async def get_completion(
88
- self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
89
- ) -> List[str]:
88
+ self,
89
+ query: str,
90
+ context: Optional[Any] = None,
91
+ session_id: Optional[str] = None,
92
+ response_model: Type = str,
93
+ ) -> List[Any]:
90
94
  """
91
95
  Generate completion using provided context or fetch new context.
92
96
 
@@ -102,6 +106,7 @@ class EntityCompletionRetriever(BaseRetriever):
102
106
  fetched if not provided. (default None)
103
107
  - session_id (Optional[str]): Optional session identifier for caching. If None,
104
108
  defaults to 'default_session'. (default None)
109
+ - response_model (Type): The Pydantic model type for structured output. (default str)
105
110
 
106
111
  Returns:
107
112
  --------
@@ -133,6 +138,7 @@ class EntityCompletionRetriever(BaseRetriever):
133
138
  user_prompt_path=self.user_prompt_path,
134
139
  system_prompt_path=self.system_prompt_path,
135
140
  conversation_history=conversation_history,
141
+ response_model=response_model,
136
142
  ),
137
143
  )
138
144
  else:
@@ -141,6 +147,7 @@ class EntityCompletionRetriever(BaseRetriever):
141
147
  context=context,
142
148
  user_prompt_path=self.user_prompt_path,
143
149
  system_prompt_path=self.system_prompt_path,
150
+ response_model=response_model,
144
151
  )
145
152
 
146
153
  if session_save:
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Any, List, Optional, Type
2
2
  from abc import ABC, abstractmethod
3
3
 
4
4
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
@@ -14,7 +14,11 @@ class BaseGraphRetriever(ABC):
14
14
 
15
15
  @abstractmethod
16
16
  async def get_completion(
17
- self, query: str, context: Optional[List[Edge]] = None, session_id: Optional[str] = None
18
- ) -> str:
17
+ self,
18
+ query: str,
19
+ context: Optional[List[Edge]] = None,
20
+ session_id: Optional[str] = None,
21
+ response_model: Type = str,
22
+ ) -> List[Any]:
19
23
  """Generates a response using the query and optional context (triplets)."""
20
24
  pass
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, Optional
2
+ from typing import Any, Optional, Type, List
3
3
 
4
4
 
5
5
  class BaseRetriever(ABC):
@@ -12,7 +12,11 @@ class BaseRetriever(ABC):
12
12
 
13
13
  @abstractmethod
14
14
  async def get_completion(
15
- self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
16
- ) -> Any:
15
+ self,
16
+ query: str,
17
+ context: Optional[Any] = None,
18
+ session_id: Optional[str] = None,
19
+ response_model: Type = str,
20
+ ) -> List[Any]:
17
21
  """Generates a response using the query and optional context."""
18
22
  pass
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Any, Optional
2
+ from typing import Any, Optional, Type, List
3
3
 
4
4
  from cognee.shared.logging_utils import get_logger
5
5
  from cognee.infrastructure.databases.vector import get_vector_engine
@@ -75,8 +75,12 @@ class CompletionRetriever(BaseRetriever):
75
75
  raise NoDataError("No data found in the system, please add data first.") from error
76
76
 
77
77
  async def get_completion(
78
- self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
79
- ) -> str:
78
+ self,
79
+ query: str,
80
+ context: Optional[Any] = None,
81
+ session_id: Optional[str] = None,
82
+ response_model: Type = str,
83
+ ) -> List[Any]:
80
84
  """
81
85
  Generates an LLM completion using the context.
82
86
 
@@ -91,6 +95,7 @@ class CompletionRetriever(BaseRetriever):
91
95
  completion; if None, it retrieves the context for the query. (default None)
92
96
  - session_id (Optional[str]): Optional session identifier for caching. If None,
93
97
  defaults to 'default_session'. (default None)
98
+ - response_model (Type): The Pydantic model type for structured output. (default str)
94
99
 
95
100
  Returns:
96
101
  --------
@@ -118,6 +123,7 @@ class CompletionRetriever(BaseRetriever):
118
123
  system_prompt_path=self.system_prompt_path,
119
124
  system_prompt=self.system_prompt,
120
125
  conversation_history=conversation_history,
126
+ response_model=response_model,
121
127
  ),
122
128
  )
123
129
  else:
@@ -127,6 +133,7 @@ class CompletionRetriever(BaseRetriever):
127
133
  user_prompt_path=self.user_prompt_path,
128
134
  system_prompt_path=self.system_prompt_path,
129
135
  system_prompt=self.system_prompt,
136
+ response_model=response_model,
130
137
  )
131
138
 
132
139
  if session_save:
@@ -137,4 +144,4 @@ class CompletionRetriever(BaseRetriever):
137
144
  session_id=session_id,
138
145
  )
139
146
 
140
- return completion
147
+ return [completion]
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Optional, List, Type
2
+ from typing import Optional, List, Type, Any
3
3
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
4
4
  from cognee.shared.logging_utils import get_logger
5
5
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
@@ -56,7 +56,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
56
56
  context: Optional[List[Edge]] = None,
57
57
  session_id: Optional[str] = None,
58
58
  context_extension_rounds=4,
59
- ) -> List[str]:
59
+ response_model: Type = str,
60
+ ) -> List[Any]:
60
61
  """
61
62
  Extends the context for a given query by retrieving related triplets and generating new
62
63
  completions based on them.
@@ -76,6 +77,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
76
77
  defaults to 'default_session'. (default None)
77
78
  - context_extension_rounds: The maximum number of rounds to extend the context with
78
79
  new triplets before halting. (default 4)
80
+ - response_model (Type): The Pydantic model type for structured output. (default str)
79
81
 
80
82
  Returns:
81
83
  --------
@@ -143,6 +145,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
143
145
  system_prompt_path=self.system_prompt_path,
144
146
  system_prompt=self.system_prompt,
145
147
  conversation_history=conversation_history,
148
+ response_model=response_model,
146
149
  ),
147
150
  )
148
151
  else:
@@ -152,6 +155,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
152
155
  user_prompt_path=self.user_prompt_path,
153
156
  system_prompt_path=self.system_prompt_path,
154
157
  system_prompt=self.system_prompt,
158
+ response_model=response_model,
155
159
  )
156
160
 
157
161
  if self.save_interaction and context_text and triplets and completion:
@@ -7,7 +7,7 @@ from cognee.shared.logging_utils import get_logger
7
7
 
8
8
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
9
9
  from cognee.modules.retrieval.utils.completion import (
10
- generate_structured_completion,
10
+ generate_completion,
11
11
  summarize_text,
12
12
  )
13
13
  from cognee.modules.retrieval.utils.session_cache import (
@@ -44,7 +44,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
44
44
  questions based on reasoning. The public methods are:
45
45
 
46
46
  - get_completion
47
- - get_structured_completion
48
47
 
49
48
  Instance variables include:
50
49
  - validation_system_prompt_path
@@ -121,7 +120,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
121
120
  triplets += await self.get_context(followup_question)
122
121
  context_text = await self.resolve_edges_to_text(list(set(triplets)))
123
122
 
124
- completion = await generate_structured_completion(
123
+ completion = await generate_completion(
125
124
  query=query,
126
125
  context=context_text,
127
126
  user_prompt_path=self.user_prompt_path,
@@ -165,24 +164,28 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
165
164
 
166
165
  return completion, context_text, triplets
167
166
 
168
- async def get_structured_completion(
167
+ async def get_completion(
169
168
  self,
170
169
  query: str,
171
170
  context: Optional[List[Edge]] = None,
172
171
  session_id: Optional[str] = None,
173
- max_iter: int = 4,
172
+ max_iter=4,
174
173
  response_model: Type = str,
175
- ) -> Any:
174
+ ) -> List[Any]:
176
175
  """
177
- Generate structured completion responses based on a user query and contextual information.
176
+ Generate completion responses based on a user query and contextual information.
178
177
 
179
- This method applies the same chain-of-thought logic as get_completion but returns
178
+ This method interacts with a language model client to retrieve a structured response,
179
+ using a series of iterations to refine the answers and generate follow-up questions
180
+ based on reasoning derived from previous outputs. It raises exceptions if the context
181
+ retrieval fails or if the model encounters issues in generating outputs. It returns
180
182
  structured output using the provided response model.
181
183
 
182
184
  Parameters:
183
185
  -----------
186
+
184
187
  - query (str): The user's query to be processed and answered.
185
- - context (Optional[List[Edge]]): Optional context that may assist in answering the query.
188
+ - context (Optional[Any]): Optional context that may assist in answering the query.
186
189
  If not provided, it will be fetched based on the query. (default None)
187
190
  - session_id (Optional[str]): Optional session identifier for caching. If None,
188
191
  defaults to 'default_session'. (default None)
@@ -192,7 +195,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
192
195
 
193
196
  Returns:
194
197
  --------
195
- - Any: The generated structured completion based on the response model.
198
+
199
+ - List[str]: A list containing the generated answer to the user's query.
196
200
  """
197
201
  # Check if session saving is enabled
198
202
  cache_config = CacheConfig()
@@ -228,45 +232,4 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
228
232
  session_id=session_id,
229
233
  )
230
234
 
231
- return completion
232
-
233
- async def get_completion(
234
- self,
235
- query: str,
236
- context: Optional[List[Edge]] = None,
237
- session_id: Optional[str] = None,
238
- max_iter=4,
239
- ) -> List[str]:
240
- """
241
- Generate completion responses based on a user query and contextual information.
242
-
243
- This method interacts with a language model client to retrieve a structured response,
244
- using a series of iterations to refine the answers and generate follow-up questions
245
- based on reasoning derived from previous outputs. It raises exceptions if the context
246
- retrieval fails or if the model encounters issues in generating outputs.
247
-
248
- Parameters:
249
- -----------
250
-
251
- - query (str): The user's query to be processed and answered.
252
- - context (Optional[Any]): Optional context that may assist in answering the query.
253
- If not provided, it will be fetched based on the query. (default None)
254
- - session_id (Optional[str]): Optional session identifier for caching. If None,
255
- defaults to 'default_session'. (default None)
256
- - max_iter: The maximum number of iterations to refine the answer and generate
257
- follow-up questions. (default 4)
258
-
259
- Returns:
260
- --------
261
-
262
- - List[str]: A list containing the generated answer to the user's query.
263
- """
264
- completion = await self.get_structured_completion(
265
- query=query,
266
- context=context,
267
- session_id=session_id,
268
- max_iter=max_iter,
269
- response_model=str,
270
- )
271
-
272
235
  return [completion]