cognee 0.3.4.dev4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. cognee/api/client.py +16 -7
  2. cognee/api/health.py +5 -9
  3. cognee/api/v1/add/add.py +3 -1
  4. cognee/api/v1/cognify/cognify.py +44 -7
  5. cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
  6. cognee/api/v1/search/search.py +3 -0
  7. cognee/api/v1/ui/__init__.py +1 -1
  8. cognee/api/v1/ui/ui.py +215 -150
  9. cognee/api/v1/update/__init__.py +1 -0
  10. cognee/api/v1/update/routers/__init__.py +1 -0
  11. cognee/api/v1/update/routers/get_update_router.py +90 -0
  12. cognee/api/v1/update/update.py +100 -0
  13. cognee/base_config.py +5 -2
  14. cognee/cli/_cognee.py +28 -10
  15. cognee/cli/commands/delete_command.py +34 -2
  16. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  17. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
  18. cognee/eval_framework/modal_eval_dashboard.py +9 -1
  19. cognee/infrastructure/databases/graph/config.py +9 -9
  20. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
  21. cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
  22. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
  23. cognee/infrastructure/databases/relational/config.py +4 -4
  24. cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
  25. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
  26. cognee/infrastructure/databases/vector/config.py +7 -7
  27. cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
  28. cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
  32. cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
  33. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
  34. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
  35. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
  36. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
  37. cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
  38. cognee/infrastructure/files/storage/s3_config.py +1 -0
  39. cognee/infrastructure/files/utils/open_data_file.py +7 -14
  40. cognee/infrastructure/llm/LLMGateway.py +19 -117
  41. cognee/infrastructure/llm/config.py +28 -13
  42. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
  43. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
  44. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
  45. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
  46. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
  47. cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
  48. cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
  49. cognee/infrastructure/llm/prompts/test.txt +1 -0
  50. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
  51. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
  52. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
  53. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
  54. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
  55. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
  71. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
  72. cognee/infrastructure/llm/utils.py +4 -4
  73. cognee/infrastructure/loaders/LoaderEngine.py +5 -2
  74. cognee/infrastructure/loaders/external/__init__.py +7 -0
  75. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
  76. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  77. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  78. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  79. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  80. cognee/modules/data/methods/get_deletion_counts.py +92 -0
  81. cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
  82. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  83. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  84. cognee/modules/ingestion/data_types/TextData.py +0 -1
  85. cognee/modules/observability/get_observe.py +14 -0
  86. cognee/modules/observability/observers.py +1 -0
  87. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  88. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  89. cognee/modules/ontology/matching_strategies.py +53 -0
  90. cognee/modules/ontology/models.py +20 -0
  91. cognee/modules/ontology/ontology_config.py +24 -0
  92. cognee/modules/ontology/ontology_env_config.py +45 -0
  93. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  94. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
  95. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
  96. cognee/modules/retrieval/code_retriever.py +2 -1
  97. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
  98. cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
  99. cognee/modules/retrieval/graph_completion_retriever.py +0 -3
  100. cognee/modules/retrieval/insights_retriever.py +1 -1
  101. cognee/modules/retrieval/jaccard_retrival.py +60 -0
  102. cognee/modules/retrieval/lexical_retriever.py +123 -0
  103. cognee/modules/retrieval/natural_language_retriever.py +2 -1
  104. cognee/modules/retrieval/temporal_retriever.py +3 -2
  105. cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
  106. cognee/modules/retrieval/utils/completion.py +4 -7
  107. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  108. cognee/modules/search/methods/no_access_control_search.py +1 -1
  109. cognee/modules/search/methods/search.py +32 -13
  110. cognee/modules/search/types/SearchType.py +1 -0
  111. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  112. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  113. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
  114. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  115. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  116. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  117. cognee/modules/users/permissions/methods/get_role.py +10 -0
  118. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  119. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  120. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  121. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  122. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  123. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  124. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  125. cognee/modules/users/roles/methods/create_role.py +12 -1
  126. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  127. cognee/modules/users/tenants/methods/create_tenant.py +12 -1
  128. cognee/modules/visualization/cognee_network_visualization.py +13 -9
  129. cognee/shared/data_models.py +0 -1
  130. cognee/shared/utils.py +0 -32
  131. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  132. cognee/tasks/codingagents/coding_rule_associations.py +3 -2
  133. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
  134. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
  135. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
  136. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
  137. cognee/tasks/graph/extract_graph_from_code.py +2 -2
  138. cognee/tasks/graph/extract_graph_from_data.py +55 -12
  139. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  140. cognee/tasks/ingestion/migrate_relational_database.py +132 -41
  141. cognee/tasks/ingestion/resolve_data_directories.py +4 -1
  142. cognee/tasks/schema/ingest_database_schema.py +134 -0
  143. cognee/tasks/schema/models.py +40 -0
  144. cognee/tasks/storage/index_data_points.py +1 -1
  145. cognee/tasks/storage/index_graph_edges.py +3 -1
  146. cognee/tasks/summarization/summarize_code.py +2 -2
  147. cognee/tasks/summarization/summarize_text.py +2 -2
  148. cognee/tasks/temporal_graph/enrich_events.py +2 -2
  149. cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
  150. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
  151. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
  152. cognee/tests/test_advanced_pdf_loader.py +141 -0
  153. cognee/tests/test_chromadb.py +40 -0
  154. cognee/tests/test_cognee_server_start.py +6 -1
  155. cognee/tests/test_data/Quantum_computers.txt +9 -0
  156. cognee/tests/test_lancedb.py +211 -0
  157. cognee/tests/test_pgvector.py +40 -0
  158. cognee/tests/test_relational_db_migration.py +76 -0
  159. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
  160. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  161. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
  162. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
  163. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
  164. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/METADATA +92 -96
  165. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/RECORD +172 -160
  166. cognee/infrastructure/data/utils/extract_keywords.py +0 -48
  167. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
  168. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
  169. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
  170. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
  171. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
  172. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
  173. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
  174. cognee/tasks/graph/infer_data_ontology.py +0 -309
  175. cognee/tests/test_falkordb.py +0 -174
  176. distributed/poetry.lock +0 -12238
  177. distributed/pyproject.toml +0 -186
  178. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
  179. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
  180. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
  181. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/WHEEL +0 -0
  182. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/entry_points.txt +0 -0
  183. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/LICENSE +0 -0
  184. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,309 +0,0 @@
1
- # PROPOSED TO BE DEPRECATED
2
-
3
- """This module contains the OntologyEngine class which is responsible for adding graph ontology from a JSON or CSV file."""
4
-
5
- import csv
6
- import json
7
- from cognee.shared.logging_utils import get_logger
8
- from datetime import datetime, timezone
9
- from fastapi import status
10
- from typing import Any, Dict, List, Optional, Union, Type
11
-
12
- import aiofiles
13
- import pandas as pd
14
- from pydantic import BaseModel
15
-
16
- from cognee.modules.graph.exceptions import EntityNotFoundError
17
- from cognee.modules.ingestion.exceptions import IngestionError
18
-
19
- from cognee.infrastructure.data.chunking.config import get_chunk_config
20
- from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine
21
- from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
22
- from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
23
- from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
24
- from cognee.modules.data.methods.add_model_class_to_graph import (
25
- add_model_class_to_graph,
26
- )
27
- from cognee.tasks.graph.models import NodeModel, GraphOntology
28
- from cognee.shared.data_models import KnowledgeGraph
29
- from cognee.modules.engine.utils import generate_node_id, generate_node_name
30
- from cognee.infrastructure.llm.LLMGateway import LLMGateway
31
-
32
- logger = get_logger("task:infer_data_ontology")
33
-
34
-
35
- async def extract_ontology(content: str, response_model: Type[BaseModel]):
36
- """
37
- Extracts structured ontology from the provided content using a pre-defined LLM client.
38
-
39
- This asynchronous function retrieves a system prompt from a file and utilizes an LLM
40
- client to create a structured output based on the input content and specified response
41
- model.
42
-
43
- Parameters:
44
- -----------
45
-
46
- - content (str): The content from which to extract the ontology.
47
- - response_model (Type[BaseModel]): The model that defines the structure of the
48
- output ontology.
49
-
50
- Returns:
51
- --------
52
-
53
- The structured ontology extracted from the content.
54
- """
55
-
56
- system_prompt = LLMGateway.read_query_prompt("extract_ontology.txt")
57
-
58
- ontology = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
59
-
60
- return ontology
61
-
62
-
63
- class OntologyEngine:
64
- """
65
- Manage ontology data and operations for graph structures, providing methods for data
66
- loading, flattening models, and adding ontological relationships to a graph database.
67
-
68
- Public methods:
69
-
70
- - flatten_model
71
- - recursive_flatten
72
- - load_data
73
- - add_graph_ontology
74
- """
75
-
76
- async def flatten_model(
77
- self, model: NodeModel, parent_id: Optional[str] = None
78
- ) -> Dict[str, Any]:
79
- """
80
- Flatten the model to a dictionary including optional parent ID and relationship details
81
- if available.
82
-
83
- Parameters:
84
- -----------
85
-
86
- - model (NodeModel): The NodeModel instance to flatten.
87
- - parent_id (Optional[str]): An optional ID of the parent node for hierarchical
88
- purposes. (default None)
89
-
90
- Returns:
91
- --------
92
-
93
- - Dict[str, Any]: A dictionary representation of the model with flattened
94
- attributes.
95
- """
96
- result = model.dict()
97
- result["parent_id"] = parent_id
98
- if model.default_relationship:
99
- result.update(
100
- {
101
- "relationship_type": model.default_relationship.type,
102
- "relationship_source": model.default_relationship.source,
103
- "relationship_target": model.default_relationship.target,
104
- }
105
- )
106
- return result
107
-
108
- async def recursive_flatten(
109
- self, items: Union[List[Dict[str, Any]], Dict[str, Any]], parent_id: Optional[str] = None
110
- ) -> List[Dict[str, Any]]:
111
- """
112
- Recursively flatten a hierarchical structure of models into a flat list of dictionaries.
113
-
114
- Parameters:
115
- -----------
116
-
117
- - items (Union[List[Dict[str, Any]], Dict[str, Any]]): A list or dictionary
118
- containing models to flatten.
119
- - parent_id (Optional[str]): An optional ID of the parent node to maintain hierarchy
120
- during flattening. (default None)
121
-
122
- Returns:
123
- --------
124
-
125
- - List[Dict[str, Any]]: A flat list of dictionaries representing the hierarchical
126
- model structure.
127
- """
128
- flat_list = []
129
-
130
- if isinstance(items, list):
131
- for item in items:
132
- flat_list.extend(await self.recursive_flatten(item, parent_id))
133
- elif isinstance(items, dict):
134
- model = NodeModel.model_validate(items)
135
- flat_list.append(await self.flatten_model(model, parent_id))
136
- for child in model.children:
137
- flat_list.extend(await self.recursive_flatten(child, model.node_id))
138
- return flat_list
139
-
140
- async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
141
- """
142
- Load data from a specified JSON or CSV file and return it in a structured format.
143
-
144
- Parameters:
145
- -----------
146
-
147
- - file_path (str): The path to the file to load data from.
148
-
149
- Returns:
150
- --------
151
-
152
- - Union[List[Dict[str, Any]], Dict[str, Any]]: Parsed data from the file as either a
153
- list of dictionaries or a single dictionary depending on content type.
154
- """
155
- try:
156
- if file_path.endswith(".json"):
157
- async with aiofiles.open(file_path, mode="r") as f:
158
- data = await f.read()
159
- return json.loads(data)
160
- elif file_path.endswith(".csv"):
161
- async with aiofiles.open(file_path, mode="r") as f:
162
- content = await f.read()
163
- reader = csv.DictReader(content.splitlines())
164
- return list(reader)
165
- else:
166
- raise IngestionError(message="Unsupported file format")
167
- except Exception as e:
168
- raise IngestionError(
169
- message=f"Failed to load data from {file_path}: {e}",
170
- status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
171
- )
172
-
173
- async def add_graph_ontology(self, file_path: str = None, documents: list = None):
174
- """
175
- Add graph ontology from a JSON or CSV file, or infer relationships from provided
176
- document content. Raise exceptions for invalid file types or missing entities.
177
-
178
- Parameters:
179
- -----------
180
-
181
- - file_path (str): Optional path to a file containing data to be loaded. (default
182
- None)
183
- - documents (list): Optional list of document objects for content extraction if no
184
- file path is provided. (default None)
185
- """
186
- if file_path is None:
187
- initial_chunks_and_ids = []
188
-
189
- chunk_config = get_chunk_config()
190
- chunk_engine = get_chunk_engine()
191
- chunk_strategy = chunk_config.chunk_strategy
192
-
193
- for base_file in documents:
194
- with open(base_file.raw_data_location, "rb") as file:
195
- try:
196
- file_type = guess_file_type(file)
197
- text = extract_text_from_file(file, file_type)
198
-
199
- subchunks, chunks_with_ids = chunk_engine.chunk_data(
200
- chunk_strategy,
201
- text,
202
- chunk_config.chunk_size,
203
- chunk_config.chunk_overlap,
204
- )
205
-
206
- if chunks_with_ids[0][0] == 1:
207
- initial_chunks_and_ids.append({base_file.id: chunks_with_ids})
208
-
209
- except FileTypeException:
210
- logger.warning(
211
- "File (%s) has an unknown file type. We are skipping it.", file["id"]
212
- )
213
-
214
- ontology = await extract_ontology(str(initial_chunks_and_ids), GraphOntology)
215
- graph_client = await get_graph_engine()
216
-
217
- await graph_client.add_nodes(
218
- [
219
- (
220
- node.id,
221
- dict(
222
- uuid=generate_node_id(node.id),
223
- name=generate_node_name(node.name),
224
- type=generate_node_id(node.id),
225
- description=node.description,
226
- updated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
227
- ),
228
- )
229
- for node in ontology.nodes
230
- ]
231
- )
232
-
233
- await graph_client.add_edges(
234
- (
235
- generate_node_id(edge.source_id),
236
- generate_node_id(edge.target_id),
237
- edge.relationship_type,
238
- dict(
239
- source_node_id=generate_node_id(edge.source_id),
240
- target_node_id=generate_node_id(edge.target_id),
241
- relationship_name=edge.relationship_type,
242
- updated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
243
- ),
244
- )
245
- for edge in ontology.edges
246
- )
247
-
248
- else:
249
- dataset_level_information = documents[0][1]
250
-
251
- # Extract the list of valid IDs from the explanations
252
- valid_ids = {item["id"] for item in dataset_level_information}
253
- try:
254
- data = await self.load_data(file_path)
255
- flt_ontology = await self.recursive_flatten(data)
256
- df = pd.DataFrame(flt_ontology)
257
- graph_client = await get_graph_engine()
258
-
259
- for _, row in df.iterrows():
260
- node_data = row.to_dict()
261
- node_id = node_data.pop("node_id", None)
262
- if node_id in valid_ids:
263
- await graph_client.add_node(node_id, node_data)
264
- if node_id not in valid_ids:
265
- raise EntityNotFoundError(
266
- message=f"Node ID {node_id} not found in the dataset"
267
- )
268
- if pd.notna(row.get("relationship_source")) and pd.notna(
269
- row.get("relationship_target")
270
- ):
271
- await graph_client.add_edge(
272
- row["relationship_source"],
273
- row["relationship_target"],
274
- relationship_name=row["relationship_type"],
275
- edge_properties={
276
- "source_node_id": row["relationship_source"],
277
- "target_node_id": row["relationship_target"],
278
- "relationship_name": row["relationship_type"],
279
- "updated_at": datetime.now(timezone.utc).strftime(
280
- "%Y-%m-%d %H:%M:%S"
281
- ),
282
- },
283
- )
284
-
285
- return
286
- except Exception as e:
287
- raise RuntimeError(f"Failed to add graph ontology from {file_path}: {e}") from e
288
-
289
-
290
- async def infer_data_ontology(documents, ontology_model=KnowledgeGraph, root_node_id=None):
291
- """
292
- Infer data ontology from provided documents and optionally add it to a graph.
293
-
294
- Parameters:
295
- -----------
296
-
297
- - documents: The documents from which to infer the ontology.
298
- - ontology_model: The ontology model to use for the inference, defaults to
299
- KnowledgeGraph. (default KnowledgeGraph)
300
- - root_node_id: An optional root node identifier for the ontology. (default None)
301
- """
302
- if ontology_model == KnowledgeGraph:
303
- ontology_engine = OntologyEngine()
304
- root_node_id = await ontology_engine.add_graph_ontology(documents=documents)
305
- else:
306
- graph_engine = await get_graph_engine()
307
- await add_model_class_to_graph(ontology_model, graph_engine)
308
-
309
- yield (documents, root_node_id)
@@ -1,174 +0,0 @@
1
- import os
2
- import cognee
3
- import pathlib
4
- from cognee.infrastructure.files.storage import get_storage_config
5
- from cognee.modules.search.operations import get_history
6
- from cognee.modules.users.methods import get_default_user
7
- from cognee.shared.logging_utils import get_logger
8
- from cognee.modules.search.types import SearchType
9
-
10
- logger = get_logger()
11
-
12
-
13
- async def check_falkordb_connection():
14
- """Check if FalkorDB is available at localhost:6379"""
15
- try:
16
- from falkordb import FalkorDB
17
-
18
- client = FalkorDB(host="localhost", port=6379)
19
- # Try to list graphs to check connection
20
- client.list_graphs()
21
- return True
22
- except Exception as e:
23
- logger.warning(f"FalkorDB not available at localhost:6379: {e}")
24
- return False
25
-
26
-
27
- async def main():
28
- # Check if FalkorDB is available
29
- if not await check_falkordb_connection():
30
- print("⚠️ FalkorDB is not available at localhost:6379")
31
- print(" To run this test, start FalkorDB server:")
32
- print(" docker run -p 6379:6379 falkordb/falkordb:latest")
33
- print(" Skipping FalkorDB test...")
34
- return
35
-
36
- print("✅ FalkorDB connection successful, running test...")
37
-
38
- # Configure FalkorDB as the graph database provider
39
- cognee.config.set_graph_db_config(
40
- {
41
- "graph_database_url": "localhost", # FalkorDB URL (using Redis protocol)
42
- "graph_database_port": 6379,
43
- "graph_database_provider": "falkordb",
44
- }
45
- )
46
-
47
- # Configure FalkorDB as the vector database provider too since it's a hybrid adapter
48
- cognee.config.set_vector_db_config(
49
- {
50
- "vector_db_url": "localhost",
51
- "vector_db_port": 6379,
52
- "vector_db_provider": "falkordb",
53
- }
54
- )
55
-
56
- data_directory_path = str(
57
- pathlib.Path(
58
- os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_falkordb")
59
- ).resolve()
60
- )
61
- cognee.config.data_root_directory(data_directory_path)
62
- cognee_directory_path = str(
63
- pathlib.Path(
64
- os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_falkordb")
65
- ).resolve()
66
- )
67
- cognee.config.system_root_directory(cognee_directory_path)
68
-
69
- await cognee.prune.prune_data()
70
- await cognee.prune.prune_system(metadata=True)
71
-
72
- dataset_name = "artificial_intelligence"
73
-
74
- ai_text_file_path = os.path.join(
75
- pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
76
- )
77
- await cognee.add([ai_text_file_path], dataset_name)
78
-
79
- text = """A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word.
80
- LLMs are artificial neural networks. The largest and most capable, as of March 2024, are built with a decoder-only transformer-based architecture while some recent implementations are based on other architectures, such as recurrent neural network variants and Mamba (a state space model).
81
- Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to achieve similar results.[6] They are thought to acquire knowledge about syntax, semantics and "ontology" inherent in human language corpora, but also inaccuracies and biases present in the corpora.
82
- Some notable LLMs are OpenAI's GPT series of models (e.g., GPT-3.5 and GPT-4, used in ChatGPT and Microsoft Copilot), Google's PaLM and Gemini (the latter of which is currently used in the chatbot of the same name), xAI's Grok, Meta's LLaMA family of open-source models, Anthropic's Claude models, Mistral AI's open source models, and Databricks' open source DBRX.
83
- """
84
-
85
- await cognee.add([text], dataset_name)
86
-
87
- await cognee.cognify([dataset_name])
88
-
89
- from cognee.infrastructure.databases.vector import get_vector_engine
90
-
91
- vector_engine = get_vector_engine()
92
- random_node = (await vector_engine.search("entity.name", "AI"))[0]
93
- random_node_name = random_node.payload["text"]
94
-
95
- search_results = await cognee.search(
96
- query_type=SearchType.INSIGHTS, query_text=random_node_name
97
- )
98
- assert len(search_results) != 0, "The search results list is empty."
99
- print("\n\nExtracted sentences are:\n")
100
- for result in search_results:
101
- print(f"{result}\n")
102
-
103
- search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
104
- assert len(search_results) != 0, "The search results list is empty."
105
- print("\n\nExtracted chunks are:\n")
106
- for result in search_results:
107
- print(f"{result}\n")
108
-
109
- search_results = await cognee.search(
110
- query_type=SearchType.SUMMARIES, query_text=random_node_name
111
- )
112
- assert len(search_results) != 0, "Query related summaries don't exist."
113
- print("\nExtracted summaries are:\n")
114
- for result in search_results:
115
- print(f"{result}\n")
116
-
117
- user = await get_default_user()
118
- history = await get_history(user.id)
119
-
120
- assert len(history) == 6, "Search history is not correct."
121
-
122
- # Assert local data files are cleaned properly
123
- await cognee.prune.prune_data()
124
- data_root_directory = get_storage_config()["data_root_directory"]
125
- assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
126
-
127
- # Assert relational, vector and graph databases have been cleaned properly
128
- await cognee.prune.prune_system(metadata=True)
129
-
130
- # For FalkorDB vector engine, check if collections are empty
131
- # Since FalkorDB is a hybrid adapter, we can check if the graph is empty
132
- # as the vector data is stored in the same graph
133
- if hasattr(vector_engine, "driver"):
134
- # This is FalkorDB - check if graphs exist
135
- collections = vector_engine.driver.list_graphs()
136
- # The graph should be deleted, so either no graphs or empty graph
137
- if vector_engine.graph_name in collections:
138
- # Graph exists but should be empty
139
- vector_graph_data = await vector_engine.get_graph_data()
140
- vector_nodes, vector_edges = vector_graph_data
141
- assert len(vector_nodes) == 0 and len(vector_edges) == 0, (
142
- "FalkorDB vector database is not empty"
143
- )
144
- else:
145
- # Fallback for other vector engines like LanceDB
146
- connection = await vector_engine.get_connection()
147
- collection_names = await connection.table_names()
148
- assert len(collection_names) == 0, "Vector database is not empty"
149
-
150
- from cognee.infrastructure.databases.relational import get_relational_engine
151
-
152
- assert not os.path.exists(get_relational_engine().db_path), (
153
- "SQLite relational database is not empty"
154
- )
155
-
156
- # For FalkorDB, check if the graph database is empty
157
- from cognee.infrastructure.databases.graph import get_graph_engine
158
-
159
- graph_engine = get_graph_engine()
160
- graph_data = await graph_engine.get_graph_data()
161
- nodes, edges = graph_data
162
- assert len(nodes) == 0 and len(edges) == 0, "FalkorDB graph database is not empty"
163
-
164
- print("🎉 FalkorDB test completed successfully!")
165
- print(" ✓ Data ingestion worked")
166
- print(" ✓ Cognify processing worked")
167
- print(" ✓ Search operations worked")
168
- print(" ✓ Cleanup worked")
169
-
170
-
171
- if __name__ == "__main__":
172
- import asyncio
173
-
174
- asyncio.run(main(), debug=True)