cognee 0.3.4.dev4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. cognee/api/client.py +16 -7
  2. cognee/api/health.py +5 -9
  3. cognee/api/v1/add/add.py +3 -1
  4. cognee/api/v1/cognify/cognify.py +44 -7
  5. cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
  6. cognee/api/v1/search/search.py +3 -0
  7. cognee/api/v1/ui/__init__.py +1 -1
  8. cognee/api/v1/ui/ui.py +215 -150
  9. cognee/api/v1/update/__init__.py +1 -0
  10. cognee/api/v1/update/routers/__init__.py +1 -0
  11. cognee/api/v1/update/routers/get_update_router.py +90 -0
  12. cognee/api/v1/update/update.py +100 -0
  13. cognee/base_config.py +5 -2
  14. cognee/cli/_cognee.py +28 -10
  15. cognee/cli/commands/delete_command.py +34 -2
  16. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  17. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
  18. cognee/eval_framework/modal_eval_dashboard.py +9 -1
  19. cognee/infrastructure/databases/graph/config.py +9 -9
  20. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
  21. cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
  22. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
  23. cognee/infrastructure/databases/relational/config.py +4 -4
  24. cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
  25. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
  26. cognee/infrastructure/databases/vector/config.py +7 -7
  27. cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
  28. cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
  32. cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
  33. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
  34. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
  35. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
  36. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
  37. cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
  38. cognee/infrastructure/files/storage/s3_config.py +1 -0
  39. cognee/infrastructure/files/utils/open_data_file.py +7 -14
  40. cognee/infrastructure/llm/LLMGateway.py +19 -117
  41. cognee/infrastructure/llm/config.py +28 -13
  42. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
  43. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
  44. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
  45. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
  46. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
  47. cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
  48. cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
  49. cognee/infrastructure/llm/prompts/test.txt +1 -0
  50. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
  51. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
  52. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
  53. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
  54. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
  55. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
  71. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
  72. cognee/infrastructure/llm/utils.py +4 -4
  73. cognee/infrastructure/loaders/LoaderEngine.py +5 -2
  74. cognee/infrastructure/loaders/external/__init__.py +7 -0
  75. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
  76. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  77. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  78. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  79. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  80. cognee/modules/data/methods/get_deletion_counts.py +92 -0
  81. cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
  82. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  83. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  84. cognee/modules/ingestion/data_types/TextData.py +0 -1
  85. cognee/modules/observability/get_observe.py +14 -0
  86. cognee/modules/observability/observers.py +1 -0
  87. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  88. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  89. cognee/modules/ontology/matching_strategies.py +53 -0
  90. cognee/modules/ontology/models.py +20 -0
  91. cognee/modules/ontology/ontology_config.py +24 -0
  92. cognee/modules/ontology/ontology_env_config.py +45 -0
  93. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  94. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
  95. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
  96. cognee/modules/retrieval/code_retriever.py +2 -1
  97. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
  98. cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
  99. cognee/modules/retrieval/graph_completion_retriever.py +0 -3
  100. cognee/modules/retrieval/insights_retriever.py +1 -1
  101. cognee/modules/retrieval/jaccard_retrival.py +60 -0
  102. cognee/modules/retrieval/lexical_retriever.py +123 -0
  103. cognee/modules/retrieval/natural_language_retriever.py +2 -1
  104. cognee/modules/retrieval/temporal_retriever.py +3 -2
  105. cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
  106. cognee/modules/retrieval/utils/completion.py +4 -7
  107. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  108. cognee/modules/search/methods/no_access_control_search.py +1 -1
  109. cognee/modules/search/methods/search.py +32 -13
  110. cognee/modules/search/types/SearchType.py +1 -0
  111. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  112. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  113. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
  114. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  115. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  116. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  117. cognee/modules/users/permissions/methods/get_role.py +10 -0
  118. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  119. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  120. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  121. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  122. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  123. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  124. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  125. cognee/modules/users/roles/methods/create_role.py +12 -1
  126. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  127. cognee/modules/users/tenants/methods/create_tenant.py +12 -1
  128. cognee/modules/visualization/cognee_network_visualization.py +13 -9
  129. cognee/shared/data_models.py +0 -1
  130. cognee/shared/utils.py +0 -32
  131. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  132. cognee/tasks/codingagents/coding_rule_associations.py +3 -2
  133. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
  134. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
  135. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
  136. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
  137. cognee/tasks/graph/extract_graph_from_code.py +2 -2
  138. cognee/tasks/graph/extract_graph_from_data.py +55 -12
  139. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  140. cognee/tasks/ingestion/migrate_relational_database.py +132 -41
  141. cognee/tasks/ingestion/resolve_data_directories.py +4 -1
  142. cognee/tasks/schema/ingest_database_schema.py +134 -0
  143. cognee/tasks/schema/models.py +40 -0
  144. cognee/tasks/storage/index_data_points.py +1 -1
  145. cognee/tasks/storage/index_graph_edges.py +3 -1
  146. cognee/tasks/summarization/summarize_code.py +2 -2
  147. cognee/tasks/summarization/summarize_text.py +2 -2
  148. cognee/tasks/temporal_graph/enrich_events.py +2 -2
  149. cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
  150. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
  151. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
  152. cognee/tests/test_advanced_pdf_loader.py +141 -0
  153. cognee/tests/test_chromadb.py +40 -0
  154. cognee/tests/test_cognee_server_start.py +6 -1
  155. cognee/tests/test_data/Quantum_computers.txt +9 -0
  156. cognee/tests/test_lancedb.py +211 -0
  157. cognee/tests/test_pgvector.py +40 -0
  158. cognee/tests/test_relational_db_migration.py +76 -0
  159. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
  160. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  161. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
  162. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
  163. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
  164. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/METADATA +92 -96
  165. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/RECORD +172 -160
  166. cognee/infrastructure/data/utils/extract_keywords.py +0 -48
  167. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
  168. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
  169. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
  170. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
  171. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
  172. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
  173. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
  174. cognee/tasks/graph/infer_data_ontology.py +0 -309
  175. cognee/tests/test_falkordb.py +0 -174
  176. distributed/poetry.lock +0 -12238
  177. distributed/pyproject.toml +0 -186
  178. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
  179. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
  180. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
  181. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/WHEEL +0 -0
  182. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/entry_points.txt +0 -0
  183. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/LICENSE +0 -0
  184. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,1227 +0,0 @@
1
- import asyncio
2
-
3
- # from datetime import datetime
4
- import json
5
- from textwrap import dedent
6
- from uuid import UUID
7
- from webbrowser import Error
8
- from typing import List, Dict, Any, Optional, Tuple, Type, Union
9
-
10
- from falkordb import FalkorDB
11
-
12
- from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
13
- from cognee.infrastructure.databases.graph.graph_db_interface import (
14
- GraphDBInterface,
15
- record_graph_changes,
16
- NodeData,
17
- EdgeData,
18
- Node,
19
- )
20
- from cognee.infrastructure.databases.vector.embeddings import EmbeddingEngine
21
- from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
22
- from cognee.infrastructure.engine import DataPoint
23
-
24
-
25
- class IndexSchema(DataPoint):
26
- """
27
- Define a schema for indexing that includes text data and associated metadata.
28
-
29
- This class inherits from the DataPoint class. It contains a string attribute 'text' and
30
- a dictionary 'metadata' that specifies the index fields for this schema.
31
- """
32
-
33
- text: str
34
-
35
- metadata: dict = {"index_fields": ["text"]}
36
-
37
-
38
- class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
39
- """
40
- Manage and interact with a graph database using vector embeddings.
41
-
42
- Public methods include:
43
- - query
44
- - embed_data
45
- - stringify_properties
46
- - create_data_point_query
47
- - create_edge_query
48
- - create_collection
49
- - has_collection
50
- - create_data_points
51
- - create_vector_index
52
- - has_vector_index
53
- - index_data_points
54
- - add_node
55
- - add_nodes
56
- - add_edge
57
- - add_edges
58
- - has_edges
59
- - retrieve
60
- - extract_node
61
- - extract_nodes
62
- - get_connections
63
- - search
64
- - batch_search
65
- - get_graph_data
66
- - delete_data_points
67
- - delete_node
68
- - delete_nodes
69
- - delete_graph
70
- - prune
71
- - get_node
72
- - get_nodes
73
- - get_neighbors
74
- - get_graph_metrics
75
- - get_document_subgraph
76
- - get_degree_one_nodes
77
- """
78
-
79
- def __init__(
80
- self,
81
- database_url: str,
82
- database_port: int,
83
- embedding_engine=EmbeddingEngine,
84
- ):
85
- self.driver = FalkorDB(
86
- host=database_url,
87
- port=database_port,
88
- )
89
- self.embedding_engine = embedding_engine
90
- self.graph_name = "cognee_graph"
91
-
92
- def query(self, query: str, params: dict = {}):
93
- """
94
- Execute a query against the graph database.
95
-
96
- Handles exceptions during the query execution by logging errors and re-raising the
97
- exception.
98
-
99
- The method can be called only if a valid query string and parameters are provided.
100
-
101
- Parameters:
102
- -----------
103
-
104
- - query (str): The query string to be executed against the graph database.
105
- - params (dict): A dictionary of parameters to be used in the query. (default {})
106
-
107
- Returns:
108
- --------
109
-
110
- The result of the query execution, returned by the graph database.
111
- """
112
- graph = self.driver.select_graph(self.graph_name)
113
-
114
- try:
115
- result = graph.query(query, params)
116
- return result
117
- except Exception as e:
118
- print(f"Error executing query: {e}")
119
- raise e
120
-
121
- async def embed_data(self, data: list[str]) -> list[list[float]]:
122
- """
123
- Embed a list of text data into vector representations using the embedding engine.
124
-
125
- Parameters:
126
- -----------
127
-
128
- - data (list[str]): A list of strings that should be embedded into vectors.
129
-
130
- Returns:
131
- --------
132
-
133
- - list[list[float]]: A list of lists, where each inner list contains float values
134
- representing the embedded vectors.
135
- """
136
- return await self.embedding_engine.embed_text(data)
137
-
138
- async def stringify_properties(self, properties: dict) -> str:
139
- """
140
- Convert properties dictionary to a string format suitable for database queries.
141
-
142
- Parameters:
143
- -----------
144
-
145
- - properties (dict): A dictionary containing properties to be converted to string
146
- format.
147
-
148
- Returns:
149
- --------
150
-
151
- - str: A string representation of the properties in the appropriate format.
152
- """
153
-
154
- def parse_value(value):
155
- """
156
- Convert a value to its string representation based on type for database queries.
157
-
158
- Parameters:
159
- -----------
160
-
161
- - value: The value to parse into a string representation.
162
-
163
- Returns:
164
- --------
165
-
166
- Returns the string representation of the value in the appropriate format.
167
- """
168
- if type(value) is UUID:
169
- return f"'{str(value)}'"
170
- if type(value) is int or type(value) is float:
171
- return value
172
- if (
173
- type(value) is list
174
- and len(value) > 0
175
- and type(value[0]) is float
176
- and len(value) == self.embedding_engine.get_vector_size()
177
- ):
178
- return f"'vecf32({value})'"
179
- # if type(value) is datetime:
180
- # return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f%z")
181
- if type(value) is dict:
182
- return f"'{json.dumps(value).replace(chr(39), chr(34))}'"
183
- if type(value) is str:
184
- # Escape single quotes and handle special characters
185
- escaped_value = (
186
- str(value)
187
- .replace("'", "\\'")
188
- .replace('"', '\\"')
189
- .replace("\n", "\\n")
190
- .replace("\r", "\\r")
191
- .replace("\t", "\\t")
192
- )
193
- return f"'{escaped_value}'"
194
- return f"'{str(value)}'"
195
-
196
- return ",".join([f"{key}:{parse_value(value)}" for key, value in properties.items()])
197
-
198
- async def create_data_point_query(self, data_point: DataPoint, vectorized_values: dict):
199
- """
200
- Compose a query to create or update a data point in the database.
201
-
202
- Parameters:
203
- -----------
204
-
205
- - data_point (DataPoint): An instance of DataPoint containing information about the
206
- entity.
207
- - vectorized_values (dict): A dictionary of vectorized values related to the data
208
- point.
209
-
210
- Returns:
211
- --------
212
-
213
- A tuple containing the query string and parameters dictionary.
214
- """
215
- node_label = type(data_point).__name__
216
- property_names = DataPoint.get_embeddable_property_names(data_point)
217
-
218
- properties = {
219
- **data_point.model_dump(),
220
- **(
221
- {
222
- property_names[index]: (
223
- vectorized_values[index]
224
- if index < len(vectorized_values)
225
- else getattr(data_point, property_name, None)
226
- )
227
- for index, property_name in enumerate(property_names)
228
- }
229
- ),
230
- }
231
-
232
- # Clean the properties - remove None values and handle special types
233
- clean_properties = {}
234
- for key, value in properties.items():
235
- if value is not None:
236
- if isinstance(value, UUID):
237
- clean_properties[key] = str(value)
238
- elif isinstance(value, dict):
239
- clean_properties[key] = json.dumps(value)
240
- elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], float):
241
- # This is likely a vector - convert to string representation
242
- clean_properties[key] = f"vecf32({value})"
243
- else:
244
- clean_properties[key] = value
245
-
246
- query = dedent(
247
- f"""
248
- MERGE (node:{node_label} {{id: $node_id}})
249
- SET node += $properties, node.updated_at = timestamp()
250
- """
251
- ).strip()
252
-
253
- params = {"node_id": str(data_point.id), "properties": clean_properties}
254
-
255
- return query, params
256
-
257
- def sanitize_relationship_name(self, relationship_name: str) -> str:
258
- """
259
- Sanitize relationship name to be valid for Cypher queries.
260
-
261
- Parameters:
262
- -----------
263
- - relationship_name (str): The original relationship name
264
-
265
- Returns:
266
- --------
267
- - str: A sanitized relationship name valid for Cypher
268
- """
269
- # Replace hyphens, spaces, and other special characters with underscores
270
- import re
271
-
272
- sanitized = re.sub(r"[^\w]", "_", relationship_name)
273
- # Remove consecutive underscores
274
- sanitized = re.sub(r"_+", "_", sanitized)
275
- # Remove leading/trailing underscores
276
- sanitized = sanitized.strip("_")
277
- # Ensure it starts with a letter or underscore
278
- if sanitized and not sanitized[0].isalpha() and sanitized[0] != "_":
279
- sanitized = "_" + sanitized
280
- return sanitized or "RELATIONSHIP"
281
-
282
- async def create_edge_query(self, edge: tuple[str, str, str, dict]) -> str:
283
- """
284
- Generate a query to create or update an edge between two nodes in the graph.
285
-
286
- Parameters:
287
- -----------
288
-
289
- - edge (tuple[str, str, str, dict]): A tuple consisting of source and target node
290
- IDs, edge type, and edge properties.
291
-
292
- Returns:
293
- --------
294
-
295
- - str: A string containing the query to be executed for creating the edge.
296
- """
297
- # Sanitize the relationship name for Cypher compatibility
298
- sanitized_relationship = self.sanitize_relationship_name(edge[2])
299
-
300
- # Add the original relationship name to properties
301
- edge_properties = {**edge[3], "relationship_name": edge[2]}
302
- properties = await self.stringify_properties(edge_properties)
303
- properties = f"{{{properties}}}"
304
-
305
- return dedent(
306
- f"""
307
- MERGE (source {{id:'{edge[0]}'}})
308
- MERGE (target {{id: '{edge[1]}'}})
309
- MERGE (source)-[edge:{sanitized_relationship} {properties}]->(target)
310
- ON MATCH SET edge.updated_at = timestamp()
311
- ON CREATE SET edge.updated_at = timestamp()
312
- """
313
- ).strip()
314
-
315
- async def create_collection(self, collection_name: str):
316
- """
317
- Create a collection in the graph database with the specified name.
318
-
319
- Parameters:
320
- -----------
321
-
322
- - collection_name (str): The name of the collection to be created.
323
- """
324
- pass
325
-
326
- async def has_collection(self, collection_name: str) -> bool:
327
- """
328
- Check if a collection with the specified name exists in the graph database.
329
-
330
- Parameters:
331
- -----------
332
-
333
- - collection_name (str): The name of the collection to check for existence.
334
-
335
- Returns:
336
- --------
337
-
338
- - bool: Returns true if the collection exists, otherwise false.
339
- """
340
- collections = self.driver.list_graphs()
341
-
342
- return collection_name in collections
343
-
344
- async def create_data_points(self, data_points: list[DataPoint]):
345
- """
346
- Add a list of data points to the graph database via batching.
347
-
348
- Can raise exceptions if there are issues during the database operations.
349
-
350
- Parameters:
351
- -----------
352
-
353
- - data_points (list[DataPoint]): A list of DataPoint instances to be inserted into
354
- the database.
355
- """
356
- embeddable_values = []
357
- vector_map = {}
358
-
359
- for data_point in data_points:
360
- property_names = DataPoint.get_embeddable_property_names(data_point)
361
- key = str(data_point.id)
362
- vector_map[key] = {}
363
-
364
- for property_name in property_names:
365
- property_value = getattr(data_point, property_name, None)
366
-
367
- if property_value is not None:
368
- vector_map[key][property_name] = len(embeddable_values)
369
- embeddable_values.append(property_value)
370
- else:
371
- vector_map[key][property_name] = None
372
-
373
- vectorized_values = await self.embed_data(embeddable_values)
374
-
375
- for data_point in data_points:
376
- vectorized_data = [
377
- vectorized_values[vector_map[str(data_point.id)][property_name]]
378
- if vector_map[str(data_point.id)][property_name] is not None
379
- else None
380
- for property_name in DataPoint.get_embeddable_property_names(data_point)
381
- ]
382
-
383
- query, params = await self.create_data_point_query(data_point, vectorized_data)
384
- self.query(query, params)
385
-
386
- async def create_vector_index(self, index_name: str, index_property_name: str):
387
- """
388
- Create a vector index in the specified graph for a given property if it does not already
389
- exist.
390
-
391
- Parameters:
392
- -----------
393
-
394
- - index_name (str): The name of the vector index to be created.
395
- - index_property_name (str): The name of the property on which the vector index will
396
- be created.
397
- """
398
- graph = self.driver.select_graph(self.graph_name)
399
-
400
- if not self.has_vector_index(graph, index_name, index_property_name):
401
- graph.create_node_vector_index(
402
- index_name, index_property_name, dim=self.embedding_engine.get_vector_size()
403
- )
404
-
405
- def has_vector_index(self, graph, index_name: str, index_property_name: str) -> bool:
406
- """
407
- Determine if a vector index exists on the specified property of the given graph.
408
-
409
- Parameters:
410
- -----------
411
-
412
- - graph: The graph instance to check for the vector index.
413
- - index_name (str): The name of the index to check for existence.
414
- - index_property_name (str): The property name associated with the index.
415
-
416
- Returns:
417
- --------
418
-
419
- - bool: Returns true if the vector index exists, otherwise false.
420
- """
421
- try:
422
- indices = graph.list_indices()
423
-
424
- return any(
425
- [
426
- (index[0] == index_name and index_property_name in index[1])
427
- for index in indices.result_set
428
- ]
429
- )
430
- except Error as e:
431
- print(e)
432
- return False
433
-
434
- async def index_data_points(
435
- self, index_name: str, index_property_name: str, data_points: list[DataPoint]
436
- ):
437
- """
438
- Index a list of data points in the specified graph database based on properties.
439
-
440
- To be implemented: does not yet have a defined behavior.
441
-
442
- Parameters:
443
- -----------
444
-
445
- - index_name (str): The name of the index to be created for the data points.
446
- - index_property_name (str): The property name on which to index the data points.
447
- - data_points (list[DataPoint]): A list of DataPoint instances to be indexed.
448
- """
449
- pass
450
-
451
- async def add_node(self, node_id: str, properties: Dict[str, Any]) -> None:
452
- """
453
- Add a single node with specified properties to the graph.
454
-
455
- Parameters:
456
- -----------
457
-
458
- - node_id (str): Unique identifier for the node being added.
459
- - properties (Dict[str, Any]): A dictionary of properties associated with the node.
460
- """
461
- # Clean the properties - remove None values and handle special types
462
- clean_properties = {"id": node_id}
463
- for key, value in properties.items():
464
- if value is not None:
465
- if isinstance(value, UUID):
466
- clean_properties[key] = str(value)
467
- elif isinstance(value, dict):
468
- clean_properties[key] = json.dumps(value)
469
- elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], float):
470
- # This is likely a vector - convert to string representation
471
- clean_properties[key] = f"vecf32({value})"
472
- else:
473
- clean_properties[key] = value
474
-
475
- query = "MERGE (node {id: $node_id}) SET node += $properties, node.updated_at = timestamp()"
476
- params = {"node_id": node_id, "properties": clean_properties}
477
-
478
- self.query(query, params)
479
-
480
- # Helper methods for DataPoint compatibility
481
- async def add_data_point_node(self, node: DataPoint):
482
- """
483
- Add a single data point as a node in the graph.
484
-
485
- Parameters:
486
- -----------
487
-
488
- - node (DataPoint): An instance of DataPoint to be added to the graph.
489
- """
490
- await self.create_data_points([node])
491
-
492
- async def add_data_point_nodes(self, nodes: list[DataPoint]):
493
- """
494
- Add multiple data points as nodes in the graph.
495
-
496
- Parameters:
497
- -----------
498
-
499
- - nodes (list[DataPoint]): A list of DataPoint instances to be added to the graph.
500
- """
501
- await self.create_data_points(nodes)
502
-
503
- @record_graph_changes
504
- async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
505
- """
506
- Add multiple nodes to the graph in a single operation.
507
-
508
- Parameters:
509
- -----------
510
-
511
- - nodes (Union[List[Node], List[DataPoint]]): A list of Node tuples or DataPoint objects to be added to the graph.
512
- """
513
- for node in nodes:
514
- if isinstance(node, tuple) and len(node) == 2:
515
- # Node is in (node_id, properties) format
516
- node_id, properties = node
517
- await self.add_node(node_id, properties)
518
- elif hasattr(node, "id") and hasattr(node, "model_dump"):
519
- # Node is a DataPoint object
520
- await self.add_node(str(node.id), node.model_dump())
521
- else:
522
- raise ValueError(
523
- f"Invalid node format: {node}. Expected tuple (node_id, properties) or DataPoint object."
524
- )
525
-
526
- async def add_edge(
527
- self,
528
- source_id: str,
529
- target_id: str,
530
- relationship_name: str,
531
- properties: Optional[Dict[str, Any]] = None,
532
- ) -> None:
533
- """
534
- Create a new edge between two nodes in the graph.
535
-
536
- Parameters:
537
- -----------
538
-
539
- - source_id (str): The unique identifier of the source node.
540
- - target_id (str): The unique identifier of the target node.
541
- - relationship_name (str): The name of the relationship to be established by the
542
- edge.
543
- - properties (Optional[Dict[str, Any]]): Optional dictionary of properties
544
- associated with the edge. (default None)
545
- """
546
- if properties is None:
547
- properties = {}
548
-
549
- edge_tuple = (source_id, target_id, relationship_name, properties)
550
- query = await self.create_edge_query(edge_tuple)
551
- self.query(query)
552
-
553
- @record_graph_changes
554
- async def add_edges(self, edges: List[EdgeData]) -> None:
555
- """
556
- Add multiple edges to the graph in a single operation.
557
-
558
- Parameters:
559
- -----------
560
-
561
- - edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
562
- """
563
- for edge in edges:
564
- if isinstance(edge, tuple) and len(edge) == 4:
565
- # Edge is in (source_id, target_id, relationship_name, properties) format
566
- source_id, target_id, relationship_name, properties = edge
567
- await self.add_edge(source_id, target_id, relationship_name, properties)
568
- else:
569
- raise ValueError(
570
- f"Invalid edge format: {edge}. Expected tuple (source_id, target_id, relationship_name, properties)."
571
- )
572
-
573
- async def has_edges(self, edges):
574
- """
575
- Check if the specified edges exist in the graph based on their attributes.
576
-
577
- Parameters:
578
- -----------
579
-
580
- - edges: A list of edges to check for existence in the graph.
581
-
582
- Returns:
583
- --------
584
-
585
- Returns a list of edge tuples that exist in the graph.
586
- """
587
- existing_edges = []
588
- for edge in edges:
589
- exists = await self.has_edge(str(edge[0]), str(edge[1]), edge[2])
590
- if exists:
591
- existing_edges.append(edge)
592
- return existing_edges
593
-
594
- async def retrieve(self, data_point_ids: list[UUID]):
595
- """
596
- Retrieve data points from the graph based on their IDs.
597
-
598
- Parameters:
599
- -----------
600
-
601
- - data_point_ids (list[UUID]): A list of UUIDs representing the data points to
602
- retrieve.
603
-
604
- Returns:
605
- --------
606
-
607
- Returns the result set containing the retrieved nodes or an empty list if not found.
608
- """
609
- result = self.query(
610
- "MATCH (node) WHERE node.id IN $node_ids RETURN node",
611
- {
612
- "node_ids": [str(data_point) for data_point in data_point_ids],
613
- },
614
- )
615
- return result.result_set
616
-
617
- async def extract_node(self, data_point_id: UUID):
618
- """
619
- Extract the properties of a single node identified by its data point ID.
620
-
621
- Parameters:
622
- -----------
623
-
624
- - data_point_id (UUID): The UUID of the data point to extract.
625
-
626
- Returns:
627
- --------
628
-
629
- Returns the properties of the node if found, otherwise None.
630
- """
631
- result = await self.retrieve([data_point_id])
632
- result = result[0][0] if len(result[0]) > 0 else None
633
- return result.properties if result else None
634
-
635
- async def extract_nodes(self, data_point_ids: list[UUID]):
636
- """
637
- Extract properties of multiple nodes identified by their data point IDs.
638
-
639
- Parameters:
640
- -----------
641
-
642
- - data_point_ids (list[UUID]): A list of UUIDs representing the data points to
643
- extract.
644
-
645
- Returns:
646
- --------
647
-
648
- Returns the properties of the nodes in a list.
649
- """
650
- return await self.retrieve(data_point_ids)
651
-
652
- async def get_connections(self, node_id: UUID) -> list:
653
- """
654
- Retrieve connection details (predecessors and successors) for a given node ID.
655
-
656
- Parameters:
657
- -----------
658
-
659
- - node_id (UUID): The UUID of the node whose connections are to be retrieved.
660
-
661
- Returns:
662
- --------
663
-
664
- - list: Returns a list of tuples representing the connections of the node.
665
- """
666
- predecessors_query = """
667
- MATCH (node)<-[relation]-(neighbour)
668
- WHERE node.id = $node_id
669
- RETURN neighbour, relation, node
670
- """
671
- successors_query = """
672
- MATCH (node)-[relation]->(neighbour)
673
- WHERE node.id = $node_id
674
- RETURN node, relation, neighbour
675
- """
676
-
677
- predecessors, successors = await asyncio.gather(
678
- self.query(predecessors_query, dict(node_id=node_id)),
679
- self.query(successors_query, dict(node_id=node_id)),
680
- )
681
-
682
- connections = []
683
-
684
- for neighbour in predecessors:
685
- neighbour = neighbour["relation"]
686
- connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
687
-
688
- for neighbour in successors:
689
- neighbour = neighbour["relation"]
690
- connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
691
-
692
- return connections
693
-
694
- async def search(
695
- self,
696
- collection_name: str,
697
- query_text: str = None,
698
- query_vector: list[float] = None,
699
- limit: int = 10,
700
- with_vector: bool = False,
701
- ):
702
- """
703
- Search for nodes in a collection based on text or vector query, with optional limitation
704
- on results.
705
-
706
- Parameters:
707
- -----------
708
-
709
- - collection_name (str): The name of the collection in which to search.
710
- - query_text (str): The text to search for (if using text-based query). (default
711
- None)
712
- - query_vector (list[float]): The vector representation of the query if using
713
- vector-based search. (default None)
714
- - limit (int): Maximum number of results to return from the search. (default 10)
715
- - with_vector (bool): Flag indicating whether to return vectors with the search
716
- results. (default False)
717
-
718
- Returns:
719
- --------
720
-
721
- Returns the search results as a result set from the graph database.
722
- """
723
- if query_text is None and query_vector is None:
724
- raise MissingQueryParameterError()
725
-
726
- if query_text and not query_vector:
727
- query_vector = (await self.embed_data([query_text]))[0]
728
-
729
- # For FalkorDB, let's do a simple property-based search instead of vector search for now
730
- # since the vector index might not be set up correctly
731
- if "." in collection_name:
732
- [label, attribute_name] = collection_name.split(".")
733
- else:
734
- # If no dot, treat the whole thing as a property search
735
- label = ""
736
- attribute_name = collection_name
737
-
738
- # Simple text-based search if we have query_text
739
- if query_text:
740
- if label:
741
- query = f"""
742
- MATCH (n:{label})
743
- WHERE toLower(toString(n.{attribute_name})) CONTAINS toLower($query_text)
744
- RETURN n, 1.0 as score
745
- LIMIT $limit
746
- """
747
- else:
748
- query = f"""
749
- MATCH (n)
750
- WHERE toLower(toString(n.{attribute_name})) CONTAINS toLower($query_text)
751
- RETURN n, 1.0 as score
752
- LIMIT $limit
753
- """
754
-
755
- params = {"query_text": query_text, "limit": limit}
756
- result = self.query(query, params)
757
- return result.result_set
758
- else:
759
- # For vector search, return empty for now since vector indexing needs proper setup
760
- return []
761
-
762
- async def batch_search(
763
- self,
764
- collection_name: str,
765
- query_texts: list[str],
766
- limit: int = None,
767
- with_vectors: bool = False,
768
- ):
769
- """
770
- Perform batch search across multiple queries based on text inputs and return results
771
- asynchronously.
772
-
773
- Parameters:
774
- -----------
775
-
776
- - collection_name (str): The name of the collection in which to perform the
777
- searches.
778
- - query_texts (list[str]): A list of text queries to search for.
779
- - limit (int): Optional limit for the search results for each query. (default None)
780
- - with_vectors (bool): Flag indicating whether to return vectors with the results.
781
- (default False)
782
-
783
- Returns:
784
- --------
785
-
786
- Returns a list of results for each search query executed in parallel.
787
- """
788
- query_vectors = await self.embedding_engine.embed_text(query_texts)
789
-
790
- return await asyncio.gather(
791
- *[
792
- self.search(
793
- collection_name=collection_name,
794
- query_vector=query_vector,
795
- limit=limit,
796
- with_vector=with_vectors,
797
- )
798
- for query_vector in query_vectors
799
- ]
800
- )
801
-
802
- async def get_graph_data(self):
803
- """
804
- Retrieve all nodes and edges from the graph along with their properties.
805
-
806
- Returns:
807
- --------
808
-
809
- Returns a tuple containing lists of nodes and edges data retrieved from the graph.
810
- """
811
- query = "MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties"
812
-
813
- result = self.query(query)
814
-
815
- nodes = [
816
- (
817
- record[2]["id"],
818
- record[2],
819
- )
820
- for record in result.result_set
821
- ]
822
-
823
- query = """
824
- MATCH (n)-[r]->(m)
825
- RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
826
- """
827
- result = self.query(query)
828
- edges = [
829
- (
830
- record[3]["source_node_id"],
831
- record[3]["target_node_id"],
832
- record[2],
833
- record[3],
834
- )
835
- for record in result.result_set
836
- ]
837
-
838
- return (nodes, edges)
839
-
840
- async def delete_data_points(self, collection_name: str, data_point_ids: list[UUID]):
841
- """
842
- Remove specified data points from the graph database based on their IDs.
843
-
844
- Parameters:
845
- -----------
846
-
847
- - collection_name (str): The name of the collection from which to delete the data
848
- points.
849
- - data_point_ids (list[UUID]): A list of UUIDs representing the data points to
850
- delete.
851
-
852
- Returns:
853
- --------
854
-
855
- Returns the result of the deletion operation from the database.
856
- """
857
- return self.query(
858
- "MATCH (node) WHERE node.id IN $node_ids DETACH DELETE node",
859
- {
860
- "node_ids": [str(data_point) for data_point in data_point_ids],
861
- },
862
- )
863
-
864
- async def delete_node(self, node_id: str) -> None:
865
- """
866
- Delete a specified node from the graph by its ID.
867
-
868
- Parameters:
869
- -----------
870
-
871
- - node_id (str): Unique identifier for the node to delete.
872
- """
873
- query = f"MATCH (node {{id: '{node_id}'}}) DETACH DELETE node"
874
- self.query(query)
875
-
876
- async def delete_nodes(self, node_ids: List[str]) -> None:
877
- """
878
- Delete multiple nodes from the graph by their identifiers.
879
-
880
- Parameters:
881
- -----------
882
-
883
- - node_ids (List[str]): A list of unique identifiers for the nodes to delete.
884
- """
885
- for node_id in node_ids:
886
- await self.delete_node(node_id)
887
-
888
- async def delete_graph(self):
889
- """
890
- Delete the entire graph along with all its indices and nodes.
891
- """
892
- try:
893
- graph = self.driver.select_graph(self.graph_name)
894
-
895
- indices = graph.list_indices()
896
- for index in indices.result_set:
897
- for field in index[1]:
898
- graph.drop_node_vector_index(index[0], field)
899
-
900
- graph.delete()
901
- except Exception as e:
902
- print(f"Error deleting graph: {e}")
903
-
904
- async def get_node(self, node_id: str) -> Optional[NodeData]:
905
- """
906
- Retrieve a single node from the graph using its ID.
907
-
908
- Parameters:
909
- -----------
910
-
911
- - node_id (str): Unique identifier of the node to retrieve.
912
- """
913
- result = self.query(
914
- "MATCH (node) WHERE node.id = $node_id RETURN node",
915
- {"node_id": node_id},
916
- )
917
-
918
- if result.result_set and len(result.result_set) > 0:
919
- # FalkorDB returns node objects as first element in the result list
920
- return result.result_set[0][0].properties
921
- return None
922
-
923
- async def get_nodes(self, node_ids: List[str]) -> List[NodeData]:
924
- """
925
- Retrieve multiple nodes from the graph using their IDs.
926
-
927
- Parameters:
928
- -----------
929
-
930
- - node_ids (List[str]): A list of unique identifiers for the nodes to retrieve.
931
- """
932
- result = self.query(
933
- "MATCH (node) WHERE node.id IN $node_ids RETURN node",
934
- {"node_ids": node_ids},
935
- )
936
-
937
- nodes = []
938
- if result.result_set:
939
- for record in result.result_set:
940
- # FalkorDB returns node objects as first element in each record
941
- nodes.append(record[0].properties)
942
- return nodes
943
-
944
- async def get_neighbors(self, node_id: str) -> List[NodeData]:
945
- """
946
- Get all neighboring nodes connected to the specified node.
947
-
948
- Parameters:
949
- -----------
950
-
951
- - node_id (str): Unique identifier of the node for which to retrieve neighbors.
952
- """
953
- result = self.query(
954
- "MATCH (node)-[]-(neighbor) WHERE node.id = $node_id RETURN DISTINCT neighbor",
955
- {"node_id": node_id},
956
- )
957
-
958
- neighbors = []
959
- if result.result_set:
960
- for record in result.result_set:
961
- # FalkorDB returns neighbor objects as first element in each record
962
- neighbors.append(record[0].properties)
963
- return neighbors
964
-
965
- async def get_edges(self, node_id: str) -> List[EdgeData]:
966
- """
967
- Retrieve all edges that are connected to the specified node.
968
-
969
- Parameters:
970
- -----------
971
-
972
- - node_id (str): Unique identifier of the node whose edges are to be retrieved.
973
- """
974
- result = self.query(
975
- """
976
- MATCH (n)-[r]-(m)
977
- WHERE n.id = $node_id
978
- RETURN n.id AS source_id, m.id AS target_id, type(r) AS relationship_name, properties(r) AS properties
979
- """,
980
- {"node_id": node_id},
981
- )
982
-
983
- edges = []
984
- if result.result_set:
985
- for record in result.result_set:
986
- # FalkorDB returns values by index: source_id, target_id, relationship_name, properties
987
- edges.append(
988
- (
989
- record[0], # source_id
990
- record[1], # target_id
991
- record[2], # relationship_name
992
- record[3], # properties
993
- )
994
- )
995
- return edges
996
-
997
- async def has_edge(self, source_id: str, target_id: str, relationship_name: str) -> bool:
998
- """
999
- Verify if an edge exists between two specified nodes.
1000
-
1001
- Parameters:
1002
- -----------
1003
-
1004
- - source_id (str): Unique identifier of the source node.
1005
- - target_id (str): Unique identifier of the target node.
1006
- - relationship_name (str): Name of the relationship to verify.
1007
- """
1008
- # Check both the sanitized relationship type and the original name in properties
1009
- sanitized_relationship = self.sanitize_relationship_name(relationship_name)
1010
-
1011
- result = self.query(
1012
- f"""
1013
- MATCH (source)-[r:{sanitized_relationship}]->(target)
1014
- WHERE source.id = $source_id AND target.id = $target_id
1015
- AND (r.relationship_name = $relationship_name OR NOT EXISTS(r.relationship_name))
1016
- RETURN COUNT(r) > 0 AS edge_exists
1017
- """,
1018
- {
1019
- "source_id": source_id,
1020
- "target_id": target_id,
1021
- "relationship_name": relationship_name,
1022
- },
1023
- )
1024
-
1025
- if result.result_set and len(result.result_set) > 0:
1026
- # FalkorDB returns scalar results as a list, access by index instead of key
1027
- return result.result_set[0][0]
1028
- return False
1029
-
1030
- async def get_graph_metrics(self, include_optional: bool = False) -> Dict[str, Any]:
1031
- """
1032
- Fetch metrics and statistics of the graph, possibly including optional details.
1033
-
1034
- Parameters:
1035
- -----------
1036
-
1037
- - include_optional (bool): Flag indicating whether to include optional metrics or
1038
- not. (default False)
1039
- """
1040
- # Get basic node and edge counts
1041
- node_result = self.query("MATCH (n) RETURN count(n) AS node_count")
1042
- edge_result = self.query("MATCH ()-[r]->() RETURN count(r) AS edge_count")
1043
-
1044
- # FalkorDB returns scalar results as a list, access by index instead of key
1045
- num_nodes = node_result.result_set[0][0] if node_result.result_set else 0
1046
- num_edges = edge_result.result_set[0][0] if edge_result.result_set else 0
1047
-
1048
- metrics = {
1049
- "num_nodes": num_nodes,
1050
- "num_edges": num_edges,
1051
- "mean_degree": (2 * num_edges) / num_nodes if num_nodes > 0 else 0,
1052
- "edge_density": num_edges / (num_nodes * (num_nodes - 1)) if num_nodes > 1 else 0,
1053
- "num_connected_components": 1, # Simplified for now
1054
- "sizes_of_connected_components": [num_nodes] if num_nodes > 0 else [],
1055
- }
1056
-
1057
- if include_optional:
1058
- # Add optional metrics - simplified implementation
1059
- metrics.update(
1060
- {
1061
- "num_selfloops": 0, # Simplified
1062
- "diameter": -1, # Not implemented
1063
- "avg_shortest_path_length": -1, # Not implemented
1064
- "avg_clustering": -1, # Not implemented
1065
- }
1066
- )
1067
- else:
1068
- metrics.update(
1069
- {
1070
- "num_selfloops": -1,
1071
- "diameter": -1,
1072
- "avg_shortest_path_length": -1,
1073
- "avg_clustering": -1,
1074
- }
1075
- )
1076
-
1077
- return metrics
1078
-
1079
- async def get_document_subgraph(self, content_hash: str):
1080
- """
1081
- Get a subgraph related to a specific document by content hash.
1082
-
1083
- Parameters:
1084
- -----------
1085
-
1086
- - content_hash (str): The content hash of the document to find.
1087
- """
1088
- query = """
1089
- MATCH (d) WHERE d.id CONTAINS $content_hash
1090
- OPTIONAL MATCH (d)<-[:CHUNK_OF]-(c)
1091
- OPTIONAL MATCH (c)-[:HAS_ENTITY]->(e)
1092
- OPTIONAL MATCH (e)-[:IS_INSTANCE_OF]->(et)
1093
- RETURN d AS document,
1094
- COLLECT(DISTINCT c) AS chunks,
1095
- COLLECT(DISTINCT e) AS orphan_entities,
1096
- COLLECT(DISTINCT c) AS made_from_nodes,
1097
- COLLECT(DISTINCT et) AS orphan_types
1098
- """
1099
-
1100
- result = self.query(query, {"content_hash": f"text_{content_hash}"})
1101
-
1102
- if not result.result_set or not result.result_set[0]:
1103
- return None
1104
-
1105
- # Convert result to dictionary format
1106
- # FalkorDB returns values by index: document, chunks, orphan_entities, made_from_nodes, orphan_types
1107
- record = result.result_set[0]
1108
- return {
1109
- "document": record[0],
1110
- "chunks": record[1],
1111
- "orphan_entities": record[2],
1112
- "made_from_nodes": record[3],
1113
- "orphan_types": record[4],
1114
- }
1115
-
1116
- async def get_degree_one_nodes(self, node_type: str):
1117
- """
1118
- Get all nodes that have only one connection.
1119
-
1120
- Parameters:
1121
- -----------
1122
-
1123
- - node_type (str): The type of nodes to filter by, must be 'Entity' or 'EntityType'.
1124
- """
1125
- if not node_type or node_type not in ["Entity", "EntityType"]:
1126
- raise ValueError("node_type must be either 'Entity' or 'EntityType'")
1127
-
1128
- result = self.query(
1129
- f"""
1130
- MATCH (n:{node_type})
1131
- WITH n, COUNT {{ MATCH (n)--() }} as degree
1132
- WHERE degree = 1
1133
- RETURN n
1134
- """
1135
- )
1136
-
1137
- # FalkorDB returns node objects as first element in each record
1138
- return [record[0] for record in result.result_set] if result.result_set else []
1139
-
1140
- async def get_nodeset_subgraph(
1141
- self, node_type: Type[Any], node_name: List[str]
1142
- ) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
1143
- """
1144
- Fetch a subgraph consisting of a specific set of nodes and their relationships.
1145
-
1146
- Parameters:
1147
- -----------
1148
-
1149
- - node_type (Type[Any]): The type of nodes to include in the subgraph.
1150
- - node_name (List[str]): A list of names of the nodes to include in the subgraph.
1151
- """
1152
- label = node_type.__name__
1153
-
1154
- # Find primary nodes of the specified type and names
1155
- primary_query = f"""
1156
- UNWIND $names AS wantedName
1157
- MATCH (n:{label})
1158
- WHERE n.name = wantedName
1159
- RETURN DISTINCT n.id, properties(n) AS properties
1160
- """
1161
-
1162
- primary_result = self.query(primary_query, {"names": node_name})
1163
- if not primary_result.result_set:
1164
- return [], []
1165
-
1166
- # FalkorDB returns values by index: id, properties
1167
- primary_ids = [record[0] for record in primary_result.result_set]
1168
-
1169
- # Find neighbors of primary nodes
1170
- neighbor_query = """
1171
- MATCH (n)-[]-(neighbor)
1172
- WHERE n.id IN $ids
1173
- RETURN DISTINCT neighbor.id, properties(neighbor) AS properties
1174
- """
1175
-
1176
- neighbor_result = self.query(neighbor_query, {"ids": primary_ids})
1177
- # FalkorDB returns values by index: id, properties
1178
- neighbor_ids = (
1179
- [record[0] for record in neighbor_result.result_set]
1180
- if neighbor_result.result_set
1181
- else []
1182
- )
1183
-
1184
- all_ids = list(set(primary_ids + neighbor_ids))
1185
-
1186
- # Get all nodes in the subgraph
1187
- nodes_query = """
1188
- MATCH (n)
1189
- WHERE n.id IN $ids
1190
- RETURN n.id, properties(n) AS properties
1191
- """
1192
-
1193
- nodes_result = self.query(nodes_query, {"ids": all_ids})
1194
- nodes = []
1195
- if nodes_result.result_set:
1196
- for record in nodes_result.result_set:
1197
- # FalkorDB returns values by index: id, properties
1198
- nodes.append((record[0], record[1]))
1199
-
1200
- # Get edges between these nodes
1201
- edges_query = """
1202
- MATCH (a)-[r]->(b)
1203
- WHERE a.id IN $ids AND b.id IN $ids
1204
- RETURN a.id AS source_id, b.id AS target_id, type(r) AS relationship_name, properties(r) AS properties
1205
- """
1206
-
1207
- edges_result = self.query(edges_query, {"ids": all_ids})
1208
- edges = []
1209
- if edges_result.result_set:
1210
- for record in edges_result.result_set:
1211
- # FalkorDB returns values by index: source_id, target_id, relationship_name, properties
1212
- edges.append(
1213
- (
1214
- record[0], # source_id
1215
- record[1], # target_id
1216
- record[2], # relationship_name
1217
- record[3], # properties
1218
- )
1219
- )
1220
-
1221
- return nodes, edges
1222
-
1223
- async def prune(self):
1224
- """
1225
- Prune the graph by deleting the entire graph structure.
1226
- """
1227
- await self.delete_graph()