cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. cognee/api/client.py +3 -1
  2. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
  3. cognee/infrastructure/databases/graph/kuzu/adapter.py +31 -9
  4. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
  5. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +103 -64
  6. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +10 -3
  7. cognee/infrastructure/databases/vector/create_vector_engine.py +3 -11
  8. cognee/modules/data/models/Data.py +2 -2
  9. cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
  10. cognee/modules/graph/cognee_graph/CogneeGraph.py +39 -20
  11. cognee/modules/graph/methods/get_formatted_graph_data.py +1 -1
  12. cognee/modules/pipelines/operations/run_tasks.py +1 -1
  13. cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
  14. cognee/modules/retrieval/chunks_retriever.py +23 -1
  15. cognee/modules/retrieval/code_retriever.py +64 -5
  16. cognee/modules/retrieval/completion_retriever.py +12 -10
  17. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  18. cognee/modules/retrieval/insights_retriever.py +4 -0
  19. cognee/modules/retrieval/natural_language_retriever.py +6 -10
  20. cognee/modules/retrieval/summaries_retriever.py +23 -1
  21. cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
  22. cognee/modules/settings/get_settings.py +0 -4
  23. cognee/modules/settings/save_vector_db_config.py +1 -1
  24. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
  25. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev0.dist-info}/METADATA +5 -7
  26. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev0.dist-info}/RECORD +29 -29
  27. cognee/tests/test_weaviate.py +0 -94
  28. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev0.dist-info}/WHEEL +0 -0
  29. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev0.dist-info}/licenses/LICENSE +0 -0
  30. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/client.py CHANGED
@@ -74,7 +74,9 @@ if CORS_ALLOWED_ORIGINS:
74
74
  origin.strip() for origin in CORS_ALLOWED_ORIGINS.split(",") if origin.strip()
75
75
  ]
76
76
  else:
77
- allowed_origins = [] # Block all except explicitly set origins
77
+ allowed_origins = [
78
+ "http://localhost:3000",
79
+ ] # Block all except explicitly set origins
78
80
 
79
81
  app.add_middleware(
80
82
  CORSMiddleware,
@@ -290,7 +290,7 @@ def get_datasets_router() -> APIRouter:
290
290
  if dataset is None:
291
291
  raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
292
292
 
293
- graph_data = await get_formatted_graph_data(dataset)
293
+ graph_data = await get_formatted_graph_data(dataset.id, user.id)
294
294
 
295
295
  return graph_data
296
296
 
@@ -353,7 +353,7 @@ def get_datasets_router() -> APIRouter:
353
353
 
354
354
  @router.get("/status", response_model=dict[str, PipelineRunStatus])
355
355
  async def get_dataset_status(
356
- datasets: Annotated[List[UUID], Query(alias="dataset")] = None,
356
+ datasets: Annotated[List[UUID], Query(alias="dataset")] = [],
357
357
  user: User = Depends(get_authenticated_user),
358
358
  ):
359
359
  """
@@ -72,11 +72,36 @@ class KuzuAdapter(GraphDBInterface):
72
72
 
73
73
  run_sync(file_storage.ensure_directory_exists())
74
74
 
75
- self.db = Database(
76
- self.db_path,
77
- buffer_pool_size=256 * 1024 * 1024, # 256MB buffer pool
78
- max_db_size=1024 * 1024 * 1024,
79
- )
75
+ try:
76
+ self.db = Database(
77
+ self.db_path,
78
+ buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
79
+ max_db_size=4096 * 1024 * 1024,
80
+ )
81
+ except RuntimeError:
82
+ from .kuzu_migrate import read_kuzu_storage_version
83
+ import kuzu
84
+
85
+ kuzu_db_version = read_kuzu_storage_version(self.db_path)
86
+ if (
87
+ kuzu_db_version == "0.9.0" or kuzu_db_version == "0.8.2"
88
+ ) and kuzu_db_version != kuzu.__version__:
89
+ # Try to migrate kuzu database to latest version
90
+ from .kuzu_migrate import kuzu_migration
91
+
92
+ kuzu_migration(
93
+ new_db=self.db_path + "_new",
94
+ old_db=self.db_path,
95
+ new_version=kuzu.__version__,
96
+ old_version=kuzu_db_version,
97
+ overwrite=True,
98
+ )
99
+
100
+ self.db = Database(
101
+ self.db_path,
102
+ buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
103
+ max_db_size=4096 * 1024 * 1024,
104
+ )
80
105
 
81
106
  self.db.init_database()
82
107
  self.connection = Connection(self.db)
@@ -1438,11 +1463,8 @@ class KuzuAdapter(GraphDBInterface):
1438
1463
  It raises exceptions for failures occurring during deletion processes.
1439
1464
  """
1440
1465
  try:
1441
- # Use DETACH DELETE to remove both nodes and their relationships in one operation
1442
- await self.query("MATCH (n:Node) DETACH DELETE n")
1443
- logger.info("Cleared all data from graph while preserving structure")
1444
-
1445
1466
  if self.connection:
1467
+ self.connection.close()
1446
1468
  self.connection = None
1447
1469
  if self.db:
1448
1470
  self.db.close()
@@ -0,0 +1,281 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kuzu Database Migration Script
4
+
5
+ This script migrates Kuzu databases between different versions by:
6
+ 1. Setting up isolated Python environments for each Kuzu version
7
+ 2. Exporting data from the source database using the old version
8
+ 3. Importing data into the target database using the new version
9
+ 4. If overwrite is enabled target database will replace source database and source database will have the prefix _old
10
+ 5. If delete-old is enabled target database will be renamed to source database and source database will be deleted
11
+
12
+ The script automatically handles:
13
+ - Environment setup (creates virtual environments as needed)
14
+ - Export/import validation
15
+ - Error handling and reporting
16
+
17
+ Usage Examples:
18
+ # Basic migration from 0.9.0 to 0.11.0
19
+ python kuzu_migrate.py --old-version 0.9.0 --new-version 0.11.0 --old-db /path/to/old/database --new-db /path/to/new/database
20
+
21
+ Requirements:
22
+ - Python 3.7+
23
+ - Internet connection (to download Kuzu packages)
24
+ - Sufficient disk space for virtual environments and temporary exports
25
+
26
+ Notes:
27
+ - Can only be used to migrate to newer Kuzu versions, from 0.11.0 onwards
28
+ """
29
+
30
+ import tempfile
31
+ import sys
32
+ import struct
33
+ import shutil
34
+ import subprocess
35
+ import argparse
36
+ import os
37
+
38
+
39
+ kuzu_version_mapping = {
40
+ 34: "0.7.0",
41
+ 35: "0.7.1",
42
+ 36: "0.8.2",
43
+ 37: "0.9.0",
44
+ 38: "0.10.1",
45
+ 39: "0.11.0",
46
+ }
47
+
48
+
49
+ def read_kuzu_storage_version(kuzu_db_path: str) -> int:
50
+ """
51
+ Reads the Kùzu storage version code from the first catalog.bin file bytes.
52
+
53
+ :param kuzu_db_path: Path to the Kuzu database file/directory.
54
+ :return: Storage version code as an integer.
55
+ """
56
+ if os.path.isdir(kuzu_db_path):
57
+ kuzu_version_file_path = os.path.join(kuzu_db_path, "catalog.kz")
58
+ if not os.path.isfile(kuzu_version_file_path):
59
+ raise FileExistsError("Kuzu catalog.kz file does not exist")
60
+ else:
61
+ kuzu_version_file_path = kuzu_db_path
62
+
63
+ with open(kuzu_version_file_path, "rb") as f:
64
+ # Skip the 3-byte magic "KUZ" and one byte of padding
65
+ f.seek(4)
66
+ # Read the next 8 bytes as a little-endian unsigned 64-bit integer
67
+ data = f.read(8)
68
+ if len(data) < 8:
69
+ raise ValueError(
70
+ f"File '{kuzu_version_file_path}' does not contain a storage version code."
71
+ )
72
+ version_code = struct.unpack("<Q", data)[0]
73
+
74
+ if kuzu_version_mapping.get(version_code):
75
+ return kuzu_version_mapping[version_code]
76
+ else:
77
+ ValueError("Could not map version_code to proper Kuzu version.")
78
+
79
+
80
+ def ensure_env(version: str, export_dir) -> str:
81
+ """
82
+ Create (if needed) a venv at .kuzu_envs/{version} and install kuzu=={version}.
83
+ Returns the path to the venv's python executable.
84
+ """
85
+ # Use temp directory to create venv
86
+ kuzu_envs_dir = os.path.join(export_dir, ".kuzu_envs")
87
+
88
+ # venv base under the script directory
89
+ base = os.path.join(kuzu_envs_dir, version)
90
+ py_bin = os.path.join(base, "bin", "python")
91
+ # If environment already exists clean it
92
+ if os.path.isfile(py_bin):
93
+ shutil.rmtree(base)
94
+
95
+ print(f"→ Setting up venv for Kùzu {version}...", file=sys.stderr)
96
+ # Create venv
97
+ # NOTE: Running python in debug mode can cause issues with creating a virtual environment from that python instance
98
+ subprocess.run([sys.executable, "-m", "venv", base], check=True)
99
+ # Install the specific Kùzu version
100
+ subprocess.run([py_bin, "-m", "pip", "install", "--upgrade", "pip"], check=True)
101
+ subprocess.run([py_bin, "-m", "pip", "install", f"kuzu=={version}"], check=True)
102
+ return py_bin
103
+
104
+
105
+ def run_migration_step(python_exe: str, db_path: str, cypher: str):
106
+ """
107
+ Uses the given python_exe to execute a short snippet that
108
+ connects to the Kùzu database and runs a Cypher command.
109
+ """
110
+ snippet = f"""
111
+ import kuzu
112
+ db = kuzu.Database(r"{db_path}")
113
+ conn = kuzu.Connection(db)
114
+ conn.execute(r\"\"\"{cypher}\"\"\")
115
+ """
116
+ proc = subprocess.run([python_exe, "-c", snippet], capture_output=True, text=True)
117
+ if proc.returncode != 0:
118
+ print(f"[ERROR] {cypher} failed:\n{proc.stderr}", file=sys.stderr)
119
+ sys.exit(proc.returncode)
120
+
121
+
122
+ def kuzu_migration(new_db, old_db, new_version, old_version=None, overwrite=None, delete_old=None):
123
+ """
124
+ Main migration function that handles the complete migration process.
125
+ """
126
+ print(f"🔄 Migrating Kuzu database from {old_version} to {new_version}", file=sys.stderr)
127
+ print(f"📂 Source: {old_db}", file=sys.stderr)
128
+ print("", file=sys.stderr)
129
+
130
+ # If version of old kuzu db is not provided try to determine it based on file info
131
+ if not old_version:
132
+ old_version = read_kuzu_storage_version(old_db)
133
+
134
+ # Check if old database exists
135
+ if not os.path.exists(old_db):
136
+ print(f"Source database '{old_db}' does not exist.", file=sys.stderr)
137
+ sys.exit(1)
138
+
139
+ # Prepare target - ensure parent directory exists but remove target if it exists
140
+ parent_dir = os.path.dirname(new_db)
141
+ if parent_dir:
142
+ os.makedirs(parent_dir, exist_ok=True)
143
+
144
+ if os.path.exists(new_db):
145
+ raise FileExistsError(
146
+ "File already exists at new database location, remove file or change new database file path to continue"
147
+ )
148
+
149
+ # Use temp directory for all processing, it will be cleaned up after with statement
150
+ with tempfile.TemporaryDirectory() as export_dir:
151
+ # Set up environments
152
+ print(f"Setting up Kuzu {old_version} environment...", file=sys.stderr)
153
+ old_py = ensure_env(old_version, export_dir)
154
+ print(f"Setting up Kuzu {new_version} environment...", file=sys.stderr)
155
+ new_py = ensure_env(new_version, export_dir)
156
+
157
+ export_file = os.path.join(export_dir, "kuzu_export")
158
+ print(f"Exporting old DB → {export_dir}", file=sys.stderr)
159
+ run_migration_step(old_py, old_db, f"EXPORT DATABASE '{export_file}'")
160
+ print("Export complete.", file=sys.stderr)
161
+
162
+ # Check if export files were created and have content
163
+ schema_file = os.path.join(export_file, "schema.cypher")
164
+ if not os.path.exists(schema_file) or os.path.getsize(schema_file) == 0:
165
+ raise ValueError(f"Schema file not found: {schema_file}")
166
+
167
+ print(f"Importing into new DB at {new_db}", file=sys.stderr)
168
+ run_migration_step(new_py, new_db, f"IMPORT DATABASE '{export_file}'")
169
+ print("Import complete.", file=sys.stderr)
170
+
171
+ # Rename new kuzu database to old kuzu database name if enabled
172
+ if overwrite or delete_old:
173
+ # Remove kuzu lock from migrated DB
174
+ lock_file = new_db + ".lock"
175
+ if os.path.exists(lock_file):
176
+ os.remove(lock_file)
177
+ rename_databases(old_db, old_version, new_db, delete_old)
178
+
179
+ print("✅ Kuzu graph database migration finished successfully!")
180
+
181
+
182
+ def rename_databases(old_db: str, old_version: str, new_db: str, delete_old: bool):
183
+ """
184
+ When overwrite is enabled, back up the original old_db (file with .lock and .wal or directory)
185
+ by renaming it to *_old, and replace it with the newly imported new_db files.
186
+
187
+ When delete_old is enabled replace the old database with the new one and delete old database
188
+ """
189
+ base_dir = os.path.dirname(old_db)
190
+ name = os.path.basename(old_db.rstrip(os.sep))
191
+ # Add _old_ and version info to backup graph database
192
+ backup_database_name = f"{name}_old_" + old_version.replace(".", "_")
193
+ backup_base = os.path.join(base_dir, backup_database_name)
194
+
195
+ if os.path.isfile(old_db):
196
+ # File-based database: handle main file and accompanying lock/WAL
197
+ for ext in ["", ".wal"]:
198
+ src = old_db + ext
199
+ dst = backup_base + ext
200
+ if os.path.exists(src):
201
+ if delete_old:
202
+ os.remove(src)
203
+ else:
204
+ os.rename(src, dst)
205
+ print(f"Renamed '{src}' to '{dst}'", file=sys.stderr)
206
+ elif os.path.isdir(old_db):
207
+ # Directory-based Kuzu database
208
+ backup_dir = backup_base
209
+ if delete_old:
210
+ shutil.rmtree(old_db)
211
+ else:
212
+ os.rename(old_db, backup_dir)
213
+ print(f"Renamed directory '{old_db}' to '{backup_dir}'", file=sys.stderr)
214
+ else:
215
+ print(f"Original database path '{old_db}' not found for renaming.", file=sys.stderr)
216
+ sys.exit(1)
217
+
218
+ # Now move new files into place
219
+ for ext in ["", ".wal"]:
220
+ src_new = new_db + ext
221
+ dst_new = os.path.join(base_dir, name + ext)
222
+ if os.path.exists(src_new):
223
+ os.rename(src_new, dst_new)
224
+ print(f"Renamed '{src_new}' to '{dst_new}'", file=sys.stderr)
225
+
226
+
227
+ def main():
228
+ p = argparse.ArgumentParser(
229
+ description="Migrate Kùzu DB via PyPI versions",
230
+ epilog="""
231
+ Examples:
232
+ %(prog)s --old-version 0.9.0 --new-version 0.11.0 \\
233
+ --old-db /path/to/old/db --new-db /path/to/new/db --overwrite
234
+
235
+ Note: This script will create temporary virtual environments in .kuzu_envs/ directory
236
+ to isolate different Kuzu versions.
237
+ """,
238
+ formatter_class=argparse.RawDescriptionHelpFormatter,
239
+ )
240
+ p.add_argument(
241
+ "--old-version",
242
+ required=False,
243
+ default=None,
244
+ help="Source Kuzu version (e.g., 0.9.0). If not provided automatic kuzu version detection will be attempted.",
245
+ )
246
+ p.add_argument("--new-version", required=True, help="Target Kuzu version (e.g., 0.11.0)")
247
+ p.add_argument("--old-db", required=True, help="Path to source database directory")
248
+ p.add_argument(
249
+ "--new-db",
250
+ required=True,
251
+ help="Path to target database directory, it can't be the same path as the old database. Use the overwrite flag if you want to replace the old database with the new one.",
252
+ )
253
+ p.add_argument(
254
+ "--overwrite",
255
+ required=False,
256
+ action="store_true",
257
+ default=False,
258
+ help="Rename new-db to the old-db name and location, keeps old-db as backup if delete-old is not True",
259
+ )
260
+ p.add_argument(
261
+ "--delete-old",
262
+ required=False,
263
+ action="store_true",
264
+ default=False,
265
+ help="When overwrite and delete-old is True old-db will not be stored as backup",
266
+ )
267
+
268
+ args = p.parse_args()
269
+
270
+ kuzu_migration(
271
+ new_db=args.new_db,
272
+ old_db=args.old_db,
273
+ new_version=args.new_version,
274
+ old_version=args.old_version,
275
+ overwrite=args.overwrite,
276
+ delete_old=args.delete_old,
277
+ )
278
+
279
+
280
+ if __name__ == "__main__":
281
+ main()
@@ -33,7 +33,7 @@ from .neo4j_metrics_utils import (
33
33
  from .deadlock_retry import deadlock_retry
34
34
 
35
35
 
36
- logger = get_logger("Neo4jAdapter", level=ERROR)
36
+ logger = get_logger("Neo4jAdapter")
37
37
 
38
38
  BASE_LABEL = "__Node__"
39
39
 
@@ -870,34 +870,52 @@ class Neo4jAdapter(GraphDBInterface):
870
870
 
871
871
  A tuple containing two lists: nodes and edges with their properties.
872
872
  """
873
- query = "MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties"
873
+ import time
874
874
 
875
- result = await self.query(query)
875
+ start_time = time.time()
876
876
 
877
- nodes = [
878
- (
879
- record["properties"]["id"],
880
- record["properties"],
877
+ try:
878
+ # Retrieve nodes
879
+ query = "MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties"
880
+ result = await self.query(query)
881
+
882
+ nodes = []
883
+ for record in result:
884
+ nodes.append(
885
+ (
886
+ record["properties"]["id"],
887
+ record["properties"],
888
+ )
889
+ )
890
+
891
+ # Retrieve edges
892
+ query = """
893
+ MATCH (n)-[r]->(m)
894
+ RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
895
+ """
896
+ result = await self.query(query)
897
+
898
+ edges = []
899
+ for record in result:
900
+ edges.append(
901
+ (
902
+ record["properties"]["source_node_id"],
903
+ record["properties"]["target_node_id"],
904
+ record["type"],
905
+ record["properties"],
906
+ )
907
+ )
908
+
909
+ retrieval_time = time.time() - start_time
910
+ logger.info(
911
+ f"Retrieved {len(nodes)} nodes and {len(edges)} edges in {retrieval_time:.2f} seconds"
881
912
  )
882
- for record in result
883
- ]
884
913
 
885
- query = """
886
- MATCH (n)-[r]->(m)
887
- RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
888
- """
889
- result = await self.query(query)
890
- edges = [
891
- (
892
- record["properties"]["source_node_id"],
893
- record["properties"]["target_node_id"],
894
- record["type"],
895
- record["properties"],
896
- )
897
- for record in result
898
- ]
914
+ return (nodes, edges)
899
915
 
900
- return (nodes, edges)
916
+ except Exception as e:
917
+ logger.error(f"Error during graph data retrieval: {str(e)}")
918
+ raise
901
919
 
902
920
  async def get_nodeset_subgraph(
903
921
  self, node_type: Type[Any], node_name: List[str]
@@ -918,50 +936,71 @@ class Neo4jAdapter(GraphDBInterface):
918
936
  - Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]}: A tuple
919
937
  containing nodes and edges in the requested subgraph.
920
938
  """
921
- label = node_type.__name__
939
+ import time
922
940
 
923
- query = f"""
924
- UNWIND $names AS wantedName
925
- MATCH (n:`{label}`)
926
- WHERE n.name = wantedName
927
- WITH collect(DISTINCT n) AS primary
928
- UNWIND primary AS p
929
- OPTIONAL MATCH (p)--(nbr)
930
- WITH primary, collect(DISTINCT nbr) AS nbrs
931
- WITH primary + nbrs AS nodelist
932
- UNWIND nodelist AS node
933
- WITH collect(DISTINCT node) AS nodes
934
- MATCH (a)-[r]-(b)
935
- WHERE a IN nodes AND b IN nodes
936
- WITH nodes, collect(DISTINCT r) AS rels
937
- RETURN
938
- [n IN nodes |
939
- {{ id: n.id,
940
- properties: properties(n) }}] AS rawNodes,
941
- [r IN rels |
942
- {{ type: type(r),
943
- properties: properties(r) }}] AS rawRels
944
- """
941
+ start_time = time.time()
945
942
 
946
- result = await self.query(query, {"names": node_name})
947
- if not result:
948
- return [], []
943
+ try:
944
+ label = node_type.__name__
949
945
 
950
- raw_nodes = result[0]["rawNodes"]
951
- raw_rels = result[0]["rawRels"]
946
+ query = f"""
947
+ UNWIND $names AS wantedName
948
+ MATCH (n:`{label}`)
949
+ WHERE n.name = wantedName
950
+ WITH collect(DISTINCT n) AS primary
951
+ UNWIND primary AS p
952
+ OPTIONAL MATCH (p)--(nbr)
953
+ WITH primary, collect(DISTINCT nbr) AS nbrs
954
+ WITH primary + nbrs AS nodelist
955
+ UNWIND nodelist AS node
956
+ WITH collect(DISTINCT node) AS nodes
957
+ MATCH (a)-[r]-(b)
958
+ WHERE a IN nodes AND b IN nodes
959
+ WITH nodes, collect(DISTINCT r) AS rels
960
+ RETURN
961
+ [n IN nodes |
962
+ {{ id: n.id,
963
+ properties: properties(n) }}] AS rawNodes,
964
+ [r IN rels |
965
+ {{ type: type(r),
966
+ properties: properties(r) }}] AS rawRels
967
+ """
952
968
 
953
- nodes = [(n["properties"]["id"], n["properties"]) for n in raw_nodes]
954
- edges = [
955
- (
956
- r["properties"]["source_node_id"],
957
- r["properties"]["target_node_id"],
958
- r["type"],
959
- r["properties"],
969
+ result = await self.query(query, {"names": node_name})
970
+
971
+ if not result:
972
+ return [], []
973
+
974
+ raw_nodes = result[0]["rawNodes"]
975
+ raw_rels = result[0]["rawRels"]
976
+
977
+ # Process nodes
978
+ nodes = []
979
+ for n in raw_nodes:
980
+ nodes.append((n["properties"]["id"], n["properties"]))
981
+
982
+ # Process edges
983
+ edges = []
984
+ for r in raw_rels:
985
+ edges.append(
986
+ (
987
+ r["properties"]["source_node_id"],
988
+ r["properties"]["target_node_id"],
989
+ r["type"],
990
+ r["properties"],
991
+ )
992
+ )
993
+
994
+ retrieval_time = time.time() - start_time
995
+ logger.info(
996
+ f"Retrieved {len(nodes)} nodes and {len(edges)} edges for {node_type.__name__} in {retrieval_time:.2f} seconds"
960
997
  )
961
- for r in raw_rels
962
- ]
963
998
 
964
- return nodes, edges
999
+ return nodes, edges
1000
+
1001
+ except Exception as e:
1002
+ logger.error(f"Error during nodeset subgraph retrieval: {str(e)}")
1003
+ raise
965
1004
 
966
1005
  async def get_filtered_graph_data(self, attribute_filters):
967
1006
  """
@@ -1011,8 +1050,8 @@ class Neo4jAdapter(GraphDBInterface):
1011
1050
 
1012
1051
  edges = [
1013
1052
  (
1014
- record["source"],
1015
- record["target"],
1053
+ record["properties"]["source_node_id"],
1054
+ record["properties"]["target_node_id"],
1016
1055
  record["type"],
1017
1056
  record["properties"],
1018
1057
  )
@@ -49,9 +49,16 @@ class SQLAlchemyAdapter:
49
49
 
50
50
  run_sync(self.pull_from_s3())
51
51
 
52
- self.engine = create_async_engine(
53
- connection_string, poolclass=NullPool if "sqlite" in connection_string else None
54
- )
52
+ if "sqlite" in connection_string:
53
+ self.engine = create_async_engine(
54
+ connection_string,
55
+ poolclass=NullPool,
56
+ )
57
+ else:
58
+ self.engine = create_async_engine(
59
+ connection_string, pool_size=12, max_overflow=12, poolclass=None
60
+ )
61
+
55
62
  self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
56
63
 
57
64
  async def push_to_s3(self) -> None:
@@ -19,7 +19,7 @@ def create_vector_engine(
19
19
  for each provider, raising an EnvironmentError if any are missing, or ImportError if the
20
20
  ChromaDB package is not installed.
21
21
 
22
- Supported providers include: Weaviate, Qdrant, pgvector, FalkorDB, ChromaDB, and
22
+ Supported providers include: Qdrant, pgvector, FalkorDB, ChromaDB, and
23
23
  LanceDB.
24
24
 
25
25
  Parameters:
@@ -30,7 +30,7 @@ def create_vector_engine(
30
30
  providers.
31
31
  - vector_db_key (str): The API key or access token for the vector database instance.
32
32
  - vector_db_provider (str): The name of the vector database provider to use (e.g.,
33
- 'weaviate', 'qdrant').
33
+ 'qdrant', 'pgvector').
34
34
 
35
35
  Returns:
36
36
  --------
@@ -48,15 +48,7 @@ def create_vector_engine(
48
48
  embedding_engine=embedding_engine,
49
49
  )
50
50
 
51
- if vector_db_provider == "weaviate":
52
- from .weaviate_db import WeaviateAdapter
53
-
54
- if not (vector_db_url and vector_db_key):
55
- raise EnvironmentError("Missing requred Weaviate credentials!")
56
-
57
- return WeaviateAdapter(vector_db_url, vector_db_key, embedding_engine=embedding_engine)
58
-
59
- elif vector_db_provider == "qdrant":
51
+ if vector_db_provider == "qdrant":
60
52
  if not (vector_db_url and vector_db_key):
61
53
  raise EnvironmentError("Missing requred Qdrant credentials!")
62
54
 
@@ -18,12 +18,12 @@ class Data(Base):
18
18
  mime_type = Column(String)
19
19
  raw_data_location = Column(String)
20
20
  owner_id = Column(UUID, index=True)
21
- tenant_id = Column(UUID, index=True, default=None)
21
+ tenant_id = Column(UUID, index=True, nullable=True)
22
22
  content_hash = Column(String)
23
23
  external_metadata = Column(JSON)
24
24
  node_set = Column(JSON, nullable=True) # Store NodeSet as JSON list of strings
25
25
  token_count = Column(Integer)
26
- data_size = Column(Integer) # File size in bytes
26
+ data_size = Column(Integer, nullable=True) # File size in bytes
27
27
  created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
28
28
  updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
29
29
 
@@ -18,11 +18,8 @@ class UnstructuredDocument(Document):
18
18
  except ModuleNotFoundError:
19
19
  raise UnstructuredLibraryImportError
20
20
 
21
- if self.raw_data_location.startswith("s3://"):
22
- async with open_data_file(self.raw_data_location, mode="rb") as f:
23
- elements = partition(file=f, content_type=self.mime_type)
24
- else:
25
- elements = partition(self.raw_data_location, content_type=self.mime_type)
21
+ async with open_data_file(self.raw_data_location, mode="rb") as f:
22
+ elements = partition(file=f, content_type=self.mime_type)
26
23
 
27
24
  in_memory_file = StringIO("\n\n".join([str(el) for el in elements]))
28
25
  in_memory_file.seek(0)