biocypher 0.9.2__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1317 @@
1
+ """
2
+ Neo4j connection management and Cypher interface.
3
+
4
+ A wrapper around the Neo4j driver which handles the DBMS connection and
5
+ provides basic management methods. This module is only used when BioCypher
6
+ is configured for online mode with Neo4j.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import contextlib
12
+ import itertools
13
+ import os
14
+ import re
15
+ import warnings
16
+
17
+ from typing import Literal
18
+
19
+ import appdirs
20
+ import yaml
21
+
22
+ from biocypher._logger import logger
23
+ from biocypher._misc import to_list
24
+
25
+ __all__ = ["CONFIG_FILES", "DEFAULT_CONFIG", "Neo4jDriver"]
26
+
27
+ # Try to import Neo4j driver, but don't fail if not available
28
+ try:
29
+ import neo4j
30
+ import neo4j.exceptions as neo4j_exc
31
+
32
+ NEO4J_AVAILABLE = True
33
+ except ImportError:
34
+ NEO4J_AVAILABLE = False
35
+ neo4j = None
36
+ neo4j_exc = None
37
+
38
+ CONFIG_FILES = Literal["neo4j.yaml", "neo4j.yml"]
39
+ DEFAULT_CONFIG = {
40
+ "user": "neo4j",
41
+ "passwd": "neo4j",
42
+ "db": "neo4j",
43
+ "uri": "neo4j://localhost:7687",
44
+ "fetch_size": 1000,
45
+ "raise_errors": False,
46
+ "fallback_db": ("system", "neo4j"),
47
+ "fallback_on": ("TransientError",),
48
+ }
49
+
50
+
51
+ def _to_tuple(value):
52
+ """Ensure that value is a tuple."""
53
+ return tuple(to_list(value))
54
+
55
+
56
+ def _to_set(value):
57
+ """Ensure that value is a set."""
58
+ return set(to_list(value))
59
+
60
+
61
+ def _if_none(*values):
62
+ """Use the first item from values that is not None."""
63
+ for v in values:
64
+ if v is not None:
65
+ return v
66
+ return None
67
+
68
+
69
+ def _pretty_profile(d, lines=None, indent=0):
70
+ """
71
+ Pretty format a Neo4j profile dict.
72
+
73
+ Takes Neo4j profile dictionary and an optional header as
74
+ list and creates a list of strings to be printed.
75
+
76
+ Args:
77
+ d: Profile dictionary or list
78
+ lines: Optional list to append to
79
+ indent: Indentation level
80
+
81
+ Returns:
82
+ List of formatted strings
83
+ """
84
+ if lines is None:
85
+ lines = []
86
+
87
+ # ANSI color codes for terminal output
88
+ OKBLUE = "\033[94m"
89
+ WARNING = "\033[93m"
90
+ ENDC = "\033[0m"
91
+
92
+ # if more items, branch
93
+ if d:
94
+ if isinstance(d, list):
95
+ for sd in d:
96
+ _pretty_profile(sd, lines, indent)
97
+ elif isinstance(d, dict):
98
+ typ = d.pop("operatorType", None)
99
+ if typ:
100
+ lines.append(("\t" * indent) + "|" + "\t" + f"{OKBLUE}Step: {typ} {ENDC}")
101
+
102
+ # buffer children
103
+ chi = d.pop("children", None)
104
+
105
+ for key, value in d.items():
106
+ if key == "args":
107
+ _pretty_profile(value, lines, indent)
108
+ # both are there for some reason, sometimes
109
+ # both in the same process
110
+ elif key == "Time" or key == "time":
111
+ lines.append(
112
+ ("\t" * indent) + "|" + "\t" + str(key) + ": " + f"{WARNING}{value:,}{ENDC}".replace(",", " ")
113
+ )
114
+ else:
115
+ lines.append(("\t" * indent) + "|" + "\t" + str(key) + ": " + str(value))
116
+
117
+ # now the children
118
+ _pretty_profile(chi, lines, indent + 1)
119
+
120
+ return lines
121
+
122
+
123
+ def _get_neo4j_version(driver) -> str | None:
124
+ """
125
+ Get Neo4j version from the database.
126
+
127
+ Args:
128
+ driver: Neo4j driver instance
129
+
130
+ Returns:
131
+ Version string or None if unavailable
132
+ """
133
+ if not NEO4J_AVAILABLE or not driver:
134
+ return None
135
+
136
+ try:
137
+ with driver.session() as session:
138
+ result = session.run(
139
+ """
140
+ CALL dbms.components()
141
+ YIELD name, versions, edition
142
+ UNWIND versions AS version
143
+ RETURN version AS version
144
+ """
145
+ )
146
+ data = result.data()
147
+ if data:
148
+ return data[0]["version"]
149
+ except Exception as e:
150
+ logger.warning(f"Error detecting Neo4j version: {e}")
151
+ return None
152
+
153
+
154
+ class Neo4jDriver:
155
+ """
156
+ Manage the connection to the Neo4j server.
157
+
158
+ A wrapper around the Neo4j driver that handles database connections
159
+ and provides convenient methods for querying and managing the database.
160
+ """
161
+
162
+ _connect_essential = ("uri", "user", "passwd")
163
+
164
+ def __init__(
165
+ self,
166
+ driver: neo4j.Driver | Neo4jDriver | None = None,
167
+ db_name: str | None = None,
168
+ db_uri: str | None = None,
169
+ db_user: str | None = None,
170
+ db_passwd: str | None = None,
171
+ config: CONFIG_FILES | None = None,
172
+ fetch_size: int = 1000,
173
+ raise_errors: bool | None = None,
174
+ wipe: bool = False,
175
+ offline: bool = False,
176
+ fallback_db: str | tuple[str] | None = None,
177
+ fallback_on: str | set[str] | None = None,
178
+ multi_db: bool | None = None,
179
+ force_enterprise: bool = False,
180
+ **kwargs,
181
+ ):
182
+ """
183
+ Create a Driver object with database connection and runtime parameters.
184
+
185
+ Args:
186
+ driver:
187
+ A neo4j.Driver instance, created by neo4j.GraphDatabase.driver.
188
+ db_name:
189
+ Name of the database (Neo4j graph) to use.
190
+ db_uri:
191
+ Protocol, host and port to access the Neo4j server.
192
+ db_user:
193
+ Neo4j user name.
194
+ db_passwd:
195
+ Password of the Neo4j user.
196
+ fetch_size:
197
+ Optional; the fetch size to use in database transactions.
198
+ raise_errors:
199
+ Raise the errors instead of turning them into log messages
200
+ and returning None.
201
+ config:
202
+ Path to a YAML config file which provides the URI, user
203
+ name and password.
204
+ wipe:
205
+ Wipe the database after connection, ensuring the data is
206
+ loaded into an empty database.
207
+ offline:
208
+ Disable any interaction to the server. Queries won't be
209
+ executed. The config will be still stored in the object
210
+ and it will be ready to go online by its go_online method.
211
+ fallback_db:
212
+ Arbitrary number of fallback databases. If a query fails
213
+ to run against the current database, it will be attempted
214
+ against the fallback databases.
215
+ fallback_on:
216
+ Switch to the fallback databases upon these errors.
217
+ multi_db:
218
+ Whether to use multi-database mode (Neo4j 4.0+).
219
+ kwargs:
220
+ Ignored.
221
+ """
222
+ if not NEO4J_AVAILABLE:
223
+ raise ImportError("Neo4j driver is not installed. Install it with: " "pip install neo4j>=5.0")
224
+
225
+ self.driver = getattr(driver, "driver", driver)
226
+ self._db_config = {
227
+ "uri": db_uri,
228
+ "user": db_user,
229
+ "passwd": db_passwd,
230
+ "db": db_name,
231
+ "fetch_size": fetch_size,
232
+ "raise_errors": raise_errors,
233
+ "fallback_db": fallback_db,
234
+ "fallback_on": fallback_on,
235
+ }
236
+ self._config_file = config
237
+ self._drivers = {}
238
+ self._queries = {}
239
+ self._offline = offline
240
+ self.multi_db = multi_db
241
+ self._neo4j_version_cache = None
242
+ self._force_enterprise = force_enterprise
243
+
244
+ if self.driver:
245
+ logger.info("Using the driver provided.")
246
+ self._config_from_driver()
247
+ self._register_current_driver()
248
+ else:
249
+ logger.info("No driver provided, initialising it from local config.")
250
+ self.db_connect()
251
+
252
+ # Detect Community Edition and adjust settings accordingly
253
+ # Default to Community Edition (safer for CI) unless explicitly overridden
254
+ self._detect_and_handle_community_edition()
255
+
256
+ self.ensure_db()
257
+
258
+ if wipe:
259
+ self.wipe_db()
260
+
261
+ def db_connect(self):
262
+ """Connect to the database server."""
263
+ if not self._connect_param_available:
264
+ self.read_config()
265
+
266
+ con_param = f"uri={self.uri}, auth=(user, ***)"
267
+ logger.info(f"Attempting to connect: {con_param}")
268
+
269
+ if self.offline:
270
+ self.driver = None
271
+ logger.info("Offline mode, not connecting to database.")
272
+ else:
273
+ self.driver = neo4j.GraphDatabase.driver(
274
+ uri=self.uri,
275
+ auth=self.auth,
276
+ )
277
+ logger.info("Opened database connection.")
278
+
279
+ self._register_current_driver()
280
+
281
+ def _detect_and_handle_community_edition(self):
282
+ """
283
+ Detect Community Edition and adjust settings for compatibility.
284
+
285
+ Community Edition doesn't support multi-database, so we:
286
+ 1. Convert neo4j:// to bolt:// to avoid routing issues
287
+ 2. Disable multi_db mode
288
+ 3. Use default database 'neo4j' if a custom database was requested
289
+ """
290
+ if not self.driver or self.offline:
291
+ return
292
+
293
+ # If Enterprise Edition is forced, skip detection
294
+ if self._force_enterprise:
295
+ logger.info("Enterprise Edition mode forced. Skipping Community Edition detection.")
296
+ return
297
+
298
+ # Check if multi-database is supported (Enterprise Edition)
299
+ # Use bolt:// for detection to avoid routing table issues
300
+ original_uri = self.uri
301
+ detection_uri = original_uri
302
+ if original_uri.startswith("neo4j://"):
303
+ detection_uri = original_uri.replace("neo4j://", "bolt://", 1)
304
+ elif original_uri.startswith("neo4j+s://"):
305
+ detection_uri = original_uri.replace("neo4j+s://", "bolt+s://", 1)
306
+
307
+ # Create a temporary driver with bolt:// for detection
308
+ temp_driver = None
309
+ supports_multi_db = False
310
+ try:
311
+ temp_driver = neo4j.GraphDatabase.driver(uri=detection_uri, auth=self.auth)
312
+ with temp_driver.session(database="neo4j") as session:
313
+ result = session.run(
314
+ """
315
+ CALL dbms.components()
316
+ YIELD edition
317
+ RETURN edition CONTAINS 'enterprise' AS is_enterprise
318
+ """
319
+ )
320
+ data = result.data()
321
+ supports_multi_db = data[0].get("is_enterprise", False) if data else False
322
+ except Exception as e:
323
+ logger.debug(f"Error detecting Neo4j edition: {e}. Assuming Community Edition.")
324
+ # If detection fails, assume Community Edition (safer)
325
+ supports_multi_db = False
326
+ finally:
327
+ if temp_driver:
328
+ temp_driver.close()
329
+
330
+ # If Community Edition or detection failed, adjust settings
331
+ if not supports_multi_db:
332
+ logger.info(
333
+ "Neo4j Community Edition detected (or detection failed). "
334
+ "Multi-database features are not available. "
335
+ "Adjusting configuration for compatibility."
336
+ )
337
+
338
+ # Convert neo4j:// to bolt:// to avoid routing table issues
339
+ # (already converted for detection, but need to update main driver)
340
+ try:
341
+ if original_uri.startswith("neo4j://"):
342
+ bolt_uri = original_uri.replace("neo4j://", "bolt://", 1)
343
+ self._db_config["uri"] = bolt_uri
344
+ logger.info(f"Converted URI from {original_uri} to {bolt_uri} for Community Edition compatibility.")
345
+ # Reconnect with bolt://
346
+ self.driver.close()
347
+ self.db_connect()
348
+ elif original_uri.startswith("neo4j+s://"):
349
+ bolt_uri = original_uri.replace("neo4j+s://", "bolt+s://", 1)
350
+ self._db_config["uri"] = bolt_uri
351
+ logger.info(f"Converted URI from {original_uri} to {bolt_uri} for Community Edition compatibility.")
352
+ # Reconnect with bolt+s://
353
+ self.driver.close()
354
+ self.db_connect()
355
+ except Exception as e:
356
+ logger.warning(f"Failed to convert URI and reconnect: {e}. Continuing with original URI.")
357
+
358
+ # Disable multi_db mode
359
+ if self.multi_db:
360
+ logger.info("Disabling multi-database mode for Community Edition.")
361
+ self.multi_db = False
362
+
363
+ # Use default database if a custom database was requested
364
+ current_db = self.current_db
365
+ if current_db and current_db.lower() != "neo4j":
366
+ logger.warning(
367
+ f"Requested database '{current_db}' is not supported in Community Edition. "
368
+ f"Falling back to default database 'neo4j'."
369
+ )
370
+ self._db_config["db"] = "neo4j"
371
+ self._register_current_driver()
372
+
373
+ @property
374
+ def _connect_param_available(self) -> bool:
375
+ """Check for essential connection parameters."""
376
+ return all(self._db_config.get(k, None) for k in self._connect_essential)
377
+
378
+ @property
379
+ def status(
380
+ self,
381
+ ) -> Literal[
382
+ "no driver",
383
+ "no connection",
384
+ "db offline",
385
+ "db online",
386
+ "offline",
387
+ ]:
388
+ """State of this driver object and its current database."""
389
+ if self.offline:
390
+ return "offline"
391
+
392
+ if not self.driver:
393
+ return "no driver"
394
+
395
+ db_status = self.db_status()
396
+ return f"db {db_status}" if db_status else "no connection"
397
+
398
+ @property
399
+ def uri(self) -> str:
400
+ """Database server URI (from config or built-in default)."""
401
+ return self._db_config.get("uri") or DEFAULT_CONFIG["uri"]
402
+
403
+ @property
404
+ def auth(self) -> tuple[str, str]:
405
+ """Database server user and password (from config or built-in default)."""
406
+ auth_tuple = self._db_config.get("auth")
407
+ if auth_tuple:
408
+ return tuple(auth_tuple)
409
+ return (
410
+ self._db_config.get("user") or DEFAULT_CONFIG["user"],
411
+ self._db_config.get("passwd") or DEFAULT_CONFIG["passwd"],
412
+ )
413
+
414
+ def read_config(self, section: str | None = None):
415
+ """Read the configuration from a YAML file."""
416
+ config_key_synonyms = {
417
+ "password": "passwd",
418
+ "pw": "passwd",
419
+ "username": "user",
420
+ "login": "user",
421
+ "host": "uri",
422
+ "address": "uri",
423
+ "server": "uri",
424
+ "graph": "db",
425
+ "database": "db",
426
+ "name": "db",
427
+ }
428
+
429
+ if not self._config_file or not os.path.exists(self._config_file):
430
+ confdirs = (".", appdirs.user_config_dir("biocypher", "biocypher"))
431
+ conffiles = ("neo4j.yaml", "neo4j.yml")
432
+
433
+ for config_path_t in itertools.product(confdirs, conffiles):
434
+ config_path_s = os.path.join(*config_path_t)
435
+ if os.path.exists(config_path_s):
436
+ self._config_file = config_path_s
437
+
438
+ if self._config_file and os.path.exists(self._config_file):
439
+ logger.info(f"Reading config from `{self._config_file}`.")
440
+
441
+ with open(self._config_file) as fp:
442
+ conf = yaml.safe_load(fp.read())
443
+
444
+ for k, v in conf.get(section, conf).items():
445
+ k = k.lower()
446
+ k = config_key_synonyms.get(k, k)
447
+
448
+ if not self._db_config.get(k, None):
449
+ self._db_config[k] = v
450
+
451
+ elif not self._connect_param_available:
452
+ logger.warning("No config available, falling back to defaults.")
453
+
454
+ self._config_from_defaults()
455
+
456
+ def _config_from_driver(self):
457
+ """Extract configuration from an existing driver."""
458
+ from_driver = {
459
+ "uri": self._uri(
460
+ host=getattr(self.driver, "default_host", None),
461
+ port=getattr(self.driver, "default_port", None),
462
+ ),
463
+ "db": self.current_db,
464
+ "fetch_size": getattr(
465
+ getattr(self.driver, "_default_workspace_config", None),
466
+ "fetch_size",
467
+ None,
468
+ ),
469
+ "user": self.user,
470
+ "passwd": self.passwd,
471
+ }
472
+
473
+ for k, v in from_driver.items():
474
+ self._db_config[k] = self._db_config.get(k, v) or v
475
+
476
+ self._config_from_defaults()
477
+
478
+ def _config_from_defaults(self):
479
+ """Populate missing config items by their default values."""
480
+ for k, v in DEFAULT_CONFIG.items():
481
+ if self._db_config.get(k, None) is None:
482
+ self._db_config[k] = v
483
+
484
+ def _register_current_driver(self):
485
+ """Register the current driver for the current database."""
486
+ self._drivers[self.current_db] = self.driver
487
+
488
+ @staticmethod
489
+ def _uri(
490
+ host: str = "localhost",
491
+ port: str | int = 7687,
492
+ protocol: str = "neo4j",
493
+ ) -> str:
494
+ """Construct a Neo4j URI."""
495
+ return f"{protocol}://{host}:{port}/"
496
+
497
+ def close(self):
498
+ """Close the Neo4j driver if it exists and is open."""
499
+ if hasattr(self, "driver") and hasattr(self.driver, "close"):
500
+ self.driver.close()
501
+
502
+ def __del__(self):
503
+ """Cleanup on deletion."""
504
+ self.close()
505
+
506
+ @property
507
+ def current_db(self) -> str:
508
+ """Name of the current database."""
509
+ return self._db_config["db"] or self._driver_con_db or self.home_db or neo4j.DEFAULT_DATABASE
510
+
511
+ @current_db.setter
512
+ def current_db(self, name: str):
513
+ """Set the database currently in use."""
514
+ self._db_config["db"] = name
515
+ self.db_connect()
516
+
517
+ @property
518
+ def _driver_con_db(self) -> str | None:
519
+ """Get the database from the driver connection."""
520
+ if not self.driver:
521
+ return None
522
+
523
+ with warnings.catch_warnings():
524
+ warnings.simplefilter("ignore")
525
+ try:
526
+ driver_con = self.driver.verify_connectivity()
527
+ except neo4j_exc.ServiceUnavailable:
528
+ logger.error("Cannot access Neo4j server.")
529
+ return None
530
+
531
+ if driver_con:
532
+ first_con = next(iter(driver_con.values()))[0]
533
+ return first_con.get("db", None)
534
+
535
+ return None
536
+
537
+ @property
538
+ def home_db(self) -> str | None:
539
+ """Home database of the current user."""
540
+ return self._db_name("HOME")
541
+
542
+ @property
543
+ def default_db(self) -> str | None:
544
+ """Default database of the server."""
545
+ return self._db_name("DEFAULT")
546
+
547
+ def _db_name(self, which: Literal["HOME", "DEFAULT"] = "HOME") -> str | None:
548
+ """Get the HOME or DEFAULT database name."""
549
+ try:
550
+ resp, summary = self.query(
551
+ f"SHOW {which} DATABASE;",
552
+ fallback_db=self._get_fallback_db,
553
+ )
554
+ except (neo4j_exc.AuthError, neo4j_exc.ServiceUnavailable) as e:
555
+ logger.error(f"No connection to Neo4j server: {e}")
556
+ return None
557
+
558
+ if resp:
559
+ return resp[0]["name"]
560
+ return None
561
+
562
+ @property
563
+ def _get_fallback_db(self) -> tuple[str]:
564
+ """Get fallback database tuple."""
565
+ return _to_tuple(getattr(self, "_fallback_db", None) or self._db_config["fallback_db"])
566
+
567
+ @property
568
+ def _get_fallback_on(self) -> set[str]:
569
+ """Get fallback error types."""
570
+ return _to_set(getattr(self, "_fallback_on", None) or self._db_config["fallback_on"])
571
+
572
+ def query(
573
+ self,
574
+ query: str,
575
+ db: str | None = None,
576
+ fetch_size: int | None = None,
577
+ write: bool = True,
578
+ explain: bool = False,
579
+ profile: bool = False,
580
+ fallback_db: str | tuple[str] | None = None,
581
+ fallback_on: str | set[str] | None = None,
582
+ raise_errors: bool | None = None,
583
+ parameters: dict | None = None,
584
+ **kwargs,
585
+ ) -> tuple[list[dict] | None, neo4j.work.summary.ResultSummary | None]:
586
+ """
587
+ Run a Cypher query.
588
+
589
+ Args:
590
+ query:
591
+ A valid Cypher query.
592
+ db:
593
+ The DB inside the Neo4j server that should be queried.
594
+ fetch_size:
595
+ The Neo4j fetch size parameter.
596
+ write:
597
+ Indicates whether to address write- or read-servers.
598
+ explain:
599
+ Indicates whether to EXPLAIN the Cypher query.
600
+ profile:
601
+ Indicates whether to PROFILE the Cypher query.
602
+ fallback_db:
603
+ If the query fails, try to execute it against a fallback database.
604
+ fallback_on:
605
+ Run queries against the fallback databases in case of these errors.
606
+ raise_errors:
607
+ Raise Neo4j errors instead of only printing them.
608
+ parameters:
609
+ Parameters dictionary for the query.
610
+ **kwargs:
611
+ Additional parameters (deprecated, use parameters dict instead).
612
+
613
+ Returns:
614
+ 2-tuple:
615
+ - neo4j.Record.data: the Neo4j response to the query
616
+ - neo4j.ResultSummary: information about the result
617
+ """
618
+ if explain:
619
+ query = "EXPLAIN " + query
620
+ elif profile:
621
+ query = "PROFILE " + query
622
+
623
+ if self.offline:
624
+ logger.info(f"Offline mode, not running query: `{query}`.")
625
+ return None, None
626
+
627
+ if not self.driver:
628
+ if raise_errors:
629
+ raise RuntimeError("Driver is not available. The driver may be closed or in offline mode.")
630
+ logger.error("Driver is not available. Cannot execute query.")
631
+ return None, None
632
+
633
+ # Check if driver is closed (Neo4j 5.x driver has _closed attribute)
634
+ if hasattr(self.driver, "_closed") and self.driver._closed:
635
+ if raise_errors:
636
+ raise RuntimeError("Driver is closed. Please reconnect or create a new driver instance.")
637
+ logger.error("Driver is closed. Cannot execute query.")
638
+ return None, None
639
+
640
+ db = db or self._db_config["db"] or neo4j.DEFAULT_DATABASE
641
+ fetch_size = fetch_size or self._db_config["fetch_size"]
642
+ raise_errors = self._db_config["raise_errors"] if raise_errors is None else raise_errors
643
+
644
+ # Combine parameters dict with kwargs (kwargs for backward compatibility)
645
+ query_params = dict(parameters or {}, **kwargs)
646
+
647
+ # Neo4j 5+ uses database parameter, older versions use it conditionally
648
+ session_kwargs = {
649
+ "fetch_size": fetch_size,
650
+ "default_access_mode": (neo4j.WRITE_ACCESS if write else neo4j.READ_ACCESS),
651
+ }
652
+
653
+ # For Neo4j 4.0+, use database parameter if multi_db is True
654
+ # For Neo4j 5.0+, always use database parameter
655
+ if self.multi_db or self._is_neo4j_5_plus():
656
+ session_kwargs["database"] = db
657
+
658
+ try:
659
+ with self.session(**session_kwargs) as session:
660
+ # Neo4j driver expects parameters via the 'parameters' argument,
661
+ # not unpacked as kwargs. This ensures query parameters are correctly
662
+ # passed to the Cypher query and prevents conflicts with method parameters.
663
+ if query_params:
664
+ res = session.run(query, parameters=query_params)
665
+ else:
666
+ res = session.run(query)
667
+ return res.data(), res.consume()
668
+
669
+ except (neo4j_exc.Neo4jError, neo4j_exc.DriverError) as e:
670
+ fallback_db = fallback_db or getattr(self, "_fallback_db", ())
671
+ fallback_on = _to_set(_if_none(fallback_on, self._get_fallback_on))
672
+
673
+ if self._match_error(e, fallback_on):
674
+ for fdb in _to_tuple(fallback_db):
675
+ if fdb != db:
676
+ logger.warning(f"Running query against fallback database `{fdb}`.")
677
+ return self.query(
678
+ query=query,
679
+ db=fdb,
680
+ fetch_size=fetch_size,
681
+ write=write,
682
+ fallback_on=set(),
683
+ raise_errors=raise_errors,
684
+ parameters=query_params,
685
+ )
686
+
687
+ logger.error(f"Failed to run query: {e.__class__.__name__}: {e}")
688
+ logger.error(f"The error happened with this query: {query}")
689
+
690
+ if e.__class__.__name__ == "AuthError":
691
+ logger.error("Authentication error, switching to offline mode.")
692
+ self.go_offline()
693
+
694
+ if raise_errors:
695
+ raise
696
+
697
+ return None, None
698
+
699
+ def _is_neo4j_5_plus(self) -> bool:
700
+ """Check if Neo4j version is 5.0 or higher."""
701
+ if self._neo4j_version_cache is None:
702
+ version_str = _get_neo4j_version(self.driver)
703
+ if version_str:
704
+ try:
705
+ major_version = int(version_str.split(".")[0])
706
+ self._neo4j_version_cache = major_version >= 5
707
+ except (ValueError, IndexError):
708
+ self._neo4j_version_cache = False
709
+ else:
710
+ self._neo4j_version_cache = False
711
+ return self._neo4j_version_cache
712
+
713
+ def explain(self, query, db=None, fetch_size=None, write=True, **kwargs):
714
+ """
715
+ Explain a query and pretty print the output.
716
+
717
+ Args:
718
+ query: Cypher query to explain
719
+ db: Database name
720
+ fetch_size: Fetch size
721
+ write: Write access mode
722
+ **kwargs: Query parameters
723
+
724
+ Returns:
725
+ 2-tuple:
726
+ - dict: the raw plan returned by the Neo4j bolt driver
727
+ - list of str: a list of strings ready for printing
728
+ """
729
+ logger.info("Explaining a query.")
730
+ data, summary = self.query(query, db, fetch_size, write, explain=True, **kwargs)
731
+
732
+ if not summary:
733
+ return None, []
734
+
735
+ plan = summary.plan
736
+ printout = _pretty_profile(plan)
737
+
738
+ return plan, printout
739
+
740
+ def profile(self, query, db=None, fetch_size=None, write=True, **kwargs):
741
+ """
742
+ Profile a query and pretty print the output.
743
+
744
+ Args:
745
+ query: Cypher query to profile
746
+ db: Database name
747
+ fetch_size: Fetch size
748
+ write: Write access mode
749
+ **kwargs: Query parameters
750
+
751
+ Returns:
752
+ 2-tuple:
753
+ - dict: the raw profile returned by the Neo4j bolt driver
754
+ - list of str: a list of strings ready for printing
755
+ """
756
+ logger.info("Profiling a query.")
757
+ data, summary = self.query(query, db, fetch_size, write, profile=True, **kwargs)
758
+
759
+ if not summary:
760
+ return None, []
761
+
762
+ prof = summary.profile
763
+ exec_time = summary.result_available_after + summary.result_consumed_after
764
+
765
+ # get print representation
766
+ header = f"Execution time: {exec_time:n}\n"
767
+ printout = _pretty_profile(prof, [header], indent=0)
768
+
769
+ return prof, printout
770
+
771
+ def db_exists(self, name: str | None = None) -> bool:
772
+ """Check if a database exists."""
773
+ return bool(self.db_status(name=name))
774
+
775
+ def db_status(
776
+ self,
777
+ name: str | None = None,
778
+ field: str = "currentStatus",
779
+ ) -> Literal["online", "offline"] | str | dict | None:
780
+ """
781
+ Get the current status or other state info of a database.
782
+
783
+ Args:
784
+ name: Name of a database
785
+ field: The field to return
786
+
787
+ Returns:
788
+ The status as a string, None if the database does not exist.
789
+ If field is None, a dictionary with all fields will be returned.
790
+ """
791
+ name = name or self.current_db
792
+ query = f'SHOW DATABASES WHERE name = "{name}";'
793
+
794
+ # Use fallback context manager like original neo4j_utils
795
+ # This allows query to default to current_db and fallback to system/neo4j on error
796
+ with self.fallback():
797
+ resp, summary = self.query(query)
798
+
799
+ if resp:
800
+ return resp[0].get(field, resp[0])
801
+ return None
802
+
803
+ def db_online(self, name: str | None = None) -> bool:
804
+ """Check if a database is currently online."""
805
+ return self.db_status(name=name) == "online"
806
+
807
+ def create_db(self, name: str | None = None):
808
+ """Create a database if it does not already exist."""
809
+ self._manage_db("CREATE", name=name, options="IF NOT EXISTS")
810
+
811
+ def start_db(self, name: str | None = None):
812
+ """Start a database (bring it online) if it is offline."""
813
+ self._manage_db("START", name=name)
814
+
815
+ def stop_db(self, name: str | None = None):
816
+ """Stop a database, making sure it's offline."""
817
+ self._manage_db("STOP", name=name)
818
+
819
+ def drop_db(self, name: str | None = None):
820
+ """Delete a database if it exists."""
821
+ self._manage_db("DROP", name=name, options="IF EXISTS")
822
+
823
+ def _manage_db(
824
+ self,
825
+ cmd: Literal["CREATE", "START", "STOP", "DROP"],
826
+ name: str | None = None,
827
+ options: str | None = None,
828
+ ):
829
+ """Execute a database management command."""
830
+ # Use fallback_db like original neo4j_utils
831
+ # Query defaults to current_db, but fallback mechanism will retry against system/neo4j
832
+ self.query(
833
+ f"{cmd} DATABASE {name or self.current_db} {options or ''};",
834
+ fallback_db=self._get_fallback_db,
835
+ )
836
+
837
+ def wipe_db(self):
838
+ """Delete all contents of the current database."""
839
+ if not self.driver:
840
+ raise RuntimeError(
841
+ "Driver is not available. Cannot wipe database. " "The driver may be closed or in offline mode."
842
+ )
843
+
844
+ # Check if driver is closed (Neo4j 5.x driver has _closed attribute)
845
+ if hasattr(self.driver, "_closed") and self.driver._closed:
846
+ raise RuntimeError(
847
+ "Driver is closed. Cannot wipe database. " "Please reconnect or create a new driver instance."
848
+ )
849
+
850
+ # Ensure database exists before trying to wipe it
851
+ self.ensure_db()
852
+
853
+ # For Community Edition, use default database if current_db is not supported
854
+ # Skip this check if Enterprise Edition is forced
855
+ db_to_wipe = self.current_db
856
+ if not self._force_enterprise:
857
+ current_uri = self.uri
858
+ is_neo4j_protocol = current_uri.startswith("neo4j://") or current_uri.startswith("neo4j+s://")
859
+ is_non_default_db = db_to_wipe and db_to_wipe.lower() != "neo4j"
860
+ is_community_edition = not self.multi_db or (is_neo4j_protocol and is_non_default_db)
861
+
862
+ if is_community_edition and is_non_default_db:
863
+ logger.warning(
864
+ f"Cannot wipe database '{db_to_wipe}' in Community Edition. "
865
+ f"Using default database 'neo4j' instead. "
866
+ f"Database will remain 'neo4j' for this session."
867
+ )
868
+ # Permanently change to default database for Community Edition
869
+ db_to_wipe = "neo4j"
870
+ self._db_config["db"] = "neo4j"
871
+ self._register_current_driver()
872
+
873
+ logger.info(f"Wiping database `{db_to_wipe}`.")
874
+ self.query("MATCH (n) DETACH DELETE n;")
875
+ self.drop_indices_constraints()
876
+
877
+ def ensure_db(self):
878
+ """Make sure the database exists and is online."""
879
+ db_name = self.current_db
880
+
881
+ # Skip if offline mode
882
+ if self.offline:
883
+ logger.debug(f"Offline mode, skipping database creation for '{db_name}'.")
884
+ return
885
+
886
+ # If Enterprise Edition is forced, skip Community Edition checks
887
+ if self._force_enterprise:
888
+ logger.debug(f"Enterprise Edition forced, proceeding with database check for '{db_name}'.")
889
+ # Continue to database existence check below
890
+ else:
891
+ # In Community Edition, multi-database operations are not supported
892
+ # The default database 'neo4j' always exists and is always online
893
+ # Also skip if URI is bolt:// (which indicates Community Edition or direct connection)
894
+ # If URI is still neo4j:// and we're checking a non-default database, assume Community Edition
895
+ current_uri = self.uri
896
+ is_bolt = current_uri.startswith("bolt://") or current_uri.startswith("bolt+s://")
897
+ is_neo4j_protocol = current_uri.startswith("neo4j://") or current_uri.startswith("neo4j+s://")
898
+ is_non_default_db = db_name and db_name.lower() != "neo4j"
899
+
900
+ if not self.multi_db or is_bolt or (is_neo4j_protocol and is_non_default_db):
901
+ if not self.multi_db:
902
+ logger.debug(
903
+ f"Multi-database mode disabled (Community Edition). "
904
+ f"Using default database '{db_name}' which always exists."
905
+ )
906
+ elif is_bolt:
907
+ logger.debug(
908
+ f"Using bolt:// connection (direct mode). "
909
+ f"Using default database '{db_name}' which always exists."
910
+ )
911
+ else:
912
+ logger.debug(
913
+ f"Using neo4j:// protocol with non-default database '{db_name}' - "
914
+ f"assuming Community Edition and skipping database check."
915
+ )
916
+ return
917
+
918
+ # Check if database exists, create if needed
919
+ try:
920
+ exists = self.db_exists()
921
+ if not exists:
922
+ logger.info(f"Database '{db_name}' does not exist, creating it...")
923
+ self.create_db()
924
+ # Verify creation succeeded
925
+ if not self.db_exists():
926
+ raise RuntimeError(
927
+ f"Failed to create database '{db_name}'. " "The database was not created successfully."
928
+ )
929
+ logger.info(f"Database '{db_name}' created successfully.")
930
+ else:
931
+ logger.debug(f"Database '{db_name}' already exists.")
932
+ except Exception as e:
933
+ logger.error(f"Failed to check/create database '{db_name}': {e}")
934
+ # Re-raise to prevent initialization from continuing with a missing database
935
+ raise RuntimeError(
936
+ f"Failed to ensure database '{db_name}' exists: {e}. "
937
+ "Please check Neo4j permissions and that the database can be created."
938
+ ) from e
939
+
940
+ # Check if database is online, start if needed
941
+ try:
942
+ if not self.db_online():
943
+ logger.info(f"Database '{db_name}' is offline, starting it...")
944
+ self.start_db()
945
+ # Verify start succeeded
946
+ if not self.db_online():
947
+ raise RuntimeError(
948
+ f"Failed to start database '{db_name}'. " "The database was not started successfully."
949
+ )
950
+ logger.info(f"Database '{db_name}' started successfully.")
951
+ else:
952
+ logger.debug(f"Database '{db_name}' is already online.")
953
+ except Exception as e:
954
+ logger.error(f"Failed to check/start database '{db_name}': {e}")
955
+ # Re-raise to prevent initialization from continuing with an offline database
956
+ raise RuntimeError(
957
+ f"Failed to ensure database '{db_name}' is online: {e}. "
958
+ "Please check Neo4j permissions and that the database can be started."
959
+ ) from e
960
+
961
+ def select_db(self, name: str):
962
+ """Set the current database."""
963
+ current = self.current_db
964
+
965
+ if current != name:
966
+ self._register_current_driver()
967
+ self._db_config["db"] = name
968
+
969
+ if name in self._drivers:
970
+ self.driver = self._drivers[name]
971
+ else:
972
+ self.db_connect()
973
+
974
+ @property
975
+ def indices(self) -> list | None:
976
+ """List of indices in the current database."""
977
+ return self._list_indices("indices")
978
+
979
+ @property
980
+ def constraints(self) -> list | None:
981
+ """List of constraints in the current database."""
982
+ return self._list_indices("constraints")
983
+
984
+ def drop_indices_constraints(self):
985
+ """Drop all indices and constraints in the current database."""
986
+ # Neo4j 5+ handles constraints and indexes together
987
+ self.drop_constraints()
988
+ # For older versions, also drop indexes separately
989
+ if not self._is_neo4j_5_plus():
990
+ self.drop_indices()
991
+
992
+ def drop_constraints(self):
993
+ """Drop all constraints in the current database."""
994
+ self._drop_indices(what="constraints")
995
+
996
+ def drop_indices(self):
997
+ """Drop all indices in the current database."""
998
+ self._drop_indices(what="indexes")
999
+
1000
+ def _drop_indices(
1001
+ self,
1002
+ what: Literal["indexes", "indices", "constraints"] = "constraints",
1003
+ ):
1004
+ """Drop indices or constraints.
1005
+
1006
+ Compatible with Neo4j 4.x and 5.x. Uses SHOW syntax which is
1007
+ available in both versions.
1008
+ """
1009
+ what_u = self._idx_cstr_synonyms(what)
1010
+
1011
+ with self.session() as s:
1012
+ try:
1013
+ # SHOW INDEXES and SHOW CONSTRAINTS work in both Neo4j 4.x and 5.x
1014
+ # Neo4j 5.x unified constraints and indexes, but separate commands still work
1015
+ if what == "constraints":
1016
+ query = "SHOW CONSTRAINTS"
1017
+ elif what in ("indexes", "indices"):
1018
+ query = "SHOW INDEXES"
1019
+ else:
1020
+ query = f"SHOW {what_u}S" # Plural form
1021
+
1022
+ indices = s.run(query)
1023
+ indices = list(indices)
1024
+ n_indices = len(indices)
1025
+ index_names = ", ".join(i["name"] for i in indices)
1026
+
1027
+ for idx in indices:
1028
+ s.run(f"DROP {what_u} `{idx['name']}` IF EXISTS")
1029
+
1030
+ logger.info(f"Dropped {n_indices} {what}: {index_names}.")
1031
+
1032
+ except (neo4j_exc.Neo4jError, neo4j_exc.DriverError) as e:
1033
+ logger.error(f"Failed to run query: {e}")
1034
+
1035
+ def _list_indices(
1036
+ self,
1037
+ what: Literal["indexes", "indices", "constraints"] = "constraints",
1038
+ ) -> list | None:
1039
+ """List indices or constraints."""
1040
+ what_u = self._idx_cstr_synonyms(what)
1041
+
1042
+ with self.session() as s:
1043
+ try:
1044
+ return list(s.run(f"SHOW {what_u.upper()};"))
1045
+ except (neo4j_exc.Neo4jError, neo4j_exc.DriverError) as e:
1046
+ logger.error(f"Failed to run query: {e}")
1047
+ return None
1048
+
1049
+ @staticmethod
1050
+ def _idx_cstr_synonyms(what: str) -> str:
1051
+ """Convert index/constraint keyword to Cypher keyword."""
1052
+ what_s = {
1053
+ "indexes": "INDEX",
1054
+ "indices": "INDEX",
1055
+ "constraints": "CONSTRAINT",
1056
+ }
1057
+
1058
+ what_u = what_s.get(what, None)
1059
+
1060
+ if not what_u:
1061
+ msg = f'Allowed keywords are: "indexes", "indices" or "constraints", ' f"not `{what}`."
1062
+ logger.error(msg)
1063
+ raise ValueError(msg)
1064
+
1065
+ return what_u
1066
+
1067
+ @property
1068
+ def node_count(self) -> int | None:
1069
+ """Number of nodes in the database."""
1070
+ res, summary = self.query("MATCH (n) RETURN COUNT(n) AS count;")
1071
+ return res[0]["count"] if res else None
1072
+
1073
+ @property
1074
+ def edge_count(self) -> int | None:
1075
+ """Number of edges in the database."""
1076
+ res, summary = self.query("MATCH ()-[r]->() RETURN COUNT(r) AS count;")
1077
+ return res[0]["count"] if res else None
1078
+
1079
+ @property
1080
+ def user(self) -> str | None:
1081
+ """User for the currently active connection."""
1082
+ return self._extract_auth[0]
1083
+
1084
+ @property
1085
+ def passwd(self) -> str | None:
1086
+ """Password for the currently active connection."""
1087
+ return self._extract_auth[1]
1088
+
1089
+ @property
1090
+ def _extract_auth(self) -> tuple[str | None, str | None]:
1091
+ """Extract authentication data from the Neo4j driver."""
1092
+ auth = None, None
1093
+
1094
+ if self.driver:
1095
+ opener_vars = self._opener_vars
1096
+ if "auth" in opener_vars:
1097
+ auth = opener_vars["auth"].cell_contents
1098
+
1099
+ return auth
1100
+
1101
+ @property
1102
+ def _opener_vars(self) -> dict:
1103
+ """Extract variables from the opener part of the Neo4j driver."""
1104
+ return dict(
1105
+ zip(
1106
+ self.driver._pool.opener.__code__.co_freevars,
1107
+ self.driver._pool.opener.__closure__,
1108
+ ),
1109
+ )
1110
+
1111
+ def __len__(self):
1112
+ """Return the number of nodes in the database."""
1113
+ return self.node_count or 0
1114
+
1115
+ @contextlib.contextmanager
1116
+ def use_db(self, name: str):
1117
+ """Context manager where the default database is set to name."""
1118
+ used_previously = self.current_db
1119
+ self.select_db(name=name)
1120
+
1121
+ try:
1122
+ yield None
1123
+ finally:
1124
+ self.select_db(name=used_previously)
1125
+
1126
+ @contextlib.contextmanager
1127
+ def fallback(
1128
+ self,
1129
+ db: str | tuple[str] | None = None,
1130
+ on: str | set[str] | None = None,
1131
+ ):
1132
+ """
1133
+ Context manager that attempts to run queries against a fallback database
1134
+ if running against the default database fails.
1135
+ """
1136
+ prev = {}
1137
+
1138
+ for var in ("db", "on"):
1139
+ prev[var] = getattr(self, f"_fallback_{var}", None)
1140
+ setattr(
1141
+ self,
1142
+ f"_fallback_{var}",
1143
+ locals()[var] or self._db_config.get(f"fallback_{var}"),
1144
+ )
1145
+
1146
+ try:
1147
+ yield None
1148
+ finally:
1149
+ for var in ("db", "on"):
1150
+ setattr(self, f"_fallback_{var}", prev[var])
1151
+
1152
+ @contextlib.contextmanager
1153
+ def session(self, **kwargs):
1154
+ """Context manager with a database connection session."""
1155
+ if not self.driver:
1156
+ raise RuntimeError("Driver is not available. The driver may be closed or in offline mode.")
1157
+
1158
+ # Check if driver is closed
1159
+ if hasattr(self.driver, "_closed") and self.driver._closed:
1160
+ raise RuntimeError("Driver is closed. Please reconnect or create a new driver instance.")
1161
+
1162
+ session = self.driver.session(**kwargs)
1163
+
1164
+ try:
1165
+ yield session
1166
+ finally:
1167
+ session.close()
1168
+
1169
+ def __enter__(self):
1170
+ """Context manager entry."""
1171
+ self._context_session = self.session()
1172
+ return self._context_session
1173
+
1174
+ def __exit__(self, *exc):
1175
+ """Context manager exit."""
1176
+ if hasattr(self, "_context_session"):
1177
+ self._context_session.close()
1178
+ delattr(self, "_context_session")
1179
+
1180
+ def __repr__(self):
1181
+ """String representation."""
1182
+ return f"<{self.__class__.__name__} " f"{self._connection_str if self.driver else '[offline]'}>"
1183
+
1184
+ @property
1185
+ def _connection_str(self) -> str:
1186
+ """Connection string representation."""
1187
+ if not self.driver:
1188
+ return "unknown://unknown:0/unknown"
1189
+
1190
+ protocol = re.split(
1191
+ r"(?<=[a-z])(?=[A-Z])",
1192
+ self.driver.__class__.__name__,
1193
+ )[0].lower()
1194
+
1195
+ address = self.driver._pool.address if hasattr(self.driver, "_pool") else ("unknown", 0)
1196
+
1197
+ return f"{protocol}://{address[0]}:{address[1]}/{self.user or 'unknown'}"
1198
+
1199
+ @property
1200
+ def offline(self) -> bool:
1201
+ """Whether the driver is in offline mode."""
1202
+ return self._offline
1203
+
1204
+ @offline.setter
1205
+ def offline(self, offline: bool):
1206
+ """Enable or disable offline mode."""
1207
+ self.go_offline() if offline else self.go_online()
1208
+
1209
+ @property
1210
+ def apoc_version(self) -> str | None:
1211
+ """
1212
+ Version of the APOC plugin available in the current database.
1213
+
1214
+ Returns:
1215
+ APOC version string or None if APOC is not available
1216
+ """
1217
+ # Check if driver is available before attempting to query
1218
+ if not self.driver or self.offline:
1219
+ return None
1220
+
1221
+ # Check if driver is closed
1222
+ if hasattr(self.driver, "_closed") and self.driver._closed:
1223
+ return None
1224
+
1225
+ db = self._db_config["db"] or neo4j.DEFAULT_DATABASE
1226
+
1227
+ try:
1228
+ with self.session(database=db) as session:
1229
+ res = session.run("RETURN apoc.version() AS output;")
1230
+ data = res.data()
1231
+ if data:
1232
+ return data[0]["output"]
1233
+ except (neo4j_exc.ClientError, RuntimeError):
1234
+ # RuntimeError can be raised if driver is offline/closed
1235
+ # ClientError is raised if APOC is not available
1236
+ return None
1237
+ except Exception:
1238
+ # Catch any other exceptions (e.g., connection errors) and return None
1239
+ return None
1240
+ return None
1241
+
1242
+ @property
1243
+ def has_apoc(self) -> bool:
1244
+ """
1245
+ Check if APOC is available in the current database.
1246
+
1247
+ Returns:
1248
+ True if APOC is available, False otherwise
1249
+ """
1250
+ try:
1251
+ return bool(self.apoc_version)
1252
+ except Exception:
1253
+ # Ensure has_apoc always returns a boolean, even if apoc_version raises
1254
+ return False
1255
+
1256
+ def go_offline(self):
1257
+ """Switch to offline mode."""
1258
+ self._offline = True
1259
+ self.close()
1260
+ self.driver = None
1261
+ self._register_current_driver()
1262
+ logger.warning("Offline mode: any interaction to the server is disabled.")
1263
+
1264
+ def go_online(
1265
+ self,
1266
+ db_name: str | None = None,
1267
+ db_uri: str | None = None,
1268
+ db_user: str | None = None,
1269
+ db_passwd: str | None = None,
1270
+ config: CONFIG_FILES | None = None,
1271
+ fetch_size: int | None = None,
1272
+ raise_errors: bool | None = None,
1273
+ wipe: bool = False,
1274
+ ):
1275
+ """Switch to online mode."""
1276
+ self._offline = False
1277
+
1278
+ try:
1279
+ for k, current in self._db_config.items():
1280
+ self._db_config[k] = _if_none(
1281
+ locals().get(k.replace("db_", ""), None),
1282
+ current,
1283
+ DEFAULT_CONFIG.get(k),
1284
+ )
1285
+
1286
+ self._config_file = self._config_file or config
1287
+
1288
+ self.db_connect()
1289
+ self.ensure_db()
1290
+ logger.info("Online mode: ready to run queries.")
1291
+
1292
+ except Exception as e:
1293
+ logger.error(f"Failed to connect: {e}")
1294
+ self._offline = True
1295
+
1296
+ if wipe:
1297
+ self.wipe_db()
1298
+
1299
+ @staticmethod
1300
+ def _match_error(error: Exception | str, errors: set[Exception | str]) -> bool:
1301
+ """Check if error is listed in errors."""
1302
+ import builtins
1303
+
1304
+ def str_to_exc(e):
1305
+ if isinstance(e, Exception):
1306
+ return e.__class__
1307
+ elif isinstance(e, str):
1308
+ return getattr(builtins, e, getattr(neo4j_exc, e, e))
1309
+ else:
1310
+ return e
1311
+
1312
+ error = str_to_exc(error)
1313
+ errors = {str_to_exc(e) for e in _to_set(errors)}
1314
+
1315
+ return error in errors or (
1316
+ isinstance(error, type) and any(issubclass(error, e) for e in errors if isinstance(e, type))
1317
+ )