linkml-store 0.1.12__tar.gz → 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (66) hide show
  1. {linkml_store-0.1.12 → linkml_store-0.1.13}/PKG-INFO +6 -2
  2. {linkml_store-0.1.12 → linkml_store-0.1.13}/pyproject.toml +6 -2
  3. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/client.py +2 -0
  4. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/collection.py +41 -4
  5. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/config.py +10 -0
  6. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/database.py +2 -0
  7. linkml_store-0.1.13/src/linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  8. linkml_store-0.1.13/src/linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  9. linkml_store-0.1.13/src/linkml_store/graphs/graph_map.py +24 -0
  10. linkml_store-0.1.13/src/linkml_store/utils/__init__.py +0 -0
  11. linkml_store-0.1.13/src/linkml_store/utils/neo4j_utils.py +42 -0
  12. linkml_store-0.1.13/src/linkml_store/webapi/__init__.py +0 -0
  13. {linkml_store-0.1.12 → linkml_store-0.1.13}/LICENSE +0 -0
  14. {linkml_store-0.1.12 → linkml_store-0.1.13}/README.md +0 -0
  15. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/__init__.py +0 -0
  16. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/__init__.py +0 -0
  17. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/queries.py +0 -0
  18. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/__init__.py +0 -0
  19. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
  20. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  21. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  22. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  23. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +0 -0
  24. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/duckdb/duckdb_database.py +0 -0
  25. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
  26. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
  27. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
  28. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
  29. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
  30. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  31. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  32. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  33. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +0 -0
  34. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
  35. {linkml_store-0.1.12/src/linkml_store/index/implementations → linkml_store-0.1.13/src/linkml_store/api/stores/neo4j}/__init__.py +0 -0
  36. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  37. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
  38. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  39. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  40. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/api/types.py +0 -0
  41. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/cli.py +0 -0
  42. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/constants.py +0 -0
  43. {linkml_store-0.1.12/src/linkml_store/utils → linkml_store-0.1.13/src/linkml_store/graphs}/__init__.py +0 -0
  44. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/index/__init__.py +0 -0
  45. {linkml_store-0.1.12/src/linkml_store/webapi → linkml_store-0.1.13/src/linkml_store/index/implementations}/__init__.py +0 -0
  46. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
  47. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
  48. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/index/indexer.py +0 -0
  49. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/change_utils.py +0 -0
  50. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/file_utils.py +0 -0
  51. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/format_utils.py +0 -0
  52. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/io.py +0 -0
  53. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/mongodb_utils.py +0 -0
  54. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/object_utils.py +0 -0
  55. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/pandas_utils.py +0 -0
  56. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/patch_utils.py +0 -0
  57. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/query_utils.py +0 -0
  58. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/schema_utils.py +0 -0
  59. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/utils/sql_utils.py +0 -0
  60. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/webapi/html/__init__.py +0 -0
  61. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/webapi/html/base.html.j2 +0 -0
  62. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
  63. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
  64. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
  65. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
  66. {linkml_store-0.1.12 → linkml_store-0.1.13}/src/linkml_store/webapi/main.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: linkml-store
3
- Version: 0.1.12
3
+ Version: 0.1.13
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
@@ -21,6 +21,7 @@ Provides-Extra: h5py
21
21
  Provides-Extra: llm
22
22
  Provides-Extra: map
23
23
  Provides-Extra: mongodb
24
+ Provides-Extra: neo4j
24
25
  Provides-Extra: pyarrow
25
26
  Provides-Extra: renderer
26
27
  Provides-Extra: tests
@@ -28,7 +29,7 @@ Provides-Extra: validation
28
29
  Requires-Dist: black (>=24.0.0) ; extra == "tests"
29
30
  Requires-Dist: chromadb ; extra == "chromadb"
30
31
  Requires-Dist: click
31
- Requires-Dist: duckdb (>=0.10.1,<0.11.0)
32
+ Requires-Dist: duckdb (>=0.10.1)
32
33
  Requires-Dist: duckdb-engine (>=0.11.2)
33
34
  Requires-Dist: fastapi ; extra == "fastapi"
34
35
  Requires-Dist: frictionless ; extra == "frictionless"
@@ -41,8 +42,11 @@ Requires-Dist: linkml_map ; extra == "map"
41
42
  Requires-Dist: linkml_renderer ; extra == "renderer"
42
43
  Requires-Dist: llm ; extra == "llm"
43
44
  Requires-Dist: matplotlib ; extra == "analytics"
45
+ Requires-Dist: neo4j ; extra == "neo4j"
46
+ Requires-Dist: networkx ; extra == "neo4j"
44
47
  Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
45
48
  Requires-Dist: plotly ; extra == "analytics"
49
+ Requires-Dist: py2neo ; extra == "neo4j"
46
50
  Requires-Dist: pyarrow ; extra == "pyarrow"
47
51
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
48
52
  Requires-Dist: pymongo ; extra == "mongodb"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.1.12"
3
+ version = "0.1.13"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
@@ -13,7 +13,7 @@ pydantic = "^2.0.0"
13
13
  linkml-runtime = ">=1.8.0"
14
14
  streamlit = { version = "^1.32.2", optional = true }
15
15
  sqlalchemy = "*"
16
- duckdb = "^0.10.1"
16
+ duckdb = ">=0.10.1"
17
17
  duckdb-engine = ">=0.11.2"
18
18
  matplotlib = { version = "*", optional = true }
19
19
  seaborn = { version = "*", optional = true }
@@ -22,6 +22,9 @@ pystow = "^0.5.4"
22
22
  black = { version=">=24.0.0", optional = true }
23
23
  llm = { version="*", optional = true }
24
24
  pymongo = { version="*", optional = true }
25
+ neo4j = { version="*", optional = true }
26
+ py2neo = { version="*", optional = true }
27
+ networkx = { version="*", optional = true }
25
28
  chromadb = { version="*", optional = true }
26
29
  pyarrow = { version="*", optional = true }
27
30
  h5py = { version="*", optional = true }
@@ -66,6 +69,7 @@ app = ["streamlit"]
66
69
  tests = ["black"]
67
70
  llm = ["llm"]
68
71
  mongodb = ["pymongo"]
72
+ neo4j = ["neo4j", "py2neo", "networkx"]
69
73
  chromadb = ["chromadb"]
70
74
  h5py = ["h5py"]
71
75
  pyarrow = ["pyarrow"]
@@ -11,6 +11,7 @@ from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
11
11
  from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
12
12
  from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
13
13
  from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
14
+ from linkml_store.api.stores.neo4j.neo4j_database import Neo4jDatabase
14
15
  from linkml_store.api.stores.solr.solr_database import SolrDatabase
15
16
 
16
17
  logger = logging.getLogger(__name__)
@@ -21,6 +22,7 @@ HANDLE_MAP = {
21
22
  "solr": SolrDatabase,
22
23
  "mongodb": MongoDBDatabase,
23
24
  "chromadb": ChromaDBDatabase,
25
+ "neo4j": Neo4jDatabase,
24
26
  "file": FileSystemDatabase,
25
27
  }
26
28
 
@@ -4,7 +4,21 @@ import hashlib
4
4
  import logging
5
5
  from collections import defaultdict
6
6
  from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ ClassVar,
11
+ Dict,
12
+ Generic,
13
+ Iterable,
14
+ Iterator,
15
+ List,
16
+ Optional,
17
+ TextIO,
18
+ Tuple,
19
+ Type,
20
+ Union,
21
+ )
8
22
 
9
23
  import numpy as np
10
24
  from linkml_runtime import SchemaView
@@ -202,6 +216,12 @@ class Collection(Generic[DatabaseType]):
202
216
  self._materialize_derivations()
203
217
  self._initialized = True
204
218
 
219
+ def _pre_insert_hook(self, objs: List[OBJECT], **kwargs):
220
+ if self.metadata.validate_modifications:
221
+ errors = list(self.iter_validate_collection(objs))
222
+ if errors:
223
+ raise ValueError(f"Validation errors: {errors}")
224
+
205
225
  def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
206
226
  self._initialized = True
207
227
  patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
@@ -978,11 +998,14 @@ class Collection(Generic[DatabaseType]):
978
998
  patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
979
999
  return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
980
1000
 
981
- def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
1001
+ def iter_validate_collection(
1002
+ self, objects: Optional[Iterable[OBJECT]] = None, **kwargs
1003
+ ) -> Iterator["ValidationResult"]:
982
1004
  """
983
1005
  Validate the contents of the collection
984
1006
 
985
1007
  :param kwargs:
1008
+ :param objects: objects to validate
986
1009
  :return: iterator over validation results
987
1010
  """
988
1011
  from linkml.validator import JsonschemaValidationPlugin, Validator
@@ -992,10 +1015,24 @@ class Collection(Generic[DatabaseType]):
992
1015
  cd = self.class_definition()
993
1016
  if not cd:
994
1017
  raise ValueError(f"Cannot find class definition for {self.target_class_name}")
1018
+ type_designator = None
1019
+ for att in self.parent.schema_view.class_induced_slots(cd.name):
1020
+ if att.designates_type:
1021
+ type_designator = att.name
995
1022
  class_name = cd.name
996
- for obj in self.find_iter(**kwargs):
1023
+ if objects is None:
1024
+ objects = self.find_iter(**kwargs)
1025
+ for obj in objects:
997
1026
  obj = clean_empties(obj)
998
- yield from validator.iter_results(obj, class_name)
1027
+ v_class_name = class_name
1028
+ if type_designator is not None:
1029
+ # TODO: move type designator logic to core linkml
1030
+ this_class_name = obj.get(type_designator)
1031
+ if this_class_name:
1032
+ if ":" in this_class_name:
1033
+ this_class_name = this_class_name.split(":")[-1]
1034
+ v_class_name = this_class_name
1035
+ yield from validator.iter_results(obj, v_class_name)
999
1036
 
1000
1037
  def commit(self):
1001
1038
  """
@@ -2,6 +2,8 @@ from typing import Any, Dict, List, Optional
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
5
+ from linkml_store.graphs.graph_map import GraphProjection
6
+
5
7
 
6
8
  class ConfiguredBaseModel(BaseModel, extra="forbid"):
7
9
  """
@@ -79,6 +81,14 @@ class CollectionConfig(ConfiguredBaseModel):
79
81
  description="LinkML-Map derivations",
80
82
  )
81
83
  page_size: Optional[int] = Field(default=None, description="Suggested page size (items per page) in apps and APIs")
84
+ graph_projection: Optional[GraphProjection] = Field(
85
+ default=None,
86
+ description="Optional graph projection configuration",
87
+ )
88
+ validate_modifications: Optional[bool] = Field(
89
+ default=False,
90
+ description="Whether to validate inserts, updates, and deletes",
91
+ )
82
92
 
83
93
 
84
94
  class DatabaseConfig(ConfiguredBaseModel):
@@ -505,8 +505,10 @@ class Database(ABC, Generic[CollectionType]):
505
505
  if isinstance(schema_view, str):
506
506
  schema_view = SchemaView(schema_view)
507
507
  self._schema_view = schema_view
508
+ # self._schema_view = SchemaView(schema_view.materialize_derived_schema())
508
509
  if not self._collections:
509
510
  return
511
+
510
512
  # align with induced schema
511
513
  roots = [c for c in schema_view.all_classes().values() if c.tree_root]
512
514
  if len(roots) == 0:
@@ -0,0 +1,429 @@
1
+ import logging
2
+ from enum import Enum
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
+
5
+ from neo4j import Driver, Session
6
+
7
+ from linkml_store.api import Collection
8
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
9
+ from linkml_store.api.queries import Query, QueryResult
10
+ from linkml_store.graphs.graph_map import EdgeProjection, GraphProjection, NodeProjection
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class DeletePolicy(Enum):
16
+ CASCADE = "cascade"
17
+ ERROR = "error"
18
+ STUB = "stub"
19
+
20
+
21
+ class Neo4jCollection(Collection):
22
+ """
23
+ Adapter for collections in a Neo4j database.
24
+ """
25
+
26
+ # _graph_projection: Optional[GraphProjection] = None
27
+ delete_policy: DeletePolicy = DeletePolicy.CASCADE
28
+
29
+ @property
30
+ def driver(self) -> Driver:
31
+ return self.parent.driver
32
+
33
+ def session(self) -> Session:
34
+ return self.parent.session()
35
+
36
+ def _check_if_initialized(self) -> bool:
37
+ with self.session() as session:
38
+ result = session.run("MATCH (n) RETURN count(n) > 0 as exists")
39
+ return result.single()["exists"]
40
+
41
+ @property
42
+ def graph_projection(self) -> GraphProjection:
43
+ return self.metadata.graph_projection
44
+
45
+ @property
46
+ def node_projection(self) -> Optional[NodeProjection]:
47
+ return self.metadata.graph_projection if isinstance(self.graph_projection, NodeProjection) else None
48
+
49
+ @property
50
+ def edge_projection(self) -> Optional[EdgeProjection]:
51
+ return self.metadata.graph_projection if isinstance(self.graph_projection, EdgeProjection) else None
52
+
53
+ @property
54
+ def is_edge_collection(self) -> bool:
55
+ return isinstance(self.graph_projection, EdgeProjection)
56
+
57
+ @property
58
+ def category_labels_attribute(self) -> str:
59
+ np = self.node_projection
60
+ category_labels_attribute = None
61
+ if np:
62
+ category_labels_attribute = np.category_labels_attribute
63
+ if not category_labels_attribute:
64
+ category_labels_attribute = "category"
65
+ return category_labels_attribute
66
+
67
+ @property
68
+ def identifier_attribute(self) -> str:
69
+ gp = self.graph_projection
70
+ id_attribute = None
71
+ if gp:
72
+ id_attribute = gp.identifier_attribute
73
+ if not id_attribute:
74
+ id_attribute = "id"
75
+ return id_attribute
76
+
77
+ def _node_pattern(self, obj: Optional[OBJECT] = None, node_var="n") -> str:
78
+ obj = {} if obj is None else obj
79
+ category_labels_attribute = self.category_labels_attribute
80
+ categories = obj.get(category_labels_attribute or "category", [])
81
+ if not isinstance(categories, list):
82
+ categories = [categories]
83
+ cstr = (":" + ":".join(categories)) if categories else ""
84
+ return f"{node_var}{cstr}"
85
+
86
+ @property
87
+ def is_node_collection(self) -> bool:
88
+ return not self.is_edge_collection
89
+
90
+ def set_is_edge_collection(self, force=False):
91
+ if self.is_edge_collection:
92
+ return
93
+ if self.graph_projection and not force:
94
+ raise ValueError("Cannot reassign without force=True")
95
+ self.metadata.graph_projection = EdgeProjection()
96
+
97
+ def set_is_node_collection(self, force=False):
98
+ if self.is_node_collection:
99
+ return
100
+ if self.graph_projection and not force:
101
+ raise ValueError("Cannot reassign without force=True")
102
+ self.metadata.graph_projection = NodeProjection()
103
+
104
+ def _prop_clause(
105
+ self, obj: OBJECT, node_var: Optional[str] = None, exclude_attributes: Optional[List[str]] = None
106
+ ) -> str:
107
+ if exclude_attributes is None:
108
+ exclude_attributes = [self.category_labels_attribute]
109
+ node_prefix = node_var + "." if node_var else ""
110
+ terms = [f"{node_prefix}{k}: ${k}" for k in obj.keys() if k not in exclude_attributes]
111
+ return ", ".join(terms)
112
+
113
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
114
+ if not isinstance(objs, list):
115
+ objs = [objs]
116
+ self._pre_insert_hook(objs)
117
+
118
+ with self.session() as session:
119
+ for obj in objs:
120
+ query = self._create_insert_cypher_query(obj)
121
+ session.run(query, **obj)
122
+
123
+ self._post_insert_hook(objs)
124
+
125
+ def _create_insert_cypher_query(self, obj: OBJECT) -> str:
126
+ id_attribute = self.identifier_attribute
127
+ if not self.is_edge_collection:
128
+ logger.debug(f"Inserting node: {obj}")
129
+ category_labels_attribute = self.category_labels_attribute
130
+ node_pattern = self._node_pattern(obj)
131
+ props = self._prop_clause(obj, exclude_attributes=[id_attribute, category_labels_attribute])
132
+ return f"CREATE ({node_pattern} {{{id_attribute}: ${id_attribute}, {props}}})"
133
+ else:
134
+ logger.debug(f"Inserting edge: {obj}")
135
+ ep = self.edge_projection
136
+ if ep.predicate_attribute not in obj:
137
+ raise ValueError(f"Predicate attribute {ep.predicate_attribute} not found in edge {obj}.")
138
+ if ep.subject_attribute not in obj:
139
+ raise ValueError(f"Subject attribute {ep.subject_attribute} not found in edge {obj}.")
140
+ if ep.object_attribute not in obj:
141
+ raise ValueError(f"Object attribute {ep.object_attribute} not found in edge {obj}.")
142
+ pred = obj[ep.predicate_attribute]
143
+ # check if nodes present; if not, make dangling stubs
144
+ # TODO: decide on how this should be handled in validation if some fields are required
145
+ for node_id in [obj[ep.subject_attribute], obj[ep.object_attribute]]:
146
+ check_query = (
147
+ f"MATCH (n {{{ep.identifier_attribute}: ${ep.identifier_attribute}}}) RETURN count(n) as count"
148
+ )
149
+ with self.session() as session:
150
+ result = session.run(check_query, **{ep.identifier_attribute: node_id})
151
+ if result.single()["count"] == 0:
152
+ if self.delete_policy == DeletePolicy.STUB:
153
+ stub_query = f"CREATE (n {{{ep.identifier_attribute}: ${ep.identifier_attribute}}})"
154
+ session.run(stub_query, **{ep.identifier_attribute: node_id})
155
+ else:
156
+ raise ValueError(f"Node with identifier {node_id} not found in the database.")
157
+ edge_props = self._prop_clause(
158
+ obj, exclude_attributes=[ep.subject_attribute, ep.predicate_attribute, ep.object_attribute]
159
+ )
160
+ return f"""
161
+ MATCH (s {{{id_attribute}: ${ep.subject_attribute}}}), (o {{{id_attribute}: ${ep.object_attribute}}})
162
+ CREATE (s)-[r:{pred} {{{edge_props}}}]->(o)
163
+ """
164
+
165
+ def _prop_clause(self, obj: OBJECT, exclude_attributes: List[str] = None, node_var: Optional[str] = None) -> str:
166
+ if exclude_attributes is None:
167
+ exclude_attributes = []
168
+ node_prefix = f"{node_var}." if node_var else ""
169
+ terms = [f"{node_prefix}{k}: ${k}" for k in obj.keys() if k not in exclude_attributes]
170
+ return ", ".join(terms)
171
+
172
+ def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
173
+ cypher_query = self._build_cypher_query(query, limit, offset)
174
+ ca = self.category_labels_attribute
175
+ with self.session() as session:
176
+ result = session.run(cypher_query, query.where_clause)
177
+ if self.is_edge_collection:
178
+ rows = [self._edge_to_dict(record) for record in result]
179
+ else:
180
+
181
+ def node_to_dict(n) -> dict:
182
+ d = dict(n.items())
183
+ if ca:
184
+ labels = list(n.labels)
185
+ if labels:
186
+ d[ca] = labels[0]
187
+ return d
188
+
189
+ rows = [node_to_dict(record["n"]) for record in result]
190
+
191
+ # count_query = self._build_count_query(query, is_count=True)
192
+ count_query = self._build_cypher_query(query, is_count=True)
193
+ with self.session() as session:
194
+ count = session.run(count_query, query.where_clause).single()["count"]
195
+
196
+ return QueryResult(query=query, num_rows=count, rows=rows)
197
+
198
+ def _build_cypher_query(
199
+ self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, is_count=False
200
+ ) -> str:
201
+ if self.is_edge_collection:
202
+ ep = self.edge_projection
203
+ ia = ep.identifier_attribute
204
+ sa = ep.subject_attribute
205
+ pa = ep.predicate_attribute
206
+ oa = ep.object_attribute
207
+ wc = query.where_clause or {}
208
+ rq = "r"
209
+ pred = wc.get(pa, None)
210
+ if pred:
211
+ rq = f"r:{pred}"
212
+ sq = "s"
213
+ subj = wc.get(sa, None)
214
+ if subj:
215
+ sq = f"s {{{ia}: '{subj}'}}"
216
+ oq = "o"
217
+ obj = wc.get(oa, None)
218
+ if obj:
219
+ oq = f"o {{{ia}: '{obj}'}}"
220
+ where = {k: v for k, v in wc.items() if k not in [sa, pa, oa]}
221
+ cypher_query = f"""
222
+ MATCH ({sq})-[{rq}]->({oq})
223
+ {self._build_where_clause(where, 'r')}
224
+ """
225
+ if is_count:
226
+ cypher_query += """
227
+ RETURN count(r) as count
228
+ """
229
+ else:
230
+ cypher_query += f"""
231
+ RETURN r, type(r) as predicate, s.{ia} as subject, o.{ia} as object
232
+ """
233
+ else:
234
+ node_pattern = self._node_pattern(query.where_clause)
235
+ cypher_query = f"""
236
+ MATCH ({node_pattern})
237
+ {self._build_where_clause(query.where_clause)}
238
+ """
239
+ if is_count:
240
+ cypher_query += """
241
+ RETURN count(n) as count
242
+ """
243
+ else:
244
+ cypher_query += """
245
+ RETURN n
246
+ """
247
+
248
+ if not is_count:
249
+ if limit and limit >= 0:
250
+ cypher_query += f" LIMIT {limit}"
251
+ if offset and offset >= 0:
252
+ cypher_query += f" SKIP {offset}"
253
+
254
+ return cypher_query
255
+
256
+ def _build_where_clause(self, where_clause: Dict[str, Any], prefix: str = "n") -> str:
257
+ conditions = []
258
+ if where_clause is None:
259
+ return ""
260
+ for key, value in where_clause.items():
261
+ if key == self.category_labels_attribute:
262
+ continue
263
+ if isinstance(value, str):
264
+ conditions.append(f"{prefix}.{key} = '{value}'")
265
+ else:
266
+ conditions.append(f"{prefix}.{key} = {value}")
267
+
268
+ return "WHERE " + " AND ".join(conditions) if conditions else ""
269
+
270
+ def _edge_to_dict(self, record: Dict) -> Dict[str, Any]:
271
+ r = record["r"]
272
+ ep = self.edge_projection
273
+ return {
274
+ ep.subject_attribute: record["subject"],
275
+ ep.predicate_attribute: record["predicate"],
276
+ ep.object_attribute: record["object"],
277
+ **dict(r.items()),
278
+ }
279
+
280
+ def query_facets(
281
+ self,
282
+ where: Dict = None,
283
+ facet_columns: List[Union[str, Tuple[str, ...]]] = None,
284
+ facet_limit=DEFAULT_FACET_LIMIT,
285
+ **kwargs,
286
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
287
+ results = {}
288
+ if not facet_columns:
289
+ facet_columns = list(self.class_definition().attributes.keys())
290
+
291
+ category_labels_attribute = self.category_labels_attribute
292
+ with self.session() as session:
293
+ for col in facet_columns:
294
+ where_clause = self._build_where_clause(where) if where else ""
295
+ if col == category_labels_attribute:
296
+ # Handle faceting on labels
297
+ query = f"""
298
+ MATCH (n)
299
+ {where_clause}
300
+ WITH labels(n) AS nodeLabels, count(*) as count
301
+ UNWIND nodeLabels AS label
302
+ WITH label, count
303
+ ORDER BY count DESC, label
304
+ LIMIT {facet_limit}
305
+ RETURN label as value, count
306
+ """
307
+ else:
308
+ query = f"""
309
+ MATCH (n)
310
+ {where_clause}
311
+ WITH n.{col} as value, count(*) as count
312
+ WITH value, count
313
+ ORDER BY count DESC
314
+ LIMIT {facet_limit}
315
+ RETURN value, count
316
+ """
317
+ result = session.run(query)
318
+ results[col] = [(record["value"], record["count"]) for record in result]
319
+
320
+ return results
321
+
322
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
323
+ delete_policy = self.delete_policy
324
+ if not isinstance(objs, list):
325
+ objs = [objs]
326
+
327
+ deleted_nodes = 0
328
+ deleted_relationships = 0
329
+ identifier_attribute = self.identifier_attribute
330
+
331
+ with self.session() as session:
332
+ for obj in objs:
333
+ node_pattern = self._node_pattern(obj)
334
+ id_value = obj[identifier_attribute]
335
+ where_clause = f"{{{identifier_attribute}: $id}}"
336
+ dn, dr = self._execute_delete(session, node_pattern, where_clause, delete_policy, id=id_value)
337
+ deleted_nodes += dn
338
+ deleted_relationships += dr
339
+
340
+ return deleted_nodes
341
+
342
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
343
+ delete_policy = self.delete_policy
344
+ where_clause = self._build_where_clause(where) if where else ""
345
+ node_pattern = self._node_pattern(where)
346
+
347
+ with self.session() as session:
348
+ deleted_nodes, deleted_relationships = self._execute_delete(
349
+ session, node_pattern, where_clause, delete_policy
350
+ )
351
+
352
+ if deleted_nodes == 0 and not missing_ok:
353
+ raise ValueError(f"No nodes found for {where}")
354
+
355
+ return deleted_nodes
356
+
357
+ def _execute_delete(
358
+ self, session, node_pattern: str, where_clause: str, delete_policy: DeletePolicy, **params
359
+ ) -> Tuple[int, int]:
360
+ deleted_relationships = 0
361
+ deleted_nodes = 0
362
+
363
+ if delete_policy == DeletePolicy.ERROR:
364
+ check_query = f"MATCH ({node_pattern} {where_clause})-[r]-() RETURN count(r) as rel_count"
365
+ result = session.run(check_query, **params)
366
+ if result.single()["rel_count"] > 0:
367
+ raise ValueError("Nodes with existing relationships found and cannot be deleted.")
368
+
369
+ if delete_policy == DeletePolicy.CASCADE:
370
+ rel_query = f"MATCH ({node_pattern} {where_clause})-[r]-() DELETE r"
371
+ result = session.run(rel_query, **params)
372
+ deleted_relationships = result.consume().counters.relationships_deleted
373
+
374
+ if delete_policy in [DeletePolicy.CASCADE, DeletePolicy.ERROR]:
375
+ node_query = f"MATCH ({node_pattern} {where_clause}) DELETE n"
376
+ result = session.run(node_query, **params)
377
+ deleted_nodes = result.consume().counters.nodes_deleted
378
+ elif delete_policy == DeletePolicy.STUB:
379
+ stub_query = f"MATCH ({node_pattern} {where_clause}) SET n.deleted = true RETURN count(n) as stub_count"
380
+ result = session.run(stub_query, **params)
381
+ deleted_nodes = result.single()["stub_count"]
382
+
383
+ return deleted_nodes, deleted_relationships
384
+
385
+ def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
386
+ if not isinstance(objs, list):
387
+ objs = [objs]
388
+
389
+ updated_count = 0
390
+ with self.session() as session:
391
+ for obj in objs:
392
+ query = self._create_update_cypher_query(obj)
393
+ result = session.run(query, **obj)
394
+ updated_count += result.consume().counters.properties_set
395
+
396
+ return updated_count
397
+
398
+ def _create_update_cypher_query(self, obj: OBJECT) -> str:
399
+ id_attribute = self.identifier_attribute
400
+ category_labels_attribute = self.category_labels_attribute
401
+
402
+ # Prepare SET clause
403
+ set_items = [f"n.{k} = ${k}" for k in obj.keys() if k not in [id_attribute, category_labels_attribute]]
404
+ set_clause = ", ".join(set_items)
405
+
406
+ # Prepare labels update
407
+ labels_to_add = []
408
+ # labels_to_remove = []
409
+ if category_labels_attribute in obj:
410
+ new_labels = (
411
+ obj[category_labels_attribute]
412
+ if isinstance(obj[category_labels_attribute], list)
413
+ else [obj[category_labels_attribute]]
414
+ )
415
+ labels_to_add = [f":{label}" for label in new_labels]
416
+ # labels_to_remove = [":Label" for _ in new_labels] # Placeholder for labels to remove
417
+
418
+ # Construct the query
419
+ query = f"MATCH (n {{{id_attribute}: ${id_attribute}}})\n"
420
+ # f labels_to_remove:
421
+ # query += f"REMOVE n{' '.join(labels_to_remove)}\n"
422
+ if labels_to_add:
423
+ query += f"SET n{' '.join(labels_to_add)}\n"
424
+ # f"REMOVE n{' '.join(labels_to_remove)}' if labels_to_remove else ''}"
425
+ # f"{f'SET n{' '.join(labels_to_add)}' if labels_to_add else ''}"
426
+ query += f"SET {set_clause}\n"
427
+ query += "RETURN n"
428
+ print(query)
429
+ return query
@@ -0,0 +1,154 @@
1
+ # neo4j_database.py
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Optional, Union
6
+
7
+ from neo4j import Driver, GraphDatabase, Session
8
+
9
+ from linkml_store.api import Database
10
+ from linkml_store.api.queries import Query, QueryResult
11
+ from linkml_store.api.stores.neo4j.neo4j_collection import Neo4jCollection
12
+ from linkml_store.utils.format_utils import Format
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Neo4jDatabase(Database):
18
+ """
19
+ An adapter for Neo4j databases.
20
+ """
21
+
22
+ _driver: Driver = None
23
+ collection_class = Neo4jCollection
24
+
25
+ def __init__(self, handle: Optional[str] = None, **kwargs):
26
+ # Note: in the community editing the database must be "neo4j"
27
+ if handle is None:
28
+ handle = "bolt://localhost:7687/neo4j"
29
+ if handle.startswith("neo4j:"):
30
+ handle = handle.replace("neo4j:", "bolt:")
31
+ super().__init__(handle=handle, **kwargs)
32
+
33
+ @property
34
+ def _db_name(self) -> str:
35
+ if self.handle:
36
+ db = self.handle.split("/")[-1]
37
+ else:
38
+ db = "default"
39
+ return db
40
+
41
+ @property
42
+ def driver(self) -> Driver:
43
+ if self._driver is None:
44
+ uri, user, password = self._parse_handle()
45
+ self._driver = GraphDatabase.driver(uri, auth=(user, password))
46
+ return self._driver
47
+
48
+ def session(self) -> Session:
49
+ return self.driver.session(database=self._db_name)
50
+
51
+ def _parse_handle(self):
52
+ parts = self.handle.split("://")
53
+ protocol = parts[0]
54
+ rest = parts[1]
55
+
56
+ if "@" in rest:
57
+ auth, host = rest.split("@")
58
+ user, password = auth.split(":")
59
+ else:
60
+ host = rest
61
+ user, password = "neo4j", "password" # Default credentials
62
+
63
+ uri = f"{protocol}://{host}"
64
+ return uri, user, password
65
+
66
+ def commit(self, **kwargs):
67
+ # Neo4j uses auto-commit by default for each transaction
68
+ pass
69
+
70
+ def close(self, **kwargs):
71
+ if self._driver:
72
+ self._driver.close()
73
+
74
+ def drop(self, **kwargs):
75
+ with self.driver.session() as session:
76
+ session.run("MATCH (n) DETACH DELETE n")
77
+
78
+ def query(self, query: Query, **kwargs) -> QueryResult:
79
+ if query.from_table:
80
+ collection = self.get_collection(query.from_table)
81
+ return collection.query(query, **kwargs)
82
+ else:
83
+ raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
84
+
85
+ def init_collections(self):
86
+ if self._collections is None:
87
+ self._collections = {}
88
+
89
+ # In Neo4j, we don't have a direct equivalent to collections
90
+ # We'll use node labels as a proxy for collections
91
+ with self.driver.session() as session:
92
+ result = session.run("CALL db.labels()")
93
+ labels = [record["label"] for record in result]
94
+
95
+ for label in labels:
96
+ if label not in self._collections:
97
+ collection = Neo4jCollection(name=label, parent=self)
98
+ self._collections[label] = collection
99
+
100
+ def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
101
+ # Neo4j doesn't have a built-in export function, so we'll implement a basic JSON export
102
+ if target_format == Format.JSON or target_format == "json":
103
+ path = Path(location)
104
+ with self.driver.session() as session:
105
+ result = session.run("MATCH (n) RETURN n")
106
+ nodes = [dict(record["n"].items()) for record in result]
107
+
108
+ result = session.run("MATCH ()-[r]->() RETURN r")
109
+ relationships = [
110
+ {
111
+ "type": record["r"].type,
112
+ "start": record["r"].start_node.id,
113
+ "end": record["r"].end_node.id,
114
+ **dict(record["r"].items()),
115
+ }
116
+ for record in result
117
+ ]
118
+
119
+ data = {"nodes": nodes, "relationships": relationships}
120
+
121
+ import json
122
+
123
+ with open(path, "w") as f:
124
+ json.dump(data, f)
125
+ else:
126
+ super().export_database(location, target_format=target_format, **kwargs)
127
+
128
+ def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
129
+ if source_format == Format.JSON or source_format == "json":
130
+ path = Path(location)
131
+ with open(path, "r") as f:
132
+ import json
133
+
134
+ data = json.load(f)
135
+
136
+ with self.driver.session() as session:
137
+ for node in data["nodes"]:
138
+ labels = node.pop("labels", ["Node"])
139
+ props = ", ".join([f"{k}: ${k}" for k in node.keys()])
140
+ query = f"CREATE (n:{':'.join(labels)} {{{props}}})"
141
+ session.run(query, **node)
142
+
143
+ for rel in data["relationships"]:
144
+ # rel_type = rel.pop("type")
145
+ start = rel.pop("start")
146
+ end = rel.pop("end")
147
+ # props = ", ".join([f"{k}: ${k}" for k in rel.keys()])
148
+ query = (
149
+ f"MATCH (a), (b) WHERE id(a) = {start} AND id(b) = {end} "
150
+ "CREATE (a)-[r:{rel_type} {{{props}}}]->(b)"
151
+ )
152
+ session.run(query, **rel)
153
+ else:
154
+ super().import_database(location, source_format=source_format, **kwargs)
@@ -0,0 +1,24 @@
1
+ from abc import ABC
2
+ from typing import Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+ DEFAULT_IDENTIFIER_ATTRIBUTE = "id"
7
+ DEFAULT_CATEGORY_LABELS_ATTRIBUTE = "category"
8
+ DEFAULT_SUBJECT_ATTRIBUTE = "subject"
9
+ DEFAULT_PREDICATE_ATTRIBUTE = "predicate"
10
+ DEFAULT_OBJECT_ATTRIBUTE = "object"
11
+
12
+
13
+ class GraphProjection(BaseModel, ABC):
14
+ identifier_attribute: str = DEFAULT_IDENTIFIER_ATTRIBUTE
15
+
16
+
17
+ class NodeProjection(GraphProjection):
18
+ category_labels_attribute: Optional[str] = DEFAULT_CATEGORY_LABELS_ATTRIBUTE
19
+
20
+
21
+ class EdgeProjection(GraphProjection):
22
+ subject_attribute: str = DEFAULT_SUBJECT_ATTRIBUTE
23
+ predicate_attribute: str = DEFAULT_PREDICATE_ATTRIBUTE
24
+ object_attribute: str = DEFAULT_OBJECT_ATTRIBUTE
File without changes
@@ -0,0 +1,42 @@
1
+ import networkx as nx
2
+ from py2neo import Graph
3
+
4
+
5
+ def draw_neo4j_graph(handle="bolt://localhost:7687", auth=("neo4j", None)):
6
+ # Connect to Neo4j
7
+ graph = Graph(handle, auth=auth)
8
+
9
+ # Run a Cypher query
10
+ query = """
11
+ MATCH (n)-[r]->(m)
12
+ RETURN n, r, m
13
+ LIMIT 100
14
+ """
15
+ result = graph.run(query)
16
+
17
+ # Create a NetworkX graph
18
+ G = nx.DiGraph() # Use DiGraph for directed edges
19
+ for record in result:
20
+ n = record["n"]
21
+ m = record["m"]
22
+ r = record["r"]
23
+ G.add_node(n["name"], label=list(n.labels or ["-"])[0])
24
+ G.add_node(m["name"], label=list(m.labels or ["-"])[0])
25
+ G.add_edge(n["name"], m["name"], type=type(r).__name__)
26
+
27
+ # Draw the graph
28
+ pos = nx.spring_layout(G)
29
+
30
+ # Draw nodes
31
+ nx.draw_networkx_nodes(G, pos, node_color="lightblue", node_size=10000)
32
+
33
+ # Draw edges
34
+ nx.draw_networkx_edges(G, pos, edge_color="gray", arrows=True)
35
+
36
+ # Add node labels
37
+ node_labels = nx.get_node_attributes(G, "label")
38
+ nx.draw_networkx_labels(G, pos, {node: f"{node}\n({label})" for node, label in node_labels.items()}, font_size=16)
39
+
40
+ # Add edge labels
41
+ edge_labels = nx.get_edge_attributes(G, "type")
42
+ nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=16)
File without changes
File without changes