linkml-store 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +2 -0
- linkml_store/api/collection.py +58 -9
- linkml_store/api/config.py +12 -1
- linkml_store/api/database.py +34 -3
- linkml_store/api/stores/duckdb/duckdb_database.py +31 -3
- linkml_store/api/stores/mongodb/mongodb_database.py +31 -1
- linkml_store/api/stores/neo4j/__init__.py +0 -0
- linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- linkml_store/cli.py +29 -2
- linkml_store/graphs/__init__.py +0 -0
- linkml_store/graphs/graph_map.py +24 -0
- linkml_store/utils/format_utils.py +132 -14
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/neo4j_utils.py +42 -0
- linkml_store/utils/sql_utils.py +7 -2
- linkml_store/webapi/html/generic.html.j2 +25 -28
- linkml_store/webapi/main.py +346 -63
- {linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/METADATA +6 -2
- {linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/RECORD +23 -16
- {linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
from neo4j import Driver, Session
|
|
6
|
+
|
|
7
|
+
from linkml_store.api import Collection
|
|
8
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
9
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
10
|
+
from linkml_store.graphs.graph_map import EdgeProjection, GraphProjection, NodeProjection
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DeletePolicy(Enum):
|
|
16
|
+
CASCADE = "cascade"
|
|
17
|
+
ERROR = "error"
|
|
18
|
+
STUB = "stub"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Neo4jCollection(Collection):
|
|
22
|
+
"""
|
|
23
|
+
Adapter for collections in a Neo4j database.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
# _graph_projection: Optional[GraphProjection] = None
|
|
27
|
+
delete_policy: DeletePolicy = DeletePolicy.CASCADE
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def driver(self) -> Driver:
|
|
31
|
+
return self.parent.driver
|
|
32
|
+
|
|
33
|
+
def session(self) -> Session:
|
|
34
|
+
return self.parent.session()
|
|
35
|
+
|
|
36
|
+
def _check_if_initialized(self) -> bool:
|
|
37
|
+
with self.session() as session:
|
|
38
|
+
result = session.run("MATCH (n) RETURN count(n) > 0 as exists")
|
|
39
|
+
return result.single()["exists"]
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def graph_projection(self) -> GraphProjection:
|
|
43
|
+
return self.metadata.graph_projection
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def node_projection(self) -> Optional[NodeProjection]:
|
|
47
|
+
return self.metadata.graph_projection if isinstance(self.graph_projection, NodeProjection) else None
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def edge_projection(self) -> Optional[EdgeProjection]:
|
|
51
|
+
return self.metadata.graph_projection if isinstance(self.graph_projection, EdgeProjection) else None
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def is_edge_collection(self) -> bool:
|
|
55
|
+
return isinstance(self.graph_projection, EdgeProjection)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def category_labels_attribute(self) -> str:
|
|
59
|
+
np = self.node_projection
|
|
60
|
+
category_labels_attribute = None
|
|
61
|
+
if np:
|
|
62
|
+
category_labels_attribute = np.category_labels_attribute
|
|
63
|
+
if not category_labels_attribute:
|
|
64
|
+
category_labels_attribute = "category"
|
|
65
|
+
return category_labels_attribute
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def identifier_attribute(self) -> str:
|
|
69
|
+
gp = self.graph_projection
|
|
70
|
+
id_attribute = None
|
|
71
|
+
if gp:
|
|
72
|
+
id_attribute = gp.identifier_attribute
|
|
73
|
+
if not id_attribute:
|
|
74
|
+
id_attribute = "id"
|
|
75
|
+
return id_attribute
|
|
76
|
+
|
|
77
|
+
def _node_pattern(self, obj: Optional[OBJECT] = None, node_var="n") -> str:
|
|
78
|
+
obj = {} if obj is None else obj
|
|
79
|
+
category_labels_attribute = self.category_labels_attribute
|
|
80
|
+
categories = obj.get(category_labels_attribute or "category", [])
|
|
81
|
+
if not isinstance(categories, list):
|
|
82
|
+
categories = [categories]
|
|
83
|
+
cstr = (":" + ":".join(categories)) if categories else ""
|
|
84
|
+
return f"{node_var}{cstr}"
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def is_node_collection(self) -> bool:
|
|
88
|
+
return not self.is_edge_collection
|
|
89
|
+
|
|
90
|
+
def set_is_edge_collection(self, force=False):
|
|
91
|
+
if self.is_edge_collection:
|
|
92
|
+
return
|
|
93
|
+
if self.graph_projection and not force:
|
|
94
|
+
raise ValueError("Cannot reassign without force=True")
|
|
95
|
+
self.metadata.graph_projection = EdgeProjection()
|
|
96
|
+
|
|
97
|
+
def set_is_node_collection(self, force=False):
|
|
98
|
+
if self.is_node_collection:
|
|
99
|
+
return
|
|
100
|
+
if self.graph_projection and not force:
|
|
101
|
+
raise ValueError("Cannot reassign without force=True")
|
|
102
|
+
self.metadata.graph_projection = NodeProjection()
|
|
103
|
+
|
|
104
|
+
def _prop_clause(
|
|
105
|
+
self, obj: OBJECT, node_var: Optional[str] = None, exclude_attributes: Optional[List[str]] = None
|
|
106
|
+
) -> str:
|
|
107
|
+
if exclude_attributes is None:
|
|
108
|
+
exclude_attributes = [self.category_labels_attribute]
|
|
109
|
+
node_prefix = node_var + "." if node_var else ""
|
|
110
|
+
terms = [f"{node_prefix}{k}: ${k}" for k in obj.keys() if k not in exclude_attributes]
|
|
111
|
+
return ", ".join(terms)
|
|
112
|
+
|
|
113
|
+
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
114
|
+
if not isinstance(objs, list):
|
|
115
|
+
objs = [objs]
|
|
116
|
+
self._pre_insert_hook(objs)
|
|
117
|
+
|
|
118
|
+
with self.session() as session:
|
|
119
|
+
for obj in objs:
|
|
120
|
+
query = self._create_insert_cypher_query(obj)
|
|
121
|
+
session.run(query, **obj)
|
|
122
|
+
|
|
123
|
+
self._post_insert_hook(objs)
|
|
124
|
+
|
|
125
|
+
def _create_insert_cypher_query(self, obj: OBJECT) -> str:
|
|
126
|
+
id_attribute = self.identifier_attribute
|
|
127
|
+
if not self.is_edge_collection:
|
|
128
|
+
logger.debug(f"Inserting node: {obj}")
|
|
129
|
+
category_labels_attribute = self.category_labels_attribute
|
|
130
|
+
node_pattern = self._node_pattern(obj)
|
|
131
|
+
props = self._prop_clause(obj, exclude_attributes=[id_attribute, category_labels_attribute])
|
|
132
|
+
return f"CREATE ({node_pattern} {{{id_attribute}: ${id_attribute}, {props}}})"
|
|
133
|
+
else:
|
|
134
|
+
logger.debug(f"Inserting edge: {obj}")
|
|
135
|
+
ep = self.edge_projection
|
|
136
|
+
if ep.predicate_attribute not in obj:
|
|
137
|
+
raise ValueError(f"Predicate attribute {ep.predicate_attribute} not found in edge {obj}.")
|
|
138
|
+
if ep.subject_attribute not in obj:
|
|
139
|
+
raise ValueError(f"Subject attribute {ep.subject_attribute} not found in edge {obj}.")
|
|
140
|
+
if ep.object_attribute not in obj:
|
|
141
|
+
raise ValueError(f"Object attribute {ep.object_attribute} not found in edge {obj}.")
|
|
142
|
+
pred = obj[ep.predicate_attribute]
|
|
143
|
+
# check if nodes present; if not, make dangling stubs
|
|
144
|
+
# TODO: decide on how this should be handled in validation if some fields are required
|
|
145
|
+
for node_id in [obj[ep.subject_attribute], obj[ep.object_attribute]]:
|
|
146
|
+
check_query = (
|
|
147
|
+
f"MATCH (n {{{ep.identifier_attribute}: ${ep.identifier_attribute}}}) RETURN count(n) as count"
|
|
148
|
+
)
|
|
149
|
+
with self.session() as session:
|
|
150
|
+
result = session.run(check_query, **{ep.identifier_attribute: node_id})
|
|
151
|
+
if result.single()["count"] == 0:
|
|
152
|
+
if self.delete_policy == DeletePolicy.STUB:
|
|
153
|
+
stub_query = f"CREATE (n {{{ep.identifier_attribute}: ${ep.identifier_attribute}}})"
|
|
154
|
+
session.run(stub_query, **{ep.identifier_attribute: node_id})
|
|
155
|
+
else:
|
|
156
|
+
raise ValueError(f"Node with identifier {node_id} not found in the database.")
|
|
157
|
+
edge_props = self._prop_clause(
|
|
158
|
+
obj, exclude_attributes=[ep.subject_attribute, ep.predicate_attribute, ep.object_attribute]
|
|
159
|
+
)
|
|
160
|
+
return f"""
|
|
161
|
+
MATCH (s {{{id_attribute}: ${ep.subject_attribute}}}), (o {{{id_attribute}: ${ep.object_attribute}}})
|
|
162
|
+
CREATE (s)-[r:{pred} {{{edge_props}}}]->(o)
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def _prop_clause(self, obj: OBJECT, exclude_attributes: List[str] = None, node_var: Optional[str] = None) -> str:
|
|
166
|
+
if exclude_attributes is None:
|
|
167
|
+
exclude_attributes = []
|
|
168
|
+
node_prefix = f"{node_var}." if node_var else ""
|
|
169
|
+
terms = [f"{node_prefix}{k}: ${k}" for k in obj.keys() if k not in exclude_attributes]
|
|
170
|
+
return ", ".join(terms)
|
|
171
|
+
|
|
172
|
+
def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
|
|
173
|
+
cypher_query = self._build_cypher_query(query, limit, offset)
|
|
174
|
+
ca = self.category_labels_attribute
|
|
175
|
+
with self.session() as session:
|
|
176
|
+
result = session.run(cypher_query, query.where_clause)
|
|
177
|
+
if self.is_edge_collection:
|
|
178
|
+
rows = [self._edge_to_dict(record) for record in result]
|
|
179
|
+
else:
|
|
180
|
+
|
|
181
|
+
def node_to_dict(n) -> dict:
|
|
182
|
+
d = dict(n.items())
|
|
183
|
+
if ca:
|
|
184
|
+
labels = list(n.labels)
|
|
185
|
+
if labels:
|
|
186
|
+
d[ca] = labels[0]
|
|
187
|
+
return d
|
|
188
|
+
|
|
189
|
+
rows = [node_to_dict(record["n"]) for record in result]
|
|
190
|
+
|
|
191
|
+
# count_query = self._build_count_query(query, is_count=True)
|
|
192
|
+
count_query = self._build_cypher_query(query, is_count=True)
|
|
193
|
+
with self.session() as session:
|
|
194
|
+
count = session.run(count_query, query.where_clause).single()["count"]
|
|
195
|
+
|
|
196
|
+
return QueryResult(query=query, num_rows=count, rows=rows)
|
|
197
|
+
|
|
198
|
+
def _build_cypher_query(
|
|
199
|
+
self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, is_count=False
|
|
200
|
+
) -> str:
|
|
201
|
+
if self.is_edge_collection:
|
|
202
|
+
ep = self.edge_projection
|
|
203
|
+
ia = ep.identifier_attribute
|
|
204
|
+
sa = ep.subject_attribute
|
|
205
|
+
pa = ep.predicate_attribute
|
|
206
|
+
oa = ep.object_attribute
|
|
207
|
+
wc = query.where_clause or {}
|
|
208
|
+
rq = "r"
|
|
209
|
+
pred = wc.get(pa, None)
|
|
210
|
+
if pred:
|
|
211
|
+
rq = f"r:{pred}"
|
|
212
|
+
sq = "s"
|
|
213
|
+
subj = wc.get(sa, None)
|
|
214
|
+
if subj:
|
|
215
|
+
sq = f"s {{{ia}: '{subj}'}}"
|
|
216
|
+
oq = "o"
|
|
217
|
+
obj = wc.get(oa, None)
|
|
218
|
+
if obj:
|
|
219
|
+
oq = f"o {{{ia}: '{obj}'}}"
|
|
220
|
+
where = {k: v for k, v in wc.items() if k not in [sa, pa, oa]}
|
|
221
|
+
cypher_query = f"""
|
|
222
|
+
MATCH ({sq})-[{rq}]->({oq})
|
|
223
|
+
{self._build_where_clause(where, 'r')}
|
|
224
|
+
"""
|
|
225
|
+
if is_count:
|
|
226
|
+
cypher_query += """
|
|
227
|
+
RETURN count(r) as count
|
|
228
|
+
"""
|
|
229
|
+
else:
|
|
230
|
+
cypher_query += f"""
|
|
231
|
+
RETURN r, type(r) as predicate, s.{ia} as subject, o.{ia} as object
|
|
232
|
+
"""
|
|
233
|
+
else:
|
|
234
|
+
node_pattern = self._node_pattern(query.where_clause)
|
|
235
|
+
cypher_query = f"""
|
|
236
|
+
MATCH ({node_pattern})
|
|
237
|
+
{self._build_where_clause(query.where_clause)}
|
|
238
|
+
"""
|
|
239
|
+
if is_count:
|
|
240
|
+
cypher_query += """
|
|
241
|
+
RETURN count(n) as count
|
|
242
|
+
"""
|
|
243
|
+
else:
|
|
244
|
+
cypher_query += """
|
|
245
|
+
RETURN n
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
if not is_count:
|
|
249
|
+
if limit and limit >= 0:
|
|
250
|
+
cypher_query += f" LIMIT {limit}"
|
|
251
|
+
if offset and offset >= 0:
|
|
252
|
+
cypher_query += f" SKIP {offset}"
|
|
253
|
+
|
|
254
|
+
return cypher_query
|
|
255
|
+
|
|
256
|
+
def _build_where_clause(self, where_clause: Dict[str, Any], prefix: str = "n") -> str:
|
|
257
|
+
conditions = []
|
|
258
|
+
if where_clause is None:
|
|
259
|
+
return ""
|
|
260
|
+
for key, value in where_clause.items():
|
|
261
|
+
if key == self.category_labels_attribute:
|
|
262
|
+
continue
|
|
263
|
+
if isinstance(value, str):
|
|
264
|
+
conditions.append(f"{prefix}.{key} = '{value}'")
|
|
265
|
+
else:
|
|
266
|
+
conditions.append(f"{prefix}.{key} = {value}")
|
|
267
|
+
|
|
268
|
+
return "WHERE " + " AND ".join(conditions) if conditions else ""
|
|
269
|
+
|
|
270
|
+
def _edge_to_dict(self, record: Dict) -> Dict[str, Any]:
|
|
271
|
+
r = record["r"]
|
|
272
|
+
ep = self.edge_projection
|
|
273
|
+
return {
|
|
274
|
+
ep.subject_attribute: record["subject"],
|
|
275
|
+
ep.predicate_attribute: record["predicate"],
|
|
276
|
+
ep.object_attribute: record["object"],
|
|
277
|
+
**dict(r.items()),
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
def query_facets(
|
|
281
|
+
self,
|
|
282
|
+
where: Dict = None,
|
|
283
|
+
facet_columns: List[Union[str, Tuple[str, ...]]] = None,
|
|
284
|
+
facet_limit=DEFAULT_FACET_LIMIT,
|
|
285
|
+
**kwargs,
|
|
286
|
+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
|
|
287
|
+
results = {}
|
|
288
|
+
if not facet_columns:
|
|
289
|
+
facet_columns = list(self.class_definition().attributes.keys())
|
|
290
|
+
|
|
291
|
+
category_labels_attribute = self.category_labels_attribute
|
|
292
|
+
with self.session() as session:
|
|
293
|
+
for col in facet_columns:
|
|
294
|
+
where_clause = self._build_where_clause(where) if where else ""
|
|
295
|
+
if col == category_labels_attribute:
|
|
296
|
+
# Handle faceting on labels
|
|
297
|
+
query = f"""
|
|
298
|
+
MATCH (n)
|
|
299
|
+
{where_clause}
|
|
300
|
+
WITH labels(n) AS nodeLabels, count(*) as count
|
|
301
|
+
UNWIND nodeLabels AS label
|
|
302
|
+
WITH label, count
|
|
303
|
+
ORDER BY count DESC, label
|
|
304
|
+
LIMIT {facet_limit}
|
|
305
|
+
RETURN label as value, count
|
|
306
|
+
"""
|
|
307
|
+
else:
|
|
308
|
+
query = f"""
|
|
309
|
+
MATCH (n)
|
|
310
|
+
{where_clause}
|
|
311
|
+
WITH n.{col} as value, count(*) as count
|
|
312
|
+
WITH value, count
|
|
313
|
+
ORDER BY count DESC
|
|
314
|
+
LIMIT {facet_limit}
|
|
315
|
+
RETURN value, count
|
|
316
|
+
"""
|
|
317
|
+
result = session.run(query)
|
|
318
|
+
results[col] = [(record["value"], record["count"]) for record in result]
|
|
319
|
+
|
|
320
|
+
return results
|
|
321
|
+
|
|
322
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
|
|
323
|
+
delete_policy = self.delete_policy
|
|
324
|
+
if not isinstance(objs, list):
|
|
325
|
+
objs = [objs]
|
|
326
|
+
|
|
327
|
+
deleted_nodes = 0
|
|
328
|
+
deleted_relationships = 0
|
|
329
|
+
identifier_attribute = self.identifier_attribute
|
|
330
|
+
|
|
331
|
+
with self.session() as session:
|
|
332
|
+
for obj in objs:
|
|
333
|
+
node_pattern = self._node_pattern(obj)
|
|
334
|
+
id_value = obj[identifier_attribute]
|
|
335
|
+
where_clause = f"{{{identifier_attribute}: $id}}"
|
|
336
|
+
dn, dr = self._execute_delete(session, node_pattern, where_clause, delete_policy, id=id_value)
|
|
337
|
+
deleted_nodes += dn
|
|
338
|
+
deleted_relationships += dr
|
|
339
|
+
|
|
340
|
+
return deleted_nodes
|
|
341
|
+
|
|
342
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
343
|
+
delete_policy = self.delete_policy
|
|
344
|
+
where_clause = self._build_where_clause(where) if where else ""
|
|
345
|
+
node_pattern = self._node_pattern(where)
|
|
346
|
+
|
|
347
|
+
with self.session() as session:
|
|
348
|
+
deleted_nodes, deleted_relationships = self._execute_delete(
|
|
349
|
+
session, node_pattern, where_clause, delete_policy
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if deleted_nodes == 0 and not missing_ok:
|
|
353
|
+
raise ValueError(f"No nodes found for {where}")
|
|
354
|
+
|
|
355
|
+
return deleted_nodes
|
|
356
|
+
|
|
357
|
+
def _execute_delete(
|
|
358
|
+
self, session, node_pattern: str, where_clause: str, delete_policy: DeletePolicy, **params
|
|
359
|
+
) -> Tuple[int, int]:
|
|
360
|
+
deleted_relationships = 0
|
|
361
|
+
deleted_nodes = 0
|
|
362
|
+
|
|
363
|
+
if delete_policy == DeletePolicy.ERROR:
|
|
364
|
+
check_query = f"MATCH ({node_pattern} {where_clause})-[r]-() RETURN count(r) as rel_count"
|
|
365
|
+
result = session.run(check_query, **params)
|
|
366
|
+
if result.single()["rel_count"] > 0:
|
|
367
|
+
raise ValueError("Nodes with existing relationships found and cannot be deleted.")
|
|
368
|
+
|
|
369
|
+
if delete_policy == DeletePolicy.CASCADE:
|
|
370
|
+
rel_query = f"MATCH ({node_pattern} {where_clause})-[r]-() DELETE r"
|
|
371
|
+
result = session.run(rel_query, **params)
|
|
372
|
+
deleted_relationships = result.consume().counters.relationships_deleted
|
|
373
|
+
|
|
374
|
+
if delete_policy in [DeletePolicy.CASCADE, DeletePolicy.ERROR]:
|
|
375
|
+
node_query = f"MATCH ({node_pattern} {where_clause}) DELETE n"
|
|
376
|
+
result = session.run(node_query, **params)
|
|
377
|
+
deleted_nodes = result.consume().counters.nodes_deleted
|
|
378
|
+
elif delete_policy == DeletePolicy.STUB:
|
|
379
|
+
stub_query = f"MATCH ({node_pattern} {where_clause}) SET n.deleted = true RETURN count(n) as stub_count"
|
|
380
|
+
result = session.run(stub_query, **params)
|
|
381
|
+
deleted_nodes = result.single()["stub_count"]
|
|
382
|
+
|
|
383
|
+
return deleted_nodes, deleted_relationships
|
|
384
|
+
|
|
385
|
+
def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
|
|
386
|
+
if not isinstance(objs, list):
|
|
387
|
+
objs = [objs]
|
|
388
|
+
|
|
389
|
+
updated_count = 0
|
|
390
|
+
with self.session() as session:
|
|
391
|
+
for obj in objs:
|
|
392
|
+
query = self._create_update_cypher_query(obj)
|
|
393
|
+
result = session.run(query, **obj)
|
|
394
|
+
updated_count += result.consume().counters.properties_set
|
|
395
|
+
|
|
396
|
+
return updated_count
|
|
397
|
+
|
|
398
|
+
def _create_update_cypher_query(self, obj: OBJECT) -> str:
|
|
399
|
+
id_attribute = self.identifier_attribute
|
|
400
|
+
category_labels_attribute = self.category_labels_attribute
|
|
401
|
+
|
|
402
|
+
# Prepare SET clause
|
|
403
|
+
set_items = [f"n.{k} = ${k}" for k in obj.keys() if k not in [id_attribute, category_labels_attribute]]
|
|
404
|
+
set_clause = ", ".join(set_items)
|
|
405
|
+
|
|
406
|
+
# Prepare labels update
|
|
407
|
+
labels_to_add = []
|
|
408
|
+
# labels_to_remove = []
|
|
409
|
+
if category_labels_attribute in obj:
|
|
410
|
+
new_labels = (
|
|
411
|
+
obj[category_labels_attribute]
|
|
412
|
+
if isinstance(obj[category_labels_attribute], list)
|
|
413
|
+
else [obj[category_labels_attribute]]
|
|
414
|
+
)
|
|
415
|
+
labels_to_add = [f":{label}" for label in new_labels]
|
|
416
|
+
# labels_to_remove = [":Label" for _ in new_labels] # Placeholder for labels to remove
|
|
417
|
+
|
|
418
|
+
# Construct the query
|
|
419
|
+
query = f"MATCH (n {{{id_attribute}: ${id_attribute}}})\n"
|
|
420
|
+
# f labels_to_remove:
|
|
421
|
+
# query += f"REMOVE n{' '.join(labels_to_remove)}\n"
|
|
422
|
+
if labels_to_add:
|
|
423
|
+
query += f"SET n{' '.join(labels_to_add)}\n"
|
|
424
|
+
# f"REMOVE n{' '.join(labels_to_remove)}' if labels_to_remove else ''}"
|
|
425
|
+
# f"{f'SET n{' '.join(labels_to_add)}' if labels_to_add else ''}"
|
|
426
|
+
query += f"SET {set_clause}\n"
|
|
427
|
+
query += "RETURN n"
|
|
428
|
+
print(query)
|
|
429
|
+
return query
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# neo4j_database.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
|
|
7
|
+
from neo4j import Driver, GraphDatabase, Session
|
|
8
|
+
|
|
9
|
+
from linkml_store.api import Database
|
|
10
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
11
|
+
from linkml_store.api.stores.neo4j.neo4j_collection import Neo4jCollection
|
|
12
|
+
from linkml_store.utils.format_utils import Format
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Neo4jDatabase(Database):
|
|
18
|
+
"""
|
|
19
|
+
An adapter for Neo4j databases.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
_driver: Driver = None
|
|
23
|
+
collection_class = Neo4jCollection
|
|
24
|
+
|
|
25
|
+
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
26
|
+
# Note: in the community editing the database must be "neo4j"
|
|
27
|
+
if handle is None:
|
|
28
|
+
handle = "bolt://localhost:7687/neo4j"
|
|
29
|
+
if handle.startswith("neo4j:"):
|
|
30
|
+
handle = handle.replace("neo4j:", "bolt:")
|
|
31
|
+
super().__init__(handle=handle, **kwargs)
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def _db_name(self) -> str:
|
|
35
|
+
if self.handle:
|
|
36
|
+
db = self.handle.split("/")[-1]
|
|
37
|
+
else:
|
|
38
|
+
db = "default"
|
|
39
|
+
return db
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def driver(self) -> Driver:
|
|
43
|
+
if self._driver is None:
|
|
44
|
+
uri, user, password = self._parse_handle()
|
|
45
|
+
self._driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
46
|
+
return self._driver
|
|
47
|
+
|
|
48
|
+
def session(self) -> Session:
|
|
49
|
+
return self.driver.session(database=self._db_name)
|
|
50
|
+
|
|
51
|
+
def _parse_handle(self):
|
|
52
|
+
parts = self.handle.split("://")
|
|
53
|
+
protocol = parts[0]
|
|
54
|
+
rest = parts[1]
|
|
55
|
+
|
|
56
|
+
if "@" in rest:
|
|
57
|
+
auth, host = rest.split("@")
|
|
58
|
+
user, password = auth.split(":")
|
|
59
|
+
else:
|
|
60
|
+
host = rest
|
|
61
|
+
user, password = "neo4j", "password" # Default credentials
|
|
62
|
+
|
|
63
|
+
uri = f"{protocol}://{host}"
|
|
64
|
+
return uri, user, password
|
|
65
|
+
|
|
66
|
+
def commit(self, **kwargs):
|
|
67
|
+
# Neo4j uses auto-commit by default for each transaction
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def close(self, **kwargs):
|
|
71
|
+
if self._driver:
|
|
72
|
+
self._driver.close()
|
|
73
|
+
|
|
74
|
+
def drop(self, **kwargs):
|
|
75
|
+
with self.driver.session() as session:
|
|
76
|
+
session.run("MATCH (n) DETACH DELETE n")
|
|
77
|
+
|
|
78
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
79
|
+
if query.from_table:
|
|
80
|
+
collection = self.get_collection(query.from_table)
|
|
81
|
+
return collection.query(query, **kwargs)
|
|
82
|
+
else:
|
|
83
|
+
raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
|
|
84
|
+
|
|
85
|
+
def init_collections(self):
|
|
86
|
+
if self._collections is None:
|
|
87
|
+
self._collections = {}
|
|
88
|
+
|
|
89
|
+
# In Neo4j, we don't have a direct equivalent to collections
|
|
90
|
+
# We'll use node labels as a proxy for collections
|
|
91
|
+
with self.driver.session() as session:
|
|
92
|
+
result = session.run("CALL db.labels()")
|
|
93
|
+
labels = [record["label"] for record in result]
|
|
94
|
+
|
|
95
|
+
for label in labels:
|
|
96
|
+
if label not in self._collections:
|
|
97
|
+
collection = Neo4jCollection(name=label, parent=self)
|
|
98
|
+
self._collections[label] = collection
|
|
99
|
+
|
|
100
|
+
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
101
|
+
# Neo4j doesn't have a built-in export function, so we'll implement a basic JSON export
|
|
102
|
+
if target_format == Format.JSON or target_format == "json":
|
|
103
|
+
path = Path(location)
|
|
104
|
+
with self.driver.session() as session:
|
|
105
|
+
result = session.run("MATCH (n) RETURN n")
|
|
106
|
+
nodes = [dict(record["n"].items()) for record in result]
|
|
107
|
+
|
|
108
|
+
result = session.run("MATCH ()-[r]->() RETURN r")
|
|
109
|
+
relationships = [
|
|
110
|
+
{
|
|
111
|
+
"type": record["r"].type,
|
|
112
|
+
"start": record["r"].start_node.id,
|
|
113
|
+
"end": record["r"].end_node.id,
|
|
114
|
+
**dict(record["r"].items()),
|
|
115
|
+
}
|
|
116
|
+
for record in result
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
data = {"nodes": nodes, "relationships": relationships}
|
|
120
|
+
|
|
121
|
+
import json
|
|
122
|
+
|
|
123
|
+
with open(path, "w") as f:
|
|
124
|
+
json.dump(data, f)
|
|
125
|
+
else:
|
|
126
|
+
super().export_database(location, target_format=target_format, **kwargs)
|
|
127
|
+
|
|
128
|
+
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
|
|
129
|
+
if source_format == Format.JSON or source_format == "json":
|
|
130
|
+
path = Path(location)
|
|
131
|
+
with open(path, "r") as f:
|
|
132
|
+
import json
|
|
133
|
+
|
|
134
|
+
data = json.load(f)
|
|
135
|
+
|
|
136
|
+
with self.driver.session() as session:
|
|
137
|
+
for node in data["nodes"]:
|
|
138
|
+
labels = node.pop("labels", ["Node"])
|
|
139
|
+
props = ", ".join([f"{k}: ${k}" for k in node.keys()])
|
|
140
|
+
query = f"CREATE (n:{':'.join(labels)} {{{props}}})"
|
|
141
|
+
session.run(query, **node)
|
|
142
|
+
|
|
143
|
+
for rel in data["relationships"]:
|
|
144
|
+
# rel_type = rel.pop("type")
|
|
145
|
+
start = rel.pop("start")
|
|
146
|
+
end = rel.pop("end")
|
|
147
|
+
# props = ", ".join([f"{k}: ${k}" for k in rel.keys()])
|
|
148
|
+
query = (
|
|
149
|
+
f"MATCH (a), (b) WHERE id(a) = {start} AND id(b) = {end} "
|
|
150
|
+
"CREATE (a)-[r:{rel_type} {{{props}}}]->(b)"
|
|
151
|
+
)
|
|
152
|
+
session.run(query, **rel)
|
|
153
|
+
else:
|
|
154
|
+
super().import_database(location, source_format=source_format, **kwargs)
|
linkml_store/cli.py
CHANGED
|
@@ -228,7 +228,11 @@ def store(ctx, files, object, format):
|
|
|
228
228
|
@click.pass_context
|
|
229
229
|
@click.argument("files", type=click.Path(exists=True), nargs=-1)
|
|
230
230
|
def import_database(ctx, files, format):
|
|
231
|
-
"""Imports a database from a dump.
|
|
231
|
+
"""Imports a database from a dump.
|
|
232
|
+
|
|
233
|
+
See the `export` command for a full list of supported formats. The same
|
|
234
|
+
formats are generally supported for imports.
|
|
235
|
+
"""
|
|
232
236
|
settings = ctx.obj["settings"]
|
|
233
237
|
db = settings.database
|
|
234
238
|
if not files and not object:
|
|
@@ -242,7 +246,30 @@ def import_database(ctx, files, format):
|
|
|
242
246
|
@click.option("--output", "-o", required=True, type=click.Path(), help="Output file path")
|
|
243
247
|
@click.pass_context
|
|
244
248
|
def export(ctx, output_type, output):
|
|
245
|
-
"""Exports a database to a dump.
|
|
249
|
+
"""Exports a database to a standard dump format.
|
|
250
|
+
|
|
251
|
+
Example:
|
|
252
|
+
|
|
253
|
+
linkml-store -d duckdb:///countries.db export -O yaml -o countries.yaml
|
|
254
|
+
|
|
255
|
+
Export format will be guessed from extension if not specified
|
|
256
|
+
|
|
257
|
+
Example:
|
|
258
|
+
|
|
259
|
+
linkml-store -d duckdb:///countries.db export -o countries.json
|
|
260
|
+
|
|
261
|
+
Tree formats such as YAML and JSON can natively store an entire database; each collection
|
|
262
|
+
will be a distinct key in the database.
|
|
263
|
+
|
|
264
|
+
Additionally, native dump formats can be used:
|
|
265
|
+
|
|
266
|
+
Example:
|
|
267
|
+
|
|
268
|
+
linkml-store -d duckdb:///countries.db export -o countries -O duckdb
|
|
269
|
+
|
|
270
|
+
Here, `countries` is a directory. This is equivalent to running EXPORT DATABASE
|
|
271
|
+
(see https://duckdb.org/docs/sql/statements/export.html)
|
|
272
|
+
"""
|
|
246
273
|
settings = ctx.obj["settings"]
|
|
247
274
|
db = settings.database
|
|
248
275
|
if output_type is None:
|
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
DEFAULT_IDENTIFIER_ATTRIBUTE = "id"
|
|
7
|
+
DEFAULT_CATEGORY_LABELS_ATTRIBUTE = "category"
|
|
8
|
+
DEFAULT_SUBJECT_ATTRIBUTE = "subject"
|
|
9
|
+
DEFAULT_PREDICATE_ATTRIBUTE = "predicate"
|
|
10
|
+
DEFAULT_OBJECT_ATTRIBUTE = "object"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GraphProjection(BaseModel, ABC):
|
|
14
|
+
identifier_attribute: str = DEFAULT_IDENTIFIER_ATTRIBUTE
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class NodeProjection(GraphProjection):
|
|
18
|
+
category_labels_attribute: Optional[str] = DEFAULT_CATEGORY_LABELS_ATTRIBUTE
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class EdgeProjection(GraphProjection):
|
|
22
|
+
subject_attribute: str = DEFAULT_SUBJECT_ATTRIBUTE
|
|
23
|
+
predicate_attribute: str = DEFAULT_PREDICATE_ATTRIBUTE
|
|
24
|
+
object_attribute: str = DEFAULT_OBJECT_ATTRIBUTE
|