linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,429 @@
1
+ import logging
2
+ from enum import Enum
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
+
5
+ from neo4j import Driver, Session
6
+
7
+ from linkml_store.api import Collection
8
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
9
+ from linkml_store.api.queries import Query, QueryResult
10
+ from linkml_store.graphs.graph_map import EdgeProjection, GraphProjection, NodeProjection
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class DeletePolicy(Enum):
16
+ CASCADE = "cascade"
17
+ ERROR = "error"
18
+ STUB = "stub"
19
+
20
+
21
+ class Neo4jCollection(Collection):
22
+ """
23
+ Adapter for collections in a Neo4j database.
24
+ """
25
+
26
+ # _graph_projection: Optional[GraphProjection] = None
27
+ delete_policy: DeletePolicy = DeletePolicy.CASCADE
28
+
29
+ @property
30
+ def driver(self) -> Driver:
31
+ return self.parent.driver
32
+
33
+ def session(self) -> Session:
34
+ return self.parent.session()
35
+
36
+ def _check_if_initialized(self) -> bool:
37
+ with self.session() as session:
38
+ result = session.run("MATCH (n) RETURN count(n) > 0 as exists")
39
+ return result.single()["exists"]
40
+
41
+ @property
42
+ def graph_projection(self) -> GraphProjection:
43
+ return self.metadata.graph_projection
44
+
45
+ @property
46
+ def node_projection(self) -> Optional[NodeProjection]:
47
+ return self.metadata.graph_projection if isinstance(self.graph_projection, NodeProjection) else None
48
+
49
+ @property
50
+ def edge_projection(self) -> Optional[EdgeProjection]:
51
+ return self.metadata.graph_projection if isinstance(self.graph_projection, EdgeProjection) else None
52
+
53
+ @property
54
+ def is_edge_collection(self) -> bool:
55
+ return isinstance(self.graph_projection, EdgeProjection)
56
+
57
+ @property
58
+ def category_labels_attribute(self) -> str:
59
+ np = self.node_projection
60
+ category_labels_attribute = None
61
+ if np:
62
+ category_labels_attribute = np.category_labels_attribute
63
+ if not category_labels_attribute:
64
+ category_labels_attribute = "category"
65
+ return category_labels_attribute
66
+
67
+ @property
68
+ def identifier_attribute(self) -> str:
69
+ gp = self.graph_projection
70
+ id_attribute = None
71
+ if gp:
72
+ id_attribute = gp.identifier_attribute
73
+ if not id_attribute:
74
+ id_attribute = "id"
75
+ return id_attribute
76
+
77
+ def _node_pattern(self, obj: Optional[OBJECT] = None, node_var="n") -> str:
78
+ obj = {} if obj is None else obj
79
+ category_labels_attribute = self.category_labels_attribute
80
+ categories = obj.get(category_labels_attribute or "category", [])
81
+ if not isinstance(categories, list):
82
+ categories = [categories]
83
+ cstr = (":" + ":".join(categories)) if categories else ""
84
+ return f"{node_var}{cstr}"
85
+
86
+ @property
87
+ def is_node_collection(self) -> bool:
88
+ return not self.is_edge_collection
89
+
90
+ def set_is_edge_collection(self, force=False):
91
+ if self.is_edge_collection:
92
+ return
93
+ if self.graph_projection and not force:
94
+ raise ValueError("Cannot reassign without force=True")
95
+ self.metadata.graph_projection = EdgeProjection()
96
+
97
+ def set_is_node_collection(self, force=False):
98
+ if self.is_node_collection:
99
+ return
100
+ if self.graph_projection and not force:
101
+ raise ValueError("Cannot reassign without force=True")
102
+ self.metadata.graph_projection = NodeProjection()
103
+
104
+ def _prop_clause(
105
+ self, obj: OBJECT, node_var: Optional[str] = None, exclude_attributes: Optional[List[str]] = None
106
+ ) -> str:
107
+ if exclude_attributes is None:
108
+ exclude_attributes = [self.category_labels_attribute]
109
+ node_prefix = node_var + "." if node_var else ""
110
+ terms = [f"{node_prefix}{k}: ${k}" for k in obj.keys() if k not in exclude_attributes]
111
+ return ", ".join(terms)
112
+
113
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
114
+ if not isinstance(objs, list):
115
+ objs = [objs]
116
+ self._pre_insert_hook(objs)
117
+
118
+ with self.session() as session:
119
+ for obj in objs:
120
+ query = self._create_insert_cypher_query(obj)
121
+ session.run(query, **obj)
122
+
123
+ self._post_insert_hook(objs)
124
+
125
+ def _create_insert_cypher_query(self, obj: OBJECT) -> str:
126
+ id_attribute = self.identifier_attribute
127
+ if not self.is_edge_collection:
128
+ logger.debug(f"Inserting node: {obj}")
129
+ category_labels_attribute = self.category_labels_attribute
130
+ node_pattern = self._node_pattern(obj)
131
+ props = self._prop_clause(obj, exclude_attributes=[id_attribute, category_labels_attribute])
132
+ return f"CREATE ({node_pattern} {{{id_attribute}: ${id_attribute}, {props}}})"
133
+ else:
134
+ logger.debug(f"Inserting edge: {obj}")
135
+ ep = self.edge_projection
136
+ if ep.predicate_attribute not in obj:
137
+ raise ValueError(f"Predicate attribute {ep.predicate_attribute} not found in edge {obj}.")
138
+ if ep.subject_attribute not in obj:
139
+ raise ValueError(f"Subject attribute {ep.subject_attribute} not found in edge {obj}.")
140
+ if ep.object_attribute not in obj:
141
+ raise ValueError(f"Object attribute {ep.object_attribute} not found in edge {obj}.")
142
+ pred = obj[ep.predicate_attribute]
143
+ # check if nodes present; if not, make dangling stubs
144
+ # TODO: decide on how this should be handled in validation if some fields are required
145
+ for node_id in [obj[ep.subject_attribute], obj[ep.object_attribute]]:
146
+ check_query = (
147
+ f"MATCH (n {{{ep.identifier_attribute}: ${ep.identifier_attribute}}}) RETURN count(n) as count"
148
+ )
149
+ with self.session() as session:
150
+ result = session.run(check_query, **{ep.identifier_attribute: node_id})
151
+ if result.single()["count"] == 0:
152
+ if self.delete_policy == DeletePolicy.STUB:
153
+ stub_query = f"CREATE (n {{{ep.identifier_attribute}: ${ep.identifier_attribute}}})"
154
+ session.run(stub_query, **{ep.identifier_attribute: node_id})
155
+ else:
156
+ raise ValueError(f"Node with identifier {node_id} not found in the database.")
157
+ edge_props = self._prop_clause(
158
+ obj, exclude_attributes=[ep.subject_attribute, ep.predicate_attribute, ep.object_attribute]
159
+ )
160
+ return f"""
161
+ MATCH (s {{{id_attribute}: ${ep.subject_attribute}}}), (o {{{id_attribute}: ${ep.object_attribute}}})
162
+ CREATE (s)-[r:{pred} {{{edge_props}}}]->(o)
163
+ """
164
+
165
+ def _prop_clause(self, obj: OBJECT, exclude_attributes: List[str] = None, node_var: Optional[str] = None) -> str:
166
+ if exclude_attributes is None:
167
+ exclude_attributes = []
168
+ node_prefix = f"{node_var}." if node_var else ""
169
+ terms = [f"{node_prefix}{k}: ${k}" for k in obj.keys() if k not in exclude_attributes]
170
+ return ", ".join(terms)
171
+
172
+ def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
173
+ cypher_query = self._build_cypher_query(query, limit, offset)
174
+ ca = self.category_labels_attribute
175
+ with self.session() as session:
176
+ result = session.run(cypher_query, query.where_clause)
177
+ if self.is_edge_collection:
178
+ rows = [self._edge_to_dict(record) for record in result]
179
+ else:
180
+
181
+ def node_to_dict(n) -> dict:
182
+ d = dict(n.items())
183
+ if ca:
184
+ labels = list(n.labels)
185
+ if labels:
186
+ d[ca] = labels[0]
187
+ return d
188
+
189
+ rows = [node_to_dict(record["n"]) for record in result]
190
+
191
+ # count_query = self._build_count_query(query, is_count=True)
192
+ count_query = self._build_cypher_query(query, is_count=True)
193
+ with self.session() as session:
194
+ count = session.run(count_query, query.where_clause).single()["count"]
195
+
196
+ return QueryResult(query=query, num_rows=count, rows=rows)
197
+
198
+ def _build_cypher_query(
199
+ self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, is_count=False
200
+ ) -> str:
201
+ if self.is_edge_collection:
202
+ ep = self.edge_projection
203
+ ia = ep.identifier_attribute
204
+ sa = ep.subject_attribute
205
+ pa = ep.predicate_attribute
206
+ oa = ep.object_attribute
207
+ wc = query.where_clause or {}
208
+ rq = "r"
209
+ pred = wc.get(pa, None)
210
+ if pred:
211
+ rq = f"r:{pred}"
212
+ sq = "s"
213
+ subj = wc.get(sa, None)
214
+ if subj:
215
+ sq = f"s {{{ia}: '{subj}'}}"
216
+ oq = "o"
217
+ obj = wc.get(oa, None)
218
+ if obj:
219
+ oq = f"o {{{ia}: '{obj}'}}"
220
+ where = {k: v for k, v in wc.items() if k not in [sa, pa, oa]}
221
+ cypher_query = f"""
222
+ MATCH ({sq})-[{rq}]->({oq})
223
+ {self._build_where_clause(where, 'r')}
224
+ """
225
+ if is_count:
226
+ cypher_query += """
227
+ RETURN count(r) as count
228
+ """
229
+ else:
230
+ cypher_query += f"""
231
+ RETURN r, type(r) as predicate, s.{ia} as subject, o.{ia} as object
232
+ """
233
+ else:
234
+ node_pattern = self._node_pattern(query.where_clause)
235
+ cypher_query = f"""
236
+ MATCH ({node_pattern})
237
+ {self._build_where_clause(query.where_clause)}
238
+ """
239
+ if is_count:
240
+ cypher_query += """
241
+ RETURN count(n) as count
242
+ """
243
+ else:
244
+ cypher_query += """
245
+ RETURN n
246
+ """
247
+
248
+ if not is_count:
249
+ if limit and limit >= 0:
250
+ cypher_query += f" LIMIT {limit}"
251
+ if offset and offset >= 0:
252
+ cypher_query += f" SKIP {offset}"
253
+
254
+ return cypher_query
255
+
256
+ def _build_where_clause(self, where_clause: Dict[str, Any], prefix: str = "n") -> str:
257
+ conditions = []
258
+ if where_clause is None:
259
+ return ""
260
+ for key, value in where_clause.items():
261
+ if key == self.category_labels_attribute:
262
+ continue
263
+ if isinstance(value, str):
264
+ conditions.append(f"{prefix}.{key} = '{value}'")
265
+ else:
266
+ conditions.append(f"{prefix}.{key} = {value}")
267
+
268
+ return "WHERE " + " AND ".join(conditions) if conditions else ""
269
+
270
+ def _edge_to_dict(self, record: Dict) -> Dict[str, Any]:
271
+ r = record["r"]
272
+ ep = self.edge_projection
273
+ return {
274
+ ep.subject_attribute: record["subject"],
275
+ ep.predicate_attribute: record["predicate"],
276
+ ep.object_attribute: record["object"],
277
+ **dict(r.items()),
278
+ }
279
+
280
+ def query_facets(
281
+ self,
282
+ where: Dict = None,
283
+ facet_columns: List[Union[str, Tuple[str, ...]]] = None,
284
+ facet_limit=DEFAULT_FACET_LIMIT,
285
+ **kwargs,
286
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
287
+ results = {}
288
+ if not facet_columns:
289
+ facet_columns = list(self.class_definition().attributes.keys())
290
+
291
+ category_labels_attribute = self.category_labels_attribute
292
+ with self.session() as session:
293
+ for col in facet_columns:
294
+ where_clause = self._build_where_clause(where) if where else ""
295
+ if col == category_labels_attribute:
296
+ # Handle faceting on labels
297
+ query = f"""
298
+ MATCH (n)
299
+ {where_clause}
300
+ WITH labels(n) AS nodeLabels, count(*) as count
301
+ UNWIND nodeLabels AS label
302
+ WITH label, count
303
+ ORDER BY count DESC, label
304
+ LIMIT {facet_limit}
305
+ RETURN label as value, count
306
+ """
307
+ else:
308
+ query = f"""
309
+ MATCH (n)
310
+ {where_clause}
311
+ WITH n.{col} as value, count(*) as count
312
+ WITH value, count
313
+ ORDER BY count DESC
314
+ LIMIT {facet_limit}
315
+ RETURN value, count
316
+ """
317
+ result = session.run(query)
318
+ results[col] = [(record["value"], record["count"]) for record in result]
319
+
320
+ return results
321
+
322
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
323
+ delete_policy = self.delete_policy
324
+ if not isinstance(objs, list):
325
+ objs = [objs]
326
+
327
+ deleted_nodes = 0
328
+ deleted_relationships = 0
329
+ identifier_attribute = self.identifier_attribute
330
+
331
+ with self.session() as session:
332
+ for obj in objs:
333
+ node_pattern = self._node_pattern(obj)
334
+ id_value = obj[identifier_attribute]
335
+ where_clause = f"{{{identifier_attribute}: $id}}"
336
+ dn, dr = self._execute_delete(session, node_pattern, where_clause, delete_policy, id=id_value)
337
+ deleted_nodes += dn
338
+ deleted_relationships += dr
339
+
340
+ return deleted_nodes
341
+
342
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
343
+ delete_policy = self.delete_policy
344
+ where_clause = self._build_where_clause(where) if where else ""
345
+ node_pattern = self._node_pattern(where)
346
+
347
+ with self.session() as session:
348
+ deleted_nodes, deleted_relationships = self._execute_delete(
349
+ session, node_pattern, where_clause, delete_policy
350
+ )
351
+
352
+ if deleted_nodes == 0 and not missing_ok:
353
+ raise ValueError(f"No nodes found for {where}")
354
+
355
+ return deleted_nodes
356
+
357
+ def _execute_delete(
358
+ self, session, node_pattern: str, where_clause: str, delete_policy: DeletePolicy, **params
359
+ ) -> Tuple[int, int]:
360
+ deleted_relationships = 0
361
+ deleted_nodes = 0
362
+
363
+ if delete_policy == DeletePolicy.ERROR:
364
+ check_query = f"MATCH ({node_pattern} {where_clause})-[r]-() RETURN count(r) as rel_count"
365
+ result = session.run(check_query, **params)
366
+ if result.single()["rel_count"] > 0:
367
+ raise ValueError("Nodes with existing relationships found and cannot be deleted.")
368
+
369
+ if delete_policy == DeletePolicy.CASCADE:
370
+ rel_query = f"MATCH ({node_pattern} {where_clause})-[r]-() DELETE r"
371
+ result = session.run(rel_query, **params)
372
+ deleted_relationships = result.consume().counters.relationships_deleted
373
+
374
+ if delete_policy in [DeletePolicy.CASCADE, DeletePolicy.ERROR]:
375
+ node_query = f"MATCH ({node_pattern} {where_clause}) DELETE n"
376
+ result = session.run(node_query, **params)
377
+ deleted_nodes = result.consume().counters.nodes_deleted
378
+ elif delete_policy == DeletePolicy.STUB:
379
+ stub_query = f"MATCH ({node_pattern} {where_clause}) SET n.deleted = true RETURN count(n) as stub_count"
380
+ result = session.run(stub_query, **params)
381
+ deleted_nodes = result.single()["stub_count"]
382
+
383
+ return deleted_nodes, deleted_relationships
384
+
385
+ def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
386
+ if not isinstance(objs, list):
387
+ objs = [objs]
388
+
389
+ updated_count = 0
390
+ with self.session() as session:
391
+ for obj in objs:
392
+ query = self._create_update_cypher_query(obj)
393
+ result = session.run(query, **obj)
394
+ updated_count += result.consume().counters.properties_set
395
+
396
+ return updated_count
397
+
398
+ def _create_update_cypher_query(self, obj: OBJECT) -> str:
399
+ id_attribute = self.identifier_attribute
400
+ category_labels_attribute = self.category_labels_attribute
401
+
402
+ # Prepare SET clause
403
+ set_items = [f"n.{k} = ${k}" for k in obj.keys() if k not in [id_attribute, category_labels_attribute]]
404
+ set_clause = ", ".join(set_items)
405
+
406
+ # Prepare labels update
407
+ labels_to_add = []
408
+ # labels_to_remove = []
409
+ if category_labels_attribute in obj:
410
+ new_labels = (
411
+ obj[category_labels_attribute]
412
+ if isinstance(obj[category_labels_attribute], list)
413
+ else [obj[category_labels_attribute]]
414
+ )
415
+ labels_to_add = [f":{label}" for label in new_labels]
416
+ # labels_to_remove = [":Label" for _ in new_labels] # Placeholder for labels to remove
417
+
418
+ # Construct the query
419
+ query = f"MATCH (n {{{id_attribute}: ${id_attribute}}})\n"
420
+ # f labels_to_remove:
421
+ # query += f"REMOVE n{' '.join(labels_to_remove)}\n"
422
+ if labels_to_add:
423
+ query += f"SET n{' '.join(labels_to_add)}\n"
424
+ # f"REMOVE n{' '.join(labels_to_remove)}' if labels_to_remove else ''}"
425
+ # f"{f'SET n{' '.join(labels_to_add)}' if labels_to_add else ''}"
426
+ query += f"SET {set_clause}\n"
427
+ query += "RETURN n"
428
+ print(query)
429
+ return query
@@ -0,0 +1,154 @@
1
+ # neo4j_database.py
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Optional, Union
6
+
7
+ from neo4j import Driver, GraphDatabase, Session
8
+
9
+ from linkml_store.api import Database
10
+ from linkml_store.api.queries import Query, QueryResult
11
+ from linkml_store.api.stores.neo4j.neo4j_collection import Neo4jCollection
12
+ from linkml_store.utils.format_utils import Format
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Neo4jDatabase(Database):
18
+ """
19
+ An adapter for Neo4j databases.
20
+ """
21
+
22
+ _driver: Driver = None
23
+ collection_class = Neo4jCollection
24
+
25
+ def __init__(self, handle: Optional[str] = None, **kwargs):
26
+ # Note: in the community editing the database must be "neo4j"
27
+ if handle is None:
28
+ handle = "bolt://localhost:7687/neo4j"
29
+ if handle.startswith("neo4j:"):
30
+ handle = handle.replace("neo4j:", "bolt:", 1)
31
+ super().__init__(handle=handle, **kwargs)
32
+
33
+ @property
34
+ def _db_name(self) -> str:
35
+ if self.handle:
36
+ db = self.handle.split("/")[-1]
37
+ else:
38
+ db = "default"
39
+ return db
40
+
41
+ @property
42
+ def driver(self) -> Driver:
43
+ if self._driver is None:
44
+ uri, user, password = self._parse_handle()
45
+ self._driver = GraphDatabase.driver(uri, auth=(user, password))
46
+ return self._driver
47
+
48
+ def session(self) -> Session:
49
+ return self.driver.session(database=self._db_name)
50
+
51
+ def _parse_handle(self):
52
+ parts = self.handle.split("://")
53
+ protocol = parts[0]
54
+ rest = parts[1]
55
+
56
+ if "@" in rest:
57
+ auth, host = rest.split("@")
58
+ user, password = auth.split(":")
59
+ else:
60
+ host = rest
61
+ user, password = "neo4j", "password" # Default credentials
62
+
63
+ uri = f"{protocol}://{host}"
64
+ return uri, user, password
65
+
66
+ def commit(self, **kwargs):
67
+ # Neo4j uses auto-commit by default for each transaction
68
+ pass
69
+
70
+ def close(self, **kwargs):
71
+ if self._driver:
72
+ self._driver.close()
73
+
74
+ def drop(self, **kwargs):
75
+ with self.driver.session() as session:
76
+ session.run("MATCH (n) DETACH DELETE n")
77
+
78
+ def query(self, query: Query, **kwargs) -> QueryResult:
79
+ if query.from_table:
80
+ collection = self.get_collection(query.from_table)
81
+ return collection.query(query, **kwargs)
82
+ else:
83
+ raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
84
+
85
+ def init_collections(self):
86
+ if self._collections is None:
87
+ self._collections = {}
88
+
89
+ # In Neo4j, we don't have a direct equivalent to collections
90
+ # We'll use node labels as a proxy for collections
91
+ with self.driver.session() as session:
92
+ result = session.run("CALL db.labels()")
93
+ labels = [record["label"] for record in result]
94
+
95
+ for label in labels:
96
+ if label not in self._collections:
97
+ collection = Neo4jCollection(name=label, parent=self)
98
+ self._collections[label] = collection
99
+
100
+ def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
101
+ # Neo4j doesn't have a built-in export function, so we'll implement a basic JSON export
102
+ if target_format == Format.JSON or target_format == "json":
103
+ path = Path(location)
104
+ with self.driver.session() as session:
105
+ result = session.run("MATCH (n) RETURN n")
106
+ nodes = [dict(record["n"].items()) for record in result]
107
+
108
+ result = session.run("MATCH ()-[r]->() RETURN r")
109
+ relationships = [
110
+ {
111
+ "type": record["r"].type,
112
+ "start": record["r"].start_node.id,
113
+ "end": record["r"].end_node.id,
114
+ **dict(record["r"].items()),
115
+ }
116
+ for record in result
117
+ ]
118
+
119
+ data = {"nodes": nodes, "relationships": relationships}
120
+
121
+ import json
122
+
123
+ with open(path, "w") as f:
124
+ json.dump(data, f)
125
+ else:
126
+ super().export_database(location, target_format=target_format, **kwargs)
127
+
128
+ def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
129
+ if source_format == Format.JSON or source_format == "json":
130
+ path = Path(location)
131
+ with open(path, "r") as f:
132
+ import json
133
+
134
+ data = json.load(f)
135
+
136
+ with self.driver.session() as session:
137
+ for node in data["nodes"]:
138
+ labels = node.pop("labels", ["Node"])
139
+ props = ", ".join([f"{k}: ${k}" for k in node.keys()])
140
+ query = f"CREATE (n:{':'.join(labels)} {{{props}}})"
141
+ session.run(query, **node)
142
+
143
+ for rel in data["relationships"]:
144
+ # rel_type = rel.pop("type")
145
+ start = rel.pop("start")
146
+ end = rel.pop("end")
147
+ # props = ", ".join([f"{k}: ${k}" for k in rel.keys()])
148
+ query = (
149
+ f"MATCH (a), (b) WHERE id(a) = {start} AND id(b) = {end} "
150
+ "CREATE (a)-[r:{rel_type} {{{props}}}]->(b)"
151
+ )
152
+ session.run(query, **rel)
153
+ else:
154
+ super().import_database(location, source_format=source_format, **kwargs)
@@ -0,0 +1,3 @@
1
+ """
2
+ Wrapper for Solr endpoints.
3
+ """