linkml-store 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml_store/__init__.py +7 -0
- linkml_store/api/__init__.py +8 -0
- linkml_store/api/client.py +414 -0
- linkml_store/api/collection.py +1280 -0
- linkml_store/api/config.py +187 -0
- linkml_store/api/database.py +862 -0
- linkml_store/api/queries.py +69 -0
- linkml_store/api/stores/__init__.py +0 -0
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
- linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
- linkml_store/api/stores/dremio/__init__.py +10 -0
- linkml_store/api/stores/dremio/dremio_collection.py +555 -0
- linkml_store/api/stores/dremio/dremio_database.py +1052 -0
- linkml_store/api/stores/dremio/mappings.py +105 -0
- linkml_store/api/stores/dremio_rest/__init__.py +11 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
- linkml_store/api/stores/duckdb/mappings.py +8 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
- linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
- linkml_store/api/stores/ibis/__init__.py +5 -0
- linkml_store/api/stores/ibis/ibis_collection.py +488 -0
- linkml_store/api/stores/ibis/ibis_database.py +328 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
- linkml_store/api/stores/neo4j/__init__.py +0 -0
- linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +224 -0
- linkml_store/api/stores/solr/solr_database.py +83 -0
- linkml_store/api/stores/solr/solr_utils.py +0 -0
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +1147 -0
- linkml_store/constants.py +7 -0
- linkml_store/graphs/__init__.py +0 -0
- linkml_store/graphs/graph_map.py +24 -0
- linkml_store/index/__init__.py +53 -0
- linkml_store/index/implementations/__init__.py +0 -0
- linkml_store/index/implementations/llm_indexer.py +174 -0
- linkml_store/index/implementations/simple_indexer.py +43 -0
- linkml_store/index/indexer.py +211 -0
- linkml_store/inference/__init__.py +13 -0
- linkml_store/inference/evaluation.py +195 -0
- linkml_store/inference/implementations/__init__.py +0 -0
- linkml_store/inference/implementations/llm_inference_engine.py +154 -0
- linkml_store/inference/implementations/rag_inference_engine.py +276 -0
- linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
- linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
- linkml_store/inference/inference_config.py +66 -0
- linkml_store/inference/inference_engine.py +209 -0
- linkml_store/inference/inference_engine_registry.py +74 -0
- linkml_store/plotting/__init__.py +5 -0
- linkml_store/plotting/cli.py +826 -0
- linkml_store/plotting/dimensionality_reduction.py +453 -0
- linkml_store/plotting/embedding_plot.py +489 -0
- linkml_store/plotting/facet_chart.py +73 -0
- linkml_store/plotting/heatmap.py +383 -0
- linkml_store/utils/__init__.py +0 -0
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/dat_parser.py +95 -0
- linkml_store/utils/embedding_matcher.py +424 -0
- linkml_store/utils/embedding_utils.py +299 -0
- linkml_store/utils/enrichment_analyzer.py +217 -0
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +550 -0
- linkml_store/utils/io.py +38 -0
- linkml_store/utils/llm_utils.py +122 -0
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/neo4j_utils.py +42 -0
- linkml_store/utils/object_utils.py +190 -0
- linkml_store/utils/pandas_utils.py +93 -0
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store/utils/sql_utils.py +177 -0
- linkml_store/utils/stats_utils.py +53 -0
- linkml_store/utils/vector_utils.py +158 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +43 -0
- linkml_store/webapi/main.py +855 -0
- linkml_store-0.3.0.dist-info/METADATA +226 -0
- linkml_store-0.3.0.dist-info/RECORD +101 -0
- linkml_store-0.3.0.dist-info/WHEEL +4 -0
- linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
- linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,862 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from copy import copy
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import (
|
|
7
|
+
TYPE_CHECKING,
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
ClassVar,
|
|
11
|
+
Dict,
|
|
12
|
+
Generic,
|
|
13
|
+
Iterator,
|
|
14
|
+
List,
|
|
15
|
+
Optional,
|
|
16
|
+
Sequence,
|
|
17
|
+
Type,
|
|
18
|
+
Union,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from linkml_store.api.types import CollectionType
|
|
22
|
+
from linkml_store.utils.format_utils import Format, load_objects, render_output
|
|
23
|
+
from linkml_store.utils.patch_utils import PatchDict
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from linkml.validator.report import Severity, ValidationResult
|
|
27
|
+
except ImportError:
|
|
28
|
+
ValidationResult = None
|
|
29
|
+
|
|
30
|
+
from linkml_runtime import SchemaView
|
|
31
|
+
from linkml_runtime.linkml_model import ClassDefinition, SchemaDefinition
|
|
32
|
+
|
|
33
|
+
from linkml_store.api.collection import Collection
|
|
34
|
+
from linkml_store.api.config import CollectionConfig, DatabaseConfig
|
|
35
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
from linkml_store.api.client import Client
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
LISTENER = Callable[[Collection, List[PatchDict]], None]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Database(ABC, Generic[CollectionType]):
|
|
46
|
+
"""
|
|
47
|
+
A Database provides access to named collections of data.
|
|
48
|
+
|
|
49
|
+
A database object is owned by a :ref:`Client`. The database
|
|
50
|
+
object uses a :ref:`handle` to know what kind of external
|
|
51
|
+
dataase system to connect to (e.g. duckdb, mongodb). The handle
|
|
52
|
+
is a string ``<DatabaseType>:<LocalLocator>``
|
|
53
|
+
|
|
54
|
+
The
|
|
55
|
+
database object may also have an :ref:`alias` that is mapped
|
|
56
|
+
to the handle.
|
|
57
|
+
|
|
58
|
+
Attaching a database
|
|
59
|
+
--------------------
|
|
60
|
+
>>> from linkml_store.api.client import Client
|
|
61
|
+
>>> client = Client()
|
|
62
|
+
>>> db = client.attach_database("duckdb:///:memory:", alias="test")
|
|
63
|
+
|
|
64
|
+
We can check the value of the handle:
|
|
65
|
+
|
|
66
|
+
>>> db.handle
|
|
67
|
+
'duckdb:///:memory:'
|
|
68
|
+
|
|
69
|
+
The alias can be used to retrieve the database object from the client
|
|
70
|
+
|
|
71
|
+
>>> assert db == client.get_database("test")
|
|
72
|
+
|
|
73
|
+
Creating a collection
|
|
74
|
+
---------------------
|
|
75
|
+
|
|
76
|
+
>>> collection = db.create_collection("Person")
|
|
77
|
+
>>> len(db.list_collections())
|
|
78
|
+
1
|
|
79
|
+
>>> db.get_collection("Person") == collection
|
|
80
|
+
True
|
|
81
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
82
|
+
>>> collection.insert(objs)
|
|
83
|
+
>>> qr = collection.find()
|
|
84
|
+
>>> len(qr.rows)
|
|
85
|
+
2
|
|
86
|
+
>>> qr.rows[0]["id"]
|
|
87
|
+
'P1'
|
|
88
|
+
>>> qr.rows[1]["name"]
|
|
89
|
+
'Alice'
|
|
90
|
+
>>> qr = collection.find({"name": "John"})
|
|
91
|
+
>>> len(qr.rows)
|
|
92
|
+
1
|
|
93
|
+
>>> qr.rows[0]["name"]
|
|
94
|
+
'John'
|
|
95
|
+
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
_schema_view: Optional[SchemaView] = None
|
|
99
|
+
"""Schema for the database. May be transformed."""
|
|
100
|
+
|
|
101
|
+
_original_schema_view: Optional[SchemaView] = None
|
|
102
|
+
"""If a schema must be transformed, then the original is stored here."""
|
|
103
|
+
|
|
104
|
+
_collections: Optional[Dict[str, Collection]] = None
|
|
105
|
+
parent: Optional["Client"] = None
|
|
106
|
+
metadata: Optional[DatabaseConfig] = None
|
|
107
|
+
collection_class: ClassVar[Optional[Type[Collection]]] = None
|
|
108
|
+
|
|
109
|
+
listeners: Optional[List[LISTENER]] = None
|
|
110
|
+
|
|
111
|
+
def __init__(self, handle: Optional[str] = None, metadata: Optional[DatabaseConfig] = None, **kwargs):
|
|
112
|
+
if metadata:
|
|
113
|
+
self.metadata = metadata
|
|
114
|
+
else:
|
|
115
|
+
self.metadata = DatabaseConfig(handle=handle, **kwargs)
|
|
116
|
+
if handle is not None and self.metadata.handle is not None and handle != self.metadata.handle:
|
|
117
|
+
raise ValueError(f"Handle mismatch: {handle} != {self.metadata.handle}")
|
|
118
|
+
self._initialize_schema()
|
|
119
|
+
self._initialize_collections()
|
|
120
|
+
|
|
121
|
+
def _initialize_schema(self, **kwargs):
|
|
122
|
+
db_config = self.metadata
|
|
123
|
+
if db_config.schema_location:
|
|
124
|
+
schema_location = db_config.schema_location.format(base_dir=self.parent.metadata.base_dir)
|
|
125
|
+
logger.info(f"Loading schema from: {schema_location}")
|
|
126
|
+
self.load_schema_view(schema_location)
|
|
127
|
+
if db_config.schema_dict:
|
|
128
|
+
schema_dict = copy(db_config.schema_dict)
|
|
129
|
+
if "id" not in schema_dict:
|
|
130
|
+
schema_dict["id"] = "tmp"
|
|
131
|
+
if "name" not in schema_dict:
|
|
132
|
+
schema_dict["name"] = "tmp"
|
|
133
|
+
self.set_schema_view(SchemaView(SchemaDefinition(**schema_dict)))
|
|
134
|
+
|
|
135
|
+
def from_config(self, db_config: DatabaseConfig, **kwargs):
|
|
136
|
+
"""
|
|
137
|
+
Initialize a database from a configuration.
|
|
138
|
+
|
|
139
|
+
TODO: DEPRECATE
|
|
140
|
+
|
|
141
|
+
:param db_config: database configuration
|
|
142
|
+
:param kwargs: additional arguments
|
|
143
|
+
"""
|
|
144
|
+
self.metadata = db_config
|
|
145
|
+
self._initialize_schema()
|
|
146
|
+
self._initialize_collections()
|
|
147
|
+
return self
|
|
148
|
+
|
|
149
|
+
def _initialize_collections(self):
|
|
150
|
+
if not self.metadata.collections:
|
|
151
|
+
return
|
|
152
|
+
for k, collection_config in self.metadata.collections.items():
|
|
153
|
+
if collection_config.alias:
|
|
154
|
+
if collection_config.alias != k:
|
|
155
|
+
raise ValueError(f"Alias mismatch: {collection_config.alias} != {k}")
|
|
156
|
+
alias = k
|
|
157
|
+
typ = collection_config.type or alias
|
|
158
|
+
_collection = self.create_collection(typ, alias=alias, metadata=collection_config)
|
|
159
|
+
assert _collection.alias == alias
|
|
160
|
+
assert _collection.target_class_name == typ
|
|
161
|
+
if collection_config.attributes:
|
|
162
|
+
# initialize schema
|
|
163
|
+
sv = self.schema_view
|
|
164
|
+
cd = ClassDefinition(typ, attributes=collection_config.attributes)
|
|
165
|
+
sv.schema.classes[cd.name] = cd
|
|
166
|
+
sv.set_modified()
|
|
167
|
+
# assert collection.class_definition() is not None
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def recreate_if_exists(self) -> bool:
|
|
171
|
+
"""
|
|
172
|
+
Return whether to recreate the database if it already exists.
|
|
173
|
+
|
|
174
|
+
:return:
|
|
175
|
+
"""
|
|
176
|
+
return self.metadata.recreate_if_exists
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def handle(self) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Return the database handle.
|
|
182
|
+
|
|
183
|
+
Examples:
|
|
184
|
+
|
|
185
|
+
- ``duckdb:///:memory:``
|
|
186
|
+
- ``duckdb:///tmp/test.db``
|
|
187
|
+
- ``mongodb://localhost:27017/``
|
|
188
|
+
|
|
189
|
+
:return:
|
|
190
|
+
"""
|
|
191
|
+
return self.metadata.handle
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def alias(self):
|
|
195
|
+
return self.metadata.alias
|
|
196
|
+
|
|
197
|
+
def store(self, obj: Dict[str, Any], **kwargs):
|
|
198
|
+
"""
|
|
199
|
+
Store an object in the database.
|
|
200
|
+
|
|
201
|
+
The object is assumed to be a Dictionary of Collections.
|
|
202
|
+
|
|
203
|
+
>>> from linkml_store.api.client import Client
|
|
204
|
+
>>> client = Client()
|
|
205
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
206
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
|
|
207
|
+
>>> collection = db.get_collection("persons")
|
|
208
|
+
>>> qr = collection.find()
|
|
209
|
+
>>> qr.num_rows
|
|
210
|
+
1
|
|
211
|
+
|
|
212
|
+
:param obj: object to store
|
|
213
|
+
:param kwargs: additional arguments
|
|
214
|
+
"""
|
|
215
|
+
sv = self.schema_view
|
|
216
|
+
roots = [c for c in sv.all_classes().values() if c.tree_root]
|
|
217
|
+
root = roots[0] if roots else None
|
|
218
|
+
for k, v in obj.items():
|
|
219
|
+
logger.info(f"Storing collection {k}")
|
|
220
|
+
if root:
|
|
221
|
+
slot = sv.induced_slot(k, root.name)
|
|
222
|
+
if not slot:
|
|
223
|
+
raise ValueError(f"Cannot determine type for {k}")
|
|
224
|
+
else:
|
|
225
|
+
slot = None
|
|
226
|
+
if isinstance(v, dict):
|
|
227
|
+
logger.debug(f"Coercing dict to list: {v}")
|
|
228
|
+
v = [v]
|
|
229
|
+
if not isinstance(v, list):
|
|
230
|
+
continue
|
|
231
|
+
if not v:
|
|
232
|
+
continue
|
|
233
|
+
if slot:
|
|
234
|
+
logger.debug(f"Aligning to existing slot: {slot.name} range={slot.range}")
|
|
235
|
+
collection = self.get_collection(slot.name, type=slot.range, create_if_not_exists=True)
|
|
236
|
+
else:
|
|
237
|
+
collection = self.get_collection(k, create_if_not_exists=True)
|
|
238
|
+
logger.debug(f"Replacing using {collection.alias} {collection.target_class_name}")
|
|
239
|
+
collection.replace(v)
|
|
240
|
+
|
|
241
|
+
def commit(self, **kwargs):
|
|
242
|
+
"""
|
|
243
|
+
Commit pending changes to the database.
|
|
244
|
+
|
|
245
|
+
:param kwargs:
|
|
246
|
+
:return:
|
|
247
|
+
"""
|
|
248
|
+
for coll in self.list_collections():
|
|
249
|
+
coll.commit()
|
|
250
|
+
|
|
251
|
+
def close(self, **kwargs):
|
|
252
|
+
"""
|
|
253
|
+
Close the database.
|
|
254
|
+
|
|
255
|
+
:param kwargs:
|
|
256
|
+
:return:
|
|
257
|
+
"""
|
|
258
|
+
raise NotImplementedError()
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def _collection_class(self) -> Type[Collection]:
|
|
262
|
+
raise NotImplementedError()
|
|
263
|
+
|
|
264
|
+
def create_collection(
|
|
265
|
+
self,
|
|
266
|
+
name: str,
|
|
267
|
+
alias: Optional[str] = None,
|
|
268
|
+
metadata: Optional[CollectionConfig] = None,
|
|
269
|
+
recreate_if_exists=False,
|
|
270
|
+
**kwargs,
|
|
271
|
+
) -> Collection:
|
|
272
|
+
"""
|
|
273
|
+
Create a new collection in the current database.
|
|
274
|
+
|
|
275
|
+
The collection must have a *Type*, and may have an *Alias*.
|
|
276
|
+
|
|
277
|
+
Examples:
|
|
278
|
+
|
|
279
|
+
>>> from linkml_store.api.client import Client
|
|
280
|
+
>>> client = Client()
|
|
281
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
282
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
283
|
+
>>> collection.alias
|
|
284
|
+
'persons'
|
|
285
|
+
|
|
286
|
+
>>> collection.target_class_name
|
|
287
|
+
'Person'
|
|
288
|
+
|
|
289
|
+
If alias is not provided, it defaults to the name of the type.
|
|
290
|
+
|
|
291
|
+
>>> collection = db.create_collection("Organization")
|
|
292
|
+
>>> collection.alias
|
|
293
|
+
'Organization'
|
|
294
|
+
|
|
295
|
+
:param name: name of the collection
|
|
296
|
+
:param alias: alias for the collection
|
|
297
|
+
:param metadata: metadata for the collection
|
|
298
|
+
:param recreate_if_exists: recreate the collection if it already exists
|
|
299
|
+
:param kwargs: additional arguments
|
|
300
|
+
"""
|
|
301
|
+
if not name:
|
|
302
|
+
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
303
|
+
collection_cls = self.collection_class
|
|
304
|
+
collection = collection_cls(name=name, parent=self, metadata=metadata)
|
|
305
|
+
if alias:
|
|
306
|
+
collection.metadata.alias = alias
|
|
307
|
+
if metadata and metadata.source:
|
|
308
|
+
collection.load_from_source()
|
|
309
|
+
if metadata and metadata.attributes:
|
|
310
|
+
sv = self.schema_view
|
|
311
|
+
schema = sv.schema
|
|
312
|
+
cd = ClassDefinition(name=metadata.type, attributes=metadata.attributes)
|
|
313
|
+
schema.classes[cd.name] = cd
|
|
314
|
+
if not self._collections:
|
|
315
|
+
self._collections = {}
|
|
316
|
+
if not alias:
|
|
317
|
+
alias = name
|
|
318
|
+
self._collections[alias] = collection
|
|
319
|
+
if recreate_if_exists:
|
|
320
|
+
logger.debug(f"Recreating collection {collection.alias}")
|
|
321
|
+
collection.delete_where({}, missing_ok=True)
|
|
322
|
+
return collection
|
|
323
|
+
|
|
324
|
+
def list_collections(self, include_internal=False) -> Sequence[Collection]:
|
|
325
|
+
"""
|
|
326
|
+
List all collections.
|
|
327
|
+
|
|
328
|
+
Examples
|
|
329
|
+
--------
|
|
330
|
+
>>> from linkml_store.api.client import Client
|
|
331
|
+
>>> client = Client()
|
|
332
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
333
|
+
>>> c1 = db.create_collection("Person")
|
|
334
|
+
>>> c2 = db.create_collection("Product")
|
|
335
|
+
>>> collections = db.list_collections()
|
|
336
|
+
>>> len(collections)
|
|
337
|
+
2
|
|
338
|
+
>>> [c.target_class_name for c in collections]
|
|
339
|
+
['Person', 'Product']
|
|
340
|
+
|
|
341
|
+
:param include_internal: include internal collections
|
|
342
|
+
:return: list of collections
|
|
343
|
+
"""
|
|
344
|
+
if not self._collections:
|
|
345
|
+
self.init_collections()
|
|
346
|
+
return [c for c in self._collections.values() if include_internal or not c.is_internal]
|
|
347
|
+
|
|
348
|
+
def list_collection_names(self, **kwargs) -> Sequence[str]:
|
|
349
|
+
"""
|
|
350
|
+
List all collection names.
|
|
351
|
+
|
|
352
|
+
Examples
|
|
353
|
+
--------
|
|
354
|
+
>>> from linkml_store.api.client import Client
|
|
355
|
+
>>> client = Client()
|
|
356
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
357
|
+
>>> c1 = db.create_collection("Person")
|
|
358
|
+
>>> c2 = db.create_collection("Product")
|
|
359
|
+
>>> collection_names = db.list_collection_names()
|
|
360
|
+
>>> len(collection_names)
|
|
361
|
+
2
|
|
362
|
+
>>> collection_names
|
|
363
|
+
['Person', 'Product']
|
|
364
|
+
|
|
365
|
+
"""
|
|
366
|
+
return [c.alias for c in self.list_collections(**kwargs)]
|
|
367
|
+
|
|
368
|
+
def get_collection(
|
|
369
|
+
self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
|
|
370
|
+
) -> "Collection":
|
|
371
|
+
"""
|
|
372
|
+
Get a named collection.
|
|
373
|
+
|
|
374
|
+
Examples
|
|
375
|
+
--------
|
|
376
|
+
>>> from linkml_store.api.client import Client
|
|
377
|
+
>>> client = Client()
|
|
378
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
379
|
+
>>> collection = db.create_collection("Person")
|
|
380
|
+
>>> db.get_collection("Person") == collection
|
|
381
|
+
True
|
|
382
|
+
>>> db.get_collection("NonExistent", create_if_not_exists=False)
|
|
383
|
+
Traceback (most recent call last):
|
|
384
|
+
...
|
|
385
|
+
KeyError: 'Collection NonExistent does not exist'
|
|
386
|
+
|
|
387
|
+
:param name: name of the collection
|
|
388
|
+
:param type: target class name
|
|
389
|
+
:param create_if_not_exists: create the collection if it does not exist
|
|
390
|
+
|
|
391
|
+
"""
|
|
392
|
+
if not self._collections:
|
|
393
|
+
logger.debug("Initializing collections")
|
|
394
|
+
self.init_collections()
|
|
395
|
+
if name not in self._collections.keys():
|
|
396
|
+
if create_if_not_exists:
|
|
397
|
+
if type is None:
|
|
398
|
+
type = name
|
|
399
|
+
logger.debug(f"Creating new collection: {name} kwargs: {kwargs}")
|
|
400
|
+
self._collections[name] = self.create_collection(type, alias=name, **kwargs)
|
|
401
|
+
else:
|
|
402
|
+
raise KeyError(f"Collection {name} does not exist")
|
|
403
|
+
return self._collections[name]
|
|
404
|
+
|
|
405
|
+
def init_collections(self):
|
|
406
|
+
"""
|
|
407
|
+
Initialize collections.
|
|
408
|
+
|
|
409
|
+
TODO: Not typically called directly: consider making this private
|
|
410
|
+
:return:
|
|
411
|
+
"""
|
|
412
|
+
raise NotImplementedError
|
|
413
|
+
|
|
414
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
415
|
+
"""
|
|
416
|
+
Run a query against the database.
|
|
417
|
+
|
|
418
|
+
Examples
|
|
419
|
+
--------
|
|
420
|
+
>>> from linkml_store.api.client import Client
|
|
421
|
+
>>> from linkml_store.api.queries import Query
|
|
422
|
+
>>> client = Client()
|
|
423
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
424
|
+
>>> collection = db.create_collection("Person")
|
|
425
|
+
>>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
|
|
426
|
+
>>> query = Query(from_table="Person", where_clause={"name": "John"})
|
|
427
|
+
>>> result = db.query(query)
|
|
428
|
+
>>> len(result.rows)
|
|
429
|
+
1
|
|
430
|
+
>>> result.rows[0]["id"]
|
|
431
|
+
'P1'
|
|
432
|
+
|
|
433
|
+
:param query:
|
|
434
|
+
:param kwargs:
|
|
435
|
+
:return:
|
|
436
|
+
|
|
437
|
+
"""
|
|
438
|
+
if query.from_table:
|
|
439
|
+
collection = self.get_collection(query.from_table)
|
|
440
|
+
return collection.query(query, **kwargs)
|
|
441
|
+
else:
|
|
442
|
+
raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
|
|
443
|
+
|
|
444
|
+
@property
|
|
445
|
+
def supports_sql(self) -> bool:
|
|
446
|
+
"""
|
|
447
|
+
Return whether this database supports raw SQL queries.
|
|
448
|
+
|
|
449
|
+
Backends like DuckDB, PostgreSQL, Dremio support SQL.
|
|
450
|
+
Backends like MongoDB, filesystem do not.
|
|
451
|
+
|
|
452
|
+
:return: True if raw SQL is supported
|
|
453
|
+
"""
|
|
454
|
+
return False
|
|
455
|
+
|
|
456
|
+
def execute_sql(self, sql: str, **kwargs) -> QueryResult:
|
|
457
|
+
"""
|
|
458
|
+
Execute a raw SQL query against the database.
|
|
459
|
+
|
|
460
|
+
This method allows direct SQL execution on SQL-capable backends,
|
|
461
|
+
bypassing the linkml-store query abstraction layer.
|
|
462
|
+
|
|
463
|
+
:param sql: SQL query string
|
|
464
|
+
:param kwargs: Additional arguments
|
|
465
|
+
:return: QueryResult containing the results
|
|
466
|
+
:raises NotImplementedError: If this backend does not support SQL
|
|
467
|
+
|
|
468
|
+
Examples
|
|
469
|
+
--------
|
|
470
|
+
>>> from linkml_store.api.client import Client
|
|
471
|
+
>>> client = Client()
|
|
472
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
473
|
+
>>> collection = db.create_collection("Person")
|
|
474
|
+
>>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
|
|
475
|
+
>>> result = db.execute_sql("SELECT * FROM Person WHERE name = 'John'")
|
|
476
|
+
>>> len(result.rows)
|
|
477
|
+
1
|
|
478
|
+
>>> result.rows[0]["name"]
|
|
479
|
+
'John'
|
|
480
|
+
"""
|
|
481
|
+
raise NotImplementedError(
|
|
482
|
+
f"Raw SQL queries are not supported by {self.__class__.__name__}. "
|
|
483
|
+
f"Use collection.find() or collection.query() instead."
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
@property
|
|
487
|
+
def schema_view(self) -> SchemaView:
|
|
488
|
+
"""
|
|
489
|
+
Return a schema view for the named collection.
|
|
490
|
+
|
|
491
|
+
If no explicit schema is provided, this will generalize one
|
|
492
|
+
|
|
493
|
+
Induced schema example:
|
|
494
|
+
|
|
495
|
+
>>> from linkml_store.api.client import Client
|
|
496
|
+
>>> client = Client()
|
|
497
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
498
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
499
|
+
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
|
|
500
|
+
>>> schema_view = db.schema_view
|
|
501
|
+
>>> cd = schema_view.get_class("Person")
|
|
502
|
+
>>> cd.attributes["id"].range
|
|
503
|
+
'string'
|
|
504
|
+
>>> cd.attributes["age_in_years"].range
|
|
505
|
+
'integer'
|
|
506
|
+
|
|
507
|
+
We can reuse the same class:
|
|
508
|
+
|
|
509
|
+
>>> collection2 = db.create_collection("Person", alias="other_persons")
|
|
510
|
+
>>> collection2.class_definition().attributes["age_in_years"].range
|
|
511
|
+
'integer'
|
|
512
|
+
"""
|
|
513
|
+
if not self._schema_view:
|
|
514
|
+
self._initialize_schema()
|
|
515
|
+
if not self._schema_view:
|
|
516
|
+
logger.info("Inducing schema view")
|
|
517
|
+
self._schema_view = self.induce_schema_view()
|
|
518
|
+
return self._schema_view
|
|
519
|
+
|
|
520
|
+
def set_schema_view(self, schema_view: Union[str, Path, SchemaView]):
|
|
521
|
+
"""
|
|
522
|
+
Set the schema view for the database.
|
|
523
|
+
|
|
524
|
+
>>> from linkml_store.api.client import Client
|
|
525
|
+
>>> client = Client()
|
|
526
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
527
|
+
>>> sv = SchemaView("tests/input/countries/countries.linkml.yaml")
|
|
528
|
+
>>> db.set_schema_view(sv)
|
|
529
|
+
>>> cd = db.schema_view.schema.classes["Country"]
|
|
530
|
+
>>> sorted(cd.slots)
|
|
531
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
532
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
533
|
+
>>> sorted(induced_slots.keys())
|
|
534
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
535
|
+
>>> induced_slots["code"].identifier
|
|
536
|
+
True
|
|
537
|
+
|
|
538
|
+
Creating a new collection will align with the schema view:
|
|
539
|
+
|
|
540
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
541
|
+
>>> sorted(collection.class_definition().slots)
|
|
542
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
543
|
+
|
|
544
|
+
:param schema_view: can be either a path to the schema, or a SchemaView object
|
|
545
|
+
:return:
|
|
546
|
+
"""
|
|
547
|
+
if isinstance(schema_view, Path):
|
|
548
|
+
schema_view = str(schema_view)
|
|
549
|
+
if isinstance(schema_view, str):
|
|
550
|
+
schema_view = SchemaView(schema_view)
|
|
551
|
+
self._schema_view = schema_view
|
|
552
|
+
logger.info(f"Setting schema view for {self.handle}")
|
|
553
|
+
# self._schema_view = SchemaView(schema_view.materialize_derived_schema())
|
|
554
|
+
if not self._collections:
|
|
555
|
+
return
|
|
556
|
+
|
|
557
|
+
# align with induced schema
|
|
558
|
+
roots = [c for c in schema_view.all_classes().values() if c.tree_root]
|
|
559
|
+
if len(roots) == 0:
|
|
560
|
+
all_ranges = set()
|
|
561
|
+
for cn in schema_view.all_classes():
|
|
562
|
+
for slot in schema_view.class_induced_slots(cn):
|
|
563
|
+
if slot.range:
|
|
564
|
+
all_ranges.add(slot.range)
|
|
565
|
+
roots = [
|
|
566
|
+
c
|
|
567
|
+
for c in schema_view.all_classes().values()
|
|
568
|
+
if not all_ranges.intersection(schema_view.class_ancestors(c.name, reflexive=True))
|
|
569
|
+
]
|
|
570
|
+
if len(roots) == 1:
|
|
571
|
+
root = roots[0]
|
|
572
|
+
for slot in schema_view.class_induced_slots(root.name):
|
|
573
|
+
inlined = slot.inlined or slot.inlined_as_list
|
|
574
|
+
if inlined and slot.range:
|
|
575
|
+
if slot.name in self._collections:
|
|
576
|
+
coll = self._collections[slot.name]
|
|
577
|
+
coll.metadata.type = slot.range
|
|
578
|
+
|
|
579
|
+
def load_schema_view(self, path: Union[str, Path]):
|
|
580
|
+
"""
|
|
581
|
+
Load a schema view from a file.
|
|
582
|
+
|
|
583
|
+
>>> from linkml_store.api.client import Client
|
|
584
|
+
>>> client = Client()
|
|
585
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
586
|
+
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
587
|
+
>>> sv = db.schema_view
|
|
588
|
+
>>> cd = sv.schema.classes["Country"]
|
|
589
|
+
>>> sorted(cd.slots)
|
|
590
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
591
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
592
|
+
>>> sorted(induced_slots.keys())
|
|
593
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
594
|
+
>>> induced_slots["code"].identifier
|
|
595
|
+
True
|
|
596
|
+
|
|
597
|
+
Creating a new collection will align with the schema view:
|
|
598
|
+
|
|
599
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
600
|
+
>>> sorted(collection.class_definition().slots)
|
|
601
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
602
|
+
|
|
603
|
+
:param path:
|
|
604
|
+
:return:
|
|
605
|
+
"""
|
|
606
|
+
if isinstance(path, Path):
|
|
607
|
+
path = str(path)
|
|
608
|
+
self.set_schema_view(SchemaView(path))
|
|
609
|
+
|
|
610
|
+
def induce_schema_view(self) -> SchemaView:
|
|
611
|
+
"""
|
|
612
|
+
Induce a schema view from a schema definition.
|
|
613
|
+
|
|
614
|
+
>>> from linkml_store.api.client import Client
|
|
615
|
+
>>> from linkml_store.api.queries import Query
|
|
616
|
+
>>> client = Client()
|
|
617
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
618
|
+
>>> collection = db.create_collection("Person")
|
|
619
|
+
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25},
|
|
620
|
+
... {"id": "P2", "name": "Alice", "age_in_years": 25}])
|
|
621
|
+
>>> schema_view = db.induce_schema_view()
|
|
622
|
+
>>> cd = schema_view.get_class("Person")
|
|
623
|
+
>>> cd.attributes["id"].range
|
|
624
|
+
'string'
|
|
625
|
+
>>> cd.attributes["age_in_years"].range
|
|
626
|
+
'integer'
|
|
627
|
+
|
|
628
|
+
:return: A schema view
|
|
629
|
+
"""
|
|
630
|
+
logger.info(f"Inducing schema view for {self.handle}")
|
|
631
|
+
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
632
|
+
|
|
633
|
+
sb = SchemaBuilder()
|
|
634
|
+
|
|
635
|
+
for collection_name in self.list_collection_names():
|
|
636
|
+
coll = self.get_collection(collection_name)
|
|
637
|
+
sb.add_class(coll.target_class_name)
|
|
638
|
+
return SchemaView(sb.schema)
|
|
639
|
+
|
|
640
|
+
def validate_database(self, **kwargs) -> List["ValidationResult"]:
|
|
641
|
+
"""
|
|
642
|
+
Validate the contents of the database.
|
|
643
|
+
|
|
644
|
+
As `iter_validate_database`, but returns a list of validation results.
|
|
645
|
+
|
|
646
|
+
:param kwargs:
|
|
647
|
+
:return:
|
|
648
|
+
"""
|
|
649
|
+
return list(self.iter_validate_database(**kwargs))
|
|
650
|
+
|
|
651
|
+
def iter_validate_database(
|
|
652
|
+
self, ensure_referential_integrity: bool = None, **kwargs
|
|
653
|
+
) -> Iterator["ValidationResult"]:
|
|
654
|
+
"""
|
|
655
|
+
Validate the contents of the database.
|
|
656
|
+
|
|
657
|
+
An example, let's create a database with a predefined schema
|
|
658
|
+
from the countries.linkml.yaml file:
|
|
659
|
+
|
|
660
|
+
>>> from linkml_store.api.client import Client
|
|
661
|
+
>>> client = Client()
|
|
662
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
663
|
+
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
664
|
+
|
|
665
|
+
Let's introspect the schema to see what slots are applicable for the class "Country":
|
|
666
|
+
|
|
667
|
+
>>> sv = db.schema_view
|
|
668
|
+
>>> for slot in sv.class_induced_slots("Country"):
|
|
669
|
+
... print(slot.name, slot.range, slot.required)
|
|
670
|
+
name string True
|
|
671
|
+
code string True
|
|
672
|
+
capital string True
|
|
673
|
+
continent string True
|
|
674
|
+
languages Language None
|
|
675
|
+
|
|
676
|
+
Next we'll create a collection, binding it to the target class "Country", and insert
|
|
677
|
+
valid data:
|
|
678
|
+
|
|
679
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
680
|
+
>>> obj = {"code": "US", "name": "United States", "continent": "North America", "capital": "Washington, D.C."}
|
|
681
|
+
>>> collection.insert([obj])
|
|
682
|
+
>>> list(db.iter_validate_database())
|
|
683
|
+
[]
|
|
684
|
+
|
|
685
|
+
Now let's insert some invalid data (missing required fields)
|
|
686
|
+
|
|
687
|
+
>>> collection.insert([{"code": "FR", "name": "France"}])
|
|
688
|
+
>>> for r in db.iter_validate_database():
|
|
689
|
+
... print(r.message[0:32])
|
|
690
|
+
'capital' is a required property
|
|
691
|
+
'continent' is a required proper
|
|
692
|
+
|
|
693
|
+
:param ensure_referential_integrity: ensure referential integrity
|
|
694
|
+
:param kwargs:
|
|
695
|
+
:return: iterator over validation results
|
|
696
|
+
"""
|
|
697
|
+
for collection in self.list_collections():
|
|
698
|
+
yield from collection.iter_validate_collection(**kwargs)
|
|
699
|
+
if self.metadata.ensure_referential_integrity or ensure_referential_integrity:
|
|
700
|
+
logger.info(f"Validating referential integrity on {self.alias}")
|
|
701
|
+
yield from self._validate_referential_integrity(**kwargs)
|
|
702
|
+
|
|
703
|
+
def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
704
|
+
"""
|
|
705
|
+
Validate referential integrity of the database.
|
|
706
|
+
|
|
707
|
+
:param kwargs:
|
|
708
|
+
:return: iterator over validation results
|
|
709
|
+
"""
|
|
710
|
+
sv = self.schema_view
|
|
711
|
+
cmap = defaultdict(list)
|
|
712
|
+
for collection in self.list_collections():
|
|
713
|
+
if not collection.target_class_name:
|
|
714
|
+
raise ValueError(f"Collection {collection.name} has no target class")
|
|
715
|
+
cmap[collection.target_class_name].append(collection)
|
|
716
|
+
for collection in self.list_collections():
|
|
717
|
+
cd = collection.class_definition()
|
|
718
|
+
induced_slots = sv.class_induced_slots(cd.name)
|
|
719
|
+
slot_map = {s.name: s for s in induced_slots}
|
|
720
|
+
# rmap = {s.name: s.range for s in induced_slots}
|
|
721
|
+
# map slot ranges to a collection where that range is stored
|
|
722
|
+
sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
|
|
723
|
+
logger.debug(f"Validating referential integrity for {collection.target_class_name} // {sr_to_coll}")
|
|
724
|
+
for obj in collection.find_iter():
|
|
725
|
+
for k, v in obj.items():
|
|
726
|
+
if k not in sr_to_coll:
|
|
727
|
+
continue
|
|
728
|
+
ref_colls = sr_to_coll[k]
|
|
729
|
+
if not ref_colls:
|
|
730
|
+
continue
|
|
731
|
+
if not isinstance(v, (str, int)):
|
|
732
|
+
continue
|
|
733
|
+
slot = slot_map[k]
|
|
734
|
+
found = False
|
|
735
|
+
for ref_coll in ref_colls:
|
|
736
|
+
ref_obj = ref_coll.get_one(v)
|
|
737
|
+
if ref_obj:
|
|
738
|
+
found = True
|
|
739
|
+
break
|
|
740
|
+
if not found:
|
|
741
|
+
yield ValidationResult(
|
|
742
|
+
type="ReferentialIntegrity",
|
|
743
|
+
severity=Severity.ERROR,
|
|
744
|
+
message=f"Referential integrity error: {slot.range} not found",
|
|
745
|
+
instantiates=slot.range,
|
|
746
|
+
instance=v,
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
def drop(self, **kwargs):
|
|
750
|
+
"""
|
|
751
|
+
Drop the database and all collections.
|
|
752
|
+
|
|
753
|
+
>>> from linkml_store.api.client import Client
|
|
754
|
+
>>> client = Client()
|
|
755
|
+
>>> path = Path("/tmp/test.db")
|
|
756
|
+
>>> path.parent.mkdir(exist_ok=True, parents=True)
|
|
757
|
+
>>> db = client.attach_database(f"duckdb:///{path}")
|
|
758
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
|
|
759
|
+
>>> coll = db.get_collection("persons")
|
|
760
|
+
>>> coll.find({}).num_rows
|
|
761
|
+
1
|
|
762
|
+
>>> db.drop()
|
|
763
|
+
>>> db = client.attach_database("duckdb:///tmp/test.db", alias="test")
|
|
764
|
+
>>> coll = db.get_collection("persons")
|
|
765
|
+
>>> coll.find({}).num_rows
|
|
766
|
+
0
|
|
767
|
+
|
|
768
|
+
:param kwargs: additional arguments
|
|
769
|
+
"""
|
|
770
|
+
raise NotImplementedError()
|
|
771
|
+
|
|
772
|
+
def import_database(
|
|
773
|
+
self,
|
|
774
|
+
location: str,
|
|
775
|
+
source_format: Optional[Union[str, Format]] = None,
|
|
776
|
+
collection_name: Optional[str] = None,
|
|
777
|
+
**kwargs,
|
|
778
|
+
):
|
|
779
|
+
"""
|
|
780
|
+
Import a database from a file or location.
|
|
781
|
+
|
|
782
|
+
>>> from linkml_store.api.client import Client
|
|
783
|
+
>>> client = Client()
|
|
784
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
785
|
+
>>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
|
|
786
|
+
>>> db.list_collection_names()
|
|
787
|
+
['iris']
|
|
788
|
+
>>> collection = db.get_collection("iris")
|
|
789
|
+
>>> collection.find({}).num_rows
|
|
790
|
+
150
|
|
791
|
+
|
|
792
|
+
:param location: location of the file
|
|
793
|
+
:param source_format: source format
|
|
794
|
+
:param collection_name: (Optional) name of the collection, for data that is flat
|
|
795
|
+
:param kwargs: additional arguments
|
|
796
|
+
"""
|
|
797
|
+
if isinstance(source_format, str):
|
|
798
|
+
source_format = Format(source_format)
|
|
799
|
+
if isinstance(source_format, Format):
|
|
800
|
+
if source_format.is_dump_format() and source_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
|
|
801
|
+
# import into a test instance
|
|
802
|
+
tmp_handle = source_format.value
|
|
803
|
+
client = self.parent
|
|
804
|
+
tmp_alias = "tmp"
|
|
805
|
+
client.drop_database(tmp_alias, missing_ok=True)
|
|
806
|
+
tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
|
|
807
|
+
# TODO: check for infinite recursion
|
|
808
|
+
tmp_db.import_database(location, source_format=source_format)
|
|
809
|
+
obj = {}
|
|
810
|
+
for coll in tmp_db.list_collections():
|
|
811
|
+
qr = coll.find({}, limit=-1)
|
|
812
|
+
obj[coll.alias] = qr.rows
|
|
813
|
+
self.store(obj)
|
|
814
|
+
return
|
|
815
|
+
objects = load_objects(location, format=source_format)
|
|
816
|
+
if collection_name:
|
|
817
|
+
collection = self.get_collection(collection_name, create_if_not_exists=True)
|
|
818
|
+
collection.insert(objects)
|
|
819
|
+
else:
|
|
820
|
+
for obj in objects:
|
|
821
|
+
self.store(obj)
|
|
822
|
+
|
|
823
|
+
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
824
|
+
"""
|
|
825
|
+
Export a database to a file or location.
|
|
826
|
+
|
|
827
|
+
>>> from linkml_store.api.client import Client
|
|
828
|
+
>>> client = Client()
|
|
829
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
830
|
+
>>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
|
|
831
|
+
>>> db.export_database("/tmp/iris.yaml", Format.YAML)
|
|
832
|
+
|
|
833
|
+
:param location: location of the file
|
|
834
|
+
:param target_format: target format
|
|
835
|
+
:param kwargs: additional arguments
|
|
836
|
+
"""
|
|
837
|
+
obj = {}
|
|
838
|
+
if isinstance(target_format, str):
|
|
839
|
+
target_format = Format(target_format)
|
|
840
|
+
for coll in self.list_collections():
|
|
841
|
+
qr = coll.find({}, limit=-1)
|
|
842
|
+
obj[coll.alias] = qr.rows
|
|
843
|
+
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
|
|
844
|
+
if isinstance(target_format, Format):
|
|
845
|
+
if target_format.is_dump_format() and target_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
|
|
846
|
+
tmp_handle = target_format.value
|
|
847
|
+
client = self.parent
|
|
848
|
+
tmp_db = client.attach_database(tmp_handle, alias="tmp")
|
|
849
|
+
tmp_db.store(obj)
|
|
850
|
+
# TODO: check for infinite recursion
|
|
851
|
+
tmp_db.export_database(location, target_format=target_format)
|
|
852
|
+
return
|
|
853
|
+
if Path(location).is_dir():
|
|
854
|
+
raise ValueError(f"{location} is a directory; cannot write {target_format} to a dir")
|
|
855
|
+
with open(location, "w", encoding="utf-8") as stream:
|
|
856
|
+
stream.write(render_output(obj, format=target_format))
|
|
857
|
+
|
|
858
|
+
def broadcast(self, source: Collection, patches: List[PatchDict]):
|
|
859
|
+
if not self.listeners:
|
|
860
|
+
return
|
|
861
|
+
for listener in self.listeners:
|
|
862
|
+
listener(source, patches)
|