linkml-store 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +32 -5
- linkml_store/api/collection.py +276 -27
- linkml_store/api/config.py +6 -2
- linkml_store/api/database.py +264 -21
- linkml_store/api/stores/chromadb/__init__.py +5 -1
- linkml_store/api/stores/duckdb/__init__.py +9 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +7 -4
- linkml_store/api/stores/duckdb/duckdb_database.py +19 -5
- linkml_store/api/stores/duckdb/mappings.py +1 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +177 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +72 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +31 -10
- linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +150 -15
- linkml_store/index/__init__.py +6 -2
- linkml_store/index/implementations/llm_indexer.py +83 -5
- linkml_store/index/implementations/simple_indexer.py +2 -2
- linkml_store/index/indexer.py +32 -8
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/format_utils.py +139 -8
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/METADATA +7 -1
- linkml_store-0.1.9.dist-info/RECORD +49 -0
- linkml_store-0.1.7.dist-info/RECORD +0 -42
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/entry_points.txt +0 -0
linkml_store/api/database.py
CHANGED
|
@@ -3,7 +3,24 @@ from abc import ABC
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from copy import copy
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import (
|
|
7
|
+
TYPE_CHECKING,
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
ClassVar,
|
|
11
|
+
Dict,
|
|
12
|
+
Generic,
|
|
13
|
+
Iterator,
|
|
14
|
+
List,
|
|
15
|
+
Optional,
|
|
16
|
+
Sequence,
|
|
17
|
+
Type,
|
|
18
|
+
Union,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from linkml_store.api.types import CollectionType
|
|
22
|
+
from linkml_store.utils.format_utils import load_objects, render_output
|
|
23
|
+
from linkml_store.utils.patch_utils import PatchDict
|
|
7
24
|
|
|
8
25
|
try:
|
|
9
26
|
from linkml.validator.report import Severity, ValidationResult
|
|
@@ -22,18 +39,40 @@ if TYPE_CHECKING:
|
|
|
22
39
|
|
|
23
40
|
logger = logging.getLogger(__name__)
|
|
24
41
|
|
|
42
|
+
LISTENER = Callable[[Collection, List[PatchDict]], None]
|
|
25
43
|
|
|
26
|
-
|
|
44
|
+
|
|
45
|
+
class Database(ABC, Generic[CollectionType]):
|
|
27
46
|
"""
|
|
28
47
|
A Database provides access to named collections of data.
|
|
29
48
|
|
|
30
|
-
|
|
31
|
-
|
|
49
|
+
A database object is owned by a :ref:`Client`. The database
|
|
50
|
+
object uses a :ref:`handle` to know what kind of external
|
|
51
|
+
dataase system to connect to (e.g. duckdb, mongodb). The handle
|
|
52
|
+
is a string ``<DatabaseType>:<LocalLocator>``
|
|
53
|
+
|
|
54
|
+
The
|
|
55
|
+
database object may also have an :ref:`alias` that is mapped
|
|
56
|
+
to the handle.
|
|
57
|
+
|
|
58
|
+
Attaching a database
|
|
59
|
+
--------------------
|
|
32
60
|
>>> from linkml_store.api.client import Client
|
|
33
61
|
>>> client = Client()
|
|
34
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
62
|
+
>>> db = client.attach_database("duckdb:///:memory:", alias="test")
|
|
63
|
+
|
|
64
|
+
We can check the value of the handle:
|
|
65
|
+
|
|
35
66
|
>>> db.handle
|
|
36
67
|
'duckdb:///:memory:'
|
|
68
|
+
|
|
69
|
+
The alias can be used to retrieve the database object from the client
|
|
70
|
+
|
|
71
|
+
>>> assert db == client.get_database("test")
|
|
72
|
+
|
|
73
|
+
Creating a collection
|
|
74
|
+
---------------------
|
|
75
|
+
|
|
37
76
|
>>> collection = db.create_collection("Person")
|
|
38
77
|
>>> len(db.list_collections())
|
|
39
78
|
1
|
|
@@ -57,11 +96,18 @@ class Database(ABC):
|
|
|
57
96
|
"""
|
|
58
97
|
|
|
59
98
|
_schema_view: Optional[SchemaView] = None
|
|
99
|
+
"""Schema for the database. May be transformed."""
|
|
100
|
+
|
|
101
|
+
_original_schema_view: Optional[SchemaView] = None
|
|
102
|
+
"""If a schema must be transformed, then the original is stored here."""
|
|
103
|
+
|
|
60
104
|
_collections: Optional[Dict[str, Collection]] = None
|
|
61
105
|
parent: Optional["Client"] = None
|
|
62
106
|
metadata: Optional[DatabaseConfig] = None
|
|
63
107
|
collection_class: ClassVar[Optional[Type[Collection]]] = None
|
|
64
108
|
|
|
109
|
+
listeners: Optional[List[LISTENER]] = None
|
|
110
|
+
|
|
65
111
|
def __init__(self, handle: Optional[str] = None, metadata: Optional[DatabaseConfig] = None, **kwargs):
|
|
66
112
|
if metadata:
|
|
67
113
|
self.metadata = metadata
|
|
@@ -101,6 +147,8 @@ class Database(ABC):
|
|
|
101
147
|
return self
|
|
102
148
|
|
|
103
149
|
def _initialize_collections(self):
|
|
150
|
+
if not self.metadata.collections:
|
|
151
|
+
return
|
|
104
152
|
for name, collection_config in self.metadata.collections.items():
|
|
105
153
|
alias = collection_config.alias
|
|
106
154
|
typ = collection_config.type
|
|
@@ -127,15 +175,46 @@ class Database(ABC):
|
|
|
127
175
|
|
|
128
176
|
@property
|
|
129
177
|
def recreate_if_exists(self) -> bool:
|
|
178
|
+
"""
|
|
179
|
+
Return whether to recreate the database if it already exists.
|
|
180
|
+
|
|
181
|
+
:return:
|
|
182
|
+
"""
|
|
130
183
|
return self.metadata.recreate_if_exists
|
|
131
184
|
|
|
132
185
|
@property
|
|
133
186
|
def handle(self) -> str:
|
|
187
|
+
"""
|
|
188
|
+
Return the database handle.
|
|
189
|
+
|
|
190
|
+
Examples:
|
|
191
|
+
|
|
192
|
+
- ``duckdb:///:memory:``
|
|
193
|
+
- ``duckdb:///tmp/test.db``
|
|
194
|
+
- ``mongodb://localhost:27017/``
|
|
195
|
+
|
|
196
|
+
:return:
|
|
197
|
+
"""
|
|
134
198
|
return self.metadata.handle
|
|
135
199
|
|
|
136
|
-
|
|
200
|
+
@property
|
|
201
|
+
def alias(self):
|
|
202
|
+
return self.metadata.alias
|
|
203
|
+
|
|
204
|
+
def store(self, obj: Dict[str, Any], **kwargs):
|
|
137
205
|
"""
|
|
138
|
-
Store an object in the database
|
|
206
|
+
Store an object in the database.
|
|
207
|
+
|
|
208
|
+
The object is assumed to be a Dictionary of Collections.
|
|
209
|
+
|
|
210
|
+
>>> from linkml_store.api.client import Client
|
|
211
|
+
>>> client = Client()
|
|
212
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
213
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
|
|
214
|
+
>>> collection = db.get_collection("persons")
|
|
215
|
+
>>> qr = collection.find()
|
|
216
|
+
>>> qr.num_rows
|
|
217
|
+
1
|
|
139
218
|
|
|
140
219
|
:param obj: object to store
|
|
141
220
|
:param kwargs: additional arguments
|
|
@@ -144,6 +223,7 @@ class Database(ABC):
|
|
|
144
223
|
roots = [c for c in sv.all_classes().values() if c.tree_root]
|
|
145
224
|
root = roots[0] if roots else None
|
|
146
225
|
for k, v in obj.items():
|
|
226
|
+
logger.info(f"Storing collection {k}")
|
|
147
227
|
if root:
|
|
148
228
|
slot = sv.induced_slot(k, root.name)
|
|
149
229
|
if not slot:
|
|
@@ -158,20 +238,29 @@ class Database(ABC):
|
|
|
158
238
|
if not v:
|
|
159
239
|
continue
|
|
160
240
|
if slot:
|
|
161
|
-
|
|
241
|
+
logger.debug(f"Aligning to existing slot: {slot.name} range={slot.range}")
|
|
242
|
+
collection = self.get_collection(slot.name, type=slot.range, create_if_not_exists=True)
|
|
162
243
|
else:
|
|
163
244
|
collection = self.get_collection(k, create_if_not_exists=True)
|
|
245
|
+
logger.debug(f"Replacing using {collection.alias} {collection.target_class_name}")
|
|
164
246
|
collection.replace(v)
|
|
165
247
|
|
|
166
248
|
def commit(self, **kwargs):
|
|
167
249
|
"""
|
|
168
|
-
Commit
|
|
250
|
+
Commit pending changes to the database.
|
|
251
|
+
|
|
252
|
+
:param kwargs:
|
|
253
|
+
:return:
|
|
169
254
|
"""
|
|
170
|
-
|
|
255
|
+
for coll in self.list_collections():
|
|
256
|
+
coll.commit()
|
|
171
257
|
|
|
172
258
|
def close(self, **kwargs):
|
|
173
259
|
"""
|
|
174
|
-
Close the database
|
|
260
|
+
Close the database.
|
|
261
|
+
|
|
262
|
+
:param kwargs:
|
|
263
|
+
:return:
|
|
175
264
|
"""
|
|
176
265
|
raise NotImplementedError()
|
|
177
266
|
|
|
@@ -188,15 +277,27 @@ class Database(ABC):
|
|
|
188
277
|
**kwargs,
|
|
189
278
|
) -> Collection:
|
|
190
279
|
"""
|
|
191
|
-
Create a new collection
|
|
280
|
+
Create a new collection in the current database.
|
|
281
|
+
|
|
282
|
+
The collection must have a *Type*, and may have an *Alias*.
|
|
283
|
+
|
|
284
|
+
Examples:
|
|
192
285
|
|
|
193
286
|
>>> from linkml_store.api.client import Client
|
|
194
287
|
>>> client = Client()
|
|
195
288
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
196
|
-
>>> collection = db.create_collection("Person")
|
|
197
|
-
>>> collection.
|
|
289
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
290
|
+
>>> collection.alias
|
|
291
|
+
'persons'
|
|
292
|
+
>>> collection.target_class_name
|
|
198
293
|
'Person'
|
|
199
294
|
|
|
295
|
+
If alias is not provided, it defaults to the name of the type.
|
|
296
|
+
|
|
297
|
+
>>> collection = db.create_collection("Organization")
|
|
298
|
+
>>> collection.alias
|
|
299
|
+
'Organization'
|
|
300
|
+
|
|
200
301
|
:param name: name of the collection
|
|
201
302
|
:param alias: alias for the collection
|
|
202
303
|
:param metadata: metadata for the collection
|
|
@@ -207,6 +308,8 @@ class Database(ABC):
|
|
|
207
308
|
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
208
309
|
collection_cls = self.collection_class
|
|
209
310
|
collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
|
|
311
|
+
if metadata and metadata.source_location:
|
|
312
|
+
collection.load_from_source()
|
|
210
313
|
if metadata and metadata.attributes:
|
|
211
314
|
sv = self.schema_view
|
|
212
315
|
schema = sv.schema
|
|
@@ -218,6 +321,7 @@ class Database(ABC):
|
|
|
218
321
|
alias = name
|
|
219
322
|
self._collections[alias] = collection
|
|
220
323
|
if recreate_if_exists:
|
|
324
|
+
logger.debug(f"Recreating collection {collection.name}")
|
|
221
325
|
collection.delete_where({}, missing_ok=True)
|
|
222
326
|
return collection
|
|
223
327
|
|
|
@@ -265,7 +369,9 @@ class Database(ABC):
|
|
|
265
369
|
"""
|
|
266
370
|
return [c.name for c in self.list_collections(**kwargs)]
|
|
267
371
|
|
|
268
|
-
def get_collection(
|
|
372
|
+
def get_collection(
|
|
373
|
+
self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
|
|
374
|
+
) -> "Collection":
|
|
269
375
|
"""
|
|
270
376
|
Get a named collection.
|
|
271
377
|
|
|
@@ -283,14 +389,19 @@ class Database(ABC):
|
|
|
283
389
|
KeyError: 'Collection NonExistent does not exist'
|
|
284
390
|
|
|
285
391
|
:param name: name of the collection
|
|
392
|
+
:param type: target class name
|
|
286
393
|
:param create_if_not_exists: create the collection if it does not exist
|
|
287
394
|
|
|
288
395
|
"""
|
|
289
396
|
if not self._collections:
|
|
397
|
+
logger.debug("Initializing collections")
|
|
290
398
|
self.init_collections()
|
|
291
399
|
if name not in self._collections.keys():
|
|
292
400
|
if create_if_not_exists:
|
|
293
|
-
|
|
401
|
+
if type is None:
|
|
402
|
+
type = name
|
|
403
|
+
logger.debug(f"Creating new collection: {name} kwargs: {kwargs}")
|
|
404
|
+
self._collections[name] = self.create_collection(type, alias=name, **kwargs)
|
|
294
405
|
else:
|
|
295
406
|
raise KeyError(f"Collection {name} does not exist")
|
|
296
407
|
return self._collections[name]
|
|
@@ -328,12 +439,38 @@ class Database(ABC):
|
|
|
328
439
|
:return:
|
|
329
440
|
|
|
330
441
|
"""
|
|
331
|
-
|
|
442
|
+
if query.from_table:
|
|
443
|
+
collection = self.get_collection(query.from_table)
|
|
444
|
+
return collection.query(query, **kwargs)
|
|
445
|
+
else:
|
|
446
|
+
raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
|
|
332
447
|
|
|
333
448
|
@property
|
|
334
449
|
def schema_view(self) -> SchemaView:
|
|
335
450
|
"""
|
|
336
|
-
Return a schema view for the named collection
|
|
451
|
+
Return a schema view for the named collection.
|
|
452
|
+
|
|
453
|
+
If no explicit schema is provided, this will generalize one
|
|
454
|
+
|
|
455
|
+
Induced schema example:
|
|
456
|
+
|
|
457
|
+
>>> from linkml_store.api.client import Client
|
|
458
|
+
>>> client = Client()
|
|
459
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
460
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
461
|
+
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
|
|
462
|
+
>>> schema_view = db.schema_view
|
|
463
|
+
>>> cd = schema_view.get_class("Person")
|
|
464
|
+
>>> cd.attributes["id"].range
|
|
465
|
+
'string'
|
|
466
|
+
>>> cd.attributes["age_in_years"].range
|
|
467
|
+
'integer'
|
|
468
|
+
|
|
469
|
+
We can reuse the same class:
|
|
470
|
+
|
|
471
|
+
>>> collection2 = db.create_collection("Person", alias="other_persons")
|
|
472
|
+
>>> collection2.class_definition().attributes["age_in_years"].range
|
|
473
|
+
'integer'
|
|
337
474
|
"""
|
|
338
475
|
if not self._schema_view:
|
|
339
476
|
self._initialize_schema()
|
|
@@ -345,6 +482,26 @@ class Database(ABC):
|
|
|
345
482
|
"""
|
|
346
483
|
Set the schema view for the database.
|
|
347
484
|
|
|
485
|
+
>>> from linkml_store.api.client import Client
|
|
486
|
+
>>> client = Client()
|
|
487
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
488
|
+
>>> sv = SchemaView("tests/input/countries/countries.linkml.yaml")
|
|
489
|
+
>>> db.set_schema_view(sv)
|
|
490
|
+
>>> cd = db.schema_view.schema.classes["Country"]
|
|
491
|
+
>>> sorted(cd.slots)
|
|
492
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
493
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
494
|
+
>>> sorted(induced_slots.keys())
|
|
495
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
496
|
+
>>> induced_slots["code"].identifier
|
|
497
|
+
True
|
|
498
|
+
|
|
499
|
+
Creating a new collection will align with the schema view:
|
|
500
|
+
|
|
501
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
502
|
+
>>> sorted(collection.class_definition().slots)
|
|
503
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
504
|
+
|
|
348
505
|
:param schema_view:
|
|
349
506
|
:return:
|
|
350
507
|
"""
|
|
@@ -375,8 +532,7 @@ class Database(ABC):
|
|
|
375
532
|
if inlined and slot.range:
|
|
376
533
|
if slot.name in self._collections:
|
|
377
534
|
coll = self._collections[slot.name]
|
|
378
|
-
|
|
379
|
-
coll.metadata.type = slot.range
|
|
535
|
+
coll.metadata.type = slot.range
|
|
380
536
|
|
|
381
537
|
def load_schema_view(self, path: Union[str, Path]):
|
|
382
538
|
"""
|
|
@@ -386,6 +542,21 @@ class Database(ABC):
|
|
|
386
542
|
>>> client = Client()
|
|
387
543
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
388
544
|
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
545
|
+
>>> sv = db.schema_view
|
|
546
|
+
>>> cd = sv.schema.classes["Country"]
|
|
547
|
+
>>> sorted(cd.slots)
|
|
548
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
549
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
550
|
+
>>> sorted(induced_slots.keys())
|
|
551
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
552
|
+
>>> induced_slots["code"].identifier
|
|
553
|
+
True
|
|
554
|
+
|
|
555
|
+
Creating a new collection will align with the schema view:
|
|
556
|
+
|
|
557
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
558
|
+
>>> sorted(collection.class_definition().slots)
|
|
559
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
389
560
|
|
|
390
561
|
:param path:
|
|
391
562
|
:return:
|
|
@@ -420,6 +591,42 @@ class Database(ABC):
|
|
|
420
591
|
"""
|
|
421
592
|
Validate the contents of the database.
|
|
422
593
|
|
|
594
|
+
An an example, let's create a database with a predefined schema
|
|
595
|
+
from the countries.linkml.yaml file:
|
|
596
|
+
|
|
597
|
+
>>> from linkml_store.api.client import Client
|
|
598
|
+
>>> client = Client()
|
|
599
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
600
|
+
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
601
|
+
|
|
602
|
+
Let's introspect the schema to see what slots are applicable for the class "Country":
|
|
603
|
+
|
|
604
|
+
>>> sv = db.schema_view
|
|
605
|
+
>>> for slot in sv.class_induced_slots("Country"):
|
|
606
|
+
... print(slot.name, slot.range, slot.required)
|
|
607
|
+
name string True
|
|
608
|
+
code string True
|
|
609
|
+
capital string True
|
|
610
|
+
continent string True
|
|
611
|
+
languages Language None
|
|
612
|
+
|
|
613
|
+
Next we'll create a collection, binding it to the target class "Country", and insert
|
|
614
|
+
valid data:
|
|
615
|
+
|
|
616
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
617
|
+
>>> obj = {"code": "US", "name": "United States", "continent": "North America", "capital": "Washington, D.C."}
|
|
618
|
+
>>> collection.insert([obj])
|
|
619
|
+
>>> list(db.iter_validate_database())
|
|
620
|
+
[]
|
|
621
|
+
|
|
622
|
+
Now let's insert some invalid data (missing required fields)
|
|
623
|
+
|
|
624
|
+
>>> collection.insert([{"code": "FR", "name": "France"}])
|
|
625
|
+
>>> for r in db.iter_validate_database():
|
|
626
|
+
... print(r.message[0:32])
|
|
627
|
+
'capital' is a required property
|
|
628
|
+
'continent' is a required proper
|
|
629
|
+
|
|
423
630
|
:param kwargs:
|
|
424
631
|
:return: iterator over validation results
|
|
425
632
|
"""
|
|
@@ -474,6 +681,42 @@ class Database(ABC):
|
|
|
474
681
|
|
|
475
682
|
def drop(self, **kwargs):
|
|
476
683
|
"""
|
|
477
|
-
Drop the database and all collections
|
|
684
|
+
Drop the database and all collections.
|
|
685
|
+
|
|
686
|
+
:param kwargs: additional arguments
|
|
478
687
|
"""
|
|
479
688
|
raise NotImplementedError()
|
|
689
|
+
|
|
690
|
+
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
|
|
691
|
+
"""
|
|
692
|
+
Import a database from a file or location.
|
|
693
|
+
|
|
694
|
+
:param location: location of the file
|
|
695
|
+
:param source_format: source format
|
|
696
|
+
:param kwargs: additional arguments
|
|
697
|
+
"""
|
|
698
|
+
objects = load_objects(location, format=source_format)
|
|
699
|
+
for obj in objects:
|
|
700
|
+
self.store(obj)
|
|
701
|
+
|
|
702
|
+
def export_database(self, location: str, target_format: Optional[str] = None, **kwargs):
|
|
703
|
+
"""
|
|
704
|
+
Export a database to a file or location.
|
|
705
|
+
|
|
706
|
+
:param location: location of the file
|
|
707
|
+
:param target_format: target format
|
|
708
|
+
:param kwargs: additional arguments
|
|
709
|
+
"""
|
|
710
|
+
obj = {}
|
|
711
|
+
for coll in self.list_collections():
|
|
712
|
+
qr = coll.find({}, limit=-1)
|
|
713
|
+
obj[coll.alias] = qr.rows
|
|
714
|
+
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
|
|
715
|
+
with open(location, "w", encoding="utf-8") as stream:
|
|
716
|
+
stream.write(render_output(obj, format=target_format))
|
|
717
|
+
|
|
718
|
+
def broadcast(self, source: Collection, patches: List[PatchDict]):
|
|
719
|
+
if not self.listeners:
|
|
720
|
+
return
|
|
721
|
+
for listener in self.listeners:
|
|
722
|
+
listener(source, patches)
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapter for DuckDB embedded database.
|
|
3
|
+
|
|
4
|
+
Handles have the form:
|
|
5
|
+
|
|
6
|
+
- ``duckdb:///<path>`` for a file-based database
|
|
7
|
+
- ``duckdb:///:memory:`` for an in-memory database
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
2
11
|
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
3
12
|
|
|
@@ -19,12 +19,14 @@ class DuckDBCollection(Collection):
|
|
|
19
19
|
_table_created: bool = None
|
|
20
20
|
|
|
21
21
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
22
|
+
logger.debug(f"Inserting {len(objs)}")
|
|
22
23
|
if not isinstance(objs, list):
|
|
23
24
|
objs = [objs]
|
|
24
25
|
if not objs:
|
|
25
26
|
return
|
|
26
27
|
cd = self.class_definition()
|
|
27
28
|
if not cd:
|
|
29
|
+
logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}; will induce")
|
|
28
30
|
cd = self.induce_class_definition_from_objects(objs)
|
|
29
31
|
self._create_table(cd)
|
|
30
32
|
table = self._sqla_table(cd)
|
|
@@ -36,8 +38,9 @@ class DuckDBCollection(Collection):
|
|
|
36
38
|
with conn.begin():
|
|
37
39
|
conn.execute(insert(table), objs)
|
|
38
40
|
conn.commit()
|
|
41
|
+
self._post_insert_hook(objs)
|
|
39
42
|
|
|
40
|
-
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
|
|
43
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
41
44
|
if not isinstance(objs, list):
|
|
42
45
|
objs = [objs]
|
|
43
46
|
cd = self.class_definition()
|
|
@@ -52,9 +55,9 @@ class DuckDBCollection(Collection):
|
|
|
52
55
|
stmt = stmt.compile(engine)
|
|
53
56
|
conn.execute(stmt)
|
|
54
57
|
conn.commit()
|
|
55
|
-
return
|
|
58
|
+
return
|
|
56
59
|
|
|
57
|
-
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
60
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
58
61
|
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
59
62
|
if where is None:
|
|
60
63
|
where = {}
|
|
@@ -78,7 +81,7 @@ class DuckDBCollection(Collection):
|
|
|
78
81
|
if deleted_rows_count == 0 and not missing_ok:
|
|
79
82
|
raise ValueError(f"No rows found for {where}")
|
|
80
83
|
conn.commit()
|
|
81
|
-
return deleted_rows_count
|
|
84
|
+
return deleted_rows_count if deleted_rows_count > -1 else None
|
|
82
85
|
|
|
83
86
|
def query_facets(
|
|
84
87
|
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
@@ -22,6 +23,7 @@ TYPE_MAP = {
|
|
|
22
23
|
"DATE": "date",
|
|
23
24
|
"DOUBLE": "float",
|
|
24
25
|
"INTEGER": "integer",
|
|
26
|
+
"JSON": "Any",
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
|
|
@@ -33,9 +35,13 @@ class DuckDBDatabase(Database):
|
|
|
33
35
|
_engine: sqlalchemy.Engine = None
|
|
34
36
|
collection_class = DuckDBCollection
|
|
35
37
|
|
|
36
|
-
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
38
|
+
def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
|
|
37
39
|
if handle is None:
|
|
38
40
|
handle = "duckdb:///:memory:"
|
|
41
|
+
if recreate_if_exists:
|
|
42
|
+
path = Path(handle.replace("duckdb:///", ""))
|
|
43
|
+
if path.exists():
|
|
44
|
+
path.unlink()
|
|
39
45
|
super().__init__(handle=handle, **kwargs)
|
|
40
46
|
|
|
41
47
|
@property
|
|
@@ -69,7 +75,10 @@ class DuckDBDatabase(Database):
|
|
|
69
75
|
if qr.num_rows == 0:
|
|
70
76
|
logger.debug(f"Table {query.from_table} not created yet")
|
|
71
77
|
return QueryResult(query=query, num_rows=0, rows=[])
|
|
72
|
-
|
|
78
|
+
if not query.from_table.startswith("information_schema"):
|
|
79
|
+
sv = self.schema_view
|
|
80
|
+
else:
|
|
81
|
+
sv = None
|
|
73
82
|
if sv:
|
|
74
83
|
cd = None
|
|
75
84
|
for c in self._collections.values():
|
|
@@ -107,7 +116,10 @@ class DuckDBDatabase(Database):
|
|
|
107
116
|
|
|
108
117
|
def init_collections(self):
|
|
109
118
|
# TODO: unify schema introspection
|
|
110
|
-
|
|
119
|
+
if not self.schema_view:
|
|
120
|
+
schema = introspect_schema(self.engine)
|
|
121
|
+
else:
|
|
122
|
+
schema = self.schema_view.schema
|
|
111
123
|
table_names = schema.classes.keys()
|
|
112
124
|
if self._collections is None:
|
|
113
125
|
self._collections = {}
|
|
@@ -119,7 +131,7 @@ class DuckDBDatabase(Database):
|
|
|
119
131
|
def induce_schema_view(self) -> SchemaView:
|
|
120
132
|
# TODO: unify schema introspection
|
|
121
133
|
# TODO: handle case where schema is provided in advance
|
|
122
|
-
logger.info(f"Inducing schema view for {self.metadata.handle}")
|
|
134
|
+
logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
|
|
123
135
|
sb = SchemaBuilder()
|
|
124
136
|
schema = sb.schema
|
|
125
137
|
query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
|
|
@@ -144,8 +156,10 @@ class DuckDBDatabase(Database):
|
|
|
144
156
|
sd = SlotDefinition(
|
|
145
157
|
row["column_name"], required=row["is_nullable"] == "NO", multivalued=multivalued, range=rng
|
|
146
158
|
)
|
|
159
|
+
if dt == "JSON":
|
|
160
|
+
sd.inlined_as_list = True
|
|
147
161
|
sb.schema.classes[tbl_name].attributes[sd.name] = sd
|
|
148
|
-
logger.info(f"Introspected slot: {tbl_name}.{sd.name}: {sd.range}")
|
|
162
|
+
logger.info(f"Introspected slot: {tbl_name}.{sd.name}: {sd.range} FROM {dt}")
|
|
149
163
|
sb.add_defaults()
|
|
150
164
|
for cls_name in schema.classes:
|
|
151
165
|
if cls_name in self.metadata.collections:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapter for FileSystem wrapper
|
|
3
|
+
|
|
4
|
+
Handles have the form:
|
|
5
|
+
|
|
6
|
+
- ``file:<path>`` for a local file
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
|
|
10
|
+
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"FileSystemCollection",
|
|
14
|
+
"FileSystemDatabase",
|
|
15
|
+
]
|