linkml-store 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +30 -5
- linkml_store/api/collection.py +175 -21
- linkml_store/api/config.py +6 -2
- linkml_store/api/database.py +230 -18
- linkml_store/api/stores/chromadb/__init__.py +5 -1
- linkml_store/api/stores/duckdb/__init__.py +9 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +6 -4
- linkml_store/api/stores/duckdb/duckdb_database.py +19 -5
- linkml_store/api/stores/duckdb/mappings.py +1 -0
- linkml_store/api/stores/filesystem/__init__.py +16 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +142 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +36 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +21 -6
- linkml_store/cli.py +64 -10
- linkml_store/index/__init__.py +6 -2
- linkml_store/index/implementations/llm_indexer.py +83 -5
- linkml_store/index/implementations/simple_indexer.py +2 -2
- linkml_store/index/indexer.py +32 -8
- linkml_store/utils/format_utils.py +52 -2
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/METADATA +4 -1
- linkml_store-0.1.8.dist-info/RECORD +45 -0
- linkml_store-0.1.7.dist-info/RECORD +0 -42
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/entry_points.txt +0 -0
linkml_store/api/database.py
CHANGED
|
@@ -3,7 +3,9 @@ from abc import ABC
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from copy import copy
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, ClassVar, Dict, Iterator, Optional, Sequence, Type, Union
|
|
6
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterator, Optional, Sequence, Type, Union
|
|
7
|
+
|
|
8
|
+
from linkml_store.utils.format_utils import load_objects, render_output
|
|
7
9
|
|
|
8
10
|
try:
|
|
9
11
|
from linkml.validator.report import Severity, ValidationResult
|
|
@@ -27,13 +29,33 @@ class Database(ABC):
|
|
|
27
29
|
"""
|
|
28
30
|
A Database provides access to named collections of data.
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
A database object is owned by a :ref:`Client`. The database
|
|
33
|
+
object uses a :ref:`handle` to know what kind of external
|
|
34
|
+
dataase system to connect to (e.g. duckdb, mongodb). The handle
|
|
35
|
+
is a string ``<DatabaseType>:<LocalLocator>``
|
|
36
|
+
|
|
37
|
+
The
|
|
38
|
+
database object may also have an :ref:`alias` that is mapped
|
|
39
|
+
to the handle.
|
|
40
|
+
|
|
41
|
+
Attaching a database
|
|
42
|
+
--------------------
|
|
32
43
|
>>> from linkml_store.api.client import Client
|
|
33
44
|
>>> client = Client()
|
|
34
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
45
|
+
>>> db = client.attach_database("duckdb:///:memory:", alias="test")
|
|
46
|
+
|
|
47
|
+
We can check the value of the handle:
|
|
48
|
+
|
|
35
49
|
>>> db.handle
|
|
36
50
|
'duckdb:///:memory:'
|
|
51
|
+
|
|
52
|
+
The alias can be used to retrieve the database object from the client
|
|
53
|
+
|
|
54
|
+
>>> assert db == client.get_database("test")
|
|
55
|
+
|
|
56
|
+
Creating a collection
|
|
57
|
+
---------------------
|
|
58
|
+
|
|
37
59
|
>>> collection = db.create_collection("Person")
|
|
38
60
|
>>> len(db.list_collections())
|
|
39
61
|
1
|
|
@@ -57,6 +79,11 @@ class Database(ABC):
|
|
|
57
79
|
"""
|
|
58
80
|
|
|
59
81
|
_schema_view: Optional[SchemaView] = None
|
|
82
|
+
"""Schema for the database. May be transformed."""
|
|
83
|
+
|
|
84
|
+
_original_schema_view: Optional[SchemaView] = None
|
|
85
|
+
"""If a schema must be transformed, then the original is stored here."""
|
|
86
|
+
|
|
60
87
|
_collections: Optional[Dict[str, Collection]] = None
|
|
61
88
|
parent: Optional["Client"] = None
|
|
62
89
|
metadata: Optional[DatabaseConfig] = None
|
|
@@ -101,6 +128,8 @@ class Database(ABC):
|
|
|
101
128
|
return self
|
|
102
129
|
|
|
103
130
|
def _initialize_collections(self):
|
|
131
|
+
if not self.metadata.collections:
|
|
132
|
+
return
|
|
104
133
|
for name, collection_config in self.metadata.collections.items():
|
|
105
134
|
alias = collection_config.alias
|
|
106
135
|
typ = collection_config.type
|
|
@@ -127,15 +156,46 @@ class Database(ABC):
|
|
|
127
156
|
|
|
128
157
|
@property
|
|
129
158
|
def recreate_if_exists(self) -> bool:
|
|
159
|
+
"""
|
|
160
|
+
Return whether to recreate the database if it already exists.
|
|
161
|
+
|
|
162
|
+
:return:
|
|
163
|
+
"""
|
|
130
164
|
return self.metadata.recreate_if_exists
|
|
131
165
|
|
|
132
166
|
@property
|
|
133
167
|
def handle(self) -> str:
|
|
168
|
+
"""
|
|
169
|
+
Return the database handle.
|
|
170
|
+
|
|
171
|
+
Examples:
|
|
172
|
+
|
|
173
|
+
- ``duckdb:///:memory:``
|
|
174
|
+
- ``duckdb:///tmp/test.db``
|
|
175
|
+
- ``mongodb://localhost:27017/``
|
|
176
|
+
|
|
177
|
+
:return:
|
|
178
|
+
"""
|
|
134
179
|
return self.metadata.handle
|
|
135
180
|
|
|
136
|
-
|
|
181
|
+
@property
|
|
182
|
+
def alias(self):
|
|
183
|
+
return self.metadata.alias
|
|
184
|
+
|
|
185
|
+
def store(self, obj: Dict[str, Any], **kwargs):
|
|
137
186
|
"""
|
|
138
|
-
Store an object in the database
|
|
187
|
+
Store an object in the database.
|
|
188
|
+
|
|
189
|
+
The object is assumed to be a Dictionary of Collections.
|
|
190
|
+
|
|
191
|
+
>>> from linkml_store.api.client import Client
|
|
192
|
+
>>> client = Client()
|
|
193
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
194
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
|
|
195
|
+
>>> collection = db.get_collection("persons")
|
|
196
|
+
>>> qr = collection.find()
|
|
197
|
+
>>> qr.num_rows
|
|
198
|
+
1
|
|
139
199
|
|
|
140
200
|
:param obj: object to store
|
|
141
201
|
:param kwargs: additional arguments
|
|
@@ -144,6 +204,7 @@ class Database(ABC):
|
|
|
144
204
|
roots = [c for c in sv.all_classes().values() if c.tree_root]
|
|
145
205
|
root = roots[0] if roots else None
|
|
146
206
|
for k, v in obj.items():
|
|
207
|
+
logger.info(f"Storing collection {k}")
|
|
147
208
|
if root:
|
|
148
209
|
slot = sv.induced_slot(k, root.name)
|
|
149
210
|
if not slot:
|
|
@@ -158,20 +219,28 @@ class Database(ABC):
|
|
|
158
219
|
if not v:
|
|
159
220
|
continue
|
|
160
221
|
if slot:
|
|
161
|
-
|
|
222
|
+
logger.debug(f"Aligning to existing slot: {slot.name} range={slot.range}")
|
|
223
|
+
collection = self.get_collection(slot.name, type=slot.range, create_if_not_exists=True)
|
|
162
224
|
else:
|
|
163
225
|
collection = self.get_collection(k, create_if_not_exists=True)
|
|
226
|
+
logger.debug(f"Replacing using {collection.alias} {collection.target_class_name}")
|
|
164
227
|
collection.replace(v)
|
|
165
228
|
|
|
166
229
|
def commit(self, **kwargs):
|
|
167
230
|
"""
|
|
168
|
-
Commit
|
|
231
|
+
Commit pending changes to the database.
|
|
232
|
+
|
|
233
|
+
:param kwargs:
|
|
234
|
+
:return:
|
|
169
235
|
"""
|
|
170
236
|
raise NotImplementedError()
|
|
171
237
|
|
|
172
238
|
def close(self, **kwargs):
|
|
173
239
|
"""
|
|
174
|
-
Close the database
|
|
240
|
+
Close the database.
|
|
241
|
+
|
|
242
|
+
:param kwargs:
|
|
243
|
+
:return:
|
|
175
244
|
"""
|
|
176
245
|
raise NotImplementedError()
|
|
177
246
|
|
|
@@ -188,15 +257,27 @@ class Database(ABC):
|
|
|
188
257
|
**kwargs,
|
|
189
258
|
) -> Collection:
|
|
190
259
|
"""
|
|
191
|
-
Create a new collection
|
|
260
|
+
Create a new collection in the current database.
|
|
261
|
+
|
|
262
|
+
The collection must have a *Type*, and may have an *Alias*.
|
|
263
|
+
|
|
264
|
+
Examples:
|
|
192
265
|
|
|
193
266
|
>>> from linkml_store.api.client import Client
|
|
194
267
|
>>> client = Client()
|
|
195
268
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
196
|
-
>>> collection = db.create_collection("Person")
|
|
197
|
-
>>> collection.
|
|
269
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
270
|
+
>>> collection.alias
|
|
271
|
+
'persons'
|
|
272
|
+
>>> collection.target_class_name
|
|
198
273
|
'Person'
|
|
199
274
|
|
|
275
|
+
If alias is not provided, it defaults to the name of the type.
|
|
276
|
+
|
|
277
|
+
>>> collection = db.create_collection("Organization")
|
|
278
|
+
>>> collection.alias
|
|
279
|
+
'Organization'
|
|
280
|
+
|
|
200
281
|
:param name: name of the collection
|
|
201
282
|
:param alias: alias for the collection
|
|
202
283
|
:param metadata: metadata for the collection
|
|
@@ -207,6 +288,8 @@ class Database(ABC):
|
|
|
207
288
|
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
208
289
|
collection_cls = self.collection_class
|
|
209
290
|
collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
|
|
291
|
+
if metadata and metadata.source_location:
|
|
292
|
+
collection.load_from_source()
|
|
210
293
|
if metadata and metadata.attributes:
|
|
211
294
|
sv = self.schema_view
|
|
212
295
|
schema = sv.schema
|
|
@@ -265,7 +348,9 @@ class Database(ABC):
|
|
|
265
348
|
"""
|
|
266
349
|
return [c.name for c in self.list_collections(**kwargs)]
|
|
267
350
|
|
|
268
|
-
def get_collection(
|
|
351
|
+
def get_collection(
|
|
352
|
+
self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
|
|
353
|
+
) -> "Collection":
|
|
269
354
|
"""
|
|
270
355
|
Get a named collection.
|
|
271
356
|
|
|
@@ -283,14 +368,19 @@ class Database(ABC):
|
|
|
283
368
|
KeyError: 'Collection NonExistent does not exist'
|
|
284
369
|
|
|
285
370
|
:param name: name of the collection
|
|
371
|
+
:param type: target class name
|
|
286
372
|
:param create_if_not_exists: create the collection if it does not exist
|
|
287
373
|
|
|
288
374
|
"""
|
|
289
375
|
if not self._collections:
|
|
376
|
+
logger.debug("Initializing collections")
|
|
290
377
|
self.init_collections()
|
|
291
378
|
if name not in self._collections.keys():
|
|
292
379
|
if create_if_not_exists:
|
|
293
|
-
|
|
380
|
+
if type is None:
|
|
381
|
+
type = name
|
|
382
|
+
logger.debug(f"Creating new collection: {name} kwargs: {kwargs}")
|
|
383
|
+
self._collections[name] = self.create_collection(type, alias=name, **kwargs)
|
|
294
384
|
else:
|
|
295
385
|
raise KeyError(f"Collection {name} does not exist")
|
|
296
386
|
return self._collections[name]
|
|
@@ -333,7 +423,29 @@ class Database(ABC):
|
|
|
333
423
|
@property
|
|
334
424
|
def schema_view(self) -> SchemaView:
|
|
335
425
|
"""
|
|
336
|
-
Return a schema view for the named collection
|
|
426
|
+
Return a schema view for the named collection.
|
|
427
|
+
|
|
428
|
+
If no explicit schema is provided, this will generalize one
|
|
429
|
+
|
|
430
|
+
Induced schema example:
|
|
431
|
+
|
|
432
|
+
>>> from linkml_store.api.client import Client
|
|
433
|
+
>>> client = Client()
|
|
434
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
435
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
436
|
+
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
|
|
437
|
+
>>> schema_view = db.schema_view
|
|
438
|
+
>>> cd = schema_view.get_class("Person")
|
|
439
|
+
>>> cd.attributes["id"].range
|
|
440
|
+
'string'
|
|
441
|
+
>>> cd.attributes["age_in_years"].range
|
|
442
|
+
'integer'
|
|
443
|
+
|
|
444
|
+
We can reuse the same class:
|
|
445
|
+
|
|
446
|
+
>>> collection2 = db.create_collection("Person", alias="other_persons")
|
|
447
|
+
>>> collection2.class_definition().attributes["age_in_years"].range
|
|
448
|
+
'integer'
|
|
337
449
|
"""
|
|
338
450
|
if not self._schema_view:
|
|
339
451
|
self._initialize_schema()
|
|
@@ -345,6 +457,26 @@ class Database(ABC):
|
|
|
345
457
|
"""
|
|
346
458
|
Set the schema view for the database.
|
|
347
459
|
|
|
460
|
+
>>> from linkml_store.api.client import Client
|
|
461
|
+
>>> client = Client()
|
|
462
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
463
|
+
>>> sv = SchemaView("tests/input/countries/countries.linkml.yaml")
|
|
464
|
+
>>> db.set_schema_view(sv)
|
|
465
|
+
>>> cd = db.schema_view.schema.classes["Country"]
|
|
466
|
+
>>> sorted(cd.slots)
|
|
467
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
468
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
469
|
+
>>> sorted(induced_slots.keys())
|
|
470
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
471
|
+
>>> induced_slots["code"].identifier
|
|
472
|
+
True
|
|
473
|
+
|
|
474
|
+
Creating a new collection will align with the schema view:
|
|
475
|
+
|
|
476
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
477
|
+
>>> sorted(collection.class_definition().slots)
|
|
478
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
479
|
+
|
|
348
480
|
:param schema_view:
|
|
349
481
|
:return:
|
|
350
482
|
"""
|
|
@@ -375,8 +507,7 @@ class Database(ABC):
|
|
|
375
507
|
if inlined and slot.range:
|
|
376
508
|
if slot.name in self._collections:
|
|
377
509
|
coll = self._collections[slot.name]
|
|
378
|
-
|
|
379
|
-
coll.metadata.type = slot.range
|
|
510
|
+
coll.metadata.type = slot.range
|
|
380
511
|
|
|
381
512
|
def load_schema_view(self, path: Union[str, Path]):
|
|
382
513
|
"""
|
|
@@ -386,6 +517,21 @@ class Database(ABC):
|
|
|
386
517
|
>>> client = Client()
|
|
387
518
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
388
519
|
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
520
|
+
>>> sv = db.schema_view
|
|
521
|
+
>>> cd = sv.schema.classes["Country"]
|
|
522
|
+
>>> sorted(cd.slots)
|
|
523
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
524
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
525
|
+
>>> sorted(induced_slots.keys())
|
|
526
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
527
|
+
>>> induced_slots["code"].identifier
|
|
528
|
+
True
|
|
529
|
+
|
|
530
|
+
Creating a new collection will align with the schema view:
|
|
531
|
+
|
|
532
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
533
|
+
>>> sorted(collection.class_definition().slots)
|
|
534
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
389
535
|
|
|
390
536
|
:param path:
|
|
391
537
|
:return:
|
|
@@ -420,6 +566,42 @@ class Database(ABC):
|
|
|
420
566
|
"""
|
|
421
567
|
Validate the contents of the database.
|
|
422
568
|
|
|
569
|
+
An an example, let's create a database with a predefined schema
|
|
570
|
+
from the countries.linkml.yaml file:
|
|
571
|
+
|
|
572
|
+
>>> from linkml_store.api.client import Client
|
|
573
|
+
>>> client = Client()
|
|
574
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
575
|
+
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
576
|
+
|
|
577
|
+
Let's introspect the schema to see what slots are applicable for the class "Country":
|
|
578
|
+
|
|
579
|
+
>>> sv = db.schema_view
|
|
580
|
+
>>> for slot in sv.class_induced_slots("Country"):
|
|
581
|
+
... print(slot.name, slot.range, slot.required)
|
|
582
|
+
name string True
|
|
583
|
+
code string True
|
|
584
|
+
capital string True
|
|
585
|
+
continent string True
|
|
586
|
+
languages Language None
|
|
587
|
+
|
|
588
|
+
Next we'll create a collection, binding it to the target class "Country", and insert
|
|
589
|
+
valid data:
|
|
590
|
+
|
|
591
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
592
|
+
>>> obj = {"code": "US", "name": "United States", "continent": "North America", "capital": "Washington, D.C."}
|
|
593
|
+
>>> collection.insert([obj])
|
|
594
|
+
>>> list(db.iter_validate_database())
|
|
595
|
+
[]
|
|
596
|
+
|
|
597
|
+
Now let's insert some invalid data (missing required fields)
|
|
598
|
+
|
|
599
|
+
>>> collection.insert([{"code": "FR", "name": "France"}])
|
|
600
|
+
>>> for r in db.iter_validate_database():
|
|
601
|
+
... print(r.message[0:32])
|
|
602
|
+
'capital' is a required property
|
|
603
|
+
'continent' is a required proper
|
|
604
|
+
|
|
423
605
|
:param kwargs:
|
|
424
606
|
:return: iterator over validation results
|
|
425
607
|
"""
|
|
@@ -474,6 +656,36 @@ class Database(ABC):
|
|
|
474
656
|
|
|
475
657
|
def drop(self, **kwargs):
|
|
476
658
|
"""
|
|
477
|
-
Drop the database and all collections
|
|
659
|
+
Drop the database and all collections.
|
|
660
|
+
|
|
661
|
+
:param kwargs: additional arguments
|
|
478
662
|
"""
|
|
479
663
|
raise NotImplementedError()
|
|
664
|
+
|
|
665
|
+
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
|
|
666
|
+
"""
|
|
667
|
+
Import a database from a file or location.
|
|
668
|
+
|
|
669
|
+
:param location: location of the file
|
|
670
|
+
:param source_format: source format
|
|
671
|
+
:param kwargs: additional arguments
|
|
672
|
+
"""
|
|
673
|
+
objects = load_objects(location, format=source_format)
|
|
674
|
+
for obj in objects:
|
|
675
|
+
self.store(obj)
|
|
676
|
+
|
|
677
|
+
def export_database(self, location: str, target_format: Optional[str] = None, **kwargs):
|
|
678
|
+
"""
|
|
679
|
+
Export a database to a file or location.
|
|
680
|
+
|
|
681
|
+
:param location: location of the file
|
|
682
|
+
:param target_format: target format
|
|
683
|
+
:param kwargs: additional arguments
|
|
684
|
+
"""
|
|
685
|
+
obj = {}
|
|
686
|
+
for coll in self.list_collections():
|
|
687
|
+
qr = coll.find({}, limit=-1)
|
|
688
|
+
obj[coll.alias] = qr.rows
|
|
689
|
+
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
|
|
690
|
+
with open(location, "w", encoding="utf-8") as stream:
|
|
691
|
+
stream.write(render_output(obj, format=target_format))
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapter for DuckDB embedded database.
|
|
3
|
+
|
|
4
|
+
Handles have the form:
|
|
5
|
+
|
|
6
|
+
- ``duckdb:///<path>`` for a file-based database
|
|
7
|
+
- ``duckdb:///:memory:`` for an in-memory database
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
2
11
|
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
3
12
|
|
|
@@ -19,12 +19,14 @@ class DuckDBCollection(Collection):
|
|
|
19
19
|
_table_created: bool = None
|
|
20
20
|
|
|
21
21
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
22
|
+
logger.debug(f"Inserting {len(objs)}")
|
|
22
23
|
if not isinstance(objs, list):
|
|
23
24
|
objs = [objs]
|
|
24
25
|
if not objs:
|
|
25
26
|
return
|
|
26
27
|
cd = self.class_definition()
|
|
27
28
|
if not cd:
|
|
29
|
+
logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}; will induce")
|
|
28
30
|
cd = self.induce_class_definition_from_objects(objs)
|
|
29
31
|
self._create_table(cd)
|
|
30
32
|
table = self._sqla_table(cd)
|
|
@@ -37,7 +39,7 @@ class DuckDBCollection(Collection):
|
|
|
37
39
|
conn.execute(insert(table), objs)
|
|
38
40
|
conn.commit()
|
|
39
41
|
|
|
40
|
-
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
|
|
42
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
41
43
|
if not isinstance(objs, list):
|
|
42
44
|
objs = [objs]
|
|
43
45
|
cd = self.class_definition()
|
|
@@ -52,9 +54,9 @@ class DuckDBCollection(Collection):
|
|
|
52
54
|
stmt = stmt.compile(engine)
|
|
53
55
|
conn.execute(stmt)
|
|
54
56
|
conn.commit()
|
|
55
|
-
return
|
|
57
|
+
return
|
|
56
58
|
|
|
57
|
-
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
59
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
58
60
|
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
59
61
|
if where is None:
|
|
60
62
|
where = {}
|
|
@@ -78,7 +80,7 @@ class DuckDBCollection(Collection):
|
|
|
78
80
|
if deleted_rows_count == 0 and not missing_ok:
|
|
79
81
|
raise ValueError(f"No rows found for {where}")
|
|
80
82
|
conn.commit()
|
|
81
|
-
return deleted_rows_count
|
|
83
|
+
return deleted_rows_count if deleted_rows_count > -1 else None
|
|
82
84
|
|
|
83
85
|
def query_facets(
|
|
84
86
|
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
@@ -22,6 +23,7 @@ TYPE_MAP = {
|
|
|
22
23
|
"DATE": "date",
|
|
23
24
|
"DOUBLE": "float",
|
|
24
25
|
"INTEGER": "integer",
|
|
26
|
+
"JSON": "Any",
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
|
|
@@ -33,9 +35,13 @@ class DuckDBDatabase(Database):
|
|
|
33
35
|
_engine: sqlalchemy.Engine = None
|
|
34
36
|
collection_class = DuckDBCollection
|
|
35
37
|
|
|
36
|
-
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
38
|
+
def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
|
|
37
39
|
if handle is None:
|
|
38
40
|
handle = "duckdb:///:memory:"
|
|
41
|
+
if recreate_if_exists:
|
|
42
|
+
path = Path(handle.replace("duckdb:///", ""))
|
|
43
|
+
if path.exists():
|
|
44
|
+
path.unlink()
|
|
39
45
|
super().__init__(handle=handle, **kwargs)
|
|
40
46
|
|
|
41
47
|
@property
|
|
@@ -69,7 +75,10 @@ class DuckDBDatabase(Database):
|
|
|
69
75
|
if qr.num_rows == 0:
|
|
70
76
|
logger.debug(f"Table {query.from_table} not created yet")
|
|
71
77
|
return QueryResult(query=query, num_rows=0, rows=[])
|
|
72
|
-
|
|
78
|
+
if not query.from_table.startswith("information_schema"):
|
|
79
|
+
sv = self.schema_view
|
|
80
|
+
else:
|
|
81
|
+
sv = None
|
|
73
82
|
if sv:
|
|
74
83
|
cd = None
|
|
75
84
|
for c in self._collections.values():
|
|
@@ -107,7 +116,10 @@ class DuckDBDatabase(Database):
|
|
|
107
116
|
|
|
108
117
|
def init_collections(self):
|
|
109
118
|
# TODO: unify schema introspection
|
|
110
|
-
|
|
119
|
+
if not self.schema_view:
|
|
120
|
+
schema = introspect_schema(self.engine)
|
|
121
|
+
else:
|
|
122
|
+
schema = self.schema_view.schema
|
|
111
123
|
table_names = schema.classes.keys()
|
|
112
124
|
if self._collections is None:
|
|
113
125
|
self._collections = {}
|
|
@@ -119,7 +131,7 @@ class DuckDBDatabase(Database):
|
|
|
119
131
|
def induce_schema_view(self) -> SchemaView:
|
|
120
132
|
# TODO: unify schema introspection
|
|
121
133
|
# TODO: handle case where schema is provided in advance
|
|
122
|
-
logger.info(f"Inducing schema view for {self.metadata.handle}")
|
|
134
|
+
logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
|
|
123
135
|
sb = SchemaBuilder()
|
|
124
136
|
schema = sb.schema
|
|
125
137
|
query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
|
|
@@ -144,8 +156,10 @@ class DuckDBDatabase(Database):
|
|
|
144
156
|
sd = SlotDefinition(
|
|
145
157
|
row["column_name"], required=row["is_nullable"] == "NO", multivalued=multivalued, range=rng
|
|
146
158
|
)
|
|
159
|
+
if dt == "JSON":
|
|
160
|
+
sd.inlined_as_list = True
|
|
147
161
|
sb.schema.classes[tbl_name].attributes[sd.name] = sd
|
|
148
|
-
logger.info(f"Introspected slot: {tbl_name}.{sd.name}: {sd.range}")
|
|
162
|
+
logger.info(f"Introspected slot: {tbl_name}.{sd.name}: {sd.range} FROM {dt}")
|
|
149
163
|
sb.add_defaults()
|
|
150
164
|
for cls_name in schema.classes:
|
|
151
165
|
if cls_name in self.metadata.collections:
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapter for DuckDB embedded database.
|
|
3
|
+
|
|
4
|
+
Handles have the form:
|
|
5
|
+
|
|
6
|
+
- ``duckdb:///<path>`` for a file-based database
|
|
7
|
+
- ``duckdb:///:memory:`` for an in-memory database
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
11
|
+
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"DuckDBCollection",
|
|
15
|
+
"DuckDBDatabase",
|
|
16
|
+
]
|