linkml-store 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +32 -3
- linkml_store/api/collection.py +231 -30
- linkml_store/api/config.py +10 -2
- linkml_store/api/database.py +305 -19
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +8 -1
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +11 -9
- linkml_store/api/stores/duckdb/duckdb_database.py +22 -8
- linkml_store/api/stores/duckdb/mappings.py +1 -0
- linkml_store/api/stores/filesystem/__init__.py +16 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +142 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +36 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +1 -1
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +29 -8
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +2 -1
- linkml_store/api/stores/solr/solr_database.py +1 -0
- linkml_store/cli.py +64 -10
- linkml_store/index/__init__.py +6 -2
- linkml_store/index/implementations/llm_indexer.py +83 -5
- linkml_store/index/implementations/simple_indexer.py +2 -2
- linkml_store/index/indexer.py +32 -8
- linkml_store/utils/format_utils.py +52 -2
- linkml_store/utils/object_utils.py +9 -1
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/METADATA +4 -1
- linkml_store-0.1.8.dist-info/RECORD +45 -0
- linkml_store-0.1.6.dist-info/RECORD +0 -41
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/entry_points.txt +0 -0
linkml_store/api/database.py
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC
|
|
3
|
+
from collections import defaultdict
|
|
3
4
|
from copy import copy
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING, ClassVar, Dict, Iterator, Optional, Sequence, Type, Union
|
|
6
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterator, Optional, Sequence, Type, Union
|
|
7
|
+
|
|
8
|
+
from linkml_store.utils.format_utils import load_objects, render_output
|
|
6
9
|
|
|
7
10
|
try:
|
|
8
|
-
from linkml.validator.report import ValidationResult
|
|
11
|
+
from linkml.validator.report import Severity, ValidationResult
|
|
9
12
|
except ImportError:
|
|
10
13
|
ValidationResult = None
|
|
11
14
|
|
|
@@ -26,13 +29,33 @@ class Database(ABC):
|
|
|
26
29
|
"""
|
|
27
30
|
A Database provides access to named collections of data.
|
|
28
31
|
|
|
29
|
-
|
|
30
|
-
|
|
32
|
+
A database object is owned by a :ref:`Client`. The database
|
|
33
|
+
object uses a :ref:`handle` to know what kind of external
|
|
34
|
+
dataase system to connect to (e.g. duckdb, mongodb). The handle
|
|
35
|
+
is a string ``<DatabaseType>:<LocalLocator>``
|
|
36
|
+
|
|
37
|
+
The
|
|
38
|
+
database object may also have an :ref:`alias` that is mapped
|
|
39
|
+
to the handle.
|
|
40
|
+
|
|
41
|
+
Attaching a database
|
|
42
|
+
--------------------
|
|
31
43
|
>>> from linkml_store.api.client import Client
|
|
32
44
|
>>> client = Client()
|
|
33
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
45
|
+
>>> db = client.attach_database("duckdb:///:memory:", alias="test")
|
|
46
|
+
|
|
47
|
+
We can check the value of the handle:
|
|
48
|
+
|
|
34
49
|
>>> db.handle
|
|
35
50
|
'duckdb:///:memory:'
|
|
51
|
+
|
|
52
|
+
The alias can be used to retrieve the database object from the client
|
|
53
|
+
|
|
54
|
+
>>> assert db == client.get_database("test")
|
|
55
|
+
|
|
56
|
+
Creating a collection
|
|
57
|
+
---------------------
|
|
58
|
+
|
|
36
59
|
>>> collection = db.create_collection("Person")
|
|
37
60
|
>>> len(db.list_collections())
|
|
38
61
|
1
|
|
@@ -56,6 +79,11 @@ class Database(ABC):
|
|
|
56
79
|
"""
|
|
57
80
|
|
|
58
81
|
_schema_view: Optional[SchemaView] = None
|
|
82
|
+
"""Schema for the database. May be transformed."""
|
|
83
|
+
|
|
84
|
+
_original_schema_view: Optional[SchemaView] = None
|
|
85
|
+
"""If a schema must be transformed, then the original is stored here."""
|
|
86
|
+
|
|
59
87
|
_collections: Optional[Dict[str, Collection]] = None
|
|
60
88
|
parent: Optional["Client"] = None
|
|
61
89
|
metadata: Optional[DatabaseConfig] = None
|
|
@@ -100,6 +128,8 @@ class Database(ABC):
|
|
|
100
128
|
return self
|
|
101
129
|
|
|
102
130
|
def _initialize_collections(self):
|
|
131
|
+
if not self.metadata.collections:
|
|
132
|
+
return
|
|
103
133
|
for name, collection_config in self.metadata.collections.items():
|
|
104
134
|
alias = collection_config.alias
|
|
105
135
|
typ = collection_config.type
|
|
@@ -126,15 +156,46 @@ class Database(ABC):
|
|
|
126
156
|
|
|
127
157
|
@property
|
|
128
158
|
def recreate_if_exists(self) -> bool:
|
|
159
|
+
"""
|
|
160
|
+
Return whether to recreate the database if it already exists.
|
|
161
|
+
|
|
162
|
+
:return:
|
|
163
|
+
"""
|
|
129
164
|
return self.metadata.recreate_if_exists
|
|
130
165
|
|
|
131
166
|
@property
|
|
132
167
|
def handle(self) -> str:
|
|
168
|
+
"""
|
|
169
|
+
Return the database handle.
|
|
170
|
+
|
|
171
|
+
Examples:
|
|
172
|
+
|
|
173
|
+
- ``duckdb:///:memory:``
|
|
174
|
+
- ``duckdb:///tmp/test.db``
|
|
175
|
+
- ``mongodb://localhost:27017/``
|
|
176
|
+
|
|
177
|
+
:return:
|
|
178
|
+
"""
|
|
133
179
|
return self.metadata.handle
|
|
134
180
|
|
|
135
|
-
|
|
181
|
+
@property
|
|
182
|
+
def alias(self):
|
|
183
|
+
return self.metadata.alias
|
|
184
|
+
|
|
185
|
+
def store(self, obj: Dict[str, Any], **kwargs):
|
|
136
186
|
"""
|
|
137
|
-
Store an object in the database
|
|
187
|
+
Store an object in the database.
|
|
188
|
+
|
|
189
|
+
The object is assumed to be a Dictionary of Collections.
|
|
190
|
+
|
|
191
|
+
>>> from linkml_store.api.client import Client
|
|
192
|
+
>>> client = Client()
|
|
193
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
194
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
|
|
195
|
+
>>> collection = db.get_collection("persons")
|
|
196
|
+
>>> qr = collection.find()
|
|
197
|
+
>>> qr.num_rows
|
|
198
|
+
1
|
|
138
199
|
|
|
139
200
|
:param obj: object to store
|
|
140
201
|
:param kwargs: additional arguments
|
|
@@ -143,6 +204,7 @@ class Database(ABC):
|
|
|
143
204
|
roots = [c for c in sv.all_classes().values() if c.tree_root]
|
|
144
205
|
root = roots[0] if roots else None
|
|
145
206
|
for k, v in obj.items():
|
|
207
|
+
logger.info(f"Storing collection {k}")
|
|
146
208
|
if root:
|
|
147
209
|
slot = sv.induced_slot(k, root.name)
|
|
148
210
|
if not slot:
|
|
@@ -157,20 +219,28 @@ class Database(ABC):
|
|
|
157
219
|
if not v:
|
|
158
220
|
continue
|
|
159
221
|
if slot:
|
|
160
|
-
|
|
222
|
+
logger.debug(f"Aligning to existing slot: {slot.name} range={slot.range}")
|
|
223
|
+
collection = self.get_collection(slot.name, type=slot.range, create_if_not_exists=True)
|
|
161
224
|
else:
|
|
162
225
|
collection = self.get_collection(k, create_if_not_exists=True)
|
|
226
|
+
logger.debug(f"Replacing using {collection.alias} {collection.target_class_name}")
|
|
163
227
|
collection.replace(v)
|
|
164
228
|
|
|
165
229
|
def commit(self, **kwargs):
|
|
166
230
|
"""
|
|
167
|
-
Commit
|
|
231
|
+
Commit pending changes to the database.
|
|
232
|
+
|
|
233
|
+
:param kwargs:
|
|
234
|
+
:return:
|
|
168
235
|
"""
|
|
169
236
|
raise NotImplementedError()
|
|
170
237
|
|
|
171
238
|
def close(self, **kwargs):
|
|
172
239
|
"""
|
|
173
|
-
Close the database
|
|
240
|
+
Close the database.
|
|
241
|
+
|
|
242
|
+
:param kwargs:
|
|
243
|
+
:return:
|
|
174
244
|
"""
|
|
175
245
|
raise NotImplementedError()
|
|
176
246
|
|
|
@@ -187,15 +257,27 @@ class Database(ABC):
|
|
|
187
257
|
**kwargs,
|
|
188
258
|
) -> Collection:
|
|
189
259
|
"""
|
|
190
|
-
Create a new collection
|
|
260
|
+
Create a new collection in the current database.
|
|
261
|
+
|
|
262
|
+
The collection must have a *Type*, and may have an *Alias*.
|
|
263
|
+
|
|
264
|
+
Examples:
|
|
191
265
|
|
|
192
266
|
>>> from linkml_store.api.client import Client
|
|
193
267
|
>>> client = Client()
|
|
194
268
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
195
|
-
>>> collection = db.create_collection("Person")
|
|
196
|
-
>>> collection.
|
|
269
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
270
|
+
>>> collection.alias
|
|
271
|
+
'persons'
|
|
272
|
+
>>> collection.target_class_name
|
|
197
273
|
'Person'
|
|
198
274
|
|
|
275
|
+
If alias is not provided, it defaults to the name of the type.
|
|
276
|
+
|
|
277
|
+
>>> collection = db.create_collection("Organization")
|
|
278
|
+
>>> collection.alias
|
|
279
|
+
'Organization'
|
|
280
|
+
|
|
199
281
|
:param name: name of the collection
|
|
200
282
|
:param alias: alias for the collection
|
|
201
283
|
:param metadata: metadata for the collection
|
|
@@ -204,9 +286,10 @@ class Database(ABC):
|
|
|
204
286
|
"""
|
|
205
287
|
if not name:
|
|
206
288
|
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
207
|
-
# collection_cls = self._collection_class
|
|
208
289
|
collection_cls = self.collection_class
|
|
209
290
|
collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
|
|
291
|
+
if metadata and metadata.source_location:
|
|
292
|
+
collection.load_from_source()
|
|
210
293
|
if metadata and metadata.attributes:
|
|
211
294
|
sv = self.schema_view
|
|
212
295
|
schema = sv.schema
|
|
@@ -265,7 +348,9 @@ class Database(ABC):
|
|
|
265
348
|
"""
|
|
266
349
|
return [c.name for c in self.list_collections(**kwargs)]
|
|
267
350
|
|
|
268
|
-
def get_collection(
|
|
351
|
+
def get_collection(
|
|
352
|
+
self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
|
|
353
|
+
) -> "Collection":
|
|
269
354
|
"""
|
|
270
355
|
Get a named collection.
|
|
271
356
|
|
|
@@ -283,14 +368,19 @@ class Database(ABC):
|
|
|
283
368
|
KeyError: 'Collection NonExistent does not exist'
|
|
284
369
|
|
|
285
370
|
:param name: name of the collection
|
|
371
|
+
:param type: target class name
|
|
286
372
|
:param create_if_not_exists: create the collection if it does not exist
|
|
287
373
|
|
|
288
374
|
"""
|
|
289
375
|
if not self._collections:
|
|
376
|
+
logger.debug("Initializing collections")
|
|
290
377
|
self.init_collections()
|
|
291
378
|
if name not in self._collections.keys():
|
|
292
379
|
if create_if_not_exists:
|
|
293
|
-
|
|
380
|
+
if type is None:
|
|
381
|
+
type = name
|
|
382
|
+
logger.debug(f"Creating new collection: {name} kwargs: {kwargs}")
|
|
383
|
+
self._collections[name] = self.create_collection(type, alias=name, **kwargs)
|
|
294
384
|
else:
|
|
295
385
|
raise KeyError(f"Collection {name} does not exist")
|
|
296
386
|
return self._collections[name]
|
|
@@ -333,7 +423,29 @@ class Database(ABC):
|
|
|
333
423
|
@property
|
|
334
424
|
def schema_view(self) -> SchemaView:
|
|
335
425
|
"""
|
|
336
|
-
Return a schema view for the named collection
|
|
426
|
+
Return a schema view for the named collection.
|
|
427
|
+
|
|
428
|
+
If no explicit schema is provided, this will generalize one
|
|
429
|
+
|
|
430
|
+
Induced schema example:
|
|
431
|
+
|
|
432
|
+
>>> from linkml_store.api.client import Client
|
|
433
|
+
>>> client = Client()
|
|
434
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
435
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
436
|
+
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
|
|
437
|
+
>>> schema_view = db.schema_view
|
|
438
|
+
>>> cd = schema_view.get_class("Person")
|
|
439
|
+
>>> cd.attributes["id"].range
|
|
440
|
+
'string'
|
|
441
|
+
>>> cd.attributes["age_in_years"].range
|
|
442
|
+
'integer'
|
|
443
|
+
|
|
444
|
+
We can reuse the same class:
|
|
445
|
+
|
|
446
|
+
>>> collection2 = db.create_collection("Person", alias="other_persons")
|
|
447
|
+
>>> collection2.class_definition().attributes["age_in_years"].range
|
|
448
|
+
'integer'
|
|
337
449
|
"""
|
|
338
450
|
if not self._schema_view:
|
|
339
451
|
self._initialize_schema()
|
|
@@ -341,14 +453,61 @@ class Database(ABC):
|
|
|
341
453
|
self._schema_view = self.induce_schema_view()
|
|
342
454
|
return self._schema_view
|
|
343
455
|
|
|
344
|
-
def set_schema_view(self, schema_view: SchemaView):
|
|
456
|
+
def set_schema_view(self, schema_view: Union[str, Path, SchemaView]):
|
|
345
457
|
"""
|
|
346
458
|
Set the schema view for the database.
|
|
347
459
|
|
|
460
|
+
>>> from linkml_store.api.client import Client
|
|
461
|
+
>>> client = Client()
|
|
462
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
463
|
+
>>> sv = SchemaView("tests/input/countries/countries.linkml.yaml")
|
|
464
|
+
>>> db.set_schema_view(sv)
|
|
465
|
+
>>> cd = db.schema_view.schema.classes["Country"]
|
|
466
|
+
>>> sorted(cd.slots)
|
|
467
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
468
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
469
|
+
>>> sorted(induced_slots.keys())
|
|
470
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
471
|
+
>>> induced_slots["code"].identifier
|
|
472
|
+
True
|
|
473
|
+
|
|
474
|
+
Creating a new collection will align with the schema view:
|
|
475
|
+
|
|
476
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
477
|
+
>>> sorted(collection.class_definition().slots)
|
|
478
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
479
|
+
|
|
348
480
|
:param schema_view:
|
|
349
481
|
:return:
|
|
350
482
|
"""
|
|
483
|
+
if isinstance(schema_view, Path):
|
|
484
|
+
schema_view = str(schema_view)
|
|
485
|
+
if isinstance(schema_view, str):
|
|
486
|
+
schema_view = SchemaView(schema_view)
|
|
351
487
|
self._schema_view = schema_view
|
|
488
|
+
if not self._collections:
|
|
489
|
+
return
|
|
490
|
+
# align with induced schema
|
|
491
|
+
roots = [c for c in schema_view.all_classes().values() if c.tree_root]
|
|
492
|
+
if len(roots) == 0:
|
|
493
|
+
all_ranges = set()
|
|
494
|
+
for cn in schema_view.all_classes():
|
|
495
|
+
for slot in schema_view.class_induced_slots(cn):
|
|
496
|
+
if slot.range:
|
|
497
|
+
all_ranges.add(slot.range)
|
|
498
|
+
roots = [
|
|
499
|
+
c
|
|
500
|
+
for c in schema_view.all_classes().values()
|
|
501
|
+
if not all_ranges.intersection(schema_view.class_ancestors(c.name, reflexive=True))
|
|
502
|
+
]
|
|
503
|
+
if len(roots) == 1:
|
|
504
|
+
root = roots[0]
|
|
505
|
+
for slot in schema_view.class_induced_slots(root.name):
|
|
506
|
+
inlined = slot.inlined or slot.inlined_as_list
|
|
507
|
+
if inlined and slot.range:
|
|
508
|
+
if slot.name in self._collections:
|
|
509
|
+
coll = self._collections[slot.name]
|
|
510
|
+
coll.metadata.type = slot.range
|
|
352
511
|
|
|
353
512
|
def load_schema_view(self, path: Union[str, Path]):
|
|
354
513
|
"""
|
|
@@ -358,6 +517,21 @@ class Database(ABC):
|
|
|
358
517
|
>>> client = Client()
|
|
359
518
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
360
519
|
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
520
|
+
>>> sv = db.schema_view
|
|
521
|
+
>>> cd = sv.schema.classes["Country"]
|
|
522
|
+
>>> sorted(cd.slots)
|
|
523
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
524
|
+
>>> induced_slots = {s.name: s for s in sv.class_induced_slots("Country")}
|
|
525
|
+
>>> sorted(induced_slots.keys())
|
|
526
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
527
|
+
>>> induced_slots["code"].identifier
|
|
528
|
+
True
|
|
529
|
+
|
|
530
|
+
Creating a new collection will align with the schema view:
|
|
531
|
+
|
|
532
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
533
|
+
>>> sorted(collection.class_definition().slots)
|
|
534
|
+
['capital', 'code', 'continent', 'languages', 'name']
|
|
361
535
|
|
|
362
536
|
:param path:
|
|
363
537
|
:return:
|
|
@@ -392,14 +566,126 @@ class Database(ABC):
|
|
|
392
566
|
"""
|
|
393
567
|
Validate the contents of the database.
|
|
394
568
|
|
|
569
|
+
An an example, let's create a database with a predefined schema
|
|
570
|
+
from the countries.linkml.yaml file:
|
|
571
|
+
|
|
572
|
+
>>> from linkml_store.api.client import Client
|
|
573
|
+
>>> client = Client()
|
|
574
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
575
|
+
>>> db.load_schema_view("tests/input/countries/countries.linkml.yaml")
|
|
576
|
+
|
|
577
|
+
Let's introspect the schema to see what slots are applicable for the class "Country":
|
|
578
|
+
|
|
579
|
+
>>> sv = db.schema_view
|
|
580
|
+
>>> for slot in sv.class_induced_slots("Country"):
|
|
581
|
+
... print(slot.name, slot.range, slot.required)
|
|
582
|
+
name string True
|
|
583
|
+
code string True
|
|
584
|
+
capital string True
|
|
585
|
+
continent string True
|
|
586
|
+
languages Language None
|
|
587
|
+
|
|
588
|
+
Next we'll create a collection, binding it to the target class "Country", and insert
|
|
589
|
+
valid data:
|
|
590
|
+
|
|
591
|
+
>>> collection = db.create_collection("Country", "all_countries")
|
|
592
|
+
>>> obj = {"code": "US", "name": "United States", "continent": "North America", "capital": "Washington, D.C."}
|
|
593
|
+
>>> collection.insert([obj])
|
|
594
|
+
>>> list(db.iter_validate_database())
|
|
595
|
+
[]
|
|
596
|
+
|
|
597
|
+
Now let's insert some invalid data (missing required fields)
|
|
598
|
+
|
|
599
|
+
>>> collection.insert([{"code": "FR", "name": "France"}])
|
|
600
|
+
>>> for r in db.iter_validate_database():
|
|
601
|
+
... print(r.message[0:32])
|
|
602
|
+
'capital' is a required property
|
|
603
|
+
'continent' is a required proper
|
|
604
|
+
|
|
395
605
|
:param kwargs:
|
|
396
606
|
:return: iterator over validation results
|
|
397
607
|
"""
|
|
398
608
|
for collection in self.list_collections():
|
|
399
609
|
yield from collection.iter_validate_collection(**kwargs)
|
|
610
|
+
if self.metadata.ensure_referential_integrity:
|
|
611
|
+
yield from self._validate_referential_integrity(**kwargs)
|
|
612
|
+
|
|
613
|
+
def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
614
|
+
"""
|
|
615
|
+
Validate referential integrity of the database.
|
|
616
|
+
|
|
617
|
+
:param kwargs:
|
|
618
|
+
:return: iterator over validation results
|
|
619
|
+
"""
|
|
620
|
+
sv = self.schema_view
|
|
621
|
+
cmap = defaultdict(list)
|
|
622
|
+
for collection in self.list_collections():
|
|
623
|
+
if not collection.target_class_name:
|
|
624
|
+
raise ValueError(f"Collection {collection.name} has no target class")
|
|
625
|
+
cmap[collection.target_class_name].append(collection)
|
|
626
|
+
for collection in self.list_collections():
|
|
627
|
+
cd = collection.class_definition()
|
|
628
|
+
induced_slots = sv.class_induced_slots(cd.name)
|
|
629
|
+
slot_map = {s.name: s for s in induced_slots}
|
|
630
|
+
# rmap = {s.name: s.range for s in induced_slots}
|
|
631
|
+
sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
|
|
632
|
+
for obj in collection.find_iter():
|
|
633
|
+
for k, v in obj.items():
|
|
634
|
+
if k not in sr_to_coll:
|
|
635
|
+
continue
|
|
636
|
+
ref_colls = sr_to_coll[k]
|
|
637
|
+
if not ref_colls:
|
|
638
|
+
continue
|
|
639
|
+
if not isinstance(v, (str, int)):
|
|
640
|
+
continue
|
|
641
|
+
slot = slot_map[k]
|
|
642
|
+
found = False
|
|
643
|
+
for ref_coll in ref_colls:
|
|
644
|
+
ref_obj = ref_coll.get_one(v)
|
|
645
|
+
if ref_obj:
|
|
646
|
+
found = True
|
|
647
|
+
break
|
|
648
|
+
if not found:
|
|
649
|
+
yield ValidationResult(
|
|
650
|
+
type="ReferentialIntegrity",
|
|
651
|
+
severity=Severity.ERROR,
|
|
652
|
+
message=f"Referential integrity error: {slot.range} not found",
|
|
653
|
+
instantiates=slot.range,
|
|
654
|
+
instance=v,
|
|
655
|
+
)
|
|
400
656
|
|
|
401
657
|
def drop(self, **kwargs):
|
|
402
658
|
"""
|
|
403
|
-
Drop the database and all collections
|
|
659
|
+
Drop the database and all collections.
|
|
660
|
+
|
|
661
|
+
:param kwargs: additional arguments
|
|
404
662
|
"""
|
|
405
663
|
raise NotImplementedError()
|
|
664
|
+
|
|
665
|
+
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
|
|
666
|
+
"""
|
|
667
|
+
Import a database from a file or location.
|
|
668
|
+
|
|
669
|
+
:param location: location of the file
|
|
670
|
+
:param source_format: source format
|
|
671
|
+
:param kwargs: additional arguments
|
|
672
|
+
"""
|
|
673
|
+
objects = load_objects(location, format=source_format)
|
|
674
|
+
for obj in objects:
|
|
675
|
+
self.store(obj)
|
|
676
|
+
|
|
677
|
+
def export_database(self, location: str, target_format: Optional[str] = None, **kwargs):
|
|
678
|
+
"""
|
|
679
|
+
Export a database to a file or location.
|
|
680
|
+
|
|
681
|
+
:param location: location of the file
|
|
682
|
+
:param target_format: target format
|
|
683
|
+
:param kwargs: additional arguments
|
|
684
|
+
"""
|
|
685
|
+
obj = {}
|
|
686
|
+
for coll in self.list_collections():
|
|
687
|
+
qr = coll.find({}, limit=-1)
|
|
688
|
+
obj[coll.alias] = qr.rows
|
|
689
|
+
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
|
|
690
|
+
with open(location, "w", encoding="utf-8") as stream:
|
|
691
|
+
stream.write(render_output(obj, format=target_format))
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ChromaDB Collection
|
|
3
|
+
"""
|
|
4
|
+
|
|
1
5
|
import logging
|
|
2
6
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
7
|
|
|
@@ -13,6 +17,9 @@ logger = logging.getLogger(__name__)
|
|
|
13
17
|
|
|
14
18
|
|
|
15
19
|
class ChromaDBCollection(Collection):
|
|
20
|
+
"""
|
|
21
|
+
A wrapper for ChromaDB collections.
|
|
22
|
+
"""
|
|
16
23
|
|
|
17
24
|
@property
|
|
18
25
|
def native_collection(self) -> ChromaCollection:
|
|
@@ -50,7 +57,7 @@ class ChromaDBCollection(Collection):
|
|
|
50
57
|
return len(ids)
|
|
51
58
|
|
|
52
59
|
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
53
|
-
logger.info(f"Deleting from {self.
|
|
60
|
+
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
54
61
|
if where is None:
|
|
55
62
|
where = {}
|
|
56
63
|
results = self.native_collection.get(where=where)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapter for DuckDB embedded database.
|
|
3
|
+
|
|
4
|
+
Handles have the form:
|
|
5
|
+
|
|
6
|
+
- ``duckdb:///<path>`` for a file-based database
|
|
7
|
+
- ``duckdb:///:memory:`` for an in-memory database
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
11
|
+
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"DuckDBCollection",
|
|
15
|
+
"DuckDBDatabase",
|
|
16
|
+
]
|
|
@@ -19,16 +19,18 @@ class DuckDBCollection(Collection):
|
|
|
19
19
|
_table_created: bool = None
|
|
20
20
|
|
|
21
21
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
22
|
+
logger.debug(f"Inserting {len(objs)}")
|
|
22
23
|
if not isinstance(objs, list):
|
|
23
24
|
objs = [objs]
|
|
24
25
|
if not objs:
|
|
25
26
|
return
|
|
26
27
|
cd = self.class_definition()
|
|
27
28
|
if not cd:
|
|
29
|
+
logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}; will induce")
|
|
28
30
|
cd = self.induce_class_definition_from_objects(objs)
|
|
29
31
|
self._create_table(cd)
|
|
30
32
|
table = self._sqla_table(cd)
|
|
31
|
-
logger.info(f"Inserting into: {self.
|
|
33
|
+
logger.info(f"Inserting into: {self.alias} // T={table.name}")
|
|
32
34
|
engine = self.parent.engine
|
|
33
35
|
col_names = [c.name for c in table.columns]
|
|
34
36
|
objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
|
|
@@ -37,7 +39,7 @@ class DuckDBCollection(Collection):
|
|
|
37
39
|
conn.execute(insert(table), objs)
|
|
38
40
|
conn.commit()
|
|
39
41
|
|
|
40
|
-
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
|
|
42
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
41
43
|
if not isinstance(objs, list):
|
|
42
44
|
objs = [objs]
|
|
43
45
|
cd = self.class_definition()
|
|
@@ -52,15 +54,15 @@ class DuckDBCollection(Collection):
|
|
|
52
54
|
stmt = stmt.compile(engine)
|
|
53
55
|
conn.execute(stmt)
|
|
54
56
|
conn.commit()
|
|
55
|
-
return
|
|
57
|
+
return
|
|
56
58
|
|
|
57
|
-
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
58
|
-
logger.info(f"Deleting from {self.
|
|
59
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
60
|
+
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
59
61
|
if where is None:
|
|
60
62
|
where = {}
|
|
61
63
|
cd = self.class_definition()
|
|
62
64
|
if not cd:
|
|
63
|
-
logger.info(f"No class definition found for {self.
|
|
65
|
+
logger.info(f"No class definition found for {self.target_class_name}, assuming not prepopulated")
|
|
64
66
|
return 0
|
|
65
67
|
table = self._sqla_table(cd)
|
|
66
68
|
engine = self.parent.engine
|
|
@@ -78,7 +80,7 @@ class DuckDBCollection(Collection):
|
|
|
78
80
|
if deleted_rows_count == 0 and not missing_ok:
|
|
79
81
|
raise ValueError(f"No rows found for {where}")
|
|
80
82
|
conn.commit()
|
|
81
|
-
return deleted_rows_count
|
|
83
|
+
return deleted_rows_count if deleted_rows_count > -1 else None
|
|
82
84
|
|
|
83
85
|
def query_facets(
|
|
84
86
|
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
@@ -115,7 +117,7 @@ class DuckDBCollection(Collection):
|
|
|
115
117
|
typ = sqla.ARRAY(typ, dimensions=1)
|
|
116
118
|
col = Column(att.name, typ)
|
|
117
119
|
cols.append(col)
|
|
118
|
-
t = Table(self.
|
|
120
|
+
t = Table(self.alias, metadata_obj, *cols)
|
|
119
121
|
return t
|
|
120
122
|
|
|
121
123
|
def _create_table(self, cd: ClassDefinition):
|
|
@@ -123,7 +125,7 @@ class DuckDBCollection(Collection):
|
|
|
123
125
|
logger.info(f"Already have table for: {cd.name}")
|
|
124
126
|
return
|
|
125
127
|
query = Query(
|
|
126
|
-
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.
|
|
128
|
+
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
127
129
|
)
|
|
128
130
|
qr = self.parent.query(query)
|
|
129
131
|
if qr.num_rows > 0:
|