linkml-store 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +63 -7
- linkml_store/api/collection.py +138 -30
- linkml_store/api/config.py +48 -6
- linkml_store/api/database.py +45 -27
- linkml_store/api/stores/duckdb/duckdb_collection.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +16 -2
- linkml_store/api/stores/filesystem/filesystem_collection.py +11 -4
- linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
- linkml_store/api/stores/mongodb/mongodb_collection.py +6 -2
- linkml_store/api/stores/mongodb/mongodb_database.py +1 -36
- linkml_store/api/stores/solr/solr_collection.py +4 -4
- linkml_store/cli.py +35 -17
- linkml_store/index/__init__.py +16 -2
- linkml_store/index/implementations/llm_indexer.py +2 -1
- linkml_store/index/indexer.py +13 -2
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +68 -7
- linkml_store/utils/pandas_utils.py +40 -0
- linkml_store/utils/sql_utils.py +2 -1
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/METADATA +36 -3
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/RECORD +24 -22
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/entry_points.txt +0 -0
linkml_store/api/client.py
CHANGED
|
@@ -242,7 +242,7 @@ class Client:
|
|
|
242
242
|
Return all attached databases
|
|
243
243
|
|
|
244
244
|
Examples
|
|
245
|
-
|
|
245
|
+
|
|
246
246
|
>>> client = Client()
|
|
247
247
|
>>> _ = client.attach_database("duckdb", alias="test1")
|
|
248
248
|
>>> _ = client.attach_database("duckdb", alias="test2")
|
|
@@ -268,25 +268,81 @@ class Client:
|
|
|
268
268
|
"""
|
|
269
269
|
Drop a database.
|
|
270
270
|
|
|
271
|
+
Example (in-memory):
|
|
272
|
+
|
|
273
|
+
>>> client = Client()
|
|
274
|
+
>>> db1 = client.attach_database("duckdb", alias="test1")
|
|
275
|
+
>>> db2 = client.attach_database("duckdb", alias="test2")
|
|
276
|
+
>>> len(client.databases)
|
|
277
|
+
2
|
|
278
|
+
>>> client.drop_database("test1")
|
|
279
|
+
>>> len(client.databases)
|
|
280
|
+
1
|
|
281
|
+
|
|
282
|
+
Databases that persist on disk:
|
|
283
|
+
|
|
284
|
+
>>> client = Client()
|
|
285
|
+
>>> path = Path("tmp/test.db")
|
|
286
|
+
>>> path.parent.mkdir(parents=True, exist_ok=True)
|
|
287
|
+
>>> db = client.attach_database(f"duckdb:///{path}", alias="test")
|
|
288
|
+
>>> len(client.databases)
|
|
289
|
+
1
|
|
290
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John"}]})
|
|
291
|
+
>>> db.commit()
|
|
292
|
+
>>> Path("tmp/test.db").exists()
|
|
293
|
+
True
|
|
294
|
+
>>> client.drop_database("test")
|
|
295
|
+
>>> len(client.databases)
|
|
296
|
+
0
|
|
297
|
+
>>> Path("tmp/test.db").exists()
|
|
298
|
+
False
|
|
299
|
+
|
|
300
|
+
Dropping a non-existent database:
|
|
301
|
+
|
|
302
|
+
>>> client = Client()
|
|
303
|
+
>>> client.drop_database("duckdb:///tmp/made-up1", missing_ok=True)
|
|
304
|
+
>>> client.drop_database("duckdb:///tmp/made-up2", missing_ok=False)
|
|
305
|
+
Traceback (most recent call last):
|
|
306
|
+
...
|
|
307
|
+
ValueError: Database duckdb:///tmp/made-up2 not found
|
|
308
|
+
|
|
271
309
|
:param name:
|
|
272
310
|
:param missing_ok:
|
|
273
311
|
:return:
|
|
274
312
|
"""
|
|
275
|
-
if
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
313
|
+
if self._databases:
|
|
314
|
+
if name in self._databases:
|
|
315
|
+
db = self._databases[name]
|
|
316
|
+
db.drop(**kwargs)
|
|
317
|
+
del self._databases[name]
|
|
318
|
+
else:
|
|
319
|
+
if not missing_ok:
|
|
320
|
+
raise ValueError(f"Database {name} not found")
|
|
279
321
|
else:
|
|
280
|
-
|
|
281
|
-
|
|
322
|
+
db = self.get_database(name, create_if_not_exists=True)
|
|
323
|
+
db.drop(**kwargs)
|
|
282
324
|
|
|
283
325
|
def drop_all_databases(self, **kwargs):
|
|
284
326
|
"""
|
|
285
327
|
Drop all databases.
|
|
286
328
|
|
|
329
|
+
Example (in-memory):
|
|
330
|
+
|
|
331
|
+
>>> client = Client()
|
|
332
|
+
>>> db1 = client.attach_database("duckdb", alias="test1")
|
|
333
|
+
>>> assert "test1" in client.databases
|
|
334
|
+
>>> db2 = client.attach_database("duckdb", alias="test2")
|
|
335
|
+
>>> assert "test2" in client.databases
|
|
336
|
+
>>> client.drop_all_databases()
|
|
337
|
+
>>> len(client.databases)
|
|
338
|
+
0
|
|
339
|
+
|
|
340
|
+
|
|
287
341
|
:param missing_ok:
|
|
288
342
|
:return:
|
|
289
343
|
"""
|
|
344
|
+
if not self._databases:
|
|
345
|
+
return
|
|
290
346
|
for name in list(self._databases.keys()):
|
|
291
347
|
self.drop_database(name, missing_ok=False, **kwargs)
|
|
292
348
|
self._databases = {}
|
linkml_store/api/collection.py
CHANGED
|
@@ -14,7 +14,7 @@ from pydantic import BaseModel
|
|
|
14
14
|
|
|
15
15
|
from linkml_store.api.types import DatabaseType
|
|
16
16
|
from linkml_store.index import get_indexer
|
|
17
|
-
from linkml_store.utils.format_utils import load_objects
|
|
17
|
+
from linkml_store.utils.format_utils import load_objects, load_objects_from_url
|
|
18
18
|
from linkml_store.utils.object_utils import clean_empties
|
|
19
19
|
from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
|
|
20
20
|
|
|
@@ -61,6 +61,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
61
61
|
# name: str
|
|
62
62
|
parent: Optional[DatabaseType] = None
|
|
63
63
|
_indexers: Optional[Dict[str, Indexer]] = None
|
|
64
|
+
_initialized: Optional[bool] = None
|
|
64
65
|
# hidden: Optional[bool] = False
|
|
65
66
|
|
|
66
67
|
metadata: Optional[CollectionConfig] = None
|
|
@@ -73,7 +74,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
73
74
|
if metadata:
|
|
74
75
|
self.metadata = metadata
|
|
75
76
|
else:
|
|
76
|
-
self.metadata = CollectionConfig(
|
|
77
|
+
self.metadata = CollectionConfig(type=name, **kwargs)
|
|
77
78
|
if not self.metadata.alias:
|
|
78
79
|
self.metadata.alias = name
|
|
79
80
|
if not self.metadata.type:
|
|
@@ -81,17 +82,6 @@ class Collection(Generic[DatabaseType]):
|
|
|
81
82
|
# if name is not None and self.metadata.name is not None and name != self.metadata.name:
|
|
82
83
|
# raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
|
|
83
84
|
|
|
84
|
-
@property
|
|
85
|
-
def name(self) -> str:
|
|
86
|
-
"""
|
|
87
|
-
Return the name of the collection.
|
|
88
|
-
|
|
89
|
-
TODO: deprecate in favor of Type
|
|
90
|
-
|
|
91
|
-
:return: name of the collection
|
|
92
|
-
"""
|
|
93
|
-
return self.metadata.name
|
|
94
|
-
|
|
95
85
|
@property
|
|
96
86
|
def hidden(self) -> bool:
|
|
97
87
|
"""
|
|
@@ -118,12 +108,18 @@ class Collection(Generic[DatabaseType]):
|
|
|
118
108
|
>>> collection.target_class_name
|
|
119
109
|
'Person'
|
|
120
110
|
|
|
111
|
+
>>> collection = db.create_collection("Organization")
|
|
112
|
+
>>> collection.target_class_name
|
|
113
|
+
'Organization'
|
|
114
|
+
>>> collection.alias
|
|
115
|
+
'Organization'
|
|
116
|
+
|
|
121
117
|
:return: name of the class which members of this collection instantiate
|
|
122
118
|
"""
|
|
123
119
|
# TODO: this is a shim layer until we can normalize on this
|
|
124
120
|
if self.metadata.type:
|
|
125
121
|
return self.metadata.type
|
|
126
|
-
return self.
|
|
122
|
+
return self.alias
|
|
127
123
|
|
|
128
124
|
@property
|
|
129
125
|
def alias(self):
|
|
@@ -161,10 +157,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
161
157
|
:return:
|
|
162
158
|
"""
|
|
163
159
|
# TODO: this is a shim layer until we can normalize on this
|
|
164
|
-
# TODO: this is a shim layer until we can normalize on this
|
|
165
160
|
if self.metadata.alias:
|
|
166
161
|
return self.metadata.alias
|
|
167
|
-
return self.
|
|
162
|
+
return self.target_class_name
|
|
168
163
|
|
|
169
164
|
def replace(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
170
165
|
"""
|
|
@@ -201,7 +196,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
201
196
|
"""
|
|
202
197
|
raise NotImplementedError
|
|
203
198
|
|
|
199
|
+
def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
|
|
200
|
+
logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
|
|
201
|
+
if not self._initialized:
|
|
202
|
+
self._materialize_derivations()
|
|
203
|
+
self._initialized = True
|
|
204
|
+
|
|
204
205
|
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
206
|
+
self._initialized = True
|
|
205
207
|
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
206
208
|
self._broadcast(patches, **kwargs)
|
|
207
209
|
|
|
@@ -305,6 +307,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
305
307
|
:param kwargs:
|
|
306
308
|
:return:
|
|
307
309
|
"""
|
|
310
|
+
self._pre_query_hook()
|
|
308
311
|
return self.parent.query(query, **kwargs)
|
|
309
312
|
|
|
310
313
|
def query_facets(
|
|
@@ -340,7 +343,6 @@ class Collection(Generic[DatabaseType]):
|
|
|
340
343
|
:param kwargs:
|
|
341
344
|
:return:
|
|
342
345
|
"""
|
|
343
|
-
# TODO
|
|
344
346
|
id_field = self.identifier_attribute_name
|
|
345
347
|
if not id_field:
|
|
346
348
|
raise ValueError(f"No identifier for {self.name}")
|
|
@@ -399,9 +401,10 @@ class Collection(Generic[DatabaseType]):
|
|
|
399
401
|
:return:
|
|
400
402
|
"""
|
|
401
403
|
query = self._create_query(where_clause=where)
|
|
404
|
+
self._pre_query_hook(query)
|
|
402
405
|
return self.query(query, **kwargs)
|
|
403
406
|
|
|
404
|
-
def find_iter(self, where: Optional[Any] = None, **kwargs) -> Iterator[OBJECT]:
|
|
407
|
+
def find_iter(self, where: Optional[Any] = None, page_size=100, **kwargs) -> Iterator[OBJECT]:
|
|
405
408
|
"""
|
|
406
409
|
Find objects in the collection using a where query.
|
|
407
410
|
|
|
@@ -409,9 +412,22 @@ class Collection(Generic[DatabaseType]):
|
|
|
409
412
|
:param kwargs:
|
|
410
413
|
:return:
|
|
411
414
|
"""
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
+
total_rows = None
|
|
416
|
+
offset = 0
|
|
417
|
+
if page_size < 1:
|
|
418
|
+
raise ValueError(f"Invalid page size: {page_size}")
|
|
419
|
+
while True:
|
|
420
|
+
qr = self.find(where=where, offset=offset, limit=page_size, **kwargs)
|
|
421
|
+
if total_rows is None:
|
|
422
|
+
total_rows = qr.num_rows
|
|
423
|
+
if not qr.rows:
|
|
424
|
+
return
|
|
425
|
+
for row in qr.rows:
|
|
426
|
+
yield row
|
|
427
|
+
offset += page_size
|
|
428
|
+
if offset >= total_rows:
|
|
429
|
+
break
|
|
430
|
+
return
|
|
415
431
|
|
|
416
432
|
def search(
|
|
417
433
|
self,
|
|
@@ -454,6 +470,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
454
470
|
:param kwargs:
|
|
455
471
|
:return:
|
|
456
472
|
"""
|
|
473
|
+
self._pre_query_hook()
|
|
457
474
|
if index_name is None:
|
|
458
475
|
if len(self.indexers) == 1:
|
|
459
476
|
index_name = list(self.indexers.keys())[0]
|
|
@@ -494,10 +511,93 @@ class Collection(Generic[DatabaseType]):
|
|
|
494
511
|
raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
|
|
495
512
|
return self.alias.startswith("internal__")
|
|
496
513
|
|
|
497
|
-
def
|
|
498
|
-
|
|
514
|
+
def exists(self) -> Optional[bool]:
|
|
515
|
+
"""
|
|
516
|
+
Check if the collection exists.
|
|
517
|
+
|
|
518
|
+
:return:
|
|
519
|
+
"""
|
|
520
|
+
cd = self.class_definition()
|
|
521
|
+
return cd is not None
|
|
522
|
+
|
|
523
|
+
def load_from_source(self, load_if_exists=False):
|
|
524
|
+
"""
|
|
525
|
+
Load objects from the source location.
|
|
526
|
+
|
|
527
|
+
:param load_if_exists:
|
|
528
|
+
:return:
|
|
529
|
+
"""
|
|
530
|
+
if not load_if_exists and self.exists():
|
|
531
|
+
return
|
|
532
|
+
metadata = self.metadata
|
|
533
|
+
if metadata.source:
|
|
534
|
+
source = metadata.source
|
|
535
|
+
kwargs = source.arguments or {}
|
|
536
|
+
if source.local_path:
|
|
537
|
+
objects = load_objects(
|
|
538
|
+
metadata.source.local_path, format=source.format, expected_type=source.expected_type, **kwargs
|
|
539
|
+
)
|
|
540
|
+
elif metadata.source.url:
|
|
541
|
+
objects = load_objects_from_url(
|
|
542
|
+
metadata.source.url, format=source.format, expected_type=source.expected_type, **kwargs
|
|
543
|
+
)
|
|
499
544
|
self.insert(objects)
|
|
500
545
|
|
|
546
|
+
def _check_if_initialized(self) -> bool:
|
|
547
|
+
return self._initialized
|
|
548
|
+
|
|
549
|
+
def _materialize_derivations(self, **kwargs):
|
|
550
|
+
metadata = self.metadata
|
|
551
|
+
if not metadata.derived_from:
|
|
552
|
+
logger.info(f"No metadata for {self.alias}; no derivations")
|
|
553
|
+
return
|
|
554
|
+
if self._check_if_initialized():
|
|
555
|
+
logger.info(f"Already initialized {self.alias}; no derivations")
|
|
556
|
+
return
|
|
557
|
+
parent_db = self.parent
|
|
558
|
+
client = parent_db.parent
|
|
559
|
+
# cd = self.class_definition()
|
|
560
|
+
for derivation in metadata.derived_from:
|
|
561
|
+
# TODO: optimize this; utilize underlying engine
|
|
562
|
+
logger.info(f"Deriving from {derivation}")
|
|
563
|
+
if derivation.database:
|
|
564
|
+
db = client.get_database(derivation.database)
|
|
565
|
+
else:
|
|
566
|
+
db = parent_db
|
|
567
|
+
if derivation.collection:
|
|
568
|
+
coll = db.get_collection(derivation.collection)
|
|
569
|
+
else:
|
|
570
|
+
coll = self
|
|
571
|
+
coll.class_definition()
|
|
572
|
+
source_obj_iter = coll.find_iter(derivation.where or {})
|
|
573
|
+
mappings = derivation.mappings
|
|
574
|
+
if not mappings:
|
|
575
|
+
raise ValueError(f"No mappings for {self.name}")
|
|
576
|
+
target_class_name = self.target_class_name
|
|
577
|
+
from linkml_map.session import Session
|
|
578
|
+
|
|
579
|
+
session = Session()
|
|
580
|
+
session.set_source_schema(db.schema_view.schema)
|
|
581
|
+
session.set_object_transformer(
|
|
582
|
+
{
|
|
583
|
+
"class_derivations": {
|
|
584
|
+
target_class_name: {
|
|
585
|
+
"populated_from": coll.target_class_name,
|
|
586
|
+
"slot_derivations": mappings,
|
|
587
|
+
},
|
|
588
|
+
}
|
|
589
|
+
},
|
|
590
|
+
)
|
|
591
|
+
logger.debug(f"Session Spec: {session.object_transformer}")
|
|
592
|
+
tr_objs = []
|
|
593
|
+
for source_obj in source_obj_iter:
|
|
594
|
+
tr_obj = session.transform(source_obj, source_type=coll.target_class_name)
|
|
595
|
+
tr_objs.append(tr_obj)
|
|
596
|
+
if not tr_objs:
|
|
597
|
+
raise ValueError(f"No objects derived from {coll.name}")
|
|
598
|
+
self.insert(tr_objs)
|
|
599
|
+
self.commit()
|
|
600
|
+
|
|
501
601
|
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
502
602
|
"""
|
|
503
603
|
Attach an index to the collection.
|
|
@@ -572,7 +672,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
572
672
|
:param indexer:
|
|
573
673
|
:return:
|
|
574
674
|
"""
|
|
575
|
-
return f"internal__index__{self.
|
|
675
|
+
return f"internal__index__{self.alias}__{index_name}"
|
|
576
676
|
|
|
577
677
|
def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
|
|
578
678
|
"""
|
|
@@ -638,6 +738,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
638
738
|
"""
|
|
639
739
|
Return the class definition for the collection.
|
|
640
740
|
|
|
741
|
+
If no schema has been explicitly set, and the native database does not
|
|
742
|
+
have a schema, then a schema will be induced from the objects in the collection.
|
|
743
|
+
|
|
641
744
|
:return:
|
|
642
745
|
"""
|
|
643
746
|
sv: SchemaView = self.parent.schema_view
|
|
@@ -722,7 +825,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
722
825
|
else:
|
|
723
826
|
return None
|
|
724
827
|
|
|
725
|
-
def induce_class_definition_from_objects(
|
|
828
|
+
def induce_class_definition_from_objects(
|
|
829
|
+
self, objs: List[OBJECT], max_sample_size: Optional[int] = None
|
|
830
|
+
) -> ClassDefinition:
|
|
726
831
|
"""
|
|
727
832
|
Induce a class definition from a list of objects.
|
|
728
833
|
|
|
@@ -733,6 +838,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
733
838
|
:param max_sample_size:
|
|
734
839
|
:return:
|
|
735
840
|
"""
|
|
841
|
+
# TODO: use schemaview
|
|
842
|
+
if max_sample_size is None:
|
|
843
|
+
max_sample_size = 10
|
|
736
844
|
if not self.target_class_name:
|
|
737
845
|
raise ValueError(f"No target_class_name for {self.alias}")
|
|
738
846
|
cd = ClassDefinition(self.target_class_name)
|
|
@@ -795,6 +903,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
795
903
|
for other_rng in rngs:
|
|
796
904
|
if rng != other_rng:
|
|
797
905
|
raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
|
|
906
|
+
logger.debug(f"Inducing {k} as {rng} {multivalued} {inlined}")
|
|
798
907
|
cd.attributes[k] = SlotDefinition(k, range=rng, multivalued=multivalued, inlined=inlined)
|
|
799
908
|
if exact_dimensions_list:
|
|
800
909
|
array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
|
|
@@ -828,7 +937,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
828
937
|
"""
|
|
829
938
|
Apply a patch to the collection.
|
|
830
939
|
|
|
831
|
-
Patches conform to the JSON Patch format
|
|
940
|
+
Patches conform to the JSON Patch format.
|
|
832
941
|
|
|
833
942
|
:param patches:
|
|
834
943
|
:param kwargs:
|
|
@@ -841,11 +950,11 @@ class Collection(Generic[DatabaseType]):
|
|
|
841
950
|
new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
|
|
842
951
|
self.replace(new_objs)
|
|
843
952
|
|
|
844
|
-
def diff(self, other: "Collection", **kwargs):
|
|
953
|
+
def diff(self, other: "Collection", **kwargs) -> List[PatchDict]:
|
|
845
954
|
"""
|
|
846
955
|
Diff two collections.
|
|
847
956
|
|
|
848
|
-
:param other:
|
|
957
|
+
:param other: The collection to diff against
|
|
849
958
|
:param kwargs:
|
|
850
959
|
:return:
|
|
851
960
|
"""
|
|
@@ -872,8 +981,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
872
981
|
if not cd:
|
|
873
982
|
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
874
983
|
class_name = cd.name
|
|
875
|
-
|
|
876
|
-
for obj in result.rows:
|
|
984
|
+
for obj in self.find_iter(**kwargs):
|
|
877
985
|
obj = clean_empties(obj)
|
|
878
986
|
yield from validator.iter_results(obj, class_name)
|
|
879
987
|
|
linkml_store/api/config.py
CHANGED
|
@@ -4,14 +4,43 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ConfiguredBaseModel(BaseModel, extra="forbid"):
|
|
7
|
+
"""
|
|
8
|
+
Base class for all configuration models.
|
|
9
|
+
"""
|
|
10
|
+
|
|
7
11
|
pass
|
|
8
12
|
|
|
9
13
|
|
|
14
|
+
class DerivationConfiguration(ConfiguredBaseModel):
|
|
15
|
+
"""
|
|
16
|
+
Configuration for a derivation
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
database: Optional[str] = None
|
|
20
|
+
collection: Optional[str] = None
|
|
21
|
+
mappings: Optional[Dict[str, Any]] = None
|
|
22
|
+
where: Optional[Dict[str, Any]] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CollectionSource(ConfiguredBaseModel):
|
|
26
|
+
"""
|
|
27
|
+
Metadata about a source
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
url: Optional[str] = None
|
|
31
|
+
local_path: Optional[str] = None
|
|
32
|
+
source_location: Optional[str] = None
|
|
33
|
+
refresh_interval_days: Optional[float] = None
|
|
34
|
+
expected_type: Optional[str] = None
|
|
35
|
+
format: Optional[str] = None
|
|
36
|
+
arguments: Optional[Dict[str, Any]] = None
|
|
37
|
+
|
|
38
|
+
|
|
10
39
|
class CollectionConfig(ConfiguredBaseModel):
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
40
|
+
"""
|
|
41
|
+
Configuration for a collection
|
|
42
|
+
"""
|
|
43
|
+
|
|
15
44
|
alias: Optional[str] = Field(
|
|
16
45
|
default=None,
|
|
17
46
|
description="An optional alias for the collection",
|
|
@@ -40,13 +69,22 @@ class CollectionConfig(ConfiguredBaseModel):
|
|
|
40
69
|
default=False,
|
|
41
70
|
description="Whether the collection is prepopulated",
|
|
42
71
|
)
|
|
43
|
-
|
|
72
|
+
source: Optional[CollectionSource] = Field(
|
|
44
73
|
default=None,
|
|
45
|
-
description="
|
|
74
|
+
description="Metadata about the source",
|
|
75
|
+
)
|
|
76
|
+
# TODO: derived_from
|
|
77
|
+
derived_from: Optional[List[DerivationConfiguration]] = Field(
|
|
78
|
+
default=None,
|
|
79
|
+
description="LinkML-Map derivations",
|
|
46
80
|
)
|
|
47
81
|
|
|
48
82
|
|
|
49
83
|
class DatabaseConfig(ConfiguredBaseModel):
|
|
84
|
+
"""
|
|
85
|
+
Configuration for a database
|
|
86
|
+
"""
|
|
87
|
+
|
|
50
88
|
handle: str = Field(
|
|
51
89
|
default="duckdb:///:memory:",
|
|
52
90
|
description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
|
|
@@ -91,6 +129,10 @@ class DatabaseConfig(ConfiguredBaseModel):
|
|
|
91
129
|
|
|
92
130
|
|
|
93
131
|
class ClientConfig(ConfiguredBaseModel):
|
|
132
|
+
"""
|
|
133
|
+
Configuration for a client
|
|
134
|
+
"""
|
|
135
|
+
|
|
94
136
|
handle: Optional[str] = Field(
|
|
95
137
|
default=None,
|
|
96
138
|
description="The client handle",
|
linkml_store/api/database.py
CHANGED
|
@@ -149,26 +149,19 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
149
149
|
def _initialize_collections(self):
|
|
150
150
|
if not self.metadata.collections:
|
|
151
151
|
return
|
|
152
|
-
for
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
# )
|
|
162
|
-
if False and typ is not None:
|
|
163
|
-
if not alias:
|
|
164
|
-
alias = name
|
|
165
|
-
name = typ
|
|
166
|
-
if not collection_config.name:
|
|
167
|
-
collection_config.name = name
|
|
168
|
-
_collection = self.create_collection(name, alias=alias, metadata=collection_config)
|
|
152
|
+
for k, collection_config in self.metadata.collections.items():
|
|
153
|
+
if collection_config.alias:
|
|
154
|
+
if collection_config.alias != k:
|
|
155
|
+
raise ValueError(f"Alias mismatch: {collection_config.alias} != {k}")
|
|
156
|
+
alias = k
|
|
157
|
+
typ = collection_config.type or alias
|
|
158
|
+
_collection = self.create_collection(typ, alias=alias, metadata=collection_config)
|
|
159
|
+
assert _collection.alias == alias
|
|
160
|
+
assert _collection.target_class_name == typ
|
|
169
161
|
if collection_config.attributes:
|
|
162
|
+
# initialize schema
|
|
170
163
|
sv = self.schema_view
|
|
171
|
-
cd = ClassDefinition(
|
|
164
|
+
cd = ClassDefinition(typ, attributes=collection_config.attributes)
|
|
172
165
|
sv.schema.classes[cd.name] = cd
|
|
173
166
|
sv.set_modified()
|
|
174
167
|
# assert collection.class_definition() is not None
|
|
@@ -275,7 +268,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
275
268
|
metadata: Optional[CollectionConfig] = None,
|
|
276
269
|
recreate_if_exists=False,
|
|
277
270
|
**kwargs,
|
|
278
|
-
) ->
|
|
271
|
+
) -> CollectionType:
|
|
279
272
|
"""
|
|
280
273
|
Create a new collection in the current database.
|
|
281
274
|
|
|
@@ -307,8 +300,10 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
307
300
|
if not name:
|
|
308
301
|
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
309
302
|
collection_cls = self.collection_class
|
|
310
|
-
collection = collection_cls(name=name,
|
|
311
|
-
if
|
|
303
|
+
collection = collection_cls(name=name, parent=self, metadata=metadata)
|
|
304
|
+
if alias:
|
|
305
|
+
collection.metadata.alias = alias
|
|
306
|
+
if metadata and metadata.source:
|
|
312
307
|
collection.load_from_source()
|
|
313
308
|
if metadata and metadata.attributes:
|
|
314
309
|
sv = self.schema_view
|
|
@@ -321,7 +316,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
321
316
|
alias = name
|
|
322
317
|
self._collections[alias] = collection
|
|
323
318
|
if recreate_if_exists:
|
|
324
|
-
logger.debug(f"Recreating collection {collection.
|
|
319
|
+
logger.debug(f"Recreating collection {collection.alias}")
|
|
325
320
|
collection.delete_where({}, missing_ok=True)
|
|
326
321
|
return collection
|
|
327
322
|
|
|
@@ -339,7 +334,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
339
334
|
>>> collections = db.list_collections()
|
|
340
335
|
>>> len(collections)
|
|
341
336
|
2
|
|
342
|
-
>>> [c.
|
|
337
|
+
>>> [c.target_class_name for c in collections]
|
|
343
338
|
['Person', 'Product']
|
|
344
339
|
|
|
345
340
|
:param include_internal: include internal collections
|
|
@@ -367,7 +362,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
367
362
|
['Person', 'Product']
|
|
368
363
|
|
|
369
364
|
"""
|
|
370
|
-
return [c.
|
|
365
|
+
return [c.alias for c in self.list_collections(**kwargs)]
|
|
371
366
|
|
|
372
367
|
def get_collection(
|
|
373
368
|
self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
|
|
@@ -410,7 +405,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
410
405
|
"""
|
|
411
406
|
Initialize collections.
|
|
412
407
|
|
|
413
|
-
Not typically called directly: consider making
|
|
408
|
+
TODO: Not typically called directly: consider making this private
|
|
414
409
|
:return:
|
|
415
410
|
"""
|
|
416
411
|
raise NotImplementedError
|
|
@@ -502,7 +497,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
502
497
|
>>> sorted(collection.class_definition().slots)
|
|
503
498
|
['capital', 'code', 'continent', 'languages', 'name']
|
|
504
499
|
|
|
505
|
-
:param schema_view:
|
|
500
|
+
:param schema_view: can be either a path to the schema, or a SchemaView object
|
|
506
501
|
:return:
|
|
507
502
|
"""
|
|
508
503
|
if isinstance(schema_view, Path):
|
|
@@ -585,7 +580,15 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
585
580
|
|
|
586
581
|
:return: A schema view
|
|
587
582
|
"""
|
|
588
|
-
|
|
583
|
+
logger.info(f"Inducing schema view for {self.handle}")
|
|
584
|
+
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
585
|
+
|
|
586
|
+
sb = SchemaBuilder()
|
|
587
|
+
|
|
588
|
+
for collection_name in self.list_collection_names():
|
|
589
|
+
coll = self.get_collection(collection_name)
|
|
590
|
+
sb.add_class(coll.target_class_name)
|
|
591
|
+
return SchemaView(sb.schema)
|
|
589
592
|
|
|
590
593
|
def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
591
594
|
"""
|
|
@@ -683,6 +686,21 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
683
686
|
"""
|
|
684
687
|
Drop the database and all collections.
|
|
685
688
|
|
|
689
|
+
>>> from linkml_store.api.client import Client
|
|
690
|
+
>>> client = Client()
|
|
691
|
+
>>> path = Path("/tmp/test.db")
|
|
692
|
+
>>> path.parent.mkdir(exist_ok=True, parents=True)
|
|
693
|
+
>>> db = client.attach_database(f"duckdb:///{path}")
|
|
694
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
|
|
695
|
+
>>> coll = db.get_collection("persons")
|
|
696
|
+
>>> coll.find({}).num_rows
|
|
697
|
+
1
|
|
698
|
+
>>> db.drop()
|
|
699
|
+
>>> db = client.attach_database("duckdb:///tmp/test.db", alias="test")
|
|
700
|
+
>>> coll = db.get_collection("persons")
|
|
701
|
+
>>> coll.find({}).num_rows
|
|
702
|
+
0
|
|
703
|
+
|
|
686
704
|
:param kwargs: additional arguments
|
|
687
705
|
"""
|
|
688
706
|
raise NotImplementedError()
|
|
@@ -18,6 +18,9 @@ logger = logging.getLogger(__name__)
|
|
|
18
18
|
class DuckDBCollection(Collection):
|
|
19
19
|
_table_created: bool = None
|
|
20
20
|
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
super().__init__(*args, **kwargs)
|
|
23
|
+
|
|
21
24
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
22
25
|
logger.debug(f"Inserting {len(objs)}")
|
|
23
26
|
if not isinstance(objs, list):
|
|
@@ -123,6 +126,17 @@ class DuckDBCollection(Collection):
|
|
|
123
126
|
t = Table(self.alias, metadata_obj, *cols)
|
|
124
127
|
return t
|
|
125
128
|
|
|
129
|
+
def _check_if_initialized(self) -> bool:
|
|
130
|
+
# if self._initialized:
|
|
131
|
+
# return True
|
|
132
|
+
query = Query(
|
|
133
|
+
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
134
|
+
)
|
|
135
|
+
qr = self.parent.query(query)
|
|
136
|
+
if qr.num_rows > 0:
|
|
137
|
+
return True
|
|
138
|
+
return False
|
|
139
|
+
|
|
126
140
|
def _create_table(self, cd: ClassDefinition):
|
|
127
141
|
if self._table_created or self.metadata.is_prepopulated:
|
|
128
142
|
logger.info(f"Already have table for: {cd.name}")
|
|
@@ -134,6 +148,7 @@ class DuckDBCollection(Collection):
|
|
|
134
148
|
if qr.num_rows > 0:
|
|
135
149
|
logger.info(f"Table already exists for {cd.name}")
|
|
136
150
|
self._table_created = True
|
|
151
|
+
self._initialized = True
|
|
137
152
|
self.metadata.is_prepopulated = True
|
|
138
153
|
return
|
|
139
154
|
logger.info(f"Creating table for {cd.name}")
|
|
@@ -144,4 +159,5 @@ class DuckDBCollection(Collection):
|
|
|
144
159
|
conn.execute(text(ddl))
|
|
145
160
|
conn.commit()
|
|
146
161
|
self._table_created = True
|
|
162
|
+
self._initialized = True
|
|
147
163
|
self.metadata.is_prepopulated = True
|