linkml-store 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +15 -4
- linkml_store/api/collection.py +185 -15
- linkml_store/api/config.py +11 -3
- linkml_store/api/database.py +36 -5
- linkml_store/api/stores/duckdb/duckdb_collection.py +6 -3
- linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
- linkml_store/api/stores/filesystem/__init__.py +7 -8
- linkml_store/api/stores/filesystem/filesystem_collection.py +150 -113
- linkml_store/api/stores/filesystem/filesystem_database.py +57 -21
- linkml_store/api/stores/mongodb/mongodb_collection.py +82 -34
- linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +97 -8
- linkml_store/index/__init__.py +5 -3
- linkml_store/index/indexer.py +7 -2
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/format_utils.py +89 -8
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +46 -0
- linkml_store/webapi/main.py +572 -0
- linkml_store-0.1.10.dist-info/METADATA +138 -0
- linkml_store-0.1.10.dist-info/RECORD +58 -0
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/entry_points.txt +1 -0
- linkml_store-0.1.8.dist-info/METADATA +0 -58
- linkml_store-0.1.8.dist-info/RECORD +0 -45
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.8.dist-info → linkml_store-0.1.10.dist-info}/WHEEL +0 -0
linkml_store/api/client.py
CHANGED
|
@@ -9,6 +9,7 @@ from linkml_store.api import Database
|
|
|
9
9
|
from linkml_store.api.config import ClientConfig
|
|
10
10
|
from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
|
|
11
11
|
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
|
+
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
12
13
|
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
13
14
|
from linkml_store.api.stores.solr.solr_database import SolrDatabase
|
|
14
15
|
|
|
@@ -20,6 +21,7 @@ HANDLE_MAP = {
|
|
|
20
21
|
"solr": SolrDatabase,
|
|
21
22
|
"mongodb": MongoDBDatabase,
|
|
22
23
|
"chromadb": ChromaDBDatabase,
|
|
24
|
+
"file": FileSystemDatabase,
|
|
23
25
|
}
|
|
24
26
|
|
|
25
27
|
|
|
@@ -96,7 +98,7 @@ class Client:
|
|
|
96
98
|
"""
|
|
97
99
|
return self.metadata.base_dir
|
|
98
100
|
|
|
99
|
-
def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
|
|
101
|
+
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
|
|
100
102
|
"""
|
|
101
103
|
Create a client from a configuration.
|
|
102
104
|
|
|
@@ -116,11 +118,13 @@ class Client:
|
|
|
116
118
|
:return:
|
|
117
119
|
|
|
118
120
|
"""
|
|
121
|
+
if isinstance(config, dict):
|
|
122
|
+
config = ClientConfig(**config)
|
|
119
123
|
if isinstance(config, Path):
|
|
120
124
|
config = str(config)
|
|
121
125
|
if isinstance(config, str):
|
|
122
|
-
if not base_dir:
|
|
123
|
-
|
|
126
|
+
# if not base_dir:
|
|
127
|
+
# base_dir = Path(config).parent
|
|
124
128
|
parsed_obj = yaml.safe_load(open(config))
|
|
125
129
|
config = ClientConfig(**parsed_obj)
|
|
126
130
|
self.metadata = config
|
|
@@ -131,8 +135,15 @@ class Client:
|
|
|
131
135
|
|
|
132
136
|
def _initialize_databases(self, **kwargs):
|
|
133
137
|
for name, db_config in self.metadata.databases.items():
|
|
134
|
-
|
|
138
|
+
base_dir = self.base_dir
|
|
139
|
+
logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
|
|
140
|
+
if not base_dir:
|
|
141
|
+
base_dir = Path.cwd()
|
|
142
|
+
logger.info(f"Using current working directory: {base_dir}")
|
|
143
|
+
handle = db_config.handle.format(base_dir=base_dir)
|
|
135
144
|
db_config.handle = handle
|
|
145
|
+
if db_config.schema_location:
|
|
146
|
+
db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
|
|
136
147
|
db = self.attach_database(handle, alias=name, **kwargs)
|
|
137
148
|
db.from_config(db_config)
|
|
138
149
|
|
linkml_store/api/collection.py
CHANGED
|
@@ -4,16 +4,19 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
+
from linkml_runtime import SchemaView
|
|
10
11
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
11
12
|
from linkml_runtime.linkml_model.meta import ArrayExpression
|
|
12
13
|
from pydantic import BaseModel
|
|
13
14
|
|
|
15
|
+
from linkml_store.api.types import DatabaseType
|
|
14
16
|
from linkml_store.index import get_indexer
|
|
15
17
|
from linkml_store.utils.format_utils import load_objects
|
|
16
18
|
from linkml_store.utils.object_utils import clean_empties
|
|
19
|
+
from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
|
|
17
20
|
|
|
18
21
|
try:
|
|
19
22
|
from linkml.validator.report import ValidationResult
|
|
@@ -36,7 +39,7 @@ IDENTIFIER = str
|
|
|
36
39
|
FIELD_NAME = str
|
|
37
40
|
|
|
38
41
|
|
|
39
|
-
class Collection:
|
|
42
|
+
class Collection(Generic[DatabaseType]):
|
|
40
43
|
"""
|
|
41
44
|
A collection is an organized set of objects of the same or similar type.
|
|
42
45
|
|
|
@@ -56,11 +59,12 @@ class Collection:
|
|
|
56
59
|
"""
|
|
57
60
|
|
|
58
61
|
# name: str
|
|
59
|
-
parent: Optional[
|
|
62
|
+
parent: Optional[DatabaseType] = None
|
|
60
63
|
_indexers: Optional[Dict[str, Indexer]] = None
|
|
61
64
|
# hidden: Optional[bool] = False
|
|
62
65
|
|
|
63
66
|
metadata: Optional[CollectionConfig] = None
|
|
67
|
+
default_index_name: ClassVar[str] = "simple"
|
|
64
68
|
|
|
65
69
|
def __init__(
|
|
66
70
|
self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
|
|
@@ -197,6 +201,10 @@ class Collection:
|
|
|
197
201
|
"""
|
|
198
202
|
raise NotImplementedError
|
|
199
203
|
|
|
204
|
+
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
205
|
+
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
206
|
+
self._broadcast(patches, **kwargs)
|
|
207
|
+
|
|
200
208
|
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
201
209
|
"""
|
|
202
210
|
Delete one or more objects from the collection.
|
|
@@ -301,7 +309,7 @@ class Collection:
|
|
|
301
309
|
|
|
302
310
|
def query_facets(
|
|
303
311
|
self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
304
|
-
) -> Dict[str,
|
|
312
|
+
) -> Dict[str, List[Tuple[Any, int]]]:
|
|
305
313
|
"""
|
|
306
314
|
Run a query to get facet counts for one or more columns.
|
|
307
315
|
|
|
@@ -319,7 +327,7 @@ class Collection:
|
|
|
319
327
|
:param query: A Query object representing the base query.
|
|
320
328
|
:param facet_columns: A list of column names to get facet counts for.
|
|
321
329
|
:param facet_limit:
|
|
322
|
-
:return: A dictionary where keys are column names and values are
|
|
330
|
+
:return: A dictionary where keys are column names and values are tuples
|
|
323
331
|
containing the facet counts for each unique value in the respective column.
|
|
324
332
|
"""
|
|
325
333
|
raise NotImplementedError
|
|
@@ -414,7 +422,30 @@ class Collection:
|
|
|
414
422
|
**kwargs,
|
|
415
423
|
) -> QueryResult:
|
|
416
424
|
"""
|
|
417
|
-
Search the collection using a
|
|
425
|
+
Search the collection using a text-based index index.
|
|
426
|
+
|
|
427
|
+
Example:
|
|
428
|
+
|
|
429
|
+
>>> from linkml_store import Client
|
|
430
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
431
|
+
>>> client = Client()
|
|
432
|
+
>>> db = client.attach_database("duckdb")
|
|
433
|
+
>>> collection = db.create_collection("Country")
|
|
434
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
435
|
+
>>> collection.insert(objs)
|
|
436
|
+
|
|
437
|
+
Now let's index, using the simple trigram-based index
|
|
438
|
+
|
|
439
|
+
>>> index = get_indexer("simple")
|
|
440
|
+
>>> collection.attach_indexer(index)
|
|
441
|
+
|
|
442
|
+
Now let's find all objects:
|
|
443
|
+
|
|
444
|
+
>>> qr = collection.search("France")
|
|
445
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
446
|
+
>>> assert score > 0.1
|
|
447
|
+
>>> top_obj["code"]
|
|
448
|
+
'FR'
|
|
418
449
|
|
|
419
450
|
:param query:
|
|
420
451
|
:param where:
|
|
@@ -424,12 +455,18 @@ class Collection:
|
|
|
424
455
|
:return:
|
|
425
456
|
"""
|
|
426
457
|
if index_name is None:
|
|
427
|
-
if len(self.
|
|
428
|
-
index_name = list(self.
|
|
458
|
+
if len(self.indexers) == 1:
|
|
459
|
+
index_name = list(self.indexers.keys())[0]
|
|
429
460
|
else:
|
|
430
|
-
|
|
461
|
+
logger.warning("Multiple indexes found. Using default index.")
|
|
462
|
+
index_name = self.default_index_name
|
|
431
463
|
ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
|
|
432
|
-
|
|
464
|
+
if index_name not in self.indexers:
|
|
465
|
+
ix = get_indexer(index_name)
|
|
466
|
+
if not self._indexers:
|
|
467
|
+
self._indexers = {}
|
|
468
|
+
self._indexers[index_name] = ix
|
|
469
|
+
ix = self.indexers.get(index_name)
|
|
433
470
|
if not ix:
|
|
434
471
|
raise ValueError(f"No index named {index_name}")
|
|
435
472
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
@@ -446,7 +483,10 @@ class Collection:
|
|
|
446
483
|
@property
|
|
447
484
|
def is_internal(self) -> bool:
|
|
448
485
|
"""
|
|
449
|
-
Check if the collection is internal
|
|
486
|
+
Check if the collection is internal.
|
|
487
|
+
|
|
488
|
+
Internal collections are hidden by default. Examples of internal collections
|
|
489
|
+
include shadow "index" collections
|
|
450
490
|
|
|
451
491
|
:return:
|
|
452
492
|
"""
|
|
@@ -462,6 +502,45 @@ class Collection:
|
|
|
462
502
|
"""
|
|
463
503
|
Attach an index to the collection.
|
|
464
504
|
|
|
505
|
+
As an example, first let's create a collection in a database:
|
|
506
|
+
|
|
507
|
+
>>> from linkml_store import Client
|
|
508
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
509
|
+
>>> client = Client()
|
|
510
|
+
>>> db = client.attach_database("duckdb")
|
|
511
|
+
>>> collection = db.create_collection("Country")
|
|
512
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
513
|
+
>>> collection.insert(objs)
|
|
514
|
+
|
|
515
|
+
We will create two indexes - one that indexes the whole object
|
|
516
|
+
(default behavior), the other one indexes the name only
|
|
517
|
+
|
|
518
|
+
>>> full_index = get_indexer("simple")
|
|
519
|
+
>>> full_index.name = "full"
|
|
520
|
+
>>> name_index = get_indexer("simple", text_template="{name}")
|
|
521
|
+
>>> name_index.name = "name"
|
|
522
|
+
>>> collection.attach_indexer(full_index)
|
|
523
|
+
>>> collection.attach_indexer(name_index)
|
|
524
|
+
|
|
525
|
+
Now let's find objects using the full index, using the string "France".
|
|
526
|
+
We expect the country France to be the top hit, but the score will
|
|
527
|
+
be less than zero because we did not match all fields in the object.
|
|
528
|
+
|
|
529
|
+
>>> qr = collection.search("France", index_name="full")
|
|
530
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
531
|
+
>>> assert score > 0.1
|
|
532
|
+
>>> assert score < 0.5
|
|
533
|
+
>>> top_obj["code"]
|
|
534
|
+
'FR'
|
|
535
|
+
|
|
536
|
+
Now using the name index
|
|
537
|
+
|
|
538
|
+
>>> qr = collection.search("France", index_name="name")
|
|
539
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
540
|
+
>>> assert score > 0.99
|
|
541
|
+
>>> top_obj["code"]
|
|
542
|
+
'FR'
|
|
543
|
+
|
|
465
544
|
:param index:
|
|
466
545
|
:param name:
|
|
467
546
|
:param auto_index: Automatically index all objects in the collection
|
|
@@ -497,15 +576,18 @@ class Collection:
|
|
|
497
576
|
|
|
498
577
|
def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
|
|
499
578
|
"""
|
|
500
|
-
Index a list of objects
|
|
579
|
+
Index a list of objects using a specified index.
|
|
580
|
+
|
|
581
|
+
By default, the indexed objects will be stored in a shadow
|
|
582
|
+
collection in the same database, with additional fields for the index vector
|
|
501
583
|
|
|
502
584
|
:param objs:
|
|
503
|
-
:param index_name:
|
|
585
|
+
:param index_name: e.g. simple, llm
|
|
504
586
|
:param replace:
|
|
505
587
|
:param kwargs:
|
|
506
588
|
:return:
|
|
507
589
|
"""
|
|
508
|
-
ix = self._indexers.get(index_name)
|
|
590
|
+
ix = self._indexers.get(index_name, None)
|
|
509
591
|
if not ix:
|
|
510
592
|
raise ValueError(f"No index named {index_name}")
|
|
511
593
|
ix_coll_name = self._index_collection_name(index_name)
|
|
@@ -523,6 +605,7 @@ class Collection:
|
|
|
523
605
|
ix_coll.delete_where()
|
|
524
606
|
|
|
525
607
|
ix_coll.insert(objects_with_ix, **kwargs)
|
|
608
|
+
ix_coll.commit()
|
|
526
609
|
|
|
527
610
|
def list_index_names(self) -> List[str]:
|
|
528
611
|
"""
|
|
@@ -557,12 +640,22 @@ class Collection:
|
|
|
557
640
|
|
|
558
641
|
:return:
|
|
559
642
|
"""
|
|
560
|
-
sv = self.parent.schema_view
|
|
643
|
+
sv: SchemaView = self.parent.schema_view
|
|
561
644
|
if sv:
|
|
562
645
|
cls = sv.get_class(self.target_class_name)
|
|
646
|
+
if cls and not cls.attributes:
|
|
647
|
+
if not sv.class_induced_slots(cls.name):
|
|
648
|
+
for att in self._induce_attributes():
|
|
649
|
+
cls.attributes[att.name] = att
|
|
650
|
+
sv.set_modified()
|
|
563
651
|
return cls
|
|
564
652
|
return None
|
|
565
653
|
|
|
654
|
+
def _induce_attributes(self) -> List[SlotDefinition]:
|
|
655
|
+
result = self.find({}, limit=-1)
|
|
656
|
+
cd = self.induce_class_definition_from_objects(result.rows, max_sample_size=None)
|
|
657
|
+
return list(cd.attributes.values())
|
|
658
|
+
|
|
566
659
|
@property
|
|
567
660
|
def identifier_attribute_name(self) -> Optional[str]:
|
|
568
661
|
"""
|
|
@@ -579,6 +672,37 @@ class Collection:
|
|
|
579
672
|
return att.name
|
|
580
673
|
return None
|
|
581
674
|
|
|
675
|
+
def set_identifier_attribute_name(self, name: str):
|
|
676
|
+
"""
|
|
677
|
+
Set the name of the identifier attribute for the collection.
|
|
678
|
+
|
|
679
|
+
AKA the primary key.
|
|
680
|
+
|
|
681
|
+
:param name: The name of the identifier attribute.
|
|
682
|
+
"""
|
|
683
|
+
cd = self.class_definition()
|
|
684
|
+
if not cd:
|
|
685
|
+
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
686
|
+
id_att = None
|
|
687
|
+
candidates = []
|
|
688
|
+
sv: SchemaView = self.parent.schema_view
|
|
689
|
+
cls = sv.get_class(cd.name)
|
|
690
|
+
existing_id_slot = sv.get_identifier_slot(cls.name)
|
|
691
|
+
if existing_id_slot:
|
|
692
|
+
if existing_id_slot.name == name:
|
|
693
|
+
return
|
|
694
|
+
existing_id_slot.identifier = False
|
|
695
|
+
for att in cls.attributes.values():
|
|
696
|
+
candidates.append(att.name)
|
|
697
|
+
if att.name == name:
|
|
698
|
+
att.identifier = True
|
|
699
|
+
id_att = att
|
|
700
|
+
else:
|
|
701
|
+
att.identifier = False
|
|
702
|
+
if not id_att:
|
|
703
|
+
raise ValueError(f"No attribute found with name {name} in {candidates}")
|
|
704
|
+
sv.set_modified()
|
|
705
|
+
|
|
582
706
|
def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
|
|
583
707
|
"""
|
|
584
708
|
Return the identifier for an object.
|
|
@@ -622,6 +746,8 @@ class Collection:
|
|
|
622
746
|
for k, v in obj.items():
|
|
623
747
|
keys[k].append(v)
|
|
624
748
|
for k, vs in keys.items():
|
|
749
|
+
if k == "_id":
|
|
750
|
+
continue
|
|
625
751
|
multivalueds = []
|
|
626
752
|
inlineds = []
|
|
627
753
|
rngs = []
|
|
@@ -698,6 +824,39 @@ class Collection:
|
|
|
698
824
|
"""
|
|
699
825
|
raise NotImplementedError
|
|
700
826
|
|
|
827
|
+
def apply_patches(self, patches: List[PatchDict], **kwargs):
|
|
828
|
+
"""
|
|
829
|
+
Apply a patch to the collection.
|
|
830
|
+
|
|
831
|
+
Patches conform to the JSON Patch format,
|
|
832
|
+
|
|
833
|
+
:param patches:
|
|
834
|
+
:param kwargs:
|
|
835
|
+
:return:
|
|
836
|
+
"""
|
|
837
|
+
all_objs = self.find(limit=-1).rows
|
|
838
|
+
primary_key = self.identifier_attribute_name
|
|
839
|
+
if not primary_key:
|
|
840
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
841
|
+
new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
|
|
842
|
+
self.replace(new_objs)
|
|
843
|
+
|
|
844
|
+
def diff(self, other: "Collection", **kwargs):
|
|
845
|
+
"""
|
|
846
|
+
Diff two collections.
|
|
847
|
+
|
|
848
|
+
:param other:
|
|
849
|
+
:param kwargs:
|
|
850
|
+
:return:
|
|
851
|
+
"""
|
|
852
|
+
src_objs = self.find(limit=-1).rows
|
|
853
|
+
tgt_objs = other.find(limit=-1).rows
|
|
854
|
+
primary_key = self.identifier_attribute_name
|
|
855
|
+
if not primary_key:
|
|
856
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
857
|
+
patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
858
|
+
return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
859
|
+
|
|
701
860
|
def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
702
861
|
"""
|
|
703
862
|
Validate the contents of the collection
|
|
@@ -717,3 +876,14 @@ class Collection:
|
|
|
717
876
|
for obj in result.rows:
|
|
718
877
|
obj = clean_empties(obj)
|
|
719
878
|
yield from validator.iter_results(obj, class_name)
|
|
879
|
+
|
|
880
|
+
def commit(self):
|
|
881
|
+
"""
|
|
882
|
+
Commit changes to the collection.
|
|
883
|
+
|
|
884
|
+
:return:
|
|
885
|
+
"""
|
|
886
|
+
pass
|
|
887
|
+
|
|
888
|
+
def _broadcast(self, *args, **kwargs):
|
|
889
|
+
self.parent.broadcast(self, *args, **kwargs)
|
linkml_store/api/config.py
CHANGED
|
@@ -3,7 +3,11 @@ from typing import Any, Dict, List, Optional
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class
|
|
6
|
+
class ConfiguredBaseModel(BaseModel, extra="forbid"):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CollectionConfig(ConfiguredBaseModel):
|
|
7
11
|
name: Optional[str] = Field(
|
|
8
12
|
default=None,
|
|
9
13
|
description="An optional name for the collection",
|
|
@@ -42,7 +46,7 @@ class CollectionConfig(BaseModel):
|
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
|
|
45
|
-
class DatabaseConfig(
|
|
49
|
+
class DatabaseConfig(ConfiguredBaseModel):
|
|
46
50
|
handle: str = Field(
|
|
47
51
|
default="duckdb:///:memory:",
|
|
48
52
|
description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
|
|
@@ -86,7 +90,7 @@ class DatabaseConfig(BaseModel):
|
|
|
86
90
|
)
|
|
87
91
|
|
|
88
92
|
|
|
89
|
-
class ClientConfig(
|
|
93
|
+
class ClientConfig(ConfiguredBaseModel):
|
|
90
94
|
handle: Optional[str] = Field(
|
|
91
95
|
default=None,
|
|
92
96
|
description="The client handle",
|
|
@@ -95,6 +99,10 @@ class ClientConfig(BaseModel):
|
|
|
95
99
|
default={},
|
|
96
100
|
description="A dictionary of database configurations",
|
|
97
101
|
)
|
|
102
|
+
default_database: Optional[str] = Field(
|
|
103
|
+
default=None,
|
|
104
|
+
description="The default database",
|
|
105
|
+
)
|
|
98
106
|
schema_path: Optional[str] = Field(
|
|
99
107
|
default=None,
|
|
100
108
|
description="The path to the LinkML schema file",
|
linkml_store/api/database.py
CHANGED
|
@@ -3,9 +3,24 @@ from abc import ABC
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from copy import copy
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
7
|
-
|
|
6
|
+
from typing import (
|
|
7
|
+
TYPE_CHECKING,
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
ClassVar,
|
|
11
|
+
Dict,
|
|
12
|
+
Generic,
|
|
13
|
+
Iterator,
|
|
14
|
+
List,
|
|
15
|
+
Optional,
|
|
16
|
+
Sequence,
|
|
17
|
+
Type,
|
|
18
|
+
Union,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from linkml_store.api.types import CollectionType
|
|
8
22
|
from linkml_store.utils.format_utils import load_objects, render_output
|
|
23
|
+
from linkml_store.utils.patch_utils import PatchDict
|
|
9
24
|
|
|
10
25
|
try:
|
|
11
26
|
from linkml.validator.report import Severity, ValidationResult
|
|
@@ -24,8 +39,10 @@ if TYPE_CHECKING:
|
|
|
24
39
|
|
|
25
40
|
logger = logging.getLogger(__name__)
|
|
26
41
|
|
|
42
|
+
LISTENER = Callable[[Collection, List[PatchDict]], None]
|
|
43
|
+
|
|
27
44
|
|
|
28
|
-
class Database(ABC):
|
|
45
|
+
class Database(ABC, Generic[CollectionType]):
|
|
29
46
|
"""
|
|
30
47
|
A Database provides access to named collections of data.
|
|
31
48
|
|
|
@@ -89,6 +106,8 @@ class Database(ABC):
|
|
|
89
106
|
metadata: Optional[DatabaseConfig] = None
|
|
90
107
|
collection_class: ClassVar[Optional[Type[Collection]]] = None
|
|
91
108
|
|
|
109
|
+
listeners: Optional[List[LISTENER]] = None
|
|
110
|
+
|
|
92
111
|
def __init__(self, handle: Optional[str] = None, metadata: Optional[DatabaseConfig] = None, **kwargs):
|
|
93
112
|
if metadata:
|
|
94
113
|
self.metadata = metadata
|
|
@@ -233,7 +252,8 @@ class Database(ABC):
|
|
|
233
252
|
:param kwargs:
|
|
234
253
|
:return:
|
|
235
254
|
"""
|
|
236
|
-
|
|
255
|
+
for coll in self.list_collections():
|
|
256
|
+
coll.commit()
|
|
237
257
|
|
|
238
258
|
def close(self, **kwargs):
|
|
239
259
|
"""
|
|
@@ -301,6 +321,7 @@ class Database(ABC):
|
|
|
301
321
|
alias = name
|
|
302
322
|
self._collections[alias] = collection
|
|
303
323
|
if recreate_if_exists:
|
|
324
|
+
logger.debug(f"Recreating collection {collection.name}")
|
|
304
325
|
collection.delete_where({}, missing_ok=True)
|
|
305
326
|
return collection
|
|
306
327
|
|
|
@@ -418,7 +439,11 @@ class Database(ABC):
|
|
|
418
439
|
:return:
|
|
419
440
|
|
|
420
441
|
"""
|
|
421
|
-
|
|
442
|
+
if query.from_table:
|
|
443
|
+
collection = self.get_collection(query.from_table)
|
|
444
|
+
return collection.query(query, **kwargs)
|
|
445
|
+
else:
|
|
446
|
+
raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
|
|
422
447
|
|
|
423
448
|
@property
|
|
424
449
|
def schema_view(self) -> SchemaView:
|
|
@@ -689,3 +714,9 @@ class Database(ABC):
|
|
|
689
714
|
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
|
|
690
715
|
with open(location, "w", encoding="utf-8") as stream:
|
|
691
716
|
stream.write(render_output(obj, format=target_format))
|
|
717
|
+
|
|
718
|
+
def broadcast(self, source: Collection, patches: List[PatchDict]):
|
|
719
|
+
if not self.listeners:
|
|
720
|
+
return
|
|
721
|
+
for listener in self.listeners:
|
|
722
|
+
listener(source, patches)
|
|
@@ -38,6 +38,7 @@ class DuckDBCollection(Collection):
|
|
|
38
38
|
with conn.begin():
|
|
39
39
|
conn.execute(insert(table), objs)
|
|
40
40
|
conn.commit()
|
|
41
|
+
self._post_insert_hook(objs)
|
|
41
42
|
|
|
42
43
|
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
43
44
|
if not isinstance(objs, list):
|
|
@@ -89,7 +90,9 @@ class DuckDBCollection(Collection):
|
|
|
89
90
|
cd = self.class_definition()
|
|
90
91
|
with self.parent.engine.connect() as conn:
|
|
91
92
|
if not facet_columns:
|
|
92
|
-
|
|
93
|
+
if not cd:
|
|
94
|
+
raise ValueError(f"No class definition found for {self.target_class_name}")
|
|
95
|
+
facet_columns = list(cd.attributes.keys())
|
|
93
96
|
for col in facet_columns:
|
|
94
97
|
logger.debug(f"Faceting on {col}")
|
|
95
98
|
if isinstance(col, tuple):
|
|
@@ -100,7 +103,7 @@ class DuckDBCollection(Collection):
|
|
|
100
103
|
facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
|
|
101
104
|
logger.debug(f"Facet query: {facet_query_str}")
|
|
102
105
|
rows = list(conn.execute(text(facet_query_str)))
|
|
103
|
-
results[col] = rows
|
|
106
|
+
results[col] = [tuple(row) for row in rows]
|
|
104
107
|
return results
|
|
105
108
|
|
|
106
109
|
def _sqla_table(self, cd: ClassDefinition) -> Table:
|
|
@@ -109,7 +112,7 @@ class DuckDBCollection(Collection):
|
|
|
109
112
|
cols = []
|
|
110
113
|
for att in schema_view.class_induced_slots(cd.name):
|
|
111
114
|
typ = TMAP.get(att.range, sqla.String)
|
|
112
|
-
if att.inlined:
|
|
115
|
+
if att.inlined or att.inlined_as_list:
|
|
113
116
|
typ = sqla.JSON
|
|
114
117
|
if att.multivalued:
|
|
115
118
|
typ = sqla.ARRAY(typ, dimensions=1)
|
|
@@ -31,6 +31,18 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class DuckDBDatabase(Database):
|
|
34
|
+
"""
|
|
35
|
+
An adapter for DuckDB databases.
|
|
36
|
+
|
|
37
|
+
Note that this adapter does not make use of a LinkML relational model transformation and
|
|
38
|
+
SQL Alchemy ORM layer. Instead, it attempts to map each collection (which is of type
|
|
39
|
+
some LinkML class) to a *single* DuckDB table. New tables are not created for nested references,
|
|
40
|
+
and linking tables are not created for many-to-many relationships.
|
|
41
|
+
|
|
42
|
+
Instead the native DuckDB ARRAY type is used to store multivalued attributes, and DuckDB JSON
|
|
43
|
+
types are used for nested inlined objects.
|
|
44
|
+
"""
|
|
45
|
+
|
|
34
46
|
_connection: DuckDBPyConnection = None
|
|
35
47
|
_engine: sqlalchemy.Engine = None
|
|
36
48
|
collection_class = DuckDBCollection
|
|
@@ -103,7 +115,14 @@ class DuckDBDatabase(Database):
|
|
|
103
115
|
if row[col]:
|
|
104
116
|
if isinstance(row[col], list):
|
|
105
117
|
for i in range(len(row[col])):
|
|
106
|
-
|
|
118
|
+
try:
|
|
119
|
+
parsed_val = json.loads(row[col][i])
|
|
120
|
+
except json.JSONDecodeError as e:
|
|
121
|
+
logger.error(f"Failed to parse col {col}[{i}] == {row[col][i]}")
|
|
122
|
+
raise e
|
|
123
|
+
row[col][i] = parsed_val
|
|
124
|
+
elif isinstance(row[col], dict):
|
|
125
|
+
pass
|
|
107
126
|
else:
|
|
108
127
|
row[col] = json.loads(row[col])
|
|
109
128
|
qr.set_rows(pd.DataFrame(rows))
|
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Adapter for
|
|
2
|
+
Adapter for FileSystem wrapper
|
|
3
3
|
|
|
4
4
|
Handles have the form:
|
|
5
5
|
|
|
6
|
-
- ``
|
|
7
|
-
|
|
8
|
-
"""
|
|
6
|
+
- ``file:<path>`` for a local file
|
|
7
|
+
"""
|
|
9
8
|
|
|
10
|
-
from linkml_store.api.stores.
|
|
11
|
-
from linkml_store.api.stores.
|
|
9
|
+
from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
|
|
10
|
+
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
12
11
|
|
|
13
12
|
__all__ = [
|
|
14
|
-
"
|
|
15
|
-
"
|
|
13
|
+
"FileSystemCollection",
|
|
14
|
+
"FileSystemDatabase",
|
|
16
15
|
]
|