linkml-store 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +63 -7
- linkml_store/api/collection.py +152 -32
- linkml_store/api/config.py +49 -6
- linkml_store/api/database.py +77 -30
- linkml_store/api/stores/duckdb/duckdb_collection.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +47 -5
- linkml_store/api/stores/filesystem/filesystem_collection.py +11 -4
- linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
- linkml_store/api/stores/mongodb/mongodb_collection.py +6 -2
- linkml_store/api/stores/mongodb/mongodb_database.py +30 -35
- linkml_store/api/stores/solr/solr_collection.py +4 -4
- linkml_store/cli.py +64 -19
- linkml_store/index/__init__.py +16 -2
- linkml_store/index/implementations/llm_indexer.py +2 -1
- linkml_store/index/indexer.py +13 -2
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +200 -21
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/pandas_utils.py +40 -0
- linkml_store/utils/sql_utils.py +9 -3
- linkml_store/webapi/html/generic.html.j2 +25 -28
- linkml_store/webapi/main.py +346 -63
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/METADATA +36 -3
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/RECORD +27 -24
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.10.dist-info → linkml_store-0.1.12.dist-info}/entry_points.txt +0 -0
linkml_store/api/database.py
CHANGED
|
@@ -19,7 +19,7 @@ from typing import (
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
from linkml_store.api.types import CollectionType
|
|
22
|
-
from linkml_store.utils.format_utils import load_objects, render_output
|
|
22
|
+
from linkml_store.utils.format_utils import Format, load_objects, render_output
|
|
23
23
|
from linkml_store.utils.patch_utils import PatchDict
|
|
24
24
|
|
|
25
25
|
try:
|
|
@@ -149,26 +149,19 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
149
149
|
def _initialize_collections(self):
|
|
150
150
|
if not self.metadata.collections:
|
|
151
151
|
return
|
|
152
|
-
for
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
# )
|
|
162
|
-
if False and typ is not None:
|
|
163
|
-
if not alias:
|
|
164
|
-
alias = name
|
|
165
|
-
name = typ
|
|
166
|
-
if not collection_config.name:
|
|
167
|
-
collection_config.name = name
|
|
168
|
-
_collection = self.create_collection(name, alias=alias, metadata=collection_config)
|
|
152
|
+
for k, collection_config in self.metadata.collections.items():
|
|
153
|
+
if collection_config.alias:
|
|
154
|
+
if collection_config.alias != k:
|
|
155
|
+
raise ValueError(f"Alias mismatch: {collection_config.alias} != {k}")
|
|
156
|
+
alias = k
|
|
157
|
+
typ = collection_config.type or alias
|
|
158
|
+
_collection = self.create_collection(typ, alias=alias, metadata=collection_config)
|
|
159
|
+
assert _collection.alias == alias
|
|
160
|
+
assert _collection.target_class_name == typ
|
|
169
161
|
if collection_config.attributes:
|
|
162
|
+
# initialize schema
|
|
170
163
|
sv = self.schema_view
|
|
171
|
-
cd = ClassDefinition(
|
|
164
|
+
cd = ClassDefinition(typ, attributes=collection_config.attributes)
|
|
172
165
|
sv.schema.classes[cd.name] = cd
|
|
173
166
|
sv.set_modified()
|
|
174
167
|
# assert collection.class_definition() is not None
|
|
@@ -275,7 +268,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
275
268
|
metadata: Optional[CollectionConfig] = None,
|
|
276
269
|
recreate_if_exists=False,
|
|
277
270
|
**kwargs,
|
|
278
|
-
) ->
|
|
271
|
+
) -> CollectionType:
|
|
279
272
|
"""
|
|
280
273
|
Create a new collection in the current database.
|
|
281
274
|
|
|
@@ -307,8 +300,10 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
307
300
|
if not name:
|
|
308
301
|
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
309
302
|
collection_cls = self.collection_class
|
|
310
|
-
collection = collection_cls(name=name,
|
|
311
|
-
if
|
|
303
|
+
collection = collection_cls(name=name, parent=self, metadata=metadata)
|
|
304
|
+
if alias:
|
|
305
|
+
collection.metadata.alias = alias
|
|
306
|
+
if metadata and metadata.source:
|
|
312
307
|
collection.load_from_source()
|
|
313
308
|
if metadata and metadata.attributes:
|
|
314
309
|
sv = self.schema_view
|
|
@@ -321,7 +316,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
321
316
|
alias = name
|
|
322
317
|
self._collections[alias] = collection
|
|
323
318
|
if recreate_if_exists:
|
|
324
|
-
logger.debug(f"Recreating collection {collection.
|
|
319
|
+
logger.debug(f"Recreating collection {collection.alias}")
|
|
325
320
|
collection.delete_where({}, missing_ok=True)
|
|
326
321
|
return collection
|
|
327
322
|
|
|
@@ -339,7 +334,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
339
334
|
>>> collections = db.list_collections()
|
|
340
335
|
>>> len(collections)
|
|
341
336
|
2
|
|
342
|
-
>>> [c.
|
|
337
|
+
>>> [c.target_class_name for c in collections]
|
|
343
338
|
['Person', 'Product']
|
|
344
339
|
|
|
345
340
|
:param include_internal: include internal collections
|
|
@@ -367,7 +362,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
367
362
|
['Person', 'Product']
|
|
368
363
|
|
|
369
364
|
"""
|
|
370
|
-
return [c.
|
|
365
|
+
return [c.alias for c in self.list_collections(**kwargs)]
|
|
371
366
|
|
|
372
367
|
def get_collection(
|
|
373
368
|
self, name: str, type: Optional[str] = None, create_if_not_exists=True, **kwargs
|
|
@@ -410,7 +405,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
410
405
|
"""
|
|
411
406
|
Initialize collections.
|
|
412
407
|
|
|
413
|
-
Not typically called directly: consider making
|
|
408
|
+
TODO: Not typically called directly: consider making this private
|
|
414
409
|
:return:
|
|
415
410
|
"""
|
|
416
411
|
raise NotImplementedError
|
|
@@ -502,7 +497,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
502
497
|
>>> sorted(collection.class_definition().slots)
|
|
503
498
|
['capital', 'code', 'continent', 'languages', 'name']
|
|
504
499
|
|
|
505
|
-
:param schema_view:
|
|
500
|
+
:param schema_view: can be either a path to the schema, or a SchemaView object
|
|
506
501
|
:return:
|
|
507
502
|
"""
|
|
508
503
|
if isinstance(schema_view, Path):
|
|
@@ -585,7 +580,15 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
585
580
|
|
|
586
581
|
:return: A schema view
|
|
587
582
|
"""
|
|
588
|
-
|
|
583
|
+
logger.info(f"Inducing schema view for {self.handle}")
|
|
584
|
+
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
585
|
+
|
|
586
|
+
sb = SchemaBuilder()
|
|
587
|
+
|
|
588
|
+
for collection_name in self.list_collection_names():
|
|
589
|
+
coll = self.get_collection(collection_name)
|
|
590
|
+
sb.add_class(coll.target_class_name)
|
|
591
|
+
return SchemaView(sb.schema)
|
|
589
592
|
|
|
590
593
|
def iter_validate_database(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
591
594
|
"""
|
|
@@ -683,11 +686,26 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
683
686
|
"""
|
|
684
687
|
Drop the database and all collections.
|
|
685
688
|
|
|
689
|
+
>>> from linkml_store.api.client import Client
|
|
690
|
+
>>> client = Client()
|
|
691
|
+
>>> path = Path("/tmp/test.db")
|
|
692
|
+
>>> path.parent.mkdir(exist_ok=True, parents=True)
|
|
693
|
+
>>> db = client.attach_database(f"duckdb:///{path}")
|
|
694
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John", "age_in_years": 30}]})
|
|
695
|
+
>>> coll = db.get_collection("persons")
|
|
696
|
+
>>> coll.find({}).num_rows
|
|
697
|
+
1
|
|
698
|
+
>>> db.drop()
|
|
699
|
+
>>> db = client.attach_database("duckdb:///tmp/test.db", alias="test")
|
|
700
|
+
>>> coll = db.get_collection("persons")
|
|
701
|
+
>>> coll.find({}).num_rows
|
|
702
|
+
0
|
|
703
|
+
|
|
686
704
|
:param kwargs: additional arguments
|
|
687
705
|
"""
|
|
688
706
|
raise NotImplementedError()
|
|
689
707
|
|
|
690
|
-
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
|
|
708
|
+
def import_database(self, location: str, source_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
691
709
|
"""
|
|
692
710
|
Import a database from a file or location.
|
|
693
711
|
|
|
@@ -695,11 +713,27 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
695
713
|
:param source_format: source format
|
|
696
714
|
:param kwargs: additional arguments
|
|
697
715
|
"""
|
|
716
|
+
if isinstance(source_format, str):
|
|
717
|
+
source_format = Format(source_format)
|
|
718
|
+
if isinstance(source_format, Format):
|
|
719
|
+
if source_format.is_dump_format() and source_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
|
|
720
|
+
# import into a test instance
|
|
721
|
+
tmp_handle = source_format.value
|
|
722
|
+
client = self.parent
|
|
723
|
+
tmp_db = client.attach_database(tmp_handle, alias="tmp")
|
|
724
|
+
# TODO: check for infinite recursion
|
|
725
|
+
tmp_db.import_database(location, source_format=source_format)
|
|
726
|
+
obj = {}
|
|
727
|
+
for coll in tmp_db.list_collections():
|
|
728
|
+
qr = coll.find({}, limit=-1)
|
|
729
|
+
obj[coll.alias] = qr.rows
|
|
730
|
+
self.store(obj)
|
|
731
|
+
return
|
|
698
732
|
objects = load_objects(location, format=source_format)
|
|
699
733
|
for obj in objects:
|
|
700
734
|
self.store(obj)
|
|
701
735
|
|
|
702
|
-
def export_database(self, location: str, target_format: Optional[str] = None, **kwargs):
|
|
736
|
+
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
703
737
|
"""
|
|
704
738
|
Export a database to a file or location.
|
|
705
739
|
|
|
@@ -708,10 +742,23 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
708
742
|
:param kwargs: additional arguments
|
|
709
743
|
"""
|
|
710
744
|
obj = {}
|
|
745
|
+
if isinstance(target_format, str):
|
|
746
|
+
target_format = Format(target_format)
|
|
711
747
|
for coll in self.list_collections():
|
|
712
748
|
qr = coll.find({}, limit=-1)
|
|
713
749
|
obj[coll.alias] = qr.rows
|
|
714
750
|
logger.info(f"Exporting object with {len(obj)} collections to {location} in {target_format} format")
|
|
751
|
+
if isinstance(target_format, Format):
|
|
752
|
+
if target_format.is_dump_format() and target_format in [Format.SQLDUMP_DUCKDB, Format.DUMP_MONGODB]:
|
|
753
|
+
tmp_handle = target_format.value
|
|
754
|
+
client = self.parent
|
|
755
|
+
tmp_db = client.attach_database(tmp_handle, alias="tmp")
|
|
756
|
+
tmp_db.store(obj)
|
|
757
|
+
# TODO: check for infinite recursion
|
|
758
|
+
tmp_db.export_database(location, target_format=target_format)
|
|
759
|
+
return
|
|
760
|
+
if Path(location).is_dir():
|
|
761
|
+
raise ValueError(f"{location} is a directory; cannot write {target_format} to a dir")
|
|
715
762
|
with open(location, "w", encoding="utf-8") as stream:
|
|
716
763
|
stream.write(render_output(obj, format=target_format))
|
|
717
764
|
|
|
@@ -18,6 +18,9 @@ logger = logging.getLogger(__name__)
|
|
|
18
18
|
class DuckDBCollection(Collection):
|
|
19
19
|
_table_created: bool = None
|
|
20
20
|
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
super().__init__(*args, **kwargs)
|
|
23
|
+
|
|
21
24
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
22
25
|
logger.debug(f"Inserting {len(objs)}")
|
|
23
26
|
if not isinstance(objs, list):
|
|
@@ -123,6 +126,17 @@ class DuckDBCollection(Collection):
|
|
|
123
126
|
t = Table(self.alias, metadata_obj, *cols)
|
|
124
127
|
return t
|
|
125
128
|
|
|
129
|
+
def _check_if_initialized(self) -> bool:
|
|
130
|
+
# if self._initialized:
|
|
131
|
+
# return True
|
|
132
|
+
query = Query(
|
|
133
|
+
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
134
|
+
)
|
|
135
|
+
qr = self.parent.query(query)
|
|
136
|
+
if qr.num_rows > 0:
|
|
137
|
+
return True
|
|
138
|
+
return False
|
|
139
|
+
|
|
126
140
|
def _create_table(self, cd: ClassDefinition):
|
|
127
141
|
if self._table_created or self.metadata.is_prepopulated:
|
|
128
142
|
logger.info(f"Already have table for: {cd.name}")
|
|
@@ -134,6 +148,7 @@ class DuckDBCollection(Collection):
|
|
|
134
148
|
if qr.num_rows > 0:
|
|
135
149
|
logger.info(f"Table already exists for {cd.name}")
|
|
136
150
|
self._table_created = True
|
|
151
|
+
self._initialized = True
|
|
137
152
|
self.metadata.is_prepopulated = True
|
|
138
153
|
return
|
|
139
154
|
logger.info(f"Creating table for {cd.name}")
|
|
@@ -144,4 +159,5 @@ class DuckDBCollection(Collection):
|
|
|
144
159
|
conn.execute(text(ddl))
|
|
145
160
|
conn.commit()
|
|
146
161
|
self._table_created = True
|
|
162
|
+
self._initialized = True
|
|
147
163
|
self.metadata.is_prepopulated = True
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Optional
|
|
4
|
+
from typing import Optional, Union
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import sqlalchemy
|
|
8
|
-
from duckdb import DuckDBPyConnection
|
|
9
8
|
from linkml_runtime import SchemaView
|
|
10
9
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
11
10
|
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
@@ -14,6 +13,7 @@ from sqlalchemy import NullPool, text
|
|
|
14
13
|
from linkml_store.api import Database
|
|
15
14
|
from linkml_store.api.queries import Query, QueryResult
|
|
16
15
|
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
16
|
+
from linkml_store.utils.format_utils import Format
|
|
17
17
|
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
|
|
18
18
|
|
|
19
19
|
TYPE_MAP = {
|
|
@@ -26,6 +26,8 @@ TYPE_MAP = {
|
|
|
26
26
|
"JSON": "Any",
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
MEMORY_HANDLE = "duckdb:///:memory:"
|
|
30
|
+
|
|
29
31
|
|
|
30
32
|
logger = logging.getLogger(__name__)
|
|
31
33
|
|
|
@@ -43,13 +45,13 @@ class DuckDBDatabase(Database):
|
|
|
43
45
|
types are used for nested inlined objects.
|
|
44
46
|
"""
|
|
45
47
|
|
|
46
|
-
_connection: DuckDBPyConnection = None
|
|
48
|
+
# _connection: DuckDBPyConnection = None
|
|
47
49
|
_engine: sqlalchemy.Engine = None
|
|
48
50
|
collection_class = DuckDBCollection
|
|
49
51
|
|
|
50
52
|
def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
|
|
51
53
|
if handle is None:
|
|
52
|
-
handle =
|
|
54
|
+
handle = MEMORY_HANDLE
|
|
53
55
|
if recreate_if_exists:
|
|
54
56
|
path = Path(handle.replace("duckdb:///", ""))
|
|
55
57
|
if path.exists():
|
|
@@ -76,6 +78,17 @@ class DuckDBDatabase(Database):
|
|
|
76
78
|
def close(self, **kwargs):
|
|
77
79
|
self.engine.dispose()
|
|
78
80
|
|
|
81
|
+
def drop(self, missing_ok=True, **kwargs):
|
|
82
|
+
self.close()
|
|
83
|
+
if self.handle == MEMORY_HANDLE:
|
|
84
|
+
return
|
|
85
|
+
path = Path(self.handle.replace("duckdb:///", ""))
|
|
86
|
+
if path.exists():
|
|
87
|
+
path.unlink()
|
|
88
|
+
else:
|
|
89
|
+
if not missing_ok:
|
|
90
|
+
raise FileNotFoundError(f"Database file not found: {path}")
|
|
91
|
+
|
|
79
92
|
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
80
93
|
json_encoded_cols = []
|
|
81
94
|
if query.from_table:
|
|
@@ -94,7 +107,8 @@ class DuckDBDatabase(Database):
|
|
|
94
107
|
if sv:
|
|
95
108
|
cd = None
|
|
96
109
|
for c in self._collections.values():
|
|
97
|
-
if c.name == query.from_table or c.metadata.alias == query.from_table:
|
|
110
|
+
# if c.name == query.from_table or c.metadata.alias == query.from_table:
|
|
111
|
+
if c.alias == query.from_table or c.target_class_name == query.from_table:
|
|
98
112
|
cd = c.class_definition()
|
|
99
113
|
break
|
|
100
114
|
if cd:
|
|
@@ -188,3 +202,31 @@ class DuckDBDatabase(Database):
|
|
|
188
202
|
cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
|
|
189
203
|
schema.classes[cls.name] = cls
|
|
190
204
|
return SchemaView(schema)
|
|
205
|
+
|
|
206
|
+
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
207
|
+
if target_format == "duckdb" or target_format == Format.SQLDUMP_DUCKDB:
|
|
208
|
+
path = Path(location)
|
|
209
|
+
if path.exists():
|
|
210
|
+
if path.is_file():
|
|
211
|
+
path.unlink()
|
|
212
|
+
with self.engine.connect() as conn:
|
|
213
|
+
sql = text(f"EXPORT DATABASE '{location}'")
|
|
214
|
+
conn.execute(sql)
|
|
215
|
+
else:
|
|
216
|
+
super().export_database(location, target_format=target_format, **kwargs)
|
|
217
|
+
|
|
218
|
+
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
|
|
219
|
+
"""
|
|
220
|
+
Import a database from a file or location.
|
|
221
|
+
|
|
222
|
+
:param location: location of the file
|
|
223
|
+
:param source_format: source format
|
|
224
|
+
:param kwargs: additional arguments
|
|
225
|
+
"""
|
|
226
|
+
if source_format == Format.SQLDUMP_DUCKDB.value or source_format == Format.SQLDUMP_DUCKDB:
|
|
227
|
+
with self.engine.connect() as conn:
|
|
228
|
+
sql = text(f"IMPORT DATABASE '{location}'")
|
|
229
|
+
conn.execute(sql)
|
|
230
|
+
conn.commit()
|
|
231
|
+
else:
|
|
232
|
+
super().import_database(location, source_format=source_format, **kwargs)
|
|
@@ -31,7 +31,7 @@ class FileSystemCollection(Collection[DatabaseType]):
|
|
|
31
31
|
|
|
32
32
|
@property
|
|
33
33
|
def path_to_file(self):
|
|
34
|
-
return Path(self.parent.directory_path) / f"{self.
|
|
34
|
+
return Path(self.parent.directory_path) / f"{self.alias}.{self.file_format}"
|
|
35
35
|
|
|
36
36
|
@property
|
|
37
37
|
def objects_as_list(self) -> List[OBJECT]:
|
|
@@ -150,13 +150,20 @@ class FileSystemCollection(Collection[DatabaseType]):
|
|
|
150
150
|
curr_objects = [o for o in self.objects_as_list if not matches(o)]
|
|
151
151
|
self._set_objects(curr_objects)
|
|
152
152
|
|
|
153
|
-
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
154
|
-
|
|
153
|
+
def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
|
|
154
|
+
limit = limit or query.limit
|
|
155
|
+
offset = offset or query.offset
|
|
156
|
+
if offset is None:
|
|
157
|
+
offset = 0
|
|
155
158
|
where = query.where_clause or {}
|
|
156
159
|
match = mongo_query_to_match_function(where)
|
|
157
160
|
rows = [o for o in self.objects_as_list if match(o)]
|
|
158
161
|
count = len(rows)
|
|
159
|
-
|
|
162
|
+
if limit is None or limit < 0:
|
|
163
|
+
limit = count
|
|
164
|
+
# TODO: avoid recalculating
|
|
165
|
+
returned_row = rows[offset : offset + limit]
|
|
166
|
+
return QueryResult(query=query, num_rows=count, rows=returned_row)
|
|
160
167
|
|
|
161
168
|
def query_facets(
|
|
162
169
|
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
@@ -9,6 +9,7 @@ from linkml_runtime import SchemaView
|
|
|
9
9
|
from linkml_store.api import Database
|
|
10
10
|
from linkml_store.api.config import DatabaseConfig
|
|
11
11
|
from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
|
|
12
|
+
from linkml_store.utils.file_utils import safe_remove_directory
|
|
12
13
|
from linkml_store.utils.format_utils import Format, load_objects
|
|
13
14
|
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
@@ -20,6 +21,8 @@ class FileSystemDatabase(Database):
|
|
|
20
21
|
directory_path: Optional[Path] = None
|
|
21
22
|
default_file_format: Optional[str] = None
|
|
22
23
|
|
|
24
|
+
no_backup_on_drop: bool = False
|
|
25
|
+
|
|
23
26
|
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
24
27
|
handle = handle.replace("file:", "")
|
|
25
28
|
if handle.startswith("//"):
|
|
@@ -43,6 +46,12 @@ class FileSystemDatabase(Database):
|
|
|
43
46
|
def close(self, **kwargs):
|
|
44
47
|
pass
|
|
45
48
|
|
|
49
|
+
def drop(self, no_backup=False, **kwargs):
|
|
50
|
+
self.close()
|
|
51
|
+
path = self.directory_path
|
|
52
|
+
if path.exists():
|
|
53
|
+
safe_remove_directory(path, no_backup=self.no_backup_on_drop or no_backup)
|
|
54
|
+
|
|
46
55
|
def init_collections(self):
|
|
47
56
|
metadata = self.metadata
|
|
48
57
|
if self._collections is None:
|
|
@@ -63,7 +72,7 @@ class FileSystemDatabase(Database):
|
|
|
63
72
|
self._collections[n] = collection
|
|
64
73
|
collection._set_objects(objs)
|
|
65
74
|
|
|
66
|
-
def
|
|
75
|
+
def xxxinduce_schema_view(self) -> SchemaView:
|
|
67
76
|
logger.info(f"Inducing schema view for {self.handle}")
|
|
68
77
|
sb = SchemaBuilder()
|
|
69
78
|
|
|
@@ -23,11 +23,15 @@ class MongoDBCollection(Collection):
|
|
|
23
23
|
|
|
24
24
|
@property
|
|
25
25
|
def mongo_collection(self) -> MongoCollection:
|
|
26
|
-
|
|
26
|
+
# collection_name = self.alias or self.name
|
|
27
|
+
collection_name = self.alias
|
|
28
|
+
if not collection_name:
|
|
27
29
|
raise ValueError("Collection name not set")
|
|
28
|
-
collection_name = self.alias or self.name
|
|
29
30
|
return self.parent.native_db[collection_name]
|
|
30
31
|
|
|
32
|
+
def _check_if_initialized(self) -> bool:
|
|
33
|
+
return self.alias in self.parent.native_db.list_collection_names()
|
|
34
|
+
|
|
31
35
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
32
36
|
if not isinstance(objs, list):
|
|
33
37
|
objs = [objs]
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
# mongodb_database.py
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional, Union
|
|
5
6
|
|
|
6
|
-
from linkml_runtime import SchemaView
|
|
7
|
-
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
8
|
-
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
9
7
|
from pymongo import MongoClient
|
|
10
8
|
from pymongo.database import Database as NativeDatabase
|
|
11
9
|
|
|
12
10
|
from linkml_store.api import Database
|
|
13
11
|
from linkml_store.api.queries import Query, QueryResult
|
|
14
12
|
from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
|
|
13
|
+
from linkml_store.utils.file_utils import safe_remove_directory
|
|
14
|
+
from linkml_store.utils.format_utils import Format
|
|
15
|
+
from linkml_store.utils.mongodb_utils import import_mongodb
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
17
18
|
|
|
@@ -30,6 +31,8 @@ class MongoDBDatabase(Database):
|
|
|
30
31
|
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
31
32
|
if handle is None:
|
|
32
33
|
handle = "mongodb://localhost:27017/test"
|
|
34
|
+
if handle == "mongodb":
|
|
35
|
+
handle = "mongodb://localhost:27017/temporary"
|
|
33
36
|
super().__init__(handle=handle, **kwargs)
|
|
34
37
|
|
|
35
38
|
@property
|
|
@@ -63,10 +66,9 @@ class MongoDBDatabase(Database):
|
|
|
63
66
|
self._native_client.close()
|
|
64
67
|
|
|
65
68
|
def drop(self, **kwargs):
|
|
66
|
-
self.native_client.drop_database(self.
|
|
69
|
+
self.native_client.drop_database(self.native_db.name)
|
|
67
70
|
|
|
68
71
|
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
69
|
-
# TODO: DRY
|
|
70
72
|
if query.from_table:
|
|
71
73
|
collection = self.get_collection(query.from_table)
|
|
72
74
|
return collection.query(query, **kwargs)
|
|
@@ -82,33 +84,26 @@ class MongoDBDatabase(Database):
|
|
|
82
84
|
collection = MongoDBCollection(name=collection_name, parent=self)
|
|
83
85
|
self._collections[collection_name] = collection
|
|
84
86
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
87
|
+
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
88
|
+
if target_format == Format.DUMP_MONGODB.value or target_format == Format.DUMP_MONGODB:
|
|
89
|
+
path = Path(location)
|
|
90
|
+
if path.exists():
|
|
91
|
+
safe_remove_directory(path, no_backup=True)
|
|
92
|
+
from linkml_store.utils.mongodb_utils import export_mongodb
|
|
89
93
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
for cls_name in schema.classes:
|
|
107
|
-
if cls_name in self.metadata.collections:
|
|
108
|
-
collection_metadata = self.metadata.collections[cls_name]
|
|
109
|
-
if collection_metadata.attributes:
|
|
110
|
-
del schema.classes[cls_name]
|
|
111
|
-
cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
|
|
112
|
-
schema.classes[cls.name] = cls
|
|
113
|
-
|
|
114
|
-
return SchemaView(schema)
|
|
94
|
+
export_mongodb(self.handle, location)
|
|
95
|
+
else:
|
|
96
|
+
super().export_database(location, target_format=target_format, **kwargs)
|
|
97
|
+
|
|
98
|
+
def import_database(self, location: str, source_format: Optional[str] = None, **kwargs):
|
|
99
|
+
"""
|
|
100
|
+
Import a database from a file or location.
|
|
101
|
+
|
|
102
|
+
:param location: location of the file
|
|
103
|
+
:param source_format: source format
|
|
104
|
+
:param kwargs: additional arguments
|
|
105
|
+
"""
|
|
106
|
+
if source_format == Format.DUMP_MONGODB.value or source_format == Format.DUMP_MONGODB:
|
|
107
|
+
import_mongodb(self.handle, location, drop=True)
|
|
108
|
+
else:
|
|
109
|
+
super().import_database(location, source_format=source_format, **kwargs)
|
|
@@ -18,7 +18,7 @@ class SolrCollection(Collection):
|
|
|
18
18
|
@property
|
|
19
19
|
def _collection_base(self) -> str:
|
|
20
20
|
if self.parent.use_cores:
|
|
21
|
-
base_url = f"{self.parent.base_url}/{self.
|
|
21
|
+
base_url = f"{self.parent.base_url}/{self.alias}"
|
|
22
22
|
else:
|
|
23
23
|
base_url = self.parent.base_url
|
|
24
24
|
return base_url
|
|
@@ -37,7 +37,7 @@ class SolrCollection(Collection):
|
|
|
37
37
|
if not qfs:
|
|
38
38
|
raise ValueError("No searchable slots configured for Solr collection")
|
|
39
39
|
solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
|
|
40
|
-
logger.info(f"Querying Solr collection {self.
|
|
40
|
+
logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
|
|
41
41
|
|
|
42
42
|
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
43
43
|
response.raise_for_status()
|
|
@@ -50,7 +50,7 @@ class SolrCollection(Collection):
|
|
|
50
50
|
|
|
51
51
|
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
52
52
|
solr_query = self._build_solr_query(query)
|
|
53
|
-
logger.info(f"Querying Solr collection {self.
|
|
53
|
+
logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
|
|
54
54
|
|
|
55
55
|
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
56
56
|
response.raise_for_status()
|
|
@@ -69,7 +69,7 @@ class SolrCollection(Collection):
|
|
|
69
69
|
solr_query["facet.field"] = facet_columns
|
|
70
70
|
solr_query["facet.limit"] = facet_limit
|
|
71
71
|
|
|
72
|
-
logger.info(f"Querying Solr collection {self.
|
|
72
|
+
logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
|
|
73
73
|
|
|
74
74
|
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
75
75
|
response.raise_for_status()
|