linkml-store 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +32 -5
- linkml_store/api/collection.py +276 -27
- linkml_store/api/config.py +6 -2
- linkml_store/api/database.py +264 -21
- linkml_store/api/stores/chromadb/__init__.py +5 -1
- linkml_store/api/stores/duckdb/__init__.py +9 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +7 -4
- linkml_store/api/stores/duckdb/duckdb_database.py +19 -5
- linkml_store/api/stores/duckdb/mappings.py +1 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +177 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +72 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +31 -10
- linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +150 -15
- linkml_store/index/__init__.py +6 -2
- linkml_store/index/implementations/llm_indexer.py +83 -5
- linkml_store/index/implementations/simple_indexer.py +2 -2
- linkml_store/index/indexer.py +32 -8
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/format_utils.py +139 -8
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/METADATA +7 -1
- linkml_store-0.1.9.dist-info/RECORD +49 -0
- linkml_store-0.1.7.dist-info/RECORD +0 -42
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/entry_points.txt +0 -0
linkml_store/api/client.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import Dict, Optional, Union
|
|
3
4
|
|
|
@@ -8,14 +9,19 @@ from linkml_store.api import Database
|
|
|
8
9
|
from linkml_store.api.config import ClientConfig
|
|
9
10
|
from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
|
|
10
11
|
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
|
+
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
11
13
|
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
12
14
|
from linkml_store.api.stores.solr.solr_database import SolrDatabase
|
|
13
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
14
19
|
HANDLE_MAP = {
|
|
15
20
|
"duckdb": DuckDBDatabase,
|
|
16
21
|
"solr": SolrDatabase,
|
|
17
22
|
"mongodb": MongoDBDatabase,
|
|
18
23
|
"chromadb": ChromaDBDatabase,
|
|
24
|
+
"file": FileSystemDatabase,
|
|
19
25
|
}
|
|
20
26
|
|
|
21
27
|
|
|
@@ -23,14 +29,27 @@ class Client:
|
|
|
23
29
|
"""
|
|
24
30
|
A client is the top-level object for interacting with databases.
|
|
25
31
|
|
|
26
|
-
A client has access to one or more :class
|
|
27
|
-
|
|
28
|
-
Each database consists of a number of :class:`.Collection` objects.
|
|
32
|
+
* A client has access to one or more :class:`.Database` objects.
|
|
33
|
+
* Each database consists of a number of :class:`.Collection` objects.
|
|
29
34
|
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
Creating a client
|
|
36
|
+
-----------------
|
|
32
37
|
>>> client = Client()
|
|
38
|
+
|
|
39
|
+
Attaching a database
|
|
40
|
+
--------------------
|
|
33
41
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
42
|
+
|
|
43
|
+
Note that normally a handle would be specified by a locator such as ``duckdb:///<PATH>``, but
|
|
44
|
+
for convenience, an in-memory duckdb object can be specified without a full locator
|
|
45
|
+
|
|
46
|
+
We can check the actual handle:
|
|
47
|
+
|
|
48
|
+
>>> db.handle
|
|
49
|
+
'duckdb:///:memory:'
|
|
50
|
+
|
|
51
|
+
Creating a new collection
|
|
52
|
+
-------------------------
|
|
34
53
|
>>> collection = db.create_collection("Person")
|
|
35
54
|
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
36
55
|
>>> collection.insert(objs)
|
|
@@ -151,6 +170,8 @@ class Client:
|
|
|
151
170
|
if ":" not in handle:
|
|
152
171
|
scheme = handle
|
|
153
172
|
handle = None
|
|
173
|
+
if alias is None:
|
|
174
|
+
alias = scheme
|
|
154
175
|
else:
|
|
155
176
|
scheme, _ = handle.split(":", 1)
|
|
156
177
|
if scheme not in HANDLE_MAP:
|
|
@@ -165,6 +186,11 @@ class Client:
|
|
|
165
186
|
self._databases = {}
|
|
166
187
|
self._databases[alias] = db
|
|
167
188
|
db.parent = self
|
|
189
|
+
if db.alias:
|
|
190
|
+
if db.alias != alias:
|
|
191
|
+
raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}")
|
|
192
|
+
else:
|
|
193
|
+
db.metadata.alias = alias
|
|
168
194
|
return db
|
|
169
195
|
|
|
170
196
|
def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
|
|
@@ -195,6 +221,7 @@ class Client:
|
|
|
195
221
|
self._databases = {}
|
|
196
222
|
if name not in self._databases:
|
|
197
223
|
if create_if_not_exists:
|
|
224
|
+
logger.info(f"Creating database: {name}")
|
|
198
225
|
self.attach_database(name, **kwargs)
|
|
199
226
|
else:
|
|
200
227
|
raise ValueError(f"Database {name} does not exist")
|
linkml_store/api/collection.py
CHANGED
|
@@ -1,16 +1,22 @@
|
|
|
1
|
+
"""A structure for representing collections of similar objects."""
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import logging
|
|
3
5
|
from collections import defaultdict
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
6
8
|
|
|
7
9
|
import numpy as np
|
|
10
|
+
from linkml_runtime import SchemaView
|
|
8
11
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
9
12
|
from linkml_runtime.linkml_model.meta import ArrayExpression
|
|
10
13
|
from pydantic import BaseModel
|
|
11
14
|
|
|
15
|
+
from linkml_store.api.types import DatabaseType
|
|
12
16
|
from linkml_store.index import get_indexer
|
|
17
|
+
from linkml_store.utils.format_utils import load_objects
|
|
13
18
|
from linkml_store.utils.object_utils import clean_empties
|
|
19
|
+
from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
|
|
14
20
|
|
|
15
21
|
try:
|
|
16
22
|
from linkml.validator.report import ValidationResult
|
|
@@ -33,17 +39,27 @@ IDENTIFIER = str
|
|
|
33
39
|
FIELD_NAME = str
|
|
34
40
|
|
|
35
41
|
|
|
36
|
-
class Collection:
|
|
42
|
+
class Collection(Generic[DatabaseType]):
|
|
37
43
|
"""
|
|
38
44
|
A collection is an organized set of objects of the same or similar type.
|
|
39
45
|
|
|
40
46
|
- For relational databases, a collection is typically a table
|
|
41
47
|
- For document databases such as MongoDB, a collection is the native type
|
|
42
|
-
- For a file system, a collection could be a single tabular file such as Parquet or CSV
|
|
48
|
+
- For a file system, a collection could be a single tabular file such as Parquet or CSV.
|
|
49
|
+
|
|
50
|
+
Collection objects are typically not created directly - instead they are generated
|
|
51
|
+
from a parent :class:`.Database` object:
|
|
52
|
+
|
|
53
|
+
>>> from linkml_store import Client
|
|
54
|
+
>>> client = Client()
|
|
55
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
56
|
+
>>> collection = db.create_collection("Person")
|
|
57
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
58
|
+
>>> collection.insert(objs)
|
|
43
59
|
"""
|
|
44
60
|
|
|
45
61
|
# name: str
|
|
46
|
-
parent: Optional[
|
|
62
|
+
parent: Optional[DatabaseType] = None
|
|
47
63
|
_indexers: Optional[Dict[str, Indexer]] = None
|
|
48
64
|
# hidden: Optional[bool] = False
|
|
49
65
|
|
|
@@ -57,15 +73,21 @@ class Collection:
|
|
|
57
73
|
self.metadata = metadata
|
|
58
74
|
else:
|
|
59
75
|
self.metadata = CollectionConfig(name=name, **kwargs)
|
|
60
|
-
|
|
61
|
-
|
|
76
|
+
if not self.metadata.alias:
|
|
77
|
+
self.metadata.alias = name
|
|
78
|
+
if not self.metadata.type:
|
|
79
|
+
self.metadata.type = name
|
|
80
|
+
# if name is not None and self.metadata.name is not None and name != self.metadata.name:
|
|
81
|
+
# raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
|
|
62
82
|
|
|
63
83
|
@property
|
|
64
84
|
def name(self) -> str:
|
|
65
85
|
"""
|
|
66
|
-
Return the name of the collection
|
|
86
|
+
Return the name of the collection.
|
|
67
87
|
|
|
68
|
-
:
|
|
88
|
+
TODO: deprecate in favor of Type
|
|
89
|
+
|
|
90
|
+
:return: name of the collection
|
|
69
91
|
"""
|
|
70
92
|
return self.metadata.name
|
|
71
93
|
|
|
@@ -79,7 +101,7 @@ class Collection:
|
|
|
79
101
|
|
|
80
102
|
:return: True if the collection is hidden
|
|
81
103
|
"""
|
|
82
|
-
return self.metadata.hidden
|
|
104
|
+
# return self.metadata.hidden
|
|
83
105
|
|
|
84
106
|
@property
|
|
85
107
|
def target_class_name(self):
|
|
@@ -88,7 +110,14 @@ class Collection:
|
|
|
88
110
|
|
|
89
111
|
This MUST be a LinkML class name
|
|
90
112
|
|
|
91
|
-
|
|
113
|
+
>>> from linkml_store import Client
|
|
114
|
+
>>> client = Client()
|
|
115
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
116
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
117
|
+
>>> collection.target_class_name
|
|
118
|
+
'Person'
|
|
119
|
+
|
|
120
|
+
:return: name of the class which members of this collection instantiate
|
|
92
121
|
"""
|
|
93
122
|
# TODO: this is a shim layer until we can normalize on this
|
|
94
123
|
if self.metadata.type:
|
|
@@ -104,15 +133,34 @@ class Collection:
|
|
|
104
133
|
to have an alias, for example "persons" which collects all instances
|
|
105
134
|
of class Person.
|
|
106
135
|
|
|
107
|
-
|
|
136
|
+
>>> from linkml_store import Client
|
|
137
|
+
>>> client = Client()
|
|
138
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
139
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
140
|
+
>>> collection.alias
|
|
141
|
+
'persons'
|
|
142
|
+
|
|
143
|
+
If no explicit alias is provided, then the target class name is used:
|
|
144
|
+
|
|
145
|
+
>>> from linkml_store import Client
|
|
146
|
+
>>> client = Client()
|
|
147
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
148
|
+
>>> collection = db.create_collection("Person")
|
|
149
|
+
>>> collection.alias
|
|
150
|
+
'Person'
|
|
151
|
+
|
|
152
|
+
The alias SHOULD be used for Table names in SQL.
|
|
108
153
|
|
|
109
154
|
For nested data, the alias SHOULD be used as the key; e.g
|
|
110
155
|
|
|
111
|
-
|
|
156
|
+
.. code-block:: json
|
|
157
|
+
|
|
158
|
+
{ "persons": [ { "name": "Alice" }, { "name": "Bob" } ] }
|
|
112
159
|
|
|
113
160
|
:return:
|
|
114
161
|
"""
|
|
115
162
|
# TODO: this is a shim layer until we can normalize on this
|
|
163
|
+
# TODO: this is a shim layer until we can normalize on this
|
|
116
164
|
if self.metadata.alias:
|
|
117
165
|
return self.metadata.alias
|
|
118
166
|
return self.name
|
|
@@ -121,6 +169,13 @@ class Collection:
|
|
|
121
169
|
"""
|
|
122
170
|
Replace entire collection with objects.
|
|
123
171
|
|
|
172
|
+
>>> from linkml_store import Client
|
|
173
|
+
>>> client = Client()
|
|
174
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
175
|
+
>>> collection = db.create_collection("Person")
|
|
176
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
177
|
+
>>> collection.insert(objs)
|
|
178
|
+
|
|
124
179
|
:param objs:
|
|
125
180
|
:param kwargs:
|
|
126
181
|
:return:
|
|
@@ -130,7 +185,14 @@ class Collection:
|
|
|
130
185
|
|
|
131
186
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
132
187
|
"""
|
|
133
|
-
Add one or more objects to the collection
|
|
188
|
+
Add one or more objects to the collection.
|
|
189
|
+
|
|
190
|
+
>>> from linkml_store import Client
|
|
191
|
+
>>> client = Client()
|
|
192
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
193
|
+
>>> collection = db.create_collection("Person")
|
|
194
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
195
|
+
>>> collection.insert(objs)
|
|
134
196
|
|
|
135
197
|
:param objs:
|
|
136
198
|
:param kwargs:
|
|
@@ -138,9 +200,36 @@ class Collection:
|
|
|
138
200
|
"""
|
|
139
201
|
raise NotImplementedError
|
|
140
202
|
|
|
141
|
-
def
|
|
203
|
+
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
204
|
+
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
205
|
+
self._broadcast(patches, **kwargs)
|
|
206
|
+
|
|
207
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
142
208
|
"""
|
|
143
|
-
Delete one or more objects from the collection
|
|
209
|
+
Delete one or more objects from the collection.
|
|
210
|
+
|
|
211
|
+
First let's set up a collection:
|
|
212
|
+
|
|
213
|
+
>>> from linkml_store import Client
|
|
214
|
+
>>> client = Client()
|
|
215
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
216
|
+
>>> collection = db.create_collection("Person")
|
|
217
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
218
|
+
>>> collection.insert(objs)
|
|
219
|
+
>>> collection.find({}).num_rows
|
|
220
|
+
2
|
|
221
|
+
|
|
222
|
+
Now let's delete an object:
|
|
223
|
+
|
|
224
|
+
>>> collection.delete(objs[0])
|
|
225
|
+
>>> collection.find({}).num_rows
|
|
226
|
+
1
|
|
227
|
+
|
|
228
|
+
Deleting the same object again should have no effect:
|
|
229
|
+
|
|
230
|
+
>>> collection.delete(objs[0])
|
|
231
|
+
>>> collection.find({}).num_rows
|
|
232
|
+
1
|
|
144
233
|
|
|
145
234
|
:param objs:
|
|
146
235
|
:param kwargs:
|
|
@@ -148,9 +237,30 @@ class Collection:
|
|
|
148
237
|
"""
|
|
149
238
|
raise NotImplementedError
|
|
150
239
|
|
|
151
|
-
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
240
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
152
241
|
"""
|
|
153
|
-
Delete objects that match a query
|
|
242
|
+
Delete objects that match a query.
|
|
243
|
+
|
|
244
|
+
First let's set up a collection:
|
|
245
|
+
|
|
246
|
+
>>> from linkml_store import Client
|
|
247
|
+
>>> client = Client()
|
|
248
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
249
|
+
>>> collection = db.create_collection("Person")
|
|
250
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
251
|
+
>>> collection.insert(objs)
|
|
252
|
+
|
|
253
|
+
Now let's delete an object:
|
|
254
|
+
|
|
255
|
+
>>> collection.delete_where({"id": "P1"})
|
|
256
|
+
>>> collection.find({}).num_rows
|
|
257
|
+
1
|
|
258
|
+
|
|
259
|
+
Match everything:
|
|
260
|
+
|
|
261
|
+
>>> collection.delete_where({})
|
|
262
|
+
>>> collection.find({}).num_rows
|
|
263
|
+
0
|
|
154
264
|
|
|
155
265
|
:param where: where conditions
|
|
156
266
|
:param missing_ok: if True, do not raise an error if the collection does not exist
|
|
@@ -161,7 +271,7 @@ class Collection:
|
|
|
161
271
|
|
|
162
272
|
def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
163
273
|
"""
|
|
164
|
-
Update one or more objects in the collection
|
|
274
|
+
Update one or more objects in the collection.
|
|
165
275
|
|
|
166
276
|
:param objs:
|
|
167
277
|
:param kwargs:
|
|
@@ -174,7 +284,21 @@ class Collection:
|
|
|
174
284
|
|
|
175
285
|
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
176
286
|
"""
|
|
177
|
-
Run a query against the collection
|
|
287
|
+
Run a query against the collection.
|
|
288
|
+
|
|
289
|
+
First let's load a collection:
|
|
290
|
+
|
|
291
|
+
>>> from linkml_store import Client
|
|
292
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
293
|
+
>>> client = Client()
|
|
294
|
+
>>> db = client.attach_database("duckdb")
|
|
295
|
+
>>> collection = db.create_collection("Country")
|
|
296
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
297
|
+
>>> collection.insert(objs)
|
|
298
|
+
|
|
299
|
+
Now let's run a query:
|
|
300
|
+
|
|
301
|
+
TODO
|
|
178
302
|
|
|
179
303
|
:param query:
|
|
180
304
|
:param kwargs:
|
|
@@ -184,7 +308,7 @@ class Collection:
|
|
|
184
308
|
|
|
185
309
|
def query_facets(
|
|
186
310
|
self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
187
|
-
) -> Dict[str,
|
|
311
|
+
) -> Dict[str, List[Tuple[Any, int]]]:
|
|
188
312
|
"""
|
|
189
313
|
Run a query to get facet counts for one or more columns.
|
|
190
314
|
|
|
@@ -202,12 +326,12 @@ class Collection:
|
|
|
202
326
|
:param query: A Query object representing the base query.
|
|
203
327
|
:param facet_columns: A list of column names to get facet counts for.
|
|
204
328
|
:param facet_limit:
|
|
205
|
-
:return: A dictionary where keys are column names and values are
|
|
329
|
+
:return: A dictionary where keys are column names and values are tuples
|
|
206
330
|
containing the facet counts for each unique value in the respective column.
|
|
207
331
|
"""
|
|
208
332
|
raise NotImplementedError
|
|
209
333
|
|
|
210
|
-
def get(self, ids: Optional[IDENTIFIER], **kwargs) -> QueryResult:
|
|
334
|
+
def get(self, ids: Optional[List[IDENTIFIER]], **kwargs) -> QueryResult:
|
|
211
335
|
"""
|
|
212
336
|
Get one or more objects by ID.
|
|
213
337
|
|
|
@@ -217,6 +341,8 @@ class Collection:
|
|
|
217
341
|
"""
|
|
218
342
|
# TODO
|
|
219
343
|
id_field = self.identifier_attribute_name
|
|
344
|
+
if not id_field:
|
|
345
|
+
raise ValueError(f"No identifier for {self.name}")
|
|
220
346
|
return self.find({id_field: ids})
|
|
221
347
|
|
|
222
348
|
def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
|
|
@@ -242,6 +368,31 @@ class Collection:
|
|
|
242
368
|
"""
|
|
243
369
|
Find objects in the collection using a where query.
|
|
244
370
|
|
|
371
|
+
As an example, first load a collection:
|
|
372
|
+
|
|
373
|
+
>>> from linkml_store import Client
|
|
374
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
375
|
+
>>> client = Client()
|
|
376
|
+
>>> db = client.attach_database("duckdb")
|
|
377
|
+
>>> collection = db.create_collection("Country")
|
|
378
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
379
|
+
>>> collection.insert(objs)
|
|
380
|
+
|
|
381
|
+
Now let's find all objects:
|
|
382
|
+
|
|
383
|
+
>>> qr = collection.find({})
|
|
384
|
+
>>> qr.num_rows
|
|
385
|
+
20
|
|
386
|
+
|
|
387
|
+
We can do a more restrictive query:
|
|
388
|
+
|
|
389
|
+
>>> qr = collection.find({"code": "FR"})
|
|
390
|
+
>>> qr.num_rows
|
|
391
|
+
1
|
|
392
|
+
>>> qr.rows[0]["name"]
|
|
393
|
+
'France'
|
|
394
|
+
|
|
395
|
+
|
|
245
396
|
:param where:
|
|
246
397
|
:param kwargs:
|
|
247
398
|
:return:
|
|
@@ -290,6 +441,7 @@ class Collection:
|
|
|
290
441
|
raise ValueError(f"No index named {index_name}")
|
|
291
442
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
292
443
|
index_col = ix.index_field
|
|
444
|
+
# TODO: optimize this for large indexes
|
|
293
445
|
vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
|
|
294
446
|
results = ix.search(query, vector_pairs, limit=limit)
|
|
295
447
|
for r in results:
|
|
@@ -305,11 +457,15 @@ class Collection:
|
|
|
305
457
|
|
|
306
458
|
:return:
|
|
307
459
|
"""
|
|
308
|
-
if not self.
|
|
309
|
-
raise ValueError(f"Collection has no
|
|
310
|
-
return self.
|
|
460
|
+
if not self.alias:
|
|
461
|
+
raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
|
|
462
|
+
return self.alias.startswith("internal__")
|
|
463
|
+
|
|
464
|
+
def load_from_source(self):
|
|
465
|
+
objects = load_objects(self.metadata.source_location)
|
|
466
|
+
self.insert(objects)
|
|
311
467
|
|
|
312
|
-
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] =
|
|
468
|
+
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
313
469
|
"""
|
|
314
470
|
Attach an index to the collection.
|
|
315
471
|
|
|
@@ -333,6 +489,7 @@ class Collection:
|
|
|
333
489
|
self._indexers[index_name] = index
|
|
334
490
|
if auto_index:
|
|
335
491
|
all_objs = self.find(limit=-1).rows
|
|
492
|
+
logger.info(f"Auto-indexing {len(all_objs)} objects")
|
|
336
493
|
self.index_objects(all_objs, index_name, replace=True, **kwargs)
|
|
337
494
|
|
|
338
495
|
def _index_collection_name(self, index_name: str) -> str:
|
|
@@ -340,6 +497,7 @@ class Collection:
|
|
|
340
497
|
Create a name for a special collection that holds index data
|
|
341
498
|
|
|
342
499
|
:param index_name:
|
|
500
|
+
:param indexer:
|
|
343
501
|
:return:
|
|
344
502
|
"""
|
|
345
503
|
return f"internal__index__{self.name}__{index_name}"
|
|
@@ -370,7 +528,9 @@ class Collection:
|
|
|
370
528
|
logger.info(f"Checking if {ix_coll_name} is in {schema.classes.keys()}")
|
|
371
529
|
if ix_coll_name in schema.classes:
|
|
372
530
|
ix_coll.delete_where()
|
|
531
|
+
|
|
373
532
|
ix_coll.insert(objects_with_ix, **kwargs)
|
|
533
|
+
ix_coll.commit()
|
|
374
534
|
|
|
375
535
|
def list_index_names(self) -> List[str]:
|
|
376
536
|
"""
|
|
@@ -405,12 +565,22 @@ class Collection:
|
|
|
405
565
|
|
|
406
566
|
:return:
|
|
407
567
|
"""
|
|
408
|
-
sv = self.parent.schema_view
|
|
568
|
+
sv: SchemaView = self.parent.schema_view
|
|
409
569
|
if sv:
|
|
410
570
|
cls = sv.get_class(self.target_class_name)
|
|
571
|
+
if cls and not cls.attributes:
|
|
572
|
+
if not sv.class_induced_slots(cls.name):
|
|
573
|
+
for att in self._induce_attributes():
|
|
574
|
+
cls.attributes[att.name] = att
|
|
575
|
+
sv.set_modified()
|
|
411
576
|
return cls
|
|
412
577
|
return None
|
|
413
578
|
|
|
579
|
+
def _induce_attributes(self) -> List[SlotDefinition]:
|
|
580
|
+
result = self.find({}, limit=-1)
|
|
581
|
+
cd = self.induce_class_definition_from_objects(result.rows, max_sample_size=None)
|
|
582
|
+
return list(cd.attributes.values())
|
|
583
|
+
|
|
414
584
|
@property
|
|
415
585
|
def identifier_attribute_name(self) -> Optional[str]:
|
|
416
586
|
"""
|
|
@@ -427,6 +597,37 @@ class Collection:
|
|
|
427
597
|
return att.name
|
|
428
598
|
return None
|
|
429
599
|
|
|
600
|
+
def set_identifier_attribute_name(self, name: str):
|
|
601
|
+
"""
|
|
602
|
+
Set the name of the identifier attribute for the collection.
|
|
603
|
+
|
|
604
|
+
AKA the primary key.
|
|
605
|
+
|
|
606
|
+
:param name: The name of the identifier attribute.
|
|
607
|
+
"""
|
|
608
|
+
cd = self.class_definition()
|
|
609
|
+
if not cd:
|
|
610
|
+
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
611
|
+
id_att = None
|
|
612
|
+
candidates = []
|
|
613
|
+
sv: SchemaView = self.parent.schema_view
|
|
614
|
+
cls = sv.get_class(cd.name)
|
|
615
|
+
existing_id_slot = sv.get_identifier_slot(cls.name)
|
|
616
|
+
if existing_id_slot:
|
|
617
|
+
if existing_id_slot.name == name:
|
|
618
|
+
return
|
|
619
|
+
existing_id_slot.identifier = False
|
|
620
|
+
for att in cls.attributes.values():
|
|
621
|
+
candidates.append(att.name)
|
|
622
|
+
if att.name == name:
|
|
623
|
+
att.identifier = True
|
|
624
|
+
id_att = att
|
|
625
|
+
else:
|
|
626
|
+
att.identifier = False
|
|
627
|
+
if not id_att:
|
|
628
|
+
raise ValueError(f"No attribute found with name {name} in {candidates}")
|
|
629
|
+
sv.set_modified()
|
|
630
|
+
|
|
430
631
|
def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
|
|
431
632
|
"""
|
|
432
633
|
Return the identifier for an object.
|
|
@@ -457,6 +658,8 @@ class Collection:
|
|
|
457
658
|
:param max_sample_size:
|
|
458
659
|
:return:
|
|
459
660
|
"""
|
|
661
|
+
if not self.target_class_name:
|
|
662
|
+
raise ValueError(f"No target_class_name for {self.alias}")
|
|
460
663
|
cd = ClassDefinition(self.target_class_name)
|
|
461
664
|
keys = defaultdict(list)
|
|
462
665
|
for obj in objs[0:max_sample_size]:
|
|
@@ -468,6 +671,8 @@ class Collection:
|
|
|
468
671
|
for k, v in obj.items():
|
|
469
672
|
keys[k].append(v)
|
|
470
673
|
for k, vs in keys.items():
|
|
674
|
+
if k == "_id":
|
|
675
|
+
continue
|
|
471
676
|
multivalueds = []
|
|
472
677
|
inlineds = []
|
|
473
678
|
rngs = []
|
|
@@ -544,6 +749,39 @@ class Collection:
|
|
|
544
749
|
"""
|
|
545
750
|
raise NotImplementedError
|
|
546
751
|
|
|
752
|
+
def apply_patches(self, patches: List[PatchDict], **kwargs):
|
|
753
|
+
"""
|
|
754
|
+
Apply a patch to the collection.
|
|
755
|
+
|
|
756
|
+
Patches conform to the JSON Patch format,
|
|
757
|
+
|
|
758
|
+
:param patches:
|
|
759
|
+
:param kwargs:
|
|
760
|
+
:return:
|
|
761
|
+
"""
|
|
762
|
+
all_objs = self.find(limit=-1).rows
|
|
763
|
+
primary_key = self.identifier_attribute_name
|
|
764
|
+
if not primary_key:
|
|
765
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
766
|
+
new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
|
|
767
|
+
self.replace(new_objs)
|
|
768
|
+
|
|
769
|
+
def diff(self, other: "Collection", **kwargs):
|
|
770
|
+
"""
|
|
771
|
+
Diff two collections.
|
|
772
|
+
|
|
773
|
+
:param other:
|
|
774
|
+
:param kwargs:
|
|
775
|
+
:return:
|
|
776
|
+
"""
|
|
777
|
+
src_objs = self.find(limit=-1).rows
|
|
778
|
+
tgt_objs = other.find(limit=-1).rows
|
|
779
|
+
primary_key = self.identifier_attribute_name
|
|
780
|
+
if not primary_key:
|
|
781
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
782
|
+
patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
783
|
+
return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
784
|
+
|
|
547
785
|
def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
548
786
|
"""
|
|
549
787
|
Validate the contents of the collection
|
|
@@ -563,3 +801,14 @@ class Collection:
|
|
|
563
801
|
for obj in result.rows:
|
|
564
802
|
obj = clean_empties(obj)
|
|
565
803
|
yield from validator.iter_results(obj, class_name)
|
|
804
|
+
|
|
805
|
+
def commit(self):
|
|
806
|
+
"""
|
|
807
|
+
Commit changes to the collection.
|
|
808
|
+
|
|
809
|
+
:return:
|
|
810
|
+
"""
|
|
811
|
+
pass
|
|
812
|
+
|
|
813
|
+
def _broadcast(self, *args, **kwargs):
|
|
814
|
+
self.parent.broadcast(self, *args, **kwargs)
|
linkml_store/api/config.py
CHANGED
|
@@ -16,7 +16,7 @@ class CollectionConfig(BaseModel):
|
|
|
16
16
|
default=None,
|
|
17
17
|
description="The type of object in the collection. TODO; use this instead of name",
|
|
18
18
|
)
|
|
19
|
-
|
|
19
|
+
additional_properties: Optional[Dict] = Field(
|
|
20
20
|
default=None,
|
|
21
21
|
description="Optional metadata for the collection",
|
|
22
22
|
)
|
|
@@ -36,6 +36,10 @@ class CollectionConfig(BaseModel):
|
|
|
36
36
|
default=False,
|
|
37
37
|
description="Whether the collection is prepopulated",
|
|
38
38
|
)
|
|
39
|
+
source_location: Optional[str] = Field(
|
|
40
|
+
default=None,
|
|
41
|
+
description="Filesystem or remote URL that stores the data",
|
|
42
|
+
)
|
|
39
43
|
|
|
40
44
|
|
|
41
45
|
class DatabaseConfig(BaseModel):
|
|
@@ -55,7 +59,7 @@ class DatabaseConfig(BaseModel):
|
|
|
55
59
|
default=None,
|
|
56
60
|
description="The LinkML schema as a dictionary",
|
|
57
61
|
)
|
|
58
|
-
collections: Dict[str, CollectionConfig] = Field(
|
|
62
|
+
collections: Optional[Dict[str, CollectionConfig]] = Field(
|
|
59
63
|
default={},
|
|
60
64
|
description="A dictionary of collection configurations",
|
|
61
65
|
)
|