linkml-store 0.1.6__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.1.6 → linkml_store-0.1.8}/PKG-INFO +4 -1
- {linkml_store-0.1.6 → linkml_store-0.1.8}/README.md +2 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/pyproject.toml +4 -1
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/client.py +32 -3
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/collection.py +231 -30
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/config.py +10 -2
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/database.py +305 -19
- linkml_store-0.1.8/src/linkml_store/api/stores/chromadb/__init__.py +7 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +8 -1
- linkml_store-0.1.8/src/linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store-0.1.8/src/linkml_store/api/stores/duckdb/duckdb_collection.py +144 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/duckdb_database.py +22 -8
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/mappings.py +1 -0
- linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/__init__.py +16 -0
- linkml_store-0.1.6/src/linkml_store/api/stores/duckdb/duckdb_collection.py → linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_collection.py +10 -10
- linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_database.py +36 -0
- linkml_store-0.1.8/src/linkml_store/api/stores/hdf5/__init__.py +7 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +1 -1
- linkml_store-0.1.8/src/linkml_store/api/stores/mongodb/__init__.py +25 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +29 -8
- linkml_store-0.1.8/src/linkml_store/api/stores/solr/__init__.py +3 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_collection.py +2 -1
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_database.py +1 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/cli.py +64 -10
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/index/__init__.py +6 -2
- linkml_store-0.1.8/src/linkml_store/index/implementations/llm_indexer.py +122 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/index/implementations/simple_indexer.py +2 -2
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/index/indexer.py +32 -8
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/utils/format_utils.py +52 -2
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/utils/object_utils.py +9 -1
- linkml_store-0.1.6/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- linkml_store-0.1.6/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- linkml_store-0.1.6/src/linkml_store/index/implementations/__init__.py +0 -0
- linkml_store-0.1.6/src/linkml_store/index/implementations/llm_indexer.py +0 -44
- linkml_store-0.1.6/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/LICENSE +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.6/src/linkml_store/api/stores/chromadb → linkml_store-0.1.8/src/linkml_store/index/implementations}/__init__.py +0 -0
- {linkml_store-0.1.6/src/linkml_store/api/stores/duckdb → linkml_store-0.1.8/src/linkml_store/utils}/__init__.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.6 → linkml_store-0.1.8}/src/linkml_store/utils/sql_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -27,6 +27,7 @@ Requires-Dist: click
|
|
|
27
27
|
Requires-Dist: duckdb (>=0.10.1,<0.11.0)
|
|
28
28
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
29
29
|
Requires-Dist: h5py ; extra == "h5py"
|
|
30
|
+
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
30
31
|
Requires-Dist: linkml ; extra == "validation"
|
|
31
32
|
Requires-Dist: linkml-runtime (>=1.7.5,<2.0.0)
|
|
32
33
|
Requires-Dist: linkml_map ; extra == "map"
|
|
@@ -53,3 +54,5 @@ There is also experimental support for vector-based indexing using OpenAI test e
|
|
|
53
54
|
The goals of this project are to provide high level access to data stored in heterogeneous databases,
|
|
54
55
|
with optional schema management using LinkML.
|
|
55
56
|
|
|
57
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
58
|
+
|
|
@@ -8,3 +8,5 @@ There is also experimental support for vector-based indexing using OpenAI test e
|
|
|
8
8
|
|
|
9
9
|
The goals of this project are to provide high level access to data stored in heterogeneous databases,
|
|
10
10
|
with optional schema management using LinkML.
|
|
11
|
+
|
|
12
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.8"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -27,6 +27,7 @@ h5py = { version="*", optional = true }
|
|
|
27
27
|
linkml = { version="*", optional = true }
|
|
28
28
|
linkml_map = { version="*", optional = true }
|
|
29
29
|
pandas = ">=2.2.1"
|
|
30
|
+
jinja2 = "^3.1.4"
|
|
30
31
|
|
|
31
32
|
[tool.poetry.group.dev.dependencies]
|
|
32
33
|
pytest = {version = ">=7.1.2"}
|
|
@@ -36,10 +37,12 @@ sphinx = {version = ">=6.1.3"}
|
|
|
36
37
|
sphinx-rtd-theme = {version = ">=1.0.0"}
|
|
37
38
|
sphinx-autodoc-typehints = {version = "<2.0.0"}
|
|
38
39
|
sphinx-click = {version = ">=4.3.0"}
|
|
40
|
+
sphinx-automodapi = "*"
|
|
39
41
|
myst-parser = {version = ">=0.18.1"}
|
|
40
42
|
furo = {version = "*"}
|
|
41
43
|
nbsphinx = "*"
|
|
42
44
|
jupyter = "*"
|
|
45
|
+
jupysql = "*"
|
|
43
46
|
|
|
44
47
|
[tool.poetry.group.tests.dependencies]
|
|
45
48
|
pytest = "^7.4.0"
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import Dict, Optional, Union
|
|
3
4
|
|
|
@@ -11,6 +12,9 @@ from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
|
11
12
|
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
12
13
|
from linkml_store.api.stores.solr.solr_database import SolrDatabase
|
|
13
14
|
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
14
18
|
HANDLE_MAP = {
|
|
15
19
|
"duckdb": DuckDBDatabase,
|
|
16
20
|
"solr": SolrDatabase,
|
|
@@ -21,12 +25,29 @@ HANDLE_MAP = {
|
|
|
21
25
|
|
|
22
26
|
class Client:
|
|
23
27
|
"""
|
|
24
|
-
A client
|
|
28
|
+
A client is the top-level object for interacting with databases.
|
|
29
|
+
|
|
30
|
+
* A client has access to one or more :class:`.Database` objects.
|
|
31
|
+
* Each database consists of a number of :class:`.Collection` objects.
|
|
25
32
|
|
|
26
|
-
|
|
27
|
-
|
|
33
|
+
Creating a client
|
|
34
|
+
-----------------
|
|
28
35
|
>>> client = Client()
|
|
36
|
+
|
|
37
|
+
Attaching a database
|
|
38
|
+
--------------------
|
|
29
39
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
40
|
+
|
|
41
|
+
Note that normally a handle would be specified by a locator such as ``duckdb:///<PATH>``, but
|
|
42
|
+
for convenience, an in-memory duckdb object can be specified without a full locator
|
|
43
|
+
|
|
44
|
+
We can check the actual handle:
|
|
45
|
+
|
|
46
|
+
>>> db.handle
|
|
47
|
+
'duckdb:///:memory:'
|
|
48
|
+
|
|
49
|
+
Creating a new collection
|
|
50
|
+
-------------------------
|
|
30
51
|
>>> collection = db.create_collection("Person")
|
|
31
52
|
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
32
53
|
>>> collection.insert(objs)
|
|
@@ -147,6 +168,8 @@ class Client:
|
|
|
147
168
|
if ":" not in handle:
|
|
148
169
|
scheme = handle
|
|
149
170
|
handle = None
|
|
171
|
+
if alias is None:
|
|
172
|
+
alias = scheme
|
|
150
173
|
else:
|
|
151
174
|
scheme, _ = handle.split(":", 1)
|
|
152
175
|
if scheme not in HANDLE_MAP:
|
|
@@ -161,6 +184,11 @@ class Client:
|
|
|
161
184
|
self._databases = {}
|
|
162
185
|
self._databases[alias] = db
|
|
163
186
|
db.parent = self
|
|
187
|
+
if db.alias:
|
|
188
|
+
if db.alias != alias:
|
|
189
|
+
raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}")
|
|
190
|
+
else:
|
|
191
|
+
db.metadata.alias = alias
|
|
164
192
|
return db
|
|
165
193
|
|
|
166
194
|
def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
|
|
@@ -191,6 +219,7 @@ class Client:
|
|
|
191
219
|
self._databases = {}
|
|
192
220
|
if name not in self._databases:
|
|
193
221
|
if create_if_not_exists:
|
|
222
|
+
logger.info(f"Creating database: {name}")
|
|
194
223
|
self.attach_database(name, **kwargs)
|
|
195
224
|
else:
|
|
196
225
|
raise ValueError(f"Database {name} does not exist")
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""A structure for representing collections of similar objects."""
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import logging
|
|
3
5
|
from collections import defaultdict
|
|
@@ -10,6 +12,8 @@ from linkml_runtime.linkml_model.meta import ArrayExpression
|
|
|
10
12
|
from pydantic import BaseModel
|
|
11
13
|
|
|
12
14
|
from linkml_store.index import get_indexer
|
|
15
|
+
from linkml_store.utils.format_utils import load_objects
|
|
16
|
+
from linkml_store.utils.object_utils import clean_empties
|
|
13
17
|
|
|
14
18
|
try:
|
|
15
19
|
from linkml.validator.report import ValidationResult
|
|
@@ -38,7 +42,17 @@ class Collection:
|
|
|
38
42
|
|
|
39
43
|
- For relational databases, a collection is typically a table
|
|
40
44
|
- For document databases such as MongoDB, a collection is the native type
|
|
41
|
-
- For a file system, a collection could be a single tabular file such as Parquet or CSV
|
|
45
|
+
- For a file system, a collection could be a single tabular file such as Parquet or CSV.
|
|
46
|
+
|
|
47
|
+
Collection objects are typically not created directly - instead they are generated
|
|
48
|
+
from a parent :class:`.Database` object:
|
|
49
|
+
|
|
50
|
+
>>> from linkml_store import Client
|
|
51
|
+
>>> client = Client()
|
|
52
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
53
|
+
>>> collection = db.create_collection("Person")
|
|
54
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
55
|
+
>>> collection.insert(objs)
|
|
42
56
|
"""
|
|
43
57
|
|
|
44
58
|
# name: str
|
|
@@ -56,25 +70,51 @@ class Collection:
|
|
|
56
70
|
self.metadata = metadata
|
|
57
71
|
else:
|
|
58
72
|
self.metadata = CollectionConfig(name=name, **kwargs)
|
|
59
|
-
|
|
60
|
-
|
|
73
|
+
if not self.metadata.alias:
|
|
74
|
+
self.metadata.alias = name
|
|
75
|
+
if not self.metadata.type:
|
|
76
|
+
self.metadata.type = name
|
|
77
|
+
# if name is not None and self.metadata.name is not None and name != self.metadata.name:
|
|
78
|
+
# raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
|
|
61
79
|
|
|
62
80
|
@property
|
|
63
81
|
def name(self) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Return the name of the collection.
|
|
84
|
+
|
|
85
|
+
TODO: deprecate in favor of Type
|
|
86
|
+
|
|
87
|
+
:return: name of the collection
|
|
88
|
+
"""
|
|
64
89
|
return self.metadata.name
|
|
65
90
|
|
|
66
91
|
@property
|
|
67
92
|
def hidden(self) -> bool:
|
|
68
|
-
|
|
93
|
+
"""
|
|
94
|
+
True if the collection is hidden.
|
|
95
|
+
|
|
96
|
+
An example of a hidden collection is a collection that indexes another
|
|
97
|
+
collection
|
|
98
|
+
|
|
99
|
+
:return: True if the collection is hidden
|
|
100
|
+
"""
|
|
101
|
+
# return self.metadata.hidden
|
|
69
102
|
|
|
70
103
|
@property
|
|
71
|
-
def
|
|
104
|
+
def target_class_name(self):
|
|
72
105
|
"""
|
|
73
106
|
Return the name of the class that this collection represents
|
|
74
107
|
|
|
75
108
|
This MUST be a LinkML class name
|
|
76
109
|
|
|
77
|
-
|
|
110
|
+
>>> from linkml_store import Client
|
|
111
|
+
>>> client = Client()
|
|
112
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
113
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
114
|
+
>>> collection.target_class_name
|
|
115
|
+
'Person'
|
|
116
|
+
|
|
117
|
+
:return: name of the class which members of this collection instantiate
|
|
78
118
|
"""
|
|
79
119
|
# TODO: this is a shim layer until we can normalize on this
|
|
80
120
|
if self.metadata.type:
|
|
@@ -82,7 +122,7 @@ class Collection:
|
|
|
82
122
|
return self.name
|
|
83
123
|
|
|
84
124
|
@property
|
|
85
|
-
def
|
|
125
|
+
def alias(self):
|
|
86
126
|
"""
|
|
87
127
|
Return the primary name/alias used for the collection.
|
|
88
128
|
|
|
@@ -90,15 +130,34 @@ class Collection:
|
|
|
90
130
|
to have an alias, for example "persons" which collects all instances
|
|
91
131
|
of class Person.
|
|
92
132
|
|
|
93
|
-
|
|
133
|
+
>>> from linkml_store import Client
|
|
134
|
+
>>> client = Client()
|
|
135
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
136
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
137
|
+
>>> collection.alias
|
|
138
|
+
'persons'
|
|
139
|
+
|
|
140
|
+
If no explicit alias is provided, then the target class name is used:
|
|
141
|
+
|
|
142
|
+
>>> from linkml_store import Client
|
|
143
|
+
>>> client = Client()
|
|
144
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
145
|
+
>>> collection = db.create_collection("Person")
|
|
146
|
+
>>> collection.alias
|
|
147
|
+
'Person'
|
|
148
|
+
|
|
149
|
+
The alias SHOULD be used for Table names in SQL.
|
|
94
150
|
|
|
95
151
|
For nested data, the alias SHOULD be used as the key; e.g
|
|
96
152
|
|
|
97
|
-
|
|
153
|
+
.. code-block:: json
|
|
154
|
+
|
|
155
|
+
{ "persons": [ { "name": "Alice" }, { "name": "Bob" } ] }
|
|
98
156
|
|
|
99
157
|
:return:
|
|
100
158
|
"""
|
|
101
159
|
# TODO: this is a shim layer until we can normalize on this
|
|
160
|
+
# TODO: this is a shim layer until we can normalize on this
|
|
102
161
|
if self.metadata.alias:
|
|
103
162
|
return self.metadata.alias
|
|
104
163
|
return self.name
|
|
@@ -107,6 +166,13 @@ class Collection:
|
|
|
107
166
|
"""
|
|
108
167
|
Replace entire collection with objects.
|
|
109
168
|
|
|
169
|
+
>>> from linkml_store import Client
|
|
170
|
+
>>> client = Client()
|
|
171
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
172
|
+
>>> collection = db.create_collection("Person")
|
|
173
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
174
|
+
>>> collection.insert(objs)
|
|
175
|
+
|
|
110
176
|
:param objs:
|
|
111
177
|
:param kwargs:
|
|
112
178
|
:return:
|
|
@@ -116,7 +182,14 @@ class Collection:
|
|
|
116
182
|
|
|
117
183
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
118
184
|
"""
|
|
119
|
-
Add one or more objects to the collection
|
|
185
|
+
Add one or more objects to the collection.
|
|
186
|
+
|
|
187
|
+
>>> from linkml_store import Client
|
|
188
|
+
>>> client = Client()
|
|
189
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
190
|
+
>>> collection = db.create_collection("Person")
|
|
191
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
192
|
+
>>> collection.insert(objs)
|
|
120
193
|
|
|
121
194
|
:param objs:
|
|
122
195
|
:param kwargs:
|
|
@@ -124,9 +197,32 @@ class Collection:
|
|
|
124
197
|
"""
|
|
125
198
|
raise NotImplementedError
|
|
126
199
|
|
|
127
|
-
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
|
|
200
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
128
201
|
"""
|
|
129
|
-
Delete one or more objects from the collection
|
|
202
|
+
Delete one or more objects from the collection.
|
|
203
|
+
|
|
204
|
+
First let's set up a collection:
|
|
205
|
+
|
|
206
|
+
>>> from linkml_store import Client
|
|
207
|
+
>>> client = Client()
|
|
208
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
209
|
+
>>> collection = db.create_collection("Person")
|
|
210
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
211
|
+
>>> collection.insert(objs)
|
|
212
|
+
>>> collection.find({}).num_rows
|
|
213
|
+
2
|
|
214
|
+
|
|
215
|
+
Now let's delete an object:
|
|
216
|
+
|
|
217
|
+
>>> collection.delete(objs[0])
|
|
218
|
+
>>> collection.find({}).num_rows
|
|
219
|
+
1
|
|
220
|
+
|
|
221
|
+
Deleting the same object again should have no effect:
|
|
222
|
+
|
|
223
|
+
>>> collection.delete(objs[0])
|
|
224
|
+
>>> collection.find({}).num_rows
|
|
225
|
+
1
|
|
130
226
|
|
|
131
227
|
:param objs:
|
|
132
228
|
:param kwargs:
|
|
@@ -134,9 +230,30 @@ class Collection:
|
|
|
134
230
|
"""
|
|
135
231
|
raise NotImplementedError
|
|
136
232
|
|
|
137
|
-
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
233
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
138
234
|
"""
|
|
139
|
-
Delete objects that match a query
|
|
235
|
+
Delete objects that match a query.
|
|
236
|
+
|
|
237
|
+
First let's set up a collection:
|
|
238
|
+
|
|
239
|
+
>>> from linkml_store import Client
|
|
240
|
+
>>> client = Client()
|
|
241
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
242
|
+
>>> collection = db.create_collection("Person")
|
|
243
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
244
|
+
>>> collection.insert(objs)
|
|
245
|
+
|
|
246
|
+
Now let's delete an object:
|
|
247
|
+
|
|
248
|
+
>>> collection.delete_where({"id": "P1"})
|
|
249
|
+
>>> collection.find({}).num_rows
|
|
250
|
+
1
|
|
251
|
+
|
|
252
|
+
Match everything:
|
|
253
|
+
|
|
254
|
+
>>> collection.delete_where({})
|
|
255
|
+
>>> collection.find({}).num_rows
|
|
256
|
+
0
|
|
140
257
|
|
|
141
258
|
:param where: where conditions
|
|
142
259
|
:param missing_ok: if True, do not raise an error if the collection does not exist
|
|
@@ -147,7 +264,7 @@ class Collection:
|
|
|
147
264
|
|
|
148
265
|
def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
149
266
|
"""
|
|
150
|
-
Update one or more objects in the collection
|
|
267
|
+
Update one or more objects in the collection.
|
|
151
268
|
|
|
152
269
|
:param objs:
|
|
153
270
|
:param kwargs:
|
|
@@ -156,11 +273,25 @@ class Collection:
|
|
|
156
273
|
raise NotImplementedError
|
|
157
274
|
|
|
158
275
|
def _create_query(self, **kwargs) -> Query:
|
|
159
|
-
return Query(from_table=self.
|
|
276
|
+
return Query(from_table=self.alias, **kwargs)
|
|
160
277
|
|
|
161
278
|
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
162
279
|
"""
|
|
163
|
-
Run a query against the collection
|
|
280
|
+
Run a query against the collection.
|
|
281
|
+
|
|
282
|
+
First let's load a collection:
|
|
283
|
+
|
|
284
|
+
>>> from linkml_store import Client
|
|
285
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
286
|
+
>>> client = Client()
|
|
287
|
+
>>> db = client.attach_database("duckdb")
|
|
288
|
+
>>> collection = db.create_collection("Country")
|
|
289
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
290
|
+
>>> collection.insert(objs)
|
|
291
|
+
|
|
292
|
+
Now let's run a query:
|
|
293
|
+
|
|
294
|
+
TODO
|
|
164
295
|
|
|
165
296
|
:param query:
|
|
166
297
|
:param kwargs:
|
|
@@ -193,7 +324,7 @@ class Collection:
|
|
|
193
324
|
"""
|
|
194
325
|
raise NotImplementedError
|
|
195
326
|
|
|
196
|
-
def get(self, ids: Optional[IDENTIFIER], **kwargs) -> QueryResult:
|
|
327
|
+
def get(self, ids: Optional[List[IDENTIFIER]], **kwargs) -> QueryResult:
|
|
197
328
|
"""
|
|
198
329
|
Get one or more objects by ID.
|
|
199
330
|
|
|
@@ -201,14 +332,60 @@ class Collection:
|
|
|
201
332
|
:param kwargs:
|
|
202
333
|
:return:
|
|
203
334
|
"""
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
335
|
+
# TODO
|
|
336
|
+
id_field = self.identifier_attribute_name
|
|
337
|
+
if not id_field:
|
|
338
|
+
raise ValueError(f"No identifier for {self.name}")
|
|
339
|
+
return self.find({id_field: ids})
|
|
340
|
+
|
|
341
|
+
def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
|
|
342
|
+
"""
|
|
343
|
+
Get one object by ID.
|
|
344
|
+
|
|
345
|
+
:param id:
|
|
346
|
+
:param kwargs:
|
|
347
|
+
:return:
|
|
348
|
+
"""
|
|
349
|
+
if not id:
|
|
350
|
+
raise ValueError("Must pass an ID")
|
|
351
|
+
id_field = self.identifier_attribute_name
|
|
352
|
+
if not id_field:
|
|
353
|
+
raise ValueError(f"No identifier for {self.name}")
|
|
354
|
+
w = {id_field: id}
|
|
355
|
+
qr = self.find(w)
|
|
356
|
+
if qr.num_rows == 1:
|
|
357
|
+
return qr.rows[0]
|
|
358
|
+
return None
|
|
207
359
|
|
|
208
360
|
def find(self, where: Optional[Any] = None, **kwargs) -> QueryResult:
|
|
209
361
|
"""
|
|
210
362
|
Find objects in the collection using a where query.
|
|
211
363
|
|
|
364
|
+
As an example, first load a collection:
|
|
365
|
+
|
|
366
|
+
>>> from linkml_store import Client
|
|
367
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
368
|
+
>>> client = Client()
|
|
369
|
+
>>> db = client.attach_database("duckdb")
|
|
370
|
+
>>> collection = db.create_collection("Country")
|
|
371
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
372
|
+
>>> collection.insert(objs)
|
|
373
|
+
|
|
374
|
+
Now let's find all objects:
|
|
375
|
+
|
|
376
|
+
>>> qr = collection.find({})
|
|
377
|
+
>>> qr.num_rows
|
|
378
|
+
20
|
|
379
|
+
|
|
380
|
+
We can do a more restrictive query:
|
|
381
|
+
|
|
382
|
+
>>> qr = collection.find({"code": "FR"})
|
|
383
|
+
>>> qr.num_rows
|
|
384
|
+
1
|
|
385
|
+
>>> qr.rows[0]["name"]
|
|
386
|
+
'France'
|
|
387
|
+
|
|
388
|
+
|
|
212
389
|
:param where:
|
|
213
390
|
:param kwargs:
|
|
214
391
|
:return:
|
|
@@ -216,6 +393,18 @@ class Collection:
|
|
|
216
393
|
query = self._create_query(where_clause=where)
|
|
217
394
|
return self.query(query, **kwargs)
|
|
218
395
|
|
|
396
|
+
def find_iter(self, where: Optional[Any] = None, **kwargs) -> Iterator[OBJECT]:
|
|
397
|
+
"""
|
|
398
|
+
Find objects in the collection using a where query.
|
|
399
|
+
|
|
400
|
+
:param where:
|
|
401
|
+
:param kwargs:
|
|
402
|
+
:return:
|
|
403
|
+
"""
|
|
404
|
+
qr = self.find(where=where, limit=-1, **kwargs)
|
|
405
|
+
for row in qr.rows:
|
|
406
|
+
yield row
|
|
407
|
+
|
|
219
408
|
def search(
|
|
220
409
|
self,
|
|
221
410
|
query: str,
|
|
@@ -245,6 +434,7 @@ class Collection:
|
|
|
245
434
|
raise ValueError(f"No index named {index_name}")
|
|
246
435
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
247
436
|
index_col = ix.index_field
|
|
437
|
+
# TODO: optimize this for large indexes
|
|
248
438
|
vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
|
|
249
439
|
results = ix.search(query, vector_pairs, limit=limit)
|
|
250
440
|
for r in results:
|
|
@@ -260,11 +450,15 @@ class Collection:
|
|
|
260
450
|
|
|
261
451
|
:return:
|
|
262
452
|
"""
|
|
263
|
-
if not self.
|
|
264
|
-
raise ValueError(f"Collection has no
|
|
265
|
-
return self.
|
|
453
|
+
if not self.alias:
|
|
454
|
+
raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
|
|
455
|
+
return self.alias.startswith("internal__")
|
|
266
456
|
|
|
267
|
-
def
|
|
457
|
+
def load_from_source(self):
|
|
458
|
+
objects = load_objects(self.metadata.source_location)
|
|
459
|
+
self.insert(objects)
|
|
460
|
+
|
|
461
|
+
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
268
462
|
"""
|
|
269
463
|
Attach an index to the collection.
|
|
270
464
|
|
|
@@ -288,6 +482,7 @@ class Collection:
|
|
|
288
482
|
self._indexers[index_name] = index
|
|
289
483
|
if auto_index:
|
|
290
484
|
all_objs = self.find(limit=-1).rows
|
|
485
|
+
logger.info(f"Auto-indexing {len(all_objs)} objects")
|
|
291
486
|
self.index_objects(all_objs, index_name, replace=True, **kwargs)
|
|
292
487
|
|
|
293
488
|
def _index_collection_name(self, index_name: str) -> str:
|
|
@@ -295,6 +490,7 @@ class Collection:
|
|
|
295
490
|
Create a name for a special collection that holds index data
|
|
296
491
|
|
|
297
492
|
:param index_name:
|
|
493
|
+
:param indexer:
|
|
298
494
|
:return:
|
|
299
495
|
"""
|
|
300
496
|
return f"internal__index__{self.name}__{index_name}"
|
|
@@ -325,6 +521,7 @@ class Collection:
|
|
|
325
521
|
logger.info(f"Checking if {ix_coll_name} is in {schema.classes.keys()}")
|
|
326
522
|
if ix_coll_name in schema.classes:
|
|
327
523
|
ix_coll.delete_where()
|
|
524
|
+
|
|
328
525
|
ix_coll.insert(objects_with_ix, **kwargs)
|
|
329
526
|
|
|
330
527
|
def list_index_names(self) -> List[str]:
|
|
@@ -362,10 +559,11 @@ class Collection:
|
|
|
362
559
|
"""
|
|
363
560
|
sv = self.parent.schema_view
|
|
364
561
|
if sv:
|
|
365
|
-
cls = sv.get_class(self.
|
|
562
|
+
cls = sv.get_class(self.target_class_name)
|
|
366
563
|
return cls
|
|
367
564
|
return None
|
|
368
565
|
|
|
566
|
+
@property
|
|
369
567
|
def identifier_attribute_name(self) -> Optional[str]:
|
|
370
568
|
"""
|
|
371
569
|
Return the name of the identifier attribute for the collection.
|
|
@@ -376,7 +574,7 @@ class Collection:
|
|
|
376
574
|
"""
|
|
377
575
|
cd = self.class_definition()
|
|
378
576
|
if cd:
|
|
379
|
-
for att in
|
|
577
|
+
for att in self.parent.schema_view.class_induced_slots(cd.name):
|
|
380
578
|
if att.identifier:
|
|
381
579
|
return att.name
|
|
382
580
|
return None
|
|
@@ -411,7 +609,9 @@ class Collection:
|
|
|
411
609
|
:param max_sample_size:
|
|
412
610
|
:return:
|
|
413
611
|
"""
|
|
414
|
-
|
|
612
|
+
if not self.target_class_name:
|
|
613
|
+
raise ValueError(f"No target_class_name for {self.alias}")
|
|
614
|
+
cd = ClassDefinition(self.target_class_name)
|
|
415
615
|
keys = defaultdict(list)
|
|
416
616
|
for obj in objs[0:max_sample_size]:
|
|
417
617
|
if isinstance(obj, BaseModel):
|
|
@@ -474,7 +674,7 @@ class Collection:
|
|
|
474
674
|
array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
|
|
475
675
|
cd.attributes[k].array = array_expr
|
|
476
676
|
sv = self.parent.schema_view
|
|
477
|
-
sv.schema.classes[self.
|
|
677
|
+
sv.schema.classes[self.target_class_name] = cd
|
|
478
678
|
sv.set_modified()
|
|
479
679
|
return cd
|
|
480
680
|
|
|
@@ -511,8 +711,9 @@ class Collection:
|
|
|
511
711
|
validator = Validator(self.parent.schema_view.schema, validation_plugins=validation_plugins)
|
|
512
712
|
cd = self.class_definition()
|
|
513
713
|
if not cd:
|
|
514
|
-
raise ValueError(f"Cannot find class definition for {self.
|
|
714
|
+
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
515
715
|
class_name = cd.name
|
|
516
716
|
result = self.find(**kwargs)
|
|
517
717
|
for obj in result.rows:
|
|
718
|
+
obj = clean_empties(obj)
|
|
518
719
|
yield from validator.iter_results(obj, class_name)
|
|
@@ -16,7 +16,7 @@ class CollectionConfig(BaseModel):
|
|
|
16
16
|
default=None,
|
|
17
17
|
description="The type of object in the collection. TODO; use this instead of name",
|
|
18
18
|
)
|
|
19
|
-
|
|
19
|
+
additional_properties: Optional[Dict] = Field(
|
|
20
20
|
default=None,
|
|
21
21
|
description="Optional metadata for the collection",
|
|
22
22
|
)
|
|
@@ -36,6 +36,10 @@ class CollectionConfig(BaseModel):
|
|
|
36
36
|
default=False,
|
|
37
37
|
description="Whether the collection is prepopulated",
|
|
38
38
|
)
|
|
39
|
+
source_location: Optional[str] = Field(
|
|
40
|
+
default=None,
|
|
41
|
+
description="Filesystem or remote URL that stores the data",
|
|
42
|
+
)
|
|
39
43
|
|
|
40
44
|
|
|
41
45
|
class DatabaseConfig(BaseModel):
|
|
@@ -55,7 +59,7 @@ class DatabaseConfig(BaseModel):
|
|
|
55
59
|
default=None,
|
|
56
60
|
description="The LinkML schema as a dictionary",
|
|
57
61
|
)
|
|
58
|
-
collections: Dict[str, CollectionConfig] = Field(
|
|
62
|
+
collections: Optional[Dict[str, CollectionConfig]] = Field(
|
|
59
63
|
default={},
|
|
60
64
|
description="A dictionary of collection configurations",
|
|
61
65
|
)
|
|
@@ -76,6 +80,10 @@ class DatabaseConfig(BaseModel):
|
|
|
76
80
|
default=None,
|
|
77
81
|
description="Optional configuration for search fields",
|
|
78
82
|
)
|
|
83
|
+
ensure_referential_integrity: bool = Field(
|
|
84
|
+
default=False,
|
|
85
|
+
description="Whether to ensure referential integrity",
|
|
86
|
+
)
|
|
79
87
|
|
|
80
88
|
|
|
81
89
|
class ClientConfig(BaseModel):
|