linkml-store 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.1.7 → linkml_store-0.1.8}/PKG-INFO +4 -1
- {linkml_store-0.1.7 → linkml_store-0.1.8}/README.md +2 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/pyproject.toml +3 -1
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/client.py +30 -5
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/collection.py +175 -21
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/config.py +6 -2
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/database.py +230 -18
- linkml_store-0.1.8/src/linkml_store/api/stores/chromadb/__init__.py +7 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/__init__.py +9 -0
- linkml_store-0.1.8/src/linkml_store/api/stores/duckdb/duckdb_collection.py +144 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/duckdb_database.py +19 -5
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/duckdb/mappings.py +1 -0
- linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/__init__.py +16 -0
- linkml_store-0.1.7/src/linkml_store/api/stores/duckdb/duckdb_collection.py → linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_collection.py +5 -5
- linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_database.py +36 -0
- linkml_store-0.1.8/src/linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store-0.1.8/src/linkml_store/api/stores/mongodb/__init__.py +25 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +21 -6
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/cli.py +64 -10
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/index/__init__.py +6 -2
- linkml_store-0.1.8/src/linkml_store/index/implementations/llm_indexer.py +122 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/index/implementations/simple_indexer.py +2 -2
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/index/indexer.py +32 -8
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/format_utils.py +52 -2
- linkml_store-0.1.7/src/linkml_store/api/stores/chromadb/__init__.py +0 -3
- linkml_store-0.1.7/src/linkml_store/index/implementations/__init__.py +0 -0
- linkml_store-0.1.7/src/linkml_store/index/implementations/llm_indexer.py +0 -44
- linkml_store-0.1.7/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/LICENSE +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.7/src/linkml_store/api/stores/hdf5 → linkml_store-0.1.8/src/linkml_store/index/implementations}/__init__.py +0 -0
- {linkml_store-0.1.7/src/linkml_store/api/stores/mongodb → linkml_store-0.1.8/src/linkml_store/utils}/__init__.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/object_utils.py +0 -0
- {linkml_store-0.1.7 → linkml_store-0.1.8}/src/linkml_store/utils/sql_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -27,6 +27,7 @@ Requires-Dist: click
|
|
|
27
27
|
Requires-Dist: duckdb (>=0.10.1,<0.11.0)
|
|
28
28
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
29
29
|
Requires-Dist: h5py ; extra == "h5py"
|
|
30
|
+
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
30
31
|
Requires-Dist: linkml ; extra == "validation"
|
|
31
32
|
Requires-Dist: linkml-runtime (>=1.7.5,<2.0.0)
|
|
32
33
|
Requires-Dist: linkml_map ; extra == "map"
|
|
@@ -53,3 +54,5 @@ There is also experimental support for vector-based indexing using OpenAI test e
|
|
|
53
54
|
The goals of this project are to provide high level access to data stored in heterogeneous databases,
|
|
54
55
|
with optional schema management using LinkML.
|
|
55
56
|
|
|
57
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
58
|
+
|
|
@@ -8,3 +8,5 @@ There is also experimental support for vector-based indexing using OpenAI test e
|
|
|
8
8
|
|
|
9
9
|
The goals of this project are to provide high level access to data stored in heterogeneous databases,
|
|
10
10
|
with optional schema management using LinkML.
|
|
11
|
+
|
|
12
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.8"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -27,6 +27,7 @@ h5py = { version="*", optional = true }
|
|
|
27
27
|
linkml = { version="*", optional = true }
|
|
28
28
|
linkml_map = { version="*", optional = true }
|
|
29
29
|
pandas = ">=2.2.1"
|
|
30
|
+
jinja2 = "^3.1.4"
|
|
30
31
|
|
|
31
32
|
[tool.poetry.group.dev.dependencies]
|
|
32
33
|
pytest = {version = ">=7.1.2"}
|
|
@@ -41,6 +42,7 @@ myst-parser = {version = ">=0.18.1"}
|
|
|
41
42
|
furo = {version = "*"}
|
|
42
43
|
nbsphinx = "*"
|
|
43
44
|
jupyter = "*"
|
|
45
|
+
jupysql = "*"
|
|
44
46
|
|
|
45
47
|
[tool.poetry.group.tests.dependencies]
|
|
46
48
|
pytest = "^7.4.0"
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import Dict, Optional, Union
|
|
3
4
|
|
|
@@ -11,6 +12,9 @@ from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
|
11
12
|
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
12
13
|
from linkml_store.api.stores.solr.solr_database import SolrDatabase
|
|
13
14
|
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
14
18
|
HANDLE_MAP = {
|
|
15
19
|
"duckdb": DuckDBDatabase,
|
|
16
20
|
"solr": SolrDatabase,
|
|
@@ -23,14 +27,27 @@ class Client:
|
|
|
23
27
|
"""
|
|
24
28
|
A client is the top-level object for interacting with databases.
|
|
25
29
|
|
|
26
|
-
A client has access to one or more :class
|
|
27
|
-
|
|
28
|
-
Each database consists of a number of :class:`.Collection` objects.
|
|
30
|
+
* A client has access to one or more :class:`.Database` objects.
|
|
31
|
+
* Each database consists of a number of :class:`.Collection` objects.
|
|
29
32
|
|
|
30
|
-
|
|
31
|
-
|
|
33
|
+
Creating a client
|
|
34
|
+
-----------------
|
|
32
35
|
>>> client = Client()
|
|
36
|
+
|
|
37
|
+
Attaching a database
|
|
38
|
+
--------------------
|
|
33
39
|
>>> db = client.attach_database("duckdb", alias="test")
|
|
40
|
+
|
|
41
|
+
Note that normally a handle would be specified by a locator such as ``duckdb:///<PATH>``, but
|
|
42
|
+
for convenience, an in-memory duckdb object can be specified without a full locator
|
|
43
|
+
|
|
44
|
+
We can check the actual handle:
|
|
45
|
+
|
|
46
|
+
>>> db.handle
|
|
47
|
+
'duckdb:///:memory:'
|
|
48
|
+
|
|
49
|
+
Creating a new collection
|
|
50
|
+
-------------------------
|
|
34
51
|
>>> collection = db.create_collection("Person")
|
|
35
52
|
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
36
53
|
>>> collection.insert(objs)
|
|
@@ -151,6 +168,8 @@ class Client:
|
|
|
151
168
|
if ":" not in handle:
|
|
152
169
|
scheme = handle
|
|
153
170
|
handle = None
|
|
171
|
+
if alias is None:
|
|
172
|
+
alias = scheme
|
|
154
173
|
else:
|
|
155
174
|
scheme, _ = handle.split(":", 1)
|
|
156
175
|
if scheme not in HANDLE_MAP:
|
|
@@ -165,6 +184,11 @@ class Client:
|
|
|
165
184
|
self._databases = {}
|
|
166
185
|
self._databases[alias] = db
|
|
167
186
|
db.parent = self
|
|
187
|
+
if db.alias:
|
|
188
|
+
if db.alias != alias:
|
|
189
|
+
raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}")
|
|
190
|
+
else:
|
|
191
|
+
db.metadata.alias = alias
|
|
168
192
|
return db
|
|
169
193
|
|
|
170
194
|
def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
|
|
@@ -195,6 +219,7 @@ class Client:
|
|
|
195
219
|
self._databases = {}
|
|
196
220
|
if name not in self._databases:
|
|
197
221
|
if create_if_not_exists:
|
|
222
|
+
logger.info(f"Creating database: {name}")
|
|
198
223
|
self.attach_database(name, **kwargs)
|
|
199
224
|
else:
|
|
200
225
|
raise ValueError(f"Database {name} does not exist")
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""A structure for representing collections of similar objects."""
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import logging
|
|
3
5
|
from collections import defaultdict
|
|
@@ -10,6 +12,7 @@ from linkml_runtime.linkml_model.meta import ArrayExpression
|
|
|
10
12
|
from pydantic import BaseModel
|
|
11
13
|
|
|
12
14
|
from linkml_store.index import get_indexer
|
|
15
|
+
from linkml_store.utils.format_utils import load_objects
|
|
13
16
|
from linkml_store.utils.object_utils import clean_empties
|
|
14
17
|
|
|
15
18
|
try:
|
|
@@ -39,7 +42,17 @@ class Collection:
|
|
|
39
42
|
|
|
40
43
|
- For relational databases, a collection is typically a table
|
|
41
44
|
- For document databases such as MongoDB, a collection is the native type
|
|
42
|
-
- For a file system, a collection could be a single tabular file such as Parquet or CSV
|
|
45
|
+
- For a file system, a collection could be a single tabular file such as Parquet or CSV.
|
|
46
|
+
|
|
47
|
+
Collection objects are typically not created directly - instead they are generated
|
|
48
|
+
from a parent :class:`.Database` object:
|
|
49
|
+
|
|
50
|
+
>>> from linkml_store import Client
|
|
51
|
+
>>> client = Client()
|
|
52
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
53
|
+
>>> collection = db.create_collection("Person")
|
|
54
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
55
|
+
>>> collection.insert(objs)
|
|
43
56
|
"""
|
|
44
57
|
|
|
45
58
|
# name: str
|
|
@@ -57,15 +70,21 @@ class Collection:
|
|
|
57
70
|
self.metadata = metadata
|
|
58
71
|
else:
|
|
59
72
|
self.metadata = CollectionConfig(name=name, **kwargs)
|
|
60
|
-
|
|
61
|
-
|
|
73
|
+
if not self.metadata.alias:
|
|
74
|
+
self.metadata.alias = name
|
|
75
|
+
if not self.metadata.type:
|
|
76
|
+
self.metadata.type = name
|
|
77
|
+
# if name is not None and self.metadata.name is not None and name != self.metadata.name:
|
|
78
|
+
# raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
|
|
62
79
|
|
|
63
80
|
@property
|
|
64
81
|
def name(self) -> str:
|
|
65
82
|
"""
|
|
66
|
-
Return the name of the collection
|
|
83
|
+
Return the name of the collection.
|
|
67
84
|
|
|
68
|
-
:
|
|
85
|
+
TODO: deprecate in favor of Type
|
|
86
|
+
|
|
87
|
+
:return: name of the collection
|
|
69
88
|
"""
|
|
70
89
|
return self.metadata.name
|
|
71
90
|
|
|
@@ -79,7 +98,7 @@ class Collection:
|
|
|
79
98
|
|
|
80
99
|
:return: True if the collection is hidden
|
|
81
100
|
"""
|
|
82
|
-
return self.metadata.hidden
|
|
101
|
+
# return self.metadata.hidden
|
|
83
102
|
|
|
84
103
|
@property
|
|
85
104
|
def target_class_name(self):
|
|
@@ -88,7 +107,14 @@ class Collection:
|
|
|
88
107
|
|
|
89
108
|
This MUST be a LinkML class name
|
|
90
109
|
|
|
91
|
-
|
|
110
|
+
>>> from linkml_store import Client
|
|
111
|
+
>>> client = Client()
|
|
112
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
113
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
114
|
+
>>> collection.target_class_name
|
|
115
|
+
'Person'
|
|
116
|
+
|
|
117
|
+
:return: name of the class which members of this collection instantiate
|
|
92
118
|
"""
|
|
93
119
|
# TODO: this is a shim layer until we can normalize on this
|
|
94
120
|
if self.metadata.type:
|
|
@@ -104,15 +130,34 @@ class Collection:
|
|
|
104
130
|
to have an alias, for example "persons" which collects all instances
|
|
105
131
|
of class Person.
|
|
106
132
|
|
|
107
|
-
|
|
133
|
+
>>> from linkml_store import Client
|
|
134
|
+
>>> client = Client()
|
|
135
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
136
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
137
|
+
>>> collection.alias
|
|
138
|
+
'persons'
|
|
139
|
+
|
|
140
|
+
If no explicit alias is provided, then the target class name is used:
|
|
141
|
+
|
|
142
|
+
>>> from linkml_store import Client
|
|
143
|
+
>>> client = Client()
|
|
144
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
145
|
+
>>> collection = db.create_collection("Person")
|
|
146
|
+
>>> collection.alias
|
|
147
|
+
'Person'
|
|
148
|
+
|
|
149
|
+
The alias SHOULD be used for Table names in SQL.
|
|
108
150
|
|
|
109
151
|
For nested data, the alias SHOULD be used as the key; e.g
|
|
110
152
|
|
|
111
|
-
|
|
153
|
+
.. code-block:: json
|
|
154
|
+
|
|
155
|
+
{ "persons": [ { "name": "Alice" }, { "name": "Bob" } ] }
|
|
112
156
|
|
|
113
157
|
:return:
|
|
114
158
|
"""
|
|
115
159
|
# TODO: this is a shim layer until we can normalize on this
|
|
160
|
+
# TODO: this is a shim layer until we can normalize on this
|
|
116
161
|
if self.metadata.alias:
|
|
117
162
|
return self.metadata.alias
|
|
118
163
|
return self.name
|
|
@@ -121,6 +166,13 @@ class Collection:
|
|
|
121
166
|
"""
|
|
122
167
|
Replace entire collection with objects.
|
|
123
168
|
|
|
169
|
+
>>> from linkml_store import Client
|
|
170
|
+
>>> client = Client()
|
|
171
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
172
|
+
>>> collection = db.create_collection("Person")
|
|
173
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
174
|
+
>>> collection.insert(objs)
|
|
175
|
+
|
|
124
176
|
:param objs:
|
|
125
177
|
:param kwargs:
|
|
126
178
|
:return:
|
|
@@ -130,7 +182,14 @@ class Collection:
|
|
|
130
182
|
|
|
131
183
|
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
132
184
|
"""
|
|
133
|
-
Add one or more objects to the collection
|
|
185
|
+
Add one or more objects to the collection.
|
|
186
|
+
|
|
187
|
+
>>> from linkml_store import Client
|
|
188
|
+
>>> client = Client()
|
|
189
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
190
|
+
>>> collection = db.create_collection("Person")
|
|
191
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
192
|
+
>>> collection.insert(objs)
|
|
134
193
|
|
|
135
194
|
:param objs:
|
|
136
195
|
:param kwargs:
|
|
@@ -138,9 +197,32 @@ class Collection:
|
|
|
138
197
|
"""
|
|
139
198
|
raise NotImplementedError
|
|
140
199
|
|
|
141
|
-
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> int:
|
|
200
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
142
201
|
"""
|
|
143
|
-
Delete one or more objects from the collection
|
|
202
|
+
Delete one or more objects from the collection.
|
|
203
|
+
|
|
204
|
+
First let's set up a collection:
|
|
205
|
+
|
|
206
|
+
>>> from linkml_store import Client
|
|
207
|
+
>>> client = Client()
|
|
208
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
209
|
+
>>> collection = db.create_collection("Person")
|
|
210
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
211
|
+
>>> collection.insert(objs)
|
|
212
|
+
>>> collection.find({}).num_rows
|
|
213
|
+
2
|
|
214
|
+
|
|
215
|
+
Now let's delete an object:
|
|
216
|
+
|
|
217
|
+
>>> collection.delete(objs[0])
|
|
218
|
+
>>> collection.find({}).num_rows
|
|
219
|
+
1
|
|
220
|
+
|
|
221
|
+
Deleting the same object again should have no effect:
|
|
222
|
+
|
|
223
|
+
>>> collection.delete(objs[0])
|
|
224
|
+
>>> collection.find({}).num_rows
|
|
225
|
+
1
|
|
144
226
|
|
|
145
227
|
:param objs:
|
|
146
228
|
:param kwargs:
|
|
@@ -148,9 +230,30 @@ class Collection:
|
|
|
148
230
|
"""
|
|
149
231
|
raise NotImplementedError
|
|
150
232
|
|
|
151
|
-
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
233
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
152
234
|
"""
|
|
153
|
-
Delete objects that match a query
|
|
235
|
+
Delete objects that match a query.
|
|
236
|
+
|
|
237
|
+
First let's set up a collection:
|
|
238
|
+
|
|
239
|
+
>>> from linkml_store import Client
|
|
240
|
+
>>> client = Client()
|
|
241
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
242
|
+
>>> collection = db.create_collection("Person")
|
|
243
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
244
|
+
>>> collection.insert(objs)
|
|
245
|
+
|
|
246
|
+
Now let's delete an object:
|
|
247
|
+
|
|
248
|
+
>>> collection.delete_where({"id": "P1"})
|
|
249
|
+
>>> collection.find({}).num_rows
|
|
250
|
+
1
|
|
251
|
+
|
|
252
|
+
Match everything:
|
|
253
|
+
|
|
254
|
+
>>> collection.delete_where({})
|
|
255
|
+
>>> collection.find({}).num_rows
|
|
256
|
+
0
|
|
154
257
|
|
|
155
258
|
:param where: where conditions
|
|
156
259
|
:param missing_ok: if True, do not raise an error if the collection does not exist
|
|
@@ -161,7 +264,7 @@ class Collection:
|
|
|
161
264
|
|
|
162
265
|
def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
163
266
|
"""
|
|
164
|
-
Update one or more objects in the collection
|
|
267
|
+
Update one or more objects in the collection.
|
|
165
268
|
|
|
166
269
|
:param objs:
|
|
167
270
|
:param kwargs:
|
|
@@ -174,7 +277,21 @@ class Collection:
|
|
|
174
277
|
|
|
175
278
|
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
176
279
|
"""
|
|
177
|
-
Run a query against the collection
|
|
280
|
+
Run a query against the collection.
|
|
281
|
+
|
|
282
|
+
First let's load a collection:
|
|
283
|
+
|
|
284
|
+
>>> from linkml_store import Client
|
|
285
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
286
|
+
>>> client = Client()
|
|
287
|
+
>>> db = client.attach_database("duckdb")
|
|
288
|
+
>>> collection = db.create_collection("Country")
|
|
289
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
290
|
+
>>> collection.insert(objs)
|
|
291
|
+
|
|
292
|
+
Now let's run a query:
|
|
293
|
+
|
|
294
|
+
TODO
|
|
178
295
|
|
|
179
296
|
:param query:
|
|
180
297
|
:param kwargs:
|
|
@@ -207,7 +324,7 @@ class Collection:
|
|
|
207
324
|
"""
|
|
208
325
|
raise NotImplementedError
|
|
209
326
|
|
|
210
|
-
def get(self, ids: Optional[IDENTIFIER], **kwargs) -> QueryResult:
|
|
327
|
+
def get(self, ids: Optional[List[IDENTIFIER]], **kwargs) -> QueryResult:
|
|
211
328
|
"""
|
|
212
329
|
Get one or more objects by ID.
|
|
213
330
|
|
|
@@ -217,6 +334,8 @@ class Collection:
|
|
|
217
334
|
"""
|
|
218
335
|
# TODO
|
|
219
336
|
id_field = self.identifier_attribute_name
|
|
337
|
+
if not id_field:
|
|
338
|
+
raise ValueError(f"No identifier for {self.name}")
|
|
220
339
|
return self.find({id_field: ids})
|
|
221
340
|
|
|
222
341
|
def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
|
|
@@ -242,6 +361,31 @@ class Collection:
|
|
|
242
361
|
"""
|
|
243
362
|
Find objects in the collection using a where query.
|
|
244
363
|
|
|
364
|
+
As an example, first load a collection:
|
|
365
|
+
|
|
366
|
+
>>> from linkml_store import Client
|
|
367
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
368
|
+
>>> client = Client()
|
|
369
|
+
>>> db = client.attach_database("duckdb")
|
|
370
|
+
>>> collection = db.create_collection("Country")
|
|
371
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
372
|
+
>>> collection.insert(objs)
|
|
373
|
+
|
|
374
|
+
Now let's find all objects:
|
|
375
|
+
|
|
376
|
+
>>> qr = collection.find({})
|
|
377
|
+
>>> qr.num_rows
|
|
378
|
+
20
|
|
379
|
+
|
|
380
|
+
We can do a more restrictive query:
|
|
381
|
+
|
|
382
|
+
>>> qr = collection.find({"code": "FR"})
|
|
383
|
+
>>> qr.num_rows
|
|
384
|
+
1
|
|
385
|
+
>>> qr.rows[0]["name"]
|
|
386
|
+
'France'
|
|
387
|
+
|
|
388
|
+
|
|
245
389
|
:param where:
|
|
246
390
|
:param kwargs:
|
|
247
391
|
:return:
|
|
@@ -290,6 +434,7 @@ class Collection:
|
|
|
290
434
|
raise ValueError(f"No index named {index_name}")
|
|
291
435
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
292
436
|
index_col = ix.index_field
|
|
437
|
+
# TODO: optimize this for large indexes
|
|
293
438
|
vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
|
|
294
439
|
results = ix.search(query, vector_pairs, limit=limit)
|
|
295
440
|
for r in results:
|
|
@@ -305,11 +450,15 @@ class Collection:
|
|
|
305
450
|
|
|
306
451
|
:return:
|
|
307
452
|
"""
|
|
308
|
-
if not self.
|
|
309
|
-
raise ValueError(f"Collection has no
|
|
310
|
-
return self.
|
|
453
|
+
if not self.alias:
|
|
454
|
+
raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
|
|
455
|
+
return self.alias.startswith("internal__")
|
|
311
456
|
|
|
312
|
-
def
|
|
457
|
+
def load_from_source(self):
|
|
458
|
+
objects = load_objects(self.metadata.source_location)
|
|
459
|
+
self.insert(objects)
|
|
460
|
+
|
|
461
|
+
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
313
462
|
"""
|
|
314
463
|
Attach an index to the collection.
|
|
315
464
|
|
|
@@ -333,6 +482,7 @@ class Collection:
|
|
|
333
482
|
self._indexers[index_name] = index
|
|
334
483
|
if auto_index:
|
|
335
484
|
all_objs = self.find(limit=-1).rows
|
|
485
|
+
logger.info(f"Auto-indexing {len(all_objs)} objects")
|
|
336
486
|
self.index_objects(all_objs, index_name, replace=True, **kwargs)
|
|
337
487
|
|
|
338
488
|
def _index_collection_name(self, index_name: str) -> str:
|
|
@@ -340,6 +490,7 @@ class Collection:
|
|
|
340
490
|
Create a name for a special collection that holds index data
|
|
341
491
|
|
|
342
492
|
:param index_name:
|
|
493
|
+
:param indexer:
|
|
343
494
|
:return:
|
|
344
495
|
"""
|
|
345
496
|
return f"internal__index__{self.name}__{index_name}"
|
|
@@ -370,6 +521,7 @@ class Collection:
|
|
|
370
521
|
logger.info(f"Checking if {ix_coll_name} is in {schema.classes.keys()}")
|
|
371
522
|
if ix_coll_name in schema.classes:
|
|
372
523
|
ix_coll.delete_where()
|
|
524
|
+
|
|
373
525
|
ix_coll.insert(objects_with_ix, **kwargs)
|
|
374
526
|
|
|
375
527
|
def list_index_names(self) -> List[str]:
|
|
@@ -457,6 +609,8 @@ class Collection:
|
|
|
457
609
|
:param max_sample_size:
|
|
458
610
|
:return:
|
|
459
611
|
"""
|
|
612
|
+
if not self.target_class_name:
|
|
613
|
+
raise ValueError(f"No target_class_name for {self.alias}")
|
|
460
614
|
cd = ClassDefinition(self.target_class_name)
|
|
461
615
|
keys = defaultdict(list)
|
|
462
616
|
for obj in objs[0:max_sample_size]:
|
|
@@ -16,7 +16,7 @@ class CollectionConfig(BaseModel):
|
|
|
16
16
|
default=None,
|
|
17
17
|
description="The type of object in the collection. TODO; use this instead of name",
|
|
18
18
|
)
|
|
19
|
-
|
|
19
|
+
additional_properties: Optional[Dict] = Field(
|
|
20
20
|
default=None,
|
|
21
21
|
description="Optional metadata for the collection",
|
|
22
22
|
)
|
|
@@ -36,6 +36,10 @@ class CollectionConfig(BaseModel):
|
|
|
36
36
|
default=False,
|
|
37
37
|
description="Whether the collection is prepopulated",
|
|
38
38
|
)
|
|
39
|
+
source_location: Optional[str] = Field(
|
|
40
|
+
default=None,
|
|
41
|
+
description="Filesystem or remote URL that stores the data",
|
|
42
|
+
)
|
|
39
43
|
|
|
40
44
|
|
|
41
45
|
class DatabaseConfig(BaseModel):
|
|
@@ -55,7 +59,7 @@ class DatabaseConfig(BaseModel):
|
|
|
55
59
|
default=None,
|
|
56
60
|
description="The LinkML schema as a dictionary",
|
|
57
61
|
)
|
|
58
|
-
collections: Dict[str, CollectionConfig] = Field(
|
|
62
|
+
collections: Optional[Dict[str, CollectionConfig]] = Field(
|
|
59
63
|
default={},
|
|
60
64
|
description="A dictionary of collection configurations",
|
|
61
65
|
)
|