linkml-store 0.1.10__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.1.10 → linkml_store-0.1.11}/PKG-INFO +36 -3
- {linkml_store-0.1.10 → linkml_store-0.1.11}/README.md +31 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/pyproject.toml +6 -3
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/client.py +63 -7
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/collection.py +138 -30
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/config.py +48 -6
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/database.py +45 -27
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +16 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/duckdb/duckdb_database.py +16 -2
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +11 -4
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +6 -2
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/mongodb/mongodb_database.py +1 -36
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/solr/solr_collection.py +4 -4
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/cli.py +35 -17
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/index/__init__.py +16 -2
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/index/implementations/llm_indexer.py +2 -1
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/index/indexer.py +13 -2
- linkml_store-0.1.11/src/linkml_store/utils/file_utils.py +37 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/format_utils.py +68 -7
- linkml_store-0.1.11/src/linkml_store/utils/pandas_utils.py +40 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/sql_utils.py +2 -1
- {linkml_store-0.1.10 → linkml_store-0.1.11}/LICENSE +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/object_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.11}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -16,6 +16,7 @@ Provides-Extra: analytics
|
|
|
16
16
|
Provides-Extra: app
|
|
17
17
|
Provides-Extra: chromadb
|
|
18
18
|
Provides-Extra: fastapi
|
|
19
|
+
Provides-Extra: frictionless
|
|
19
20
|
Provides-Extra: h5py
|
|
20
21
|
Provides-Extra: llm
|
|
21
22
|
Provides-Extra: map
|
|
@@ -30,11 +31,12 @@ Requires-Dist: click
|
|
|
30
31
|
Requires-Dist: duckdb (>=0.10.1,<0.11.0)
|
|
31
32
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
32
33
|
Requires-Dist: fastapi ; extra == "fastapi"
|
|
34
|
+
Requires-Dist: frictionless ; extra == "frictionless"
|
|
33
35
|
Requires-Dist: h5py ; extra == "h5py"
|
|
34
36
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
35
37
|
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
36
|
-
Requires-Dist: linkml ; extra == "validation"
|
|
37
|
-
Requires-Dist: linkml-runtime (>=1.8.
|
|
38
|
+
Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
|
|
39
|
+
Requires-Dist: linkml-runtime (>=1.8.0)
|
|
38
40
|
Requires-Dist: linkml_map ; extra == "map"
|
|
39
41
|
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
40
42
|
Requires-Dist: llm ; extra == "llm"
|
|
@@ -122,6 +124,10 @@ need to have a vector database to run embedding search!
|
|
|
122
124
|
|
|
123
125
|
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
124
126
|
|
|
127
|
+
### Use with LLMs
|
|
128
|
+
|
|
129
|
+
TODO - docs
|
|
130
|
+
|
|
125
131
|
### Validation
|
|
126
132
|
|
|
127
133
|
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
@@ -131,6 +137,33 @@ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.htm
|
|
|
131
137
|
|
|
132
138
|
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
133
139
|
|
|
140
|
+
## Web API
|
|
141
|
+
|
|
142
|
+
There is a preliminary API following HATEOAS principles implemented using FastAPI.
|
|
143
|
+
|
|
144
|
+
To start you should first create a config file, e.g. `db/conf.yaml`:
|
|
145
|
+
|
|
146
|
+
Then run:
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
export LINKML_STORE_CONFIG=./db/conf.yaml
|
|
150
|
+
make api
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
The API returns links as well as data objects, it's recommended to use a Chrome plugin for JSON viewing
|
|
154
|
+
for exploring the API. TODO: add docs here.
|
|
155
|
+
|
|
156
|
+
The main endpoints are:
|
|
157
|
+
|
|
158
|
+
* `http://localhost:8000/` - the root of the API
|
|
159
|
+
* `http://localhost:8000/pages/` - browse the API via HTML
|
|
160
|
+
* `http://localhost:8000/docs` - the Swagger UI
|
|
161
|
+
|
|
162
|
+
## Streamlit app
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
make app
|
|
166
|
+
```
|
|
134
167
|
|
|
135
168
|
## Background
|
|
136
169
|
|
|
@@ -69,6 +69,10 @@ need to have a vector database to run embedding search!
|
|
|
69
69
|
|
|
70
70
|
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
71
71
|
|
|
72
|
+
### Use with LLMs
|
|
73
|
+
|
|
74
|
+
TODO - docs
|
|
75
|
+
|
|
72
76
|
### Validation
|
|
73
77
|
|
|
74
78
|
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
@@ -78,6 +82,33 @@ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.htm
|
|
|
78
82
|
|
|
79
83
|
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
80
84
|
|
|
85
|
+
## Web API
|
|
86
|
+
|
|
87
|
+
There is a preliminary API following HATEOAS principles implemented using FastAPI.
|
|
88
|
+
|
|
89
|
+
To start you should first create a config file, e.g. `db/conf.yaml`:
|
|
90
|
+
|
|
91
|
+
Then run:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
export LINKML_STORE_CONFIG=./db/conf.yaml
|
|
95
|
+
make api
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
The API returns links as well as data objects, it's recommended to use a Chrome plugin for JSON viewing
|
|
99
|
+
for exploring the API. TODO: add docs here.
|
|
100
|
+
|
|
101
|
+
The main endpoints are:
|
|
102
|
+
|
|
103
|
+
* `http://localhost:8000/` - the root of the API
|
|
104
|
+
* `http://localhost:8000/pages/` - browse the API via HTML
|
|
105
|
+
* `http://localhost:8000/docs` - the Swagger UI
|
|
106
|
+
|
|
107
|
+
## Streamlit app
|
|
108
|
+
|
|
109
|
+
```
|
|
110
|
+
make app
|
|
111
|
+
```
|
|
81
112
|
|
|
82
113
|
## Background
|
|
83
114
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.11"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -10,7 +10,7 @@ readme = "README.md"
|
|
|
10
10
|
python = "^3.9, !=3.9.7"
|
|
11
11
|
click = "*"
|
|
12
12
|
pydantic = "^2.0.0"
|
|
13
|
-
linkml-runtime = ">=1.8.
|
|
13
|
+
linkml-runtime = ">=1.8.0"
|
|
14
14
|
streamlit = { version = "^1.32.2", optional = true }
|
|
15
15
|
sqlalchemy = "*"
|
|
16
16
|
duckdb = "^0.10.1"
|
|
@@ -25,9 +25,10 @@ pymongo = { version="*", optional = true }
|
|
|
25
25
|
chromadb = { version="*", optional = true }
|
|
26
26
|
pyarrow = { version="*", optional = true }
|
|
27
27
|
h5py = { version="*", optional = true }
|
|
28
|
-
linkml = { version="
|
|
28
|
+
linkml = { version=">=1.8.0", optional = true }
|
|
29
29
|
linkml_map = { version="*", optional = true }
|
|
30
30
|
linkml_renderer = { version="*", optional = true }
|
|
31
|
+
frictionless = { version="*", optional = true }
|
|
31
32
|
pandas = ">=2.2.1"
|
|
32
33
|
jinja2 = "^3.1.4"
|
|
33
34
|
jsonlines = "^4.0.0"
|
|
@@ -49,6 +50,7 @@ nbsphinx = "*"
|
|
|
49
50
|
jupyter = "*"
|
|
50
51
|
jupysql = "*"
|
|
51
52
|
papermill = "*"
|
|
53
|
+
nbdime = "*"
|
|
52
54
|
|
|
53
55
|
[tool.poetry.group.tests.dependencies]
|
|
54
56
|
pytest = "^7.4.0"
|
|
@@ -71,6 +73,7 @@ validation = ["linkml"]
|
|
|
71
73
|
map = ["linkml_map"]
|
|
72
74
|
renderer = ["linkml_renderer"]
|
|
73
75
|
fastapi = ["fastapi", "uvicorn"]
|
|
76
|
+
frictionless = ["frictionless"]
|
|
74
77
|
|
|
75
78
|
[tool.poetry.scripts]
|
|
76
79
|
linkml-store = "linkml_store.cli:cli"
|
|
@@ -242,7 +242,7 @@ class Client:
|
|
|
242
242
|
Return all attached databases
|
|
243
243
|
|
|
244
244
|
Examples
|
|
245
|
-
|
|
245
|
+
|
|
246
246
|
>>> client = Client()
|
|
247
247
|
>>> _ = client.attach_database("duckdb", alias="test1")
|
|
248
248
|
>>> _ = client.attach_database("duckdb", alias="test2")
|
|
@@ -268,25 +268,81 @@ class Client:
|
|
|
268
268
|
"""
|
|
269
269
|
Drop a database.
|
|
270
270
|
|
|
271
|
+
Example (in-memory):
|
|
272
|
+
|
|
273
|
+
>>> client = Client()
|
|
274
|
+
>>> db1 = client.attach_database("duckdb", alias="test1")
|
|
275
|
+
>>> db2 = client.attach_database("duckdb", alias="test2")
|
|
276
|
+
>>> len(client.databases)
|
|
277
|
+
2
|
|
278
|
+
>>> client.drop_database("test1")
|
|
279
|
+
>>> len(client.databases)
|
|
280
|
+
1
|
|
281
|
+
|
|
282
|
+
Databases that persist on disk:
|
|
283
|
+
|
|
284
|
+
>>> client = Client()
|
|
285
|
+
>>> path = Path("tmp/test.db")
|
|
286
|
+
>>> path.parent.mkdir(parents=True, exist_ok=True)
|
|
287
|
+
>>> db = client.attach_database(f"duckdb:///{path}", alias="test")
|
|
288
|
+
>>> len(client.databases)
|
|
289
|
+
1
|
|
290
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John"}]})
|
|
291
|
+
>>> db.commit()
|
|
292
|
+
>>> Path("tmp/test.db").exists()
|
|
293
|
+
True
|
|
294
|
+
>>> client.drop_database("test")
|
|
295
|
+
>>> len(client.databases)
|
|
296
|
+
0
|
|
297
|
+
>>> Path("tmp/test.db").exists()
|
|
298
|
+
False
|
|
299
|
+
|
|
300
|
+
Dropping a non-existent database:
|
|
301
|
+
|
|
302
|
+
>>> client = Client()
|
|
303
|
+
>>> client.drop_database("duckdb:///tmp/made-up1", missing_ok=True)
|
|
304
|
+
>>> client.drop_database("duckdb:///tmp/made-up2", missing_ok=False)
|
|
305
|
+
Traceback (most recent call last):
|
|
306
|
+
...
|
|
307
|
+
ValueError: Database duckdb:///tmp/made-up2 not found
|
|
308
|
+
|
|
271
309
|
:param name:
|
|
272
310
|
:param missing_ok:
|
|
273
311
|
:return:
|
|
274
312
|
"""
|
|
275
|
-
if
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
313
|
+
if self._databases:
|
|
314
|
+
if name in self._databases:
|
|
315
|
+
db = self._databases[name]
|
|
316
|
+
db.drop(**kwargs)
|
|
317
|
+
del self._databases[name]
|
|
318
|
+
else:
|
|
319
|
+
if not missing_ok:
|
|
320
|
+
raise ValueError(f"Database {name} not found")
|
|
279
321
|
else:
|
|
280
|
-
|
|
281
|
-
|
|
322
|
+
db = self.get_database(name, create_if_not_exists=True)
|
|
323
|
+
db.drop(**kwargs)
|
|
282
324
|
|
|
283
325
|
def drop_all_databases(self, **kwargs):
|
|
284
326
|
"""
|
|
285
327
|
Drop all databases.
|
|
286
328
|
|
|
329
|
+
Example (in-memory):
|
|
330
|
+
|
|
331
|
+
>>> client = Client()
|
|
332
|
+
>>> db1 = client.attach_database("duckdb", alias="test1")
|
|
333
|
+
>>> assert "test1" in client.databases
|
|
334
|
+
>>> db2 = client.attach_database("duckdb", alias="test2")
|
|
335
|
+
>>> assert "test2" in client.databases
|
|
336
|
+
>>> client.drop_all_databases()
|
|
337
|
+
>>> len(client.databases)
|
|
338
|
+
0
|
|
339
|
+
|
|
340
|
+
|
|
287
341
|
:param missing_ok:
|
|
288
342
|
:return:
|
|
289
343
|
"""
|
|
344
|
+
if not self._databases:
|
|
345
|
+
return
|
|
290
346
|
for name in list(self._databases.keys()):
|
|
291
347
|
self.drop_database(name, missing_ok=False, **kwargs)
|
|
292
348
|
self._databases = {}
|
|
@@ -14,7 +14,7 @@ from pydantic import BaseModel
|
|
|
14
14
|
|
|
15
15
|
from linkml_store.api.types import DatabaseType
|
|
16
16
|
from linkml_store.index import get_indexer
|
|
17
|
-
from linkml_store.utils.format_utils import load_objects
|
|
17
|
+
from linkml_store.utils.format_utils import load_objects, load_objects_from_url
|
|
18
18
|
from linkml_store.utils.object_utils import clean_empties
|
|
19
19
|
from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
|
|
20
20
|
|
|
@@ -61,6 +61,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
61
61
|
# name: str
|
|
62
62
|
parent: Optional[DatabaseType] = None
|
|
63
63
|
_indexers: Optional[Dict[str, Indexer]] = None
|
|
64
|
+
_initialized: Optional[bool] = None
|
|
64
65
|
# hidden: Optional[bool] = False
|
|
65
66
|
|
|
66
67
|
metadata: Optional[CollectionConfig] = None
|
|
@@ -73,7 +74,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
73
74
|
if metadata:
|
|
74
75
|
self.metadata = metadata
|
|
75
76
|
else:
|
|
76
|
-
self.metadata = CollectionConfig(
|
|
77
|
+
self.metadata = CollectionConfig(type=name, **kwargs)
|
|
77
78
|
if not self.metadata.alias:
|
|
78
79
|
self.metadata.alias = name
|
|
79
80
|
if not self.metadata.type:
|
|
@@ -81,17 +82,6 @@ class Collection(Generic[DatabaseType]):
|
|
|
81
82
|
# if name is not None and self.metadata.name is not None and name != self.metadata.name:
|
|
82
83
|
# raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
|
|
83
84
|
|
|
84
|
-
@property
|
|
85
|
-
def name(self) -> str:
|
|
86
|
-
"""
|
|
87
|
-
Return the name of the collection.
|
|
88
|
-
|
|
89
|
-
TODO: deprecate in favor of Type
|
|
90
|
-
|
|
91
|
-
:return: name of the collection
|
|
92
|
-
"""
|
|
93
|
-
return self.metadata.name
|
|
94
|
-
|
|
95
85
|
@property
|
|
96
86
|
def hidden(self) -> bool:
|
|
97
87
|
"""
|
|
@@ -118,12 +108,18 @@ class Collection(Generic[DatabaseType]):
|
|
|
118
108
|
>>> collection.target_class_name
|
|
119
109
|
'Person'
|
|
120
110
|
|
|
111
|
+
>>> collection = db.create_collection("Organization")
|
|
112
|
+
>>> collection.target_class_name
|
|
113
|
+
'Organization'
|
|
114
|
+
>>> collection.alias
|
|
115
|
+
'Organization'
|
|
116
|
+
|
|
121
117
|
:return: name of the class which members of this collection instantiate
|
|
122
118
|
"""
|
|
123
119
|
# TODO: this is a shim layer until we can normalize on this
|
|
124
120
|
if self.metadata.type:
|
|
125
121
|
return self.metadata.type
|
|
126
|
-
return self.
|
|
122
|
+
return self.alias
|
|
127
123
|
|
|
128
124
|
@property
|
|
129
125
|
def alias(self):
|
|
@@ -161,10 +157,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
161
157
|
:return:
|
|
162
158
|
"""
|
|
163
159
|
# TODO: this is a shim layer until we can normalize on this
|
|
164
|
-
# TODO: this is a shim layer until we can normalize on this
|
|
165
160
|
if self.metadata.alias:
|
|
166
161
|
return self.metadata.alias
|
|
167
|
-
return self.
|
|
162
|
+
return self.target_class_name
|
|
168
163
|
|
|
169
164
|
def replace(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
170
165
|
"""
|
|
@@ -201,7 +196,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
201
196
|
"""
|
|
202
197
|
raise NotImplementedError
|
|
203
198
|
|
|
199
|
+
def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
|
|
200
|
+
logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
|
|
201
|
+
if not self._initialized:
|
|
202
|
+
self._materialize_derivations()
|
|
203
|
+
self._initialized = True
|
|
204
|
+
|
|
204
205
|
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
206
|
+
self._initialized = True
|
|
205
207
|
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
206
208
|
self._broadcast(patches, **kwargs)
|
|
207
209
|
|
|
@@ -305,6 +307,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
305
307
|
:param kwargs:
|
|
306
308
|
:return:
|
|
307
309
|
"""
|
|
310
|
+
self._pre_query_hook()
|
|
308
311
|
return self.parent.query(query, **kwargs)
|
|
309
312
|
|
|
310
313
|
def query_facets(
|
|
@@ -340,7 +343,6 @@ class Collection(Generic[DatabaseType]):
|
|
|
340
343
|
:param kwargs:
|
|
341
344
|
:return:
|
|
342
345
|
"""
|
|
343
|
-
# TODO
|
|
344
346
|
id_field = self.identifier_attribute_name
|
|
345
347
|
if not id_field:
|
|
346
348
|
raise ValueError(f"No identifier for {self.name}")
|
|
@@ -399,9 +401,10 @@ class Collection(Generic[DatabaseType]):
|
|
|
399
401
|
:return:
|
|
400
402
|
"""
|
|
401
403
|
query = self._create_query(where_clause=where)
|
|
404
|
+
self._pre_query_hook(query)
|
|
402
405
|
return self.query(query, **kwargs)
|
|
403
406
|
|
|
404
|
-
def find_iter(self, where: Optional[Any] = None, **kwargs) -> Iterator[OBJECT]:
|
|
407
|
+
def find_iter(self, where: Optional[Any] = None, page_size=100, **kwargs) -> Iterator[OBJECT]:
|
|
405
408
|
"""
|
|
406
409
|
Find objects in the collection using a where query.
|
|
407
410
|
|
|
@@ -409,9 +412,22 @@ class Collection(Generic[DatabaseType]):
|
|
|
409
412
|
:param kwargs:
|
|
410
413
|
:return:
|
|
411
414
|
"""
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
+
total_rows = None
|
|
416
|
+
offset = 0
|
|
417
|
+
if page_size < 1:
|
|
418
|
+
raise ValueError(f"Invalid page size: {page_size}")
|
|
419
|
+
while True:
|
|
420
|
+
qr = self.find(where=where, offset=offset, limit=page_size, **kwargs)
|
|
421
|
+
if total_rows is None:
|
|
422
|
+
total_rows = qr.num_rows
|
|
423
|
+
if not qr.rows:
|
|
424
|
+
return
|
|
425
|
+
for row in qr.rows:
|
|
426
|
+
yield row
|
|
427
|
+
offset += page_size
|
|
428
|
+
if offset >= total_rows:
|
|
429
|
+
break
|
|
430
|
+
return
|
|
415
431
|
|
|
416
432
|
def search(
|
|
417
433
|
self,
|
|
@@ -454,6 +470,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
454
470
|
:param kwargs:
|
|
455
471
|
:return:
|
|
456
472
|
"""
|
|
473
|
+
self._pre_query_hook()
|
|
457
474
|
if index_name is None:
|
|
458
475
|
if len(self.indexers) == 1:
|
|
459
476
|
index_name = list(self.indexers.keys())[0]
|
|
@@ -494,10 +511,93 @@ class Collection(Generic[DatabaseType]):
|
|
|
494
511
|
raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
|
|
495
512
|
return self.alias.startswith("internal__")
|
|
496
513
|
|
|
497
|
-
def
|
|
498
|
-
|
|
514
|
+
def exists(self) -> Optional[bool]:
|
|
515
|
+
"""
|
|
516
|
+
Check if the collection exists.
|
|
517
|
+
|
|
518
|
+
:return:
|
|
519
|
+
"""
|
|
520
|
+
cd = self.class_definition()
|
|
521
|
+
return cd is not None
|
|
522
|
+
|
|
523
|
+
def load_from_source(self, load_if_exists=False):
|
|
524
|
+
"""
|
|
525
|
+
Load objects from the source location.
|
|
526
|
+
|
|
527
|
+
:param load_if_exists:
|
|
528
|
+
:return:
|
|
529
|
+
"""
|
|
530
|
+
if not load_if_exists and self.exists():
|
|
531
|
+
return
|
|
532
|
+
metadata = self.metadata
|
|
533
|
+
if metadata.source:
|
|
534
|
+
source = metadata.source
|
|
535
|
+
kwargs = source.arguments or {}
|
|
536
|
+
if source.local_path:
|
|
537
|
+
objects = load_objects(
|
|
538
|
+
metadata.source.local_path, format=source.format, expected_type=source.expected_type, **kwargs
|
|
539
|
+
)
|
|
540
|
+
elif metadata.source.url:
|
|
541
|
+
objects = load_objects_from_url(
|
|
542
|
+
metadata.source.url, format=source.format, expected_type=source.expected_type, **kwargs
|
|
543
|
+
)
|
|
499
544
|
self.insert(objects)
|
|
500
545
|
|
|
546
|
+
def _check_if_initialized(self) -> bool:
|
|
547
|
+
return self._initialized
|
|
548
|
+
|
|
549
|
+
def _materialize_derivations(self, **kwargs):
|
|
550
|
+
metadata = self.metadata
|
|
551
|
+
if not metadata.derived_from:
|
|
552
|
+
logger.info(f"No metadata for {self.alias}; no derivations")
|
|
553
|
+
return
|
|
554
|
+
if self._check_if_initialized():
|
|
555
|
+
logger.info(f"Already initialized {self.alias}; no derivations")
|
|
556
|
+
return
|
|
557
|
+
parent_db = self.parent
|
|
558
|
+
client = parent_db.parent
|
|
559
|
+
# cd = self.class_definition()
|
|
560
|
+
for derivation in metadata.derived_from:
|
|
561
|
+
# TODO: optimize this; utilize underlying engine
|
|
562
|
+
logger.info(f"Deriving from {derivation}")
|
|
563
|
+
if derivation.database:
|
|
564
|
+
db = client.get_database(derivation.database)
|
|
565
|
+
else:
|
|
566
|
+
db = parent_db
|
|
567
|
+
if derivation.collection:
|
|
568
|
+
coll = db.get_collection(derivation.collection)
|
|
569
|
+
else:
|
|
570
|
+
coll = self
|
|
571
|
+
coll.class_definition()
|
|
572
|
+
source_obj_iter = coll.find_iter(derivation.where or {})
|
|
573
|
+
mappings = derivation.mappings
|
|
574
|
+
if not mappings:
|
|
575
|
+
raise ValueError(f"No mappings for {self.name}")
|
|
576
|
+
target_class_name = self.target_class_name
|
|
577
|
+
from linkml_map.session import Session
|
|
578
|
+
|
|
579
|
+
session = Session()
|
|
580
|
+
session.set_source_schema(db.schema_view.schema)
|
|
581
|
+
session.set_object_transformer(
|
|
582
|
+
{
|
|
583
|
+
"class_derivations": {
|
|
584
|
+
target_class_name: {
|
|
585
|
+
"populated_from": coll.target_class_name,
|
|
586
|
+
"slot_derivations": mappings,
|
|
587
|
+
},
|
|
588
|
+
}
|
|
589
|
+
},
|
|
590
|
+
)
|
|
591
|
+
logger.debug(f"Session Spec: {session.object_transformer}")
|
|
592
|
+
tr_objs = []
|
|
593
|
+
for source_obj in source_obj_iter:
|
|
594
|
+
tr_obj = session.transform(source_obj, source_type=coll.target_class_name)
|
|
595
|
+
tr_objs.append(tr_obj)
|
|
596
|
+
if not tr_objs:
|
|
597
|
+
raise ValueError(f"No objects derived from {coll.name}")
|
|
598
|
+
self.insert(tr_objs)
|
|
599
|
+
self.commit()
|
|
600
|
+
|
|
501
601
|
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
502
602
|
"""
|
|
503
603
|
Attach an index to the collection.
|
|
@@ -572,7 +672,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
572
672
|
:param indexer:
|
|
573
673
|
:return:
|
|
574
674
|
"""
|
|
575
|
-
return f"internal__index__{self.
|
|
675
|
+
return f"internal__index__{self.alias}__{index_name}"
|
|
576
676
|
|
|
577
677
|
def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
|
|
578
678
|
"""
|
|
@@ -638,6 +738,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
638
738
|
"""
|
|
639
739
|
Return the class definition for the collection.
|
|
640
740
|
|
|
741
|
+
If no schema has been explicitly set, and the native database does not
|
|
742
|
+
have a schema, then a schema will be induced from the objects in the collection.
|
|
743
|
+
|
|
641
744
|
:return:
|
|
642
745
|
"""
|
|
643
746
|
sv: SchemaView = self.parent.schema_view
|
|
@@ -722,7 +825,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
722
825
|
else:
|
|
723
826
|
return None
|
|
724
827
|
|
|
725
|
-
def induce_class_definition_from_objects(
|
|
828
|
+
def induce_class_definition_from_objects(
|
|
829
|
+
self, objs: List[OBJECT], max_sample_size: Optional[int] = None
|
|
830
|
+
) -> ClassDefinition:
|
|
726
831
|
"""
|
|
727
832
|
Induce a class definition from a list of objects.
|
|
728
833
|
|
|
@@ -733,6 +838,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
733
838
|
:param max_sample_size:
|
|
734
839
|
:return:
|
|
735
840
|
"""
|
|
841
|
+
# TODO: use schemaview
|
|
842
|
+
if max_sample_size is None:
|
|
843
|
+
max_sample_size = 10
|
|
736
844
|
if not self.target_class_name:
|
|
737
845
|
raise ValueError(f"No target_class_name for {self.alias}")
|
|
738
846
|
cd = ClassDefinition(self.target_class_name)
|
|
@@ -795,6 +903,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
795
903
|
for other_rng in rngs:
|
|
796
904
|
if rng != other_rng:
|
|
797
905
|
raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
|
|
906
|
+
logger.debug(f"Inducing {k} as {rng} {multivalued} {inlined}")
|
|
798
907
|
cd.attributes[k] = SlotDefinition(k, range=rng, multivalued=multivalued, inlined=inlined)
|
|
799
908
|
if exact_dimensions_list:
|
|
800
909
|
array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
|
|
@@ -828,7 +937,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
828
937
|
"""
|
|
829
938
|
Apply a patch to the collection.
|
|
830
939
|
|
|
831
|
-
Patches conform to the JSON Patch format
|
|
940
|
+
Patches conform to the JSON Patch format.
|
|
832
941
|
|
|
833
942
|
:param patches:
|
|
834
943
|
:param kwargs:
|
|
@@ -841,11 +950,11 @@ class Collection(Generic[DatabaseType]):
|
|
|
841
950
|
new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
|
|
842
951
|
self.replace(new_objs)
|
|
843
952
|
|
|
844
|
-
def diff(self, other: "Collection", **kwargs):
|
|
953
|
+
def diff(self, other: "Collection", **kwargs) -> List[PatchDict]:
|
|
845
954
|
"""
|
|
846
955
|
Diff two collections.
|
|
847
956
|
|
|
848
|
-
:param other:
|
|
957
|
+
:param other: The collection to diff against
|
|
849
958
|
:param kwargs:
|
|
850
959
|
:return:
|
|
851
960
|
"""
|
|
@@ -872,8 +981,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
872
981
|
if not cd:
|
|
873
982
|
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
874
983
|
class_name = cd.name
|
|
875
|
-
|
|
876
|
-
for obj in result.rows:
|
|
984
|
+
for obj in self.find_iter(**kwargs):
|
|
877
985
|
obj = clean_empties(obj)
|
|
878
986
|
yield from validator.iter_results(obj, class_name)
|
|
879
987
|
|
|
@@ -4,14 +4,43 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ConfiguredBaseModel(BaseModel, extra="forbid"):
|
|
7
|
+
"""
|
|
8
|
+
Base class for all configuration models.
|
|
9
|
+
"""
|
|
10
|
+
|
|
7
11
|
pass
|
|
8
12
|
|
|
9
13
|
|
|
14
|
+
class DerivationConfiguration(ConfiguredBaseModel):
|
|
15
|
+
"""
|
|
16
|
+
Configuration for a derivation
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
database: Optional[str] = None
|
|
20
|
+
collection: Optional[str] = None
|
|
21
|
+
mappings: Optional[Dict[str, Any]] = None
|
|
22
|
+
where: Optional[Dict[str, Any]] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CollectionSource(ConfiguredBaseModel):
|
|
26
|
+
"""
|
|
27
|
+
Metadata about a source
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
url: Optional[str] = None
|
|
31
|
+
local_path: Optional[str] = None
|
|
32
|
+
source_location: Optional[str] = None
|
|
33
|
+
refresh_interval_days: Optional[float] = None
|
|
34
|
+
expected_type: Optional[str] = None
|
|
35
|
+
format: Optional[str] = None
|
|
36
|
+
arguments: Optional[Dict[str, Any]] = None
|
|
37
|
+
|
|
38
|
+
|
|
10
39
|
class CollectionConfig(ConfiguredBaseModel):
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
40
|
+
"""
|
|
41
|
+
Configuration for a collection
|
|
42
|
+
"""
|
|
43
|
+
|
|
15
44
|
alias: Optional[str] = Field(
|
|
16
45
|
default=None,
|
|
17
46
|
description="An optional alias for the collection",
|
|
@@ -40,13 +69,22 @@ class CollectionConfig(ConfiguredBaseModel):
|
|
|
40
69
|
default=False,
|
|
41
70
|
description="Whether the collection is prepopulated",
|
|
42
71
|
)
|
|
43
|
-
|
|
72
|
+
source: Optional[CollectionSource] = Field(
|
|
44
73
|
default=None,
|
|
45
|
-
description="
|
|
74
|
+
description="Metadata about the source",
|
|
75
|
+
)
|
|
76
|
+
# TODO: derived_from
|
|
77
|
+
derived_from: Optional[List[DerivationConfiguration]] = Field(
|
|
78
|
+
default=None,
|
|
79
|
+
description="LinkML-Map derivations",
|
|
46
80
|
)
|
|
47
81
|
|
|
48
82
|
|
|
49
83
|
class DatabaseConfig(ConfiguredBaseModel):
|
|
84
|
+
"""
|
|
85
|
+
Configuration for a database
|
|
86
|
+
"""
|
|
87
|
+
|
|
50
88
|
handle: str = Field(
|
|
51
89
|
default="duckdb:///:memory:",
|
|
52
90
|
description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
|
|
@@ -91,6 +129,10 @@ class DatabaseConfig(ConfiguredBaseModel):
|
|
|
91
129
|
|
|
92
130
|
|
|
93
131
|
class ClientConfig(ConfiguredBaseModel):
|
|
132
|
+
"""
|
|
133
|
+
Configuration for a client
|
|
134
|
+
"""
|
|
135
|
+
|
|
94
136
|
handle: Optional[str] = Field(
|
|
95
137
|
default=None,
|
|
96
138
|
description="The client handle",
|