linkml-store 0.1.10__tar.gz → 0.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.1.10 → linkml_store-0.1.12}/PKG-INFO +36 -3
- {linkml_store-0.1.10 → linkml_store-0.1.12}/README.md +31 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/pyproject.toml +6 -3
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/client.py +63 -7
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/collection.py +152 -32
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/config.py +49 -6
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/database.py +77 -30
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +16 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/duckdb_database.py +47 -5
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +11 -4
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +6 -2
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/mongodb/mongodb_database.py +30 -35
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/solr_collection.py +4 -4
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/cli.py +64 -19
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/__init__.py +16 -2
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/implementations/llm_indexer.py +2 -1
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/indexer.py +13 -2
- linkml_store-0.1.12/src/linkml_store/utils/file_utils.py +37 -0
- linkml_store-0.1.12/src/linkml_store/utils/format_utils.py +403 -0
- linkml_store-0.1.12/src/linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store-0.1.12/src/linkml_store/utils/pandas_utils.py +40 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/sql_utils.py +9 -3
- linkml_store-0.1.12/src/linkml_store/webapi/html/generic.html.j2 +43 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/main.py +346 -63
- linkml_store-0.1.10/src/linkml_store/utils/format_utils.py +0 -224
- linkml_store-0.1.10/src/linkml_store/webapi/html/generic.html.j2 +0 -46
- {linkml_store-0.1.10 → linkml_store-0.1.12}/LICENSE +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/object_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.1.10 → linkml_store-0.1.12}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -16,6 +16,7 @@ Provides-Extra: analytics
|
|
|
16
16
|
Provides-Extra: app
|
|
17
17
|
Provides-Extra: chromadb
|
|
18
18
|
Provides-Extra: fastapi
|
|
19
|
+
Provides-Extra: frictionless
|
|
19
20
|
Provides-Extra: h5py
|
|
20
21
|
Provides-Extra: llm
|
|
21
22
|
Provides-Extra: map
|
|
@@ -30,11 +31,12 @@ Requires-Dist: click
|
|
|
30
31
|
Requires-Dist: duckdb (>=0.10.1,<0.11.0)
|
|
31
32
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
32
33
|
Requires-Dist: fastapi ; extra == "fastapi"
|
|
34
|
+
Requires-Dist: frictionless ; extra == "frictionless"
|
|
33
35
|
Requires-Dist: h5py ; extra == "h5py"
|
|
34
36
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
35
37
|
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
36
|
-
Requires-Dist: linkml ; extra == "validation"
|
|
37
|
-
Requires-Dist: linkml-runtime (>=1.8.
|
|
38
|
+
Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
|
|
39
|
+
Requires-Dist: linkml-runtime (>=1.8.0)
|
|
38
40
|
Requires-Dist: linkml_map ; extra == "map"
|
|
39
41
|
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
40
42
|
Requires-Dist: llm ; extra == "llm"
|
|
@@ -122,6 +124,10 @@ need to have a vector database to run embedding search!
|
|
|
122
124
|
|
|
123
125
|
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
124
126
|
|
|
127
|
+
### Use with LLMs
|
|
128
|
+
|
|
129
|
+
TODO - docs
|
|
130
|
+
|
|
125
131
|
### Validation
|
|
126
132
|
|
|
127
133
|
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
@@ -131,6 +137,33 @@ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.htm
|
|
|
131
137
|
|
|
132
138
|
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
133
139
|
|
|
140
|
+
## Web API
|
|
141
|
+
|
|
142
|
+
There is a preliminary API following HATEOAS principles implemented using FastAPI.
|
|
143
|
+
|
|
144
|
+
To start you should first create a config file, e.g. `db/conf.yaml`:
|
|
145
|
+
|
|
146
|
+
Then run:
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
export LINKML_STORE_CONFIG=./db/conf.yaml
|
|
150
|
+
make api
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
The API returns links as well as data objects, it's recommended to use a Chrome plugin for JSON viewing
|
|
154
|
+
for exploring the API. TODO: add docs here.
|
|
155
|
+
|
|
156
|
+
The main endpoints are:
|
|
157
|
+
|
|
158
|
+
* `http://localhost:8000/` - the root of the API
|
|
159
|
+
* `http://localhost:8000/pages/` - browse the API via HTML
|
|
160
|
+
* `http://localhost:8000/docs` - the Swagger UI
|
|
161
|
+
|
|
162
|
+
## Streamlit app
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
make app
|
|
166
|
+
```
|
|
134
167
|
|
|
135
168
|
## Background
|
|
136
169
|
|
|
@@ -69,6 +69,10 @@ need to have a vector database to run embedding search!
|
|
|
69
69
|
|
|
70
70
|
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
71
71
|
|
|
72
|
+
### Use with LLMs
|
|
73
|
+
|
|
74
|
+
TODO - docs
|
|
75
|
+
|
|
72
76
|
### Validation
|
|
73
77
|
|
|
74
78
|
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
@@ -78,6 +82,33 @@ See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.htm
|
|
|
78
82
|
|
|
79
83
|
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
80
84
|
|
|
85
|
+
## Web API
|
|
86
|
+
|
|
87
|
+
There is a preliminary API following HATEOAS principles implemented using FastAPI.
|
|
88
|
+
|
|
89
|
+
To start you should first create a config file, e.g. `db/conf.yaml`:
|
|
90
|
+
|
|
91
|
+
Then run:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
export LINKML_STORE_CONFIG=./db/conf.yaml
|
|
95
|
+
make api
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
The API returns links as well as data objects, it's recommended to use a Chrome plugin for JSON viewing
|
|
99
|
+
for exploring the API. TODO: add docs here.
|
|
100
|
+
|
|
101
|
+
The main endpoints are:
|
|
102
|
+
|
|
103
|
+
* `http://localhost:8000/` - the root of the API
|
|
104
|
+
* `http://localhost:8000/pages/` - browse the API via HTML
|
|
105
|
+
* `http://localhost:8000/docs` - the Swagger UI
|
|
106
|
+
|
|
107
|
+
## Streamlit app
|
|
108
|
+
|
|
109
|
+
```
|
|
110
|
+
make app
|
|
111
|
+
```
|
|
81
112
|
|
|
82
113
|
## Background
|
|
83
114
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.12"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -10,7 +10,7 @@ readme = "README.md"
|
|
|
10
10
|
python = "^3.9, !=3.9.7"
|
|
11
11
|
click = "*"
|
|
12
12
|
pydantic = "^2.0.0"
|
|
13
|
-
linkml-runtime = ">=1.8.
|
|
13
|
+
linkml-runtime = ">=1.8.0"
|
|
14
14
|
streamlit = { version = "^1.32.2", optional = true }
|
|
15
15
|
sqlalchemy = "*"
|
|
16
16
|
duckdb = "^0.10.1"
|
|
@@ -25,9 +25,10 @@ pymongo = { version="*", optional = true }
|
|
|
25
25
|
chromadb = { version="*", optional = true }
|
|
26
26
|
pyarrow = { version="*", optional = true }
|
|
27
27
|
h5py = { version="*", optional = true }
|
|
28
|
-
linkml = { version="
|
|
28
|
+
linkml = { version=">=1.8.0", optional = true }
|
|
29
29
|
linkml_map = { version="*", optional = true }
|
|
30
30
|
linkml_renderer = { version="*", optional = true }
|
|
31
|
+
frictionless = { version="*", optional = true }
|
|
31
32
|
pandas = ">=2.2.1"
|
|
32
33
|
jinja2 = "^3.1.4"
|
|
33
34
|
jsonlines = "^4.0.0"
|
|
@@ -49,6 +50,7 @@ nbsphinx = "*"
|
|
|
49
50
|
jupyter = "*"
|
|
50
51
|
jupysql = "*"
|
|
51
52
|
papermill = "*"
|
|
53
|
+
nbdime = "*"
|
|
52
54
|
|
|
53
55
|
[tool.poetry.group.tests.dependencies]
|
|
54
56
|
pytest = "^7.4.0"
|
|
@@ -71,6 +73,7 @@ validation = ["linkml"]
|
|
|
71
73
|
map = ["linkml_map"]
|
|
72
74
|
renderer = ["linkml_renderer"]
|
|
73
75
|
fastapi = ["fastapi", "uvicorn"]
|
|
76
|
+
frictionless = ["frictionless"]
|
|
74
77
|
|
|
75
78
|
[tool.poetry.scripts]
|
|
76
79
|
linkml-store = "linkml_store.cli:cli"
|
|
@@ -242,7 +242,7 @@ class Client:
|
|
|
242
242
|
Return all attached databases
|
|
243
243
|
|
|
244
244
|
Examples
|
|
245
|
-
|
|
245
|
+
|
|
246
246
|
>>> client = Client()
|
|
247
247
|
>>> _ = client.attach_database("duckdb", alias="test1")
|
|
248
248
|
>>> _ = client.attach_database("duckdb", alias="test2")
|
|
@@ -268,25 +268,81 @@ class Client:
|
|
|
268
268
|
"""
|
|
269
269
|
Drop a database.
|
|
270
270
|
|
|
271
|
+
Example (in-memory):
|
|
272
|
+
|
|
273
|
+
>>> client = Client()
|
|
274
|
+
>>> db1 = client.attach_database("duckdb", alias="test1")
|
|
275
|
+
>>> db2 = client.attach_database("duckdb", alias="test2")
|
|
276
|
+
>>> len(client.databases)
|
|
277
|
+
2
|
|
278
|
+
>>> client.drop_database("test1")
|
|
279
|
+
>>> len(client.databases)
|
|
280
|
+
1
|
|
281
|
+
|
|
282
|
+
Databases that persist on disk:
|
|
283
|
+
|
|
284
|
+
>>> client = Client()
|
|
285
|
+
>>> path = Path("tmp/test.db")
|
|
286
|
+
>>> path.parent.mkdir(parents=True, exist_ok=True)
|
|
287
|
+
>>> db = client.attach_database(f"duckdb:///{path}", alias="test")
|
|
288
|
+
>>> len(client.databases)
|
|
289
|
+
1
|
|
290
|
+
>>> db.store({"persons": [{"id": "P1", "name": "John"}]})
|
|
291
|
+
>>> db.commit()
|
|
292
|
+
>>> Path("tmp/test.db").exists()
|
|
293
|
+
True
|
|
294
|
+
>>> client.drop_database("test")
|
|
295
|
+
>>> len(client.databases)
|
|
296
|
+
0
|
|
297
|
+
>>> Path("tmp/test.db").exists()
|
|
298
|
+
False
|
|
299
|
+
|
|
300
|
+
Dropping a non-existent database:
|
|
301
|
+
|
|
302
|
+
>>> client = Client()
|
|
303
|
+
>>> client.drop_database("duckdb:///tmp/made-up1", missing_ok=True)
|
|
304
|
+
>>> client.drop_database("duckdb:///tmp/made-up2", missing_ok=False)
|
|
305
|
+
Traceback (most recent call last):
|
|
306
|
+
...
|
|
307
|
+
ValueError: Database duckdb:///tmp/made-up2 not found
|
|
308
|
+
|
|
271
309
|
:param name:
|
|
272
310
|
:param missing_ok:
|
|
273
311
|
:return:
|
|
274
312
|
"""
|
|
275
|
-
if
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
313
|
+
if self._databases:
|
|
314
|
+
if name in self._databases:
|
|
315
|
+
db = self._databases[name]
|
|
316
|
+
db.drop(**kwargs)
|
|
317
|
+
del self._databases[name]
|
|
318
|
+
else:
|
|
319
|
+
if not missing_ok:
|
|
320
|
+
raise ValueError(f"Database {name} not found")
|
|
279
321
|
else:
|
|
280
|
-
|
|
281
|
-
|
|
322
|
+
db = self.get_database(name, create_if_not_exists=True)
|
|
323
|
+
db.drop(**kwargs)
|
|
282
324
|
|
|
283
325
|
def drop_all_databases(self, **kwargs):
|
|
284
326
|
"""
|
|
285
327
|
Drop all databases.
|
|
286
328
|
|
|
329
|
+
Example (in-memory):
|
|
330
|
+
|
|
331
|
+
>>> client = Client()
|
|
332
|
+
>>> db1 = client.attach_database("duckdb", alias="test1")
|
|
333
|
+
>>> assert "test1" in client.databases
|
|
334
|
+
>>> db2 = client.attach_database("duckdb", alias="test2")
|
|
335
|
+
>>> assert "test2" in client.databases
|
|
336
|
+
>>> client.drop_all_databases()
|
|
337
|
+
>>> len(client.databases)
|
|
338
|
+
0
|
|
339
|
+
|
|
340
|
+
|
|
287
341
|
:param missing_ok:
|
|
288
342
|
:return:
|
|
289
343
|
"""
|
|
344
|
+
if not self._databases:
|
|
345
|
+
return
|
|
290
346
|
for name in list(self._databases.keys()):
|
|
291
347
|
self.drop_database(name, missing_ok=False, **kwargs)
|
|
292
348
|
self._databases = {}
|
|
@@ -14,7 +14,7 @@ from pydantic import BaseModel
|
|
|
14
14
|
|
|
15
15
|
from linkml_store.api.types import DatabaseType
|
|
16
16
|
from linkml_store.index import get_indexer
|
|
17
|
-
from linkml_store.utils.format_utils import load_objects
|
|
17
|
+
from linkml_store.utils.format_utils import load_objects, load_objects_from_url
|
|
18
18
|
from linkml_store.utils.object_utils import clean_empties
|
|
19
19
|
from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
|
|
20
20
|
|
|
@@ -61,6 +61,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
61
61
|
# name: str
|
|
62
62
|
parent: Optional[DatabaseType] = None
|
|
63
63
|
_indexers: Optional[Dict[str, Indexer]] = None
|
|
64
|
+
_initialized: Optional[bool] = None
|
|
64
65
|
# hidden: Optional[bool] = False
|
|
65
66
|
|
|
66
67
|
metadata: Optional[CollectionConfig] = None
|
|
@@ -73,7 +74,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
73
74
|
if metadata:
|
|
74
75
|
self.metadata = metadata
|
|
75
76
|
else:
|
|
76
|
-
self.metadata = CollectionConfig(
|
|
77
|
+
self.metadata = CollectionConfig(type=name, **kwargs)
|
|
77
78
|
if not self.metadata.alias:
|
|
78
79
|
self.metadata.alias = name
|
|
79
80
|
if not self.metadata.type:
|
|
@@ -81,17 +82,6 @@ class Collection(Generic[DatabaseType]):
|
|
|
81
82
|
# if name is not None and self.metadata.name is not None and name != self.metadata.name:
|
|
82
83
|
# raise ValueError(f"Name mismatch: {name} != {self.metadata.name}")
|
|
83
84
|
|
|
84
|
-
@property
|
|
85
|
-
def name(self) -> str:
|
|
86
|
-
"""
|
|
87
|
-
Return the name of the collection.
|
|
88
|
-
|
|
89
|
-
TODO: deprecate in favor of Type
|
|
90
|
-
|
|
91
|
-
:return: name of the collection
|
|
92
|
-
"""
|
|
93
|
-
return self.metadata.name
|
|
94
|
-
|
|
95
85
|
@property
|
|
96
86
|
def hidden(self) -> bool:
|
|
97
87
|
"""
|
|
@@ -118,12 +108,18 @@ class Collection(Generic[DatabaseType]):
|
|
|
118
108
|
>>> collection.target_class_name
|
|
119
109
|
'Person'
|
|
120
110
|
|
|
111
|
+
>>> collection = db.create_collection("Organization")
|
|
112
|
+
>>> collection.target_class_name
|
|
113
|
+
'Organization'
|
|
114
|
+
>>> collection.alias
|
|
115
|
+
'Organization'
|
|
116
|
+
|
|
121
117
|
:return: name of the class which members of this collection instantiate
|
|
122
118
|
"""
|
|
123
119
|
# TODO: this is a shim layer until we can normalize on this
|
|
124
120
|
if self.metadata.type:
|
|
125
121
|
return self.metadata.type
|
|
126
|
-
return self.
|
|
122
|
+
return self.alias
|
|
127
123
|
|
|
128
124
|
@property
|
|
129
125
|
def alias(self):
|
|
@@ -161,10 +157,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
161
157
|
:return:
|
|
162
158
|
"""
|
|
163
159
|
# TODO: this is a shim layer until we can normalize on this
|
|
164
|
-
# TODO: this is a shim layer until we can normalize on this
|
|
165
160
|
if self.metadata.alias:
|
|
166
161
|
return self.metadata.alias
|
|
167
|
-
return self.
|
|
162
|
+
return self.target_class_name
|
|
168
163
|
|
|
169
164
|
def replace(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
170
165
|
"""
|
|
@@ -201,7 +196,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
201
196
|
"""
|
|
202
197
|
raise NotImplementedError
|
|
203
198
|
|
|
199
|
+
def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
|
|
200
|
+
logger.info(f"Pre-query hook (state: {self._initialized}; Q= {query}")
|
|
201
|
+
if not self._initialized:
|
|
202
|
+
self._materialize_derivations()
|
|
203
|
+
self._initialized = True
|
|
204
|
+
|
|
204
205
|
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
206
|
+
self._initialized = True
|
|
205
207
|
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
206
208
|
self._broadcast(patches, **kwargs)
|
|
207
209
|
|
|
@@ -305,6 +307,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
305
307
|
:param kwargs:
|
|
306
308
|
:return:
|
|
307
309
|
"""
|
|
310
|
+
self._pre_query_hook()
|
|
308
311
|
return self.parent.query(query, **kwargs)
|
|
309
312
|
|
|
310
313
|
def query_facets(
|
|
@@ -340,11 +343,13 @@ class Collection(Generic[DatabaseType]):
|
|
|
340
343
|
:param kwargs:
|
|
341
344
|
:return:
|
|
342
345
|
"""
|
|
343
|
-
# TODO
|
|
344
346
|
id_field = self.identifier_attribute_name
|
|
345
347
|
if not id_field:
|
|
346
348
|
raise ValueError(f"No identifier for {self.name}")
|
|
347
|
-
|
|
349
|
+
if len(ids) == 1:
|
|
350
|
+
return self.find({id_field: ids[0]})
|
|
351
|
+
else:
|
|
352
|
+
return self.find({id_field: {"$in": ids}})
|
|
348
353
|
|
|
349
354
|
def get_one(self, id: IDENTIFIER, **kwargs) -> Optional[OBJECT]:
|
|
350
355
|
"""
|
|
@@ -399,9 +404,10 @@ class Collection(Generic[DatabaseType]):
|
|
|
399
404
|
:return:
|
|
400
405
|
"""
|
|
401
406
|
query = self._create_query(where_clause=where)
|
|
407
|
+
self._pre_query_hook(query)
|
|
402
408
|
return self.query(query, **kwargs)
|
|
403
409
|
|
|
404
|
-
def find_iter(self, where: Optional[Any] = None, **kwargs) -> Iterator[OBJECT]:
|
|
410
|
+
def find_iter(self, where: Optional[Any] = None, page_size=100, **kwargs) -> Iterator[OBJECT]:
|
|
405
411
|
"""
|
|
406
412
|
Find objects in the collection using a where query.
|
|
407
413
|
|
|
@@ -409,9 +415,22 @@ class Collection(Generic[DatabaseType]):
|
|
|
409
415
|
:param kwargs:
|
|
410
416
|
:return:
|
|
411
417
|
"""
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
418
|
+
total_rows = None
|
|
419
|
+
offset = 0
|
|
420
|
+
if page_size < 1:
|
|
421
|
+
raise ValueError(f"Invalid page size: {page_size}")
|
|
422
|
+
while True:
|
|
423
|
+
qr = self.find(where=where, offset=offset, limit=page_size, **kwargs)
|
|
424
|
+
if total_rows is None:
|
|
425
|
+
total_rows = qr.num_rows
|
|
426
|
+
if not qr.rows:
|
|
427
|
+
return
|
|
428
|
+
for row in qr.rows:
|
|
429
|
+
yield row
|
|
430
|
+
offset += page_size
|
|
431
|
+
if offset >= total_rows:
|
|
432
|
+
break
|
|
433
|
+
return
|
|
415
434
|
|
|
416
435
|
def search(
|
|
417
436
|
self,
|
|
@@ -454,6 +473,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
454
473
|
:param kwargs:
|
|
455
474
|
:return:
|
|
456
475
|
"""
|
|
476
|
+
self._pre_query_hook()
|
|
457
477
|
if index_name is None:
|
|
458
478
|
if len(self.indexers) == 1:
|
|
459
479
|
index_name = list(self.indexers.keys())[0]
|
|
@@ -494,10 +514,101 @@ class Collection(Generic[DatabaseType]):
|
|
|
494
514
|
raise ValueError(f"Collection has no alias: {self} // {self.metadata}")
|
|
495
515
|
return self.alias.startswith("internal__")
|
|
496
516
|
|
|
497
|
-
def
|
|
498
|
-
|
|
517
|
+
def exists(self) -> Optional[bool]:
|
|
518
|
+
"""
|
|
519
|
+
Check if the collection exists.
|
|
520
|
+
|
|
521
|
+
:return:
|
|
522
|
+
"""
|
|
523
|
+
cd = self.class_definition()
|
|
524
|
+
return cd is not None and cd.attributes
|
|
525
|
+
|
|
526
|
+
def load_from_source(self, load_if_exists=False):
|
|
527
|
+
"""
|
|
528
|
+
Load objects from the source location.
|
|
529
|
+
|
|
530
|
+
:param load_if_exists:
|
|
531
|
+
:return:
|
|
532
|
+
"""
|
|
533
|
+
if not load_if_exists and self.exists():
|
|
534
|
+
return
|
|
535
|
+
metadata = self.metadata
|
|
536
|
+
if metadata.source:
|
|
537
|
+
source = metadata.source
|
|
538
|
+
kwargs = source.arguments or {}
|
|
539
|
+
if source.local_path:
|
|
540
|
+
objects = load_objects(
|
|
541
|
+
metadata.source.local_path,
|
|
542
|
+
format=source.format,
|
|
543
|
+
expected_type=source.expected_type,
|
|
544
|
+
compression=source.compression,
|
|
545
|
+
**kwargs,
|
|
546
|
+
)
|
|
547
|
+
elif metadata.source.url:
|
|
548
|
+
objects = load_objects_from_url(
|
|
549
|
+
metadata.source.url,
|
|
550
|
+
format=source.format,
|
|
551
|
+
expected_type=source.expected_type,
|
|
552
|
+
compression=source.compression,
|
|
553
|
+
**kwargs,
|
|
554
|
+
)
|
|
499
555
|
self.insert(objects)
|
|
500
556
|
|
|
557
|
+
def _check_if_initialized(self) -> bool:
|
|
558
|
+
return self._initialized
|
|
559
|
+
|
|
560
|
+
def _materialize_derivations(self, **kwargs):
|
|
561
|
+
metadata = self.metadata
|
|
562
|
+
if not metadata.derived_from:
|
|
563
|
+
logger.info(f"No metadata for {self.alias}; no derivations")
|
|
564
|
+
return
|
|
565
|
+
if self._check_if_initialized():
|
|
566
|
+
logger.info(f"Already initialized {self.alias}; no derivations")
|
|
567
|
+
return
|
|
568
|
+
parent_db = self.parent
|
|
569
|
+
client = parent_db.parent
|
|
570
|
+
# cd = self.class_definition()
|
|
571
|
+
for derivation in metadata.derived_from:
|
|
572
|
+
# TODO: optimize this; utilize underlying engine
|
|
573
|
+
logger.info(f"Deriving from {derivation}")
|
|
574
|
+
if derivation.database:
|
|
575
|
+
db = client.get_database(derivation.database)
|
|
576
|
+
else:
|
|
577
|
+
db = parent_db
|
|
578
|
+
if derivation.collection:
|
|
579
|
+
coll = db.get_collection(derivation.collection)
|
|
580
|
+
else:
|
|
581
|
+
coll = self
|
|
582
|
+
coll.class_definition()
|
|
583
|
+
source_obj_iter = coll.find_iter(derivation.where or {})
|
|
584
|
+
mappings = derivation.mappings
|
|
585
|
+
if not mappings:
|
|
586
|
+
raise ValueError(f"No mappings for {self.name}")
|
|
587
|
+
target_class_name = self.target_class_name
|
|
588
|
+
from linkml_map.session import Session
|
|
589
|
+
|
|
590
|
+
session = Session()
|
|
591
|
+
session.set_source_schema(db.schema_view.schema)
|
|
592
|
+
session.set_object_transformer(
|
|
593
|
+
{
|
|
594
|
+
"class_derivations": {
|
|
595
|
+
target_class_name: {
|
|
596
|
+
"populated_from": coll.target_class_name,
|
|
597
|
+
"slot_derivations": mappings,
|
|
598
|
+
},
|
|
599
|
+
}
|
|
600
|
+
},
|
|
601
|
+
)
|
|
602
|
+
logger.debug(f"Session Spec: {session.object_transformer}")
|
|
603
|
+
tr_objs = []
|
|
604
|
+
for source_obj in source_obj_iter:
|
|
605
|
+
tr_obj = session.transform(source_obj, source_type=coll.target_class_name)
|
|
606
|
+
tr_objs.append(tr_obj)
|
|
607
|
+
if not tr_objs:
|
|
608
|
+
raise ValueError(f"No objects derived from {coll.name}")
|
|
609
|
+
self.insert(tr_objs)
|
|
610
|
+
self.commit()
|
|
611
|
+
|
|
501
612
|
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
502
613
|
"""
|
|
503
614
|
Attach an index to the collection.
|
|
@@ -572,7 +683,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
572
683
|
:param indexer:
|
|
573
684
|
:return:
|
|
574
685
|
"""
|
|
575
|
-
return f"internal__index__{self.
|
|
686
|
+
return f"internal__index__{self.alias}__{index_name}"
|
|
576
687
|
|
|
577
688
|
def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
|
|
578
689
|
"""
|
|
@@ -638,11 +749,15 @@ class Collection(Generic[DatabaseType]):
|
|
|
638
749
|
"""
|
|
639
750
|
Return the class definition for the collection.
|
|
640
751
|
|
|
752
|
+
If no schema has been explicitly set, and the native database does not
|
|
753
|
+
have a schema, then a schema will be induced from the objects in the collection.
|
|
754
|
+
|
|
641
755
|
:return:
|
|
642
756
|
"""
|
|
643
757
|
sv: SchemaView = self.parent.schema_view
|
|
644
758
|
if sv:
|
|
645
759
|
cls = sv.get_class(self.target_class_name)
|
|
760
|
+
# cls = sv.schema.classes[self.target_class_name]
|
|
646
761
|
if cls and not cls.attributes:
|
|
647
762
|
if not sv.class_induced_slots(cls.name):
|
|
648
763
|
for att in self._induce_attributes():
|
|
@@ -722,7 +837,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
722
837
|
else:
|
|
723
838
|
return None
|
|
724
839
|
|
|
725
|
-
def induce_class_definition_from_objects(
|
|
840
|
+
def induce_class_definition_from_objects(
|
|
841
|
+
self, objs: List[OBJECT], max_sample_size: Optional[int] = None
|
|
842
|
+
) -> ClassDefinition:
|
|
726
843
|
"""
|
|
727
844
|
Induce a class definition from a list of objects.
|
|
728
845
|
|
|
@@ -733,6 +850,9 @@ class Collection(Generic[DatabaseType]):
|
|
|
733
850
|
:param max_sample_size:
|
|
734
851
|
:return:
|
|
735
852
|
"""
|
|
853
|
+
# TODO: use schemaview
|
|
854
|
+
if max_sample_size is None:
|
|
855
|
+
max_sample_size = 10
|
|
736
856
|
if not self.target_class_name:
|
|
737
857
|
raise ValueError(f"No target_class_name for {self.alias}")
|
|
738
858
|
cd = ClassDefinition(self.target_class_name)
|
|
@@ -760,7 +880,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
760
880
|
exact_dimensions_list.append(v.shape)
|
|
761
881
|
break
|
|
762
882
|
if isinstance(v, list):
|
|
763
|
-
v = v[0]
|
|
883
|
+
v = v[0] if v else None
|
|
764
884
|
multivalueds.append(True)
|
|
765
885
|
elif isinstance(v, dict):
|
|
766
886
|
v = list(v.values())[0]
|
|
@@ -795,6 +915,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
795
915
|
for other_rng in rngs:
|
|
796
916
|
if rng != other_rng:
|
|
797
917
|
raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
|
|
918
|
+
logger.debug(f"Inducing {k} as {rng} {multivalued} {inlined}")
|
|
798
919
|
cd.attributes[k] = SlotDefinition(k, range=rng, multivalued=multivalued, inlined=inlined)
|
|
799
920
|
if exact_dimensions_list:
|
|
800
921
|
array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
|
|
@@ -828,7 +949,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
828
949
|
"""
|
|
829
950
|
Apply a patch to the collection.
|
|
830
951
|
|
|
831
|
-
Patches conform to the JSON Patch format
|
|
952
|
+
Patches conform to the JSON Patch format.
|
|
832
953
|
|
|
833
954
|
:param patches:
|
|
834
955
|
:param kwargs:
|
|
@@ -841,11 +962,11 @@ class Collection(Generic[DatabaseType]):
|
|
|
841
962
|
new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
|
|
842
963
|
self.replace(new_objs)
|
|
843
964
|
|
|
844
|
-
def diff(self, other: "Collection", **kwargs):
|
|
965
|
+
def diff(self, other: "Collection", **kwargs) -> List[PatchDict]:
|
|
845
966
|
"""
|
|
846
967
|
Diff two collections.
|
|
847
968
|
|
|
848
|
-
:param other:
|
|
969
|
+
:param other: The collection to diff against
|
|
849
970
|
:param kwargs:
|
|
850
971
|
:return:
|
|
851
972
|
"""
|
|
@@ -872,8 +993,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
872
993
|
if not cd:
|
|
873
994
|
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
874
995
|
class_name = cd.name
|
|
875
|
-
|
|
876
|
-
for obj in result.rows:
|
|
996
|
+
for obj in self.find_iter(**kwargs):
|
|
877
997
|
obj = clean_empties(obj)
|
|
878
998
|
yield from validator.iter_results(obj, class_name)
|
|
879
999
|
|