linkml-store 0.2.5__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {linkml_store-0.2.5 → linkml_store-0.2.9}/PKG-INFO +4 -3
- {linkml_store-0.2.5 → linkml_store-0.2.9}/pyproject.toml +4 -2
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/client.py +9 -6
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/collection.py +118 -5
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/database.py +45 -14
- linkml_store-0.2.9/src/linkml_store/api/stores/duckdb/duckdb_collection.py +337 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/duckdb/duckdb_database.py +52 -19
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/filesystem/__init__.py +1 -1
- linkml_store-0.2.9/src/linkml_store/api/stores/mongodb/mongodb_collection.py +365 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/mongodb/mongodb_database.py +8 -3
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/solr_collection.py +7 -1
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/cli.py +202 -21
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/implementations/llm_indexer.py +14 -6
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/indexer.py +7 -4
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/llm_inference_engine.py +13 -9
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/rag_inference_engine.py +13 -10
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +7 -1
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/inference_config.py +1 -0
- linkml_store-0.2.9/src/linkml_store/utils/dat_parser.py +95 -0
- linkml_store-0.2.9/src/linkml_store/utils/enrichment_analyzer.py +217 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/format_utils.py +183 -3
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/llm_utils.py +3 -1
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/pandas_utils.py +1 -1
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/sql_utils.py +7 -1
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/vector_utils.py +4 -11
- linkml_store-0.2.5/src/linkml_store/api/stores/duckdb/duckdb_collection.py +0 -169
- linkml_store-0.2.5/src/linkml_store/api/stores/mongodb/mongodb_collection.py +0 -179
- {linkml_store-0.2.5 → linkml_store-0.2.9}/LICENSE +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/README.md +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/config.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/neo4j/neo4j_database.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/graphs/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/graphs/graph_map.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/evaluation.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/inference_engine.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/inference/inference_engine_registry.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/neo4j_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/object_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/sklearn_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/utils/stats_utils.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.2.5 → linkml_store-0.2.9}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -24,6 +24,7 @@ Provides-Extra: map
|
|
|
24
24
|
Provides-Extra: mongodb
|
|
25
25
|
Provides-Extra: neo4j
|
|
26
26
|
Provides-Extra: pyarrow
|
|
27
|
+
Provides-Extra: rdf
|
|
27
28
|
Provides-Extra: renderer
|
|
28
29
|
Provides-Extra: scipy
|
|
29
30
|
Provides-Extra: tests
|
|
@@ -34,12 +35,12 @@ Requires-Dist: duckdb (>=0.10.1)
|
|
|
34
35
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
35
36
|
Requires-Dist: fastapi ; extra == "fastapi"
|
|
36
37
|
Requires-Dist: frictionless ; extra == "frictionless"
|
|
37
|
-
Requires-Dist: gcsfs
|
|
38
38
|
Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
|
|
39
39
|
Requires-Dist: h5py ; extra == "h5py"
|
|
40
40
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
41
41
|
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
42
42
|
Requires-Dist: jsonpatch (>=1.33)
|
|
43
|
+
Requires-Dist: lightrdf ; extra == "rdf"
|
|
43
44
|
Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
|
|
44
45
|
Requires-Dist: linkml-runtime (>=1.8.0)
|
|
45
46
|
Requires-Dist: linkml_map ; extra == "map"
|
|
@@ -54,7 +55,7 @@ Requires-Dist: plotly ; extra == "analytics"
|
|
|
54
55
|
Requires-Dist: py2neo ; extra == "neo4j"
|
|
55
56
|
Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
56
57
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
57
|
-
Requires-Dist: pymongo ; extra == "mongodb"
|
|
58
|
+
Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
|
|
58
59
|
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
59
60
|
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
60
61
|
Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.9"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -23,8 +23,9 @@ pystow = "^0.5.4"
|
|
|
23
23
|
black = { version=">=24.0.0", optional = true }
|
|
24
24
|
ruff = { version=">=0.6.2", optional = true }
|
|
25
25
|
llm = { version="*", optional = true }
|
|
26
|
+
lightrdf = { version="*", optional = true }
|
|
26
27
|
tiktoken = { version="*", optional = true }
|
|
27
|
-
pymongo =
|
|
28
|
+
pymongo = "^4.11"
|
|
28
29
|
neo4j = { version="*", optional = true }
|
|
29
30
|
py2neo = { version="*", optional = true }
|
|
30
31
|
networkx = { version="*", optional = true }
|
|
@@ -91,6 +92,7 @@ renderer = ["linkml_renderer"]
|
|
|
91
92
|
fastapi = ["fastapi", "uvicorn"]
|
|
92
93
|
frictionless = ["frictionless"]
|
|
93
94
|
scipy = ["scipy", "scikit-learn"]
|
|
95
|
+
rdf = ["lightrdf"]
|
|
94
96
|
#ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
|
|
95
97
|
bigquery = ["google-cloud-bigquery"]
|
|
96
98
|
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]
|
|
@@ -12,9 +12,9 @@ from linkml_store.api.config import ClientConfig
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
HANDLE_MAP = {
|
|
17
16
|
"duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
17
|
+
"sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
18
18
|
"solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
|
|
19
19
|
"mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
|
|
20
20
|
"chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
|
|
@@ -24,6 +24,8 @@ HANDLE_MAP = {
|
|
|
24
24
|
|
|
25
25
|
SUFFIX_MAP = {
|
|
26
26
|
"ddb": "duckdb:///{path}",
|
|
27
|
+
"duckdb": "duckdb:///{path}",
|
|
28
|
+
"db": "duckdb:///{path}",
|
|
27
29
|
}
|
|
28
30
|
|
|
29
31
|
|
|
@@ -204,9 +206,10 @@ class Client:
|
|
|
204
206
|
if ":" not in handle:
|
|
205
207
|
if alias is None:
|
|
206
208
|
alias = handle
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
209
|
+
if "." in handle:
|
|
210
|
+
suffix = handle.split(".")[-1]
|
|
211
|
+
if suffix in SUFFIX_MAP:
|
|
212
|
+
handle = SUFFIX_MAP[suffix].format(path=handle)
|
|
210
213
|
if ":" not in handle:
|
|
211
214
|
scheme = handle
|
|
212
215
|
handle = None
|
|
@@ -216,14 +219,14 @@ class Client:
|
|
|
216
219
|
scheme, _ = handle.split(":", 1)
|
|
217
220
|
if scheme not in HANDLE_MAP:
|
|
218
221
|
raise ValueError(f"Unknown scheme: {scheme}")
|
|
219
|
-
module_path, class_name = HANDLE_MAP[scheme].rsplit(
|
|
222
|
+
module_path, class_name = HANDLE_MAP[scheme].rsplit(".", 1)
|
|
220
223
|
try:
|
|
221
224
|
module = importlib.import_module(module_path)
|
|
222
225
|
cls = getattr(module, class_name)
|
|
223
226
|
except ImportError as e:
|
|
224
227
|
raise ImportError(f"Failed to import {scheme} database. Make sure the correct extras are installed: {e}")
|
|
225
228
|
|
|
226
|
-
#cls = HANDLE_MAP[scheme]
|
|
229
|
+
# cls = HANDLE_MAP[scheme]
|
|
227
230
|
db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
|
|
228
231
|
if schema_view:
|
|
229
232
|
db.set_schema_view(schema_view)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""A structure for representing collections of similar objects."""
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
from collections import defaultdict
|
|
6
7
|
from pathlib import Path
|
|
@@ -210,8 +211,62 @@ class Collection(Generic[DatabaseType]):
|
|
|
210
211
|
"""
|
|
211
212
|
raise NotImplementedError
|
|
212
213
|
|
|
214
|
+
def index(
|
|
215
|
+
self,
|
|
216
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
217
|
+
index_name: Optional[str] = None,
|
|
218
|
+
replace: bool = False,
|
|
219
|
+
unique: bool = False,
|
|
220
|
+
**kwargs,
|
|
221
|
+
) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Index objects in the collection.
|
|
224
|
+
|
|
225
|
+
:param objs:
|
|
226
|
+
:param index_name:
|
|
227
|
+
:param replace: replace the index, or not
|
|
228
|
+
:param unique: boolean used to declare the index unique or not
|
|
229
|
+
:param kwargs:
|
|
230
|
+
:return:
|
|
231
|
+
"""
|
|
232
|
+
raise NotImplementedError
|
|
233
|
+
|
|
234
|
+
def upsert(
|
|
235
|
+
self,
|
|
236
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
237
|
+
filter_fields: List[str],
|
|
238
|
+
update_fields: Union[List[str], None] = None,
|
|
239
|
+
**kwargs,
|
|
240
|
+
):
|
|
241
|
+
"""
|
|
242
|
+
Add one or more objects to the collection.
|
|
243
|
+
|
|
244
|
+
>>> from linkml_store import Client
|
|
245
|
+
>>> client = Client()
|
|
246
|
+
>>> db = client.attach_database("mongodb", alias="test")
|
|
247
|
+
>>> collection = db.create_collection("Person")
|
|
248
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
249
|
+
>>> collection.upsert(objs)
|
|
250
|
+
|
|
251
|
+
:param objs:
|
|
252
|
+
:param filter_fields: List of field names to use as the filter for matching existing collections.
|
|
253
|
+
:param update_fields: List of field names to include in the update. If None, all fields are updated.
|
|
254
|
+
:param kwargs:
|
|
255
|
+
|
|
256
|
+
:return:
|
|
257
|
+
"""
|
|
258
|
+
raise NotImplementedError
|
|
259
|
+
|
|
213
260
|
def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
|
|
214
|
-
|
|
261
|
+
"""
|
|
262
|
+
Pre-query hook.
|
|
263
|
+
|
|
264
|
+
This is called before a query is executed. It is used to materialize derivations and indexes.
|
|
265
|
+
:param query:
|
|
266
|
+
:param kwargs:
|
|
267
|
+
:return:
|
|
268
|
+
"""
|
|
269
|
+
logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
|
|
215
270
|
if not self._initialized:
|
|
216
271
|
self._materialize_derivations()
|
|
217
272
|
self._initialized = True
|
|
@@ -402,7 +457,12 @@ class Collection(Generic[DatabaseType]):
|
|
|
402
457
|
return qr.rows[0]
|
|
403
458
|
return None
|
|
404
459
|
|
|
405
|
-
def find(
|
|
460
|
+
def find(
|
|
461
|
+
self,
|
|
462
|
+
where: Optional[Any] = None,
|
|
463
|
+
select_cols: Optional[List[str]] = None,
|
|
464
|
+
**kwargs,
|
|
465
|
+
) -> QueryResult:
|
|
406
466
|
"""
|
|
407
467
|
Find objects in the collection using a where query.
|
|
408
468
|
|
|
@@ -432,10 +492,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
432
492
|
|
|
433
493
|
|
|
434
494
|
:param where:
|
|
495
|
+
:param select_cols:
|
|
435
496
|
:param kwargs:
|
|
436
497
|
:return:
|
|
437
498
|
"""
|
|
438
|
-
query = self._create_query(
|
|
499
|
+
query = self._create_query(
|
|
500
|
+
where_clause=where,
|
|
501
|
+
select_cols=select_cols,
|
|
502
|
+
)
|
|
439
503
|
self._pre_query_hook(query)
|
|
440
504
|
return self.query(query, **kwargs)
|
|
441
505
|
|
|
@@ -535,8 +599,16 @@ class Collection(Generic[DatabaseType]):
|
|
|
535
599
|
assert ix_coll.size() > 0
|
|
536
600
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
537
601
|
index_col = ix.index_field
|
|
602
|
+
|
|
538
603
|
# TODO: optimize this for large indexes
|
|
539
|
-
|
|
604
|
+
def row2array(row):
|
|
605
|
+
v = row[index_col]
|
|
606
|
+
if isinstance(v, str):
|
|
607
|
+
# sqlite stores arrays as strings
|
|
608
|
+
v = json.loads(v)
|
|
609
|
+
return np.array(v, dtype=float)
|
|
610
|
+
|
|
611
|
+
vector_pairs = [(row, row2array(row)) for row in qr.rows]
|
|
540
612
|
results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
|
|
541
613
|
for r in results:
|
|
542
614
|
del r[1][index_col]
|
|
@@ -550,6 +622,47 @@ class Collection(Generic[DatabaseType]):
|
|
|
550
622
|
new_qr.rows = [r[1] for r in results]
|
|
551
623
|
return new_qr
|
|
552
624
|
|
|
625
|
+
def group_by(
|
|
626
|
+
self,
|
|
627
|
+
group_by_fields: List[str],
|
|
628
|
+
inlined_field="objects",
|
|
629
|
+
agg_map: Optional[Dict[str, str]] = None,
|
|
630
|
+
where: Optional[Dict] = None,
|
|
631
|
+
**kwargs,
|
|
632
|
+
) -> QueryResult:
|
|
633
|
+
"""
|
|
634
|
+
Group objects in the collection by a column.
|
|
635
|
+
|
|
636
|
+
:param group_by:
|
|
637
|
+
:param where:
|
|
638
|
+
:param kwargs:
|
|
639
|
+
:return:
|
|
640
|
+
"""
|
|
641
|
+
if isinstance(group_by_fields, str):
|
|
642
|
+
group_by_fields = [group_by_fields]
|
|
643
|
+
df = self.find(where=where, limit=-1).rows_dataframe
|
|
644
|
+
|
|
645
|
+
# Handle the case where agg_map is None
|
|
646
|
+
if agg_map is None:
|
|
647
|
+
agg_map = {}
|
|
648
|
+
|
|
649
|
+
pk_fields = agg_map.get("first", []) + group_by_fields
|
|
650
|
+
list_fields = agg_map.get("list", [])
|
|
651
|
+
if not list_fields:
|
|
652
|
+
list_fields = [a for a in df.columns if a not in pk_fields]
|
|
653
|
+
|
|
654
|
+
grouped_objs = defaultdict(list)
|
|
655
|
+
for _, row in df.iterrows():
|
|
656
|
+
pk = tuple(row[pk_fields])
|
|
657
|
+
grouped_objs[pk].append({k: row[k] for k in list_fields})
|
|
658
|
+
results = []
|
|
659
|
+
for pk, objs in grouped_objs.items():
|
|
660
|
+
top_obj = {k: v for k, v in zip(pk_fields, pk)}
|
|
661
|
+
top_obj[inlined_field] = objs
|
|
662
|
+
results.append(top_obj)
|
|
663
|
+
r = QueryResult(num_rows=len(results), rows=results)
|
|
664
|
+
return r
|
|
665
|
+
|
|
553
666
|
@property
|
|
554
667
|
def is_internal(self) -> bool:
|
|
555
668
|
"""
|
|
@@ -1004,7 +1117,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
1004
1117
|
multivalued = any(multivalueds)
|
|
1005
1118
|
inlined = any(inlineds)
|
|
1006
1119
|
if multivalued and False in multivalueds:
|
|
1007
|
-
|
|
1120
|
+
logger.info(f"Mixed list non list: {vs} // inferred= {multivalueds}")
|
|
1008
1121
|
# if not rngs:
|
|
1009
1122
|
# raise AssertionError(f"Empty rngs for {k} = {vs}")
|
|
1010
1123
|
rng = rngs[0] if rngs else None
|
|
@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
276
276
|
|
|
277
277
|
Examples:
|
|
278
278
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
279
|
+
>>> from linkml_store.api.client import Client
|
|
280
|
+
>>> client = Client()
|
|
281
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
282
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
283
|
+
>>> collection.alias
|
|
284
|
+
'persons'
|
|
285
|
+
|
|
286
|
+
>>> collection.target_class_name
|
|
287
|
+
'Person'
|
|
287
288
|
|
|
288
289
|
If alias is not provided, it defaults to the name of the type.
|
|
289
290
|
|
|
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
419
420
|
>>> from linkml_store.api.client import Client
|
|
420
421
|
>>> from linkml_store.api.queries import Query
|
|
421
422
|
>>> client = Client()
|
|
422
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
423
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
423
424
|
>>> collection = db.create_collection("Person")
|
|
424
425
|
>>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
|
|
425
426
|
>>> query = Query(from_table="Person", where_clause={"name": "John"})
|
|
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
451
452
|
|
|
452
453
|
>>> from linkml_store.api.client import Client
|
|
453
454
|
>>> client = Client()
|
|
454
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
455
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
455
456
|
>>> collection = db.create_collection("Person", alias="persons")
|
|
456
457
|
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
|
|
457
458
|
>>> schema_view = db.schema_view
|
|
@@ -594,7 +595,31 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
594
595
|
sb.add_class(coll.target_class_name)
|
|
595
596
|
return SchemaView(sb.schema)
|
|
596
597
|
|
|
597
|
-
def
|
|
598
|
+
def validate_database(self, **kwargs) -> List["ValidationResult"]:
|
|
599
|
+
"""
|
|
600
|
+
Validate the contents of the database.
|
|
601
|
+
|
|
602
|
+
As `iter_validate_database`, but returns a list of validation results.
|
|
603
|
+
|
|
604
|
+
:param kwargs:
|
|
605
|
+
:return:
|
|
606
|
+
"""
|
|
607
|
+
return list(self.iter_validate_database(**kwargs))
|
|
608
|
+
|
|
609
|
+
def validate_database(self, **kwargs) -> List["ValidationResult"]:
|
|
610
|
+
"""
|
|
611
|
+
Validate the contents of the database.
|
|
612
|
+
|
|
613
|
+
As `iter_validate_database`, but returns a list of validation results.
|
|
614
|
+
|
|
615
|
+
:param kwargs:
|
|
616
|
+
:return:
|
|
617
|
+
"""
|
|
618
|
+
return list(self.iter_validate_database(**kwargs))
|
|
619
|
+
|
|
620
|
+
def iter_validate_database(
|
|
621
|
+
self, ensure_referential_integrity: bool = None, **kwargs
|
|
622
|
+
) -> Iterator["ValidationResult"]:
|
|
598
623
|
"""
|
|
599
624
|
Validate the contents of the database.
|
|
600
625
|
|
|
@@ -634,12 +659,14 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
634
659
|
'capital' is a required property
|
|
635
660
|
'continent' is a required proper
|
|
636
661
|
|
|
662
|
+
:param ensure_referential_integrity: ensure referential integrity
|
|
637
663
|
:param kwargs:
|
|
638
664
|
:return: iterator over validation results
|
|
639
665
|
"""
|
|
640
666
|
for collection in self.list_collections():
|
|
641
667
|
yield from collection.iter_validate_collection(**kwargs)
|
|
642
|
-
if self.metadata.ensure_referential_integrity:
|
|
668
|
+
if self.metadata.ensure_referential_integrity or ensure_referential_integrity:
|
|
669
|
+
logger.info(f"Validating referential integrity on {self.alias}")
|
|
643
670
|
yield from self._validate_referential_integrity(**kwargs)
|
|
644
671
|
|
|
645
672
|
def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
@@ -660,7 +687,9 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
660
687
|
induced_slots = sv.class_induced_slots(cd.name)
|
|
661
688
|
slot_map = {s.name: s for s in induced_slots}
|
|
662
689
|
# rmap = {s.name: s.range for s in induced_slots}
|
|
690
|
+
# map slot ranges to a collection where that range is stored
|
|
663
691
|
sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
|
|
692
|
+
logger.debug(f"Validating referential integrity for {collection.target_class_name} // {sr_to_coll}")
|
|
664
693
|
for obj in collection.find_iter():
|
|
665
694
|
for k, v in obj.items():
|
|
666
695
|
if k not in sr_to_coll:
|
|
@@ -721,7 +750,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
721
750
|
|
|
722
751
|
>>> from linkml_store.api.client import Client
|
|
723
752
|
>>> client = Client()
|
|
724
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
753
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
725
754
|
>>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
|
|
726
755
|
>>> db.list_collection_names()
|
|
727
756
|
['iris']
|
|
@@ -741,7 +770,9 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
741
770
|
# import into a test instance
|
|
742
771
|
tmp_handle = source_format.value
|
|
743
772
|
client = self.parent
|
|
744
|
-
|
|
773
|
+
tmp_alias = "tmp"
|
|
774
|
+
client.drop_database(tmp_alias, missing_ok=True)
|
|
775
|
+
tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
|
|
745
776
|
# TODO: check for infinite recursion
|
|
746
777
|
tmp_db.import_database(location, source_format=source_format)
|
|
747
778
|
obj = {}
|