linkml-store 0.2.6__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.2.6 → linkml_store-0.2.10}/PKG-INFO +3 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/pyproject.toml +7 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/client.py +2 -3
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/collection.py +63 -8
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/database.py +20 -3
- linkml_store-0.2.10/src/linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/duckdb/duckdb_database.py +5 -5
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/filesystem/__init__.py +1 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +132 -15
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/mongodb/mongodb_database.py +2 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/neo4j/neo4j_database.py +1 -1
- linkml_store-0.2.10/src/linkml_store/api/stores/solr/solr_collection.py +222 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/cli.py +201 -21
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/index/implementations/llm_indexer.py +13 -6
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/index/indexer.py +9 -5
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/implementations/llm_inference_engine.py +15 -13
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/implementations/rag_inference_engine.py +13 -10
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +7 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/inference_config.py +2 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/inference_engine.py +1 -1
- linkml_store-0.2.10/src/linkml_store/plotting/__init__.py +5 -0
- linkml_store-0.2.10/src/linkml_store/plotting/cli.py +172 -0
- linkml_store-0.2.10/src/linkml_store/plotting/heatmap.py +356 -0
- linkml_store-0.2.10/src/linkml_store/utils/dat_parser.py +95 -0
- linkml_store-0.2.10/src/linkml_store/utils/enrichment_analyzer.py +217 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/format_utils.py +124 -3
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/llm_utils.py +4 -2
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/object_utils.py +9 -3
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/pandas_utils.py +1 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/sql_utils.py +1 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/vector_utils.py +3 -10
- linkml_store-0.2.6/src/linkml_store/api/stores/duckdb/duckdb_collection.py +0 -175
- linkml_store-0.2.6/src/linkml_store/api/stores/solr/solr_collection.py +0 -133
- {linkml_store-0.2.6 → linkml_store-0.2.10}/LICENSE +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/README.md +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/config.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/filesystem/filesystem_database.py +1 -1
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/graphs/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/graphs/graph_map.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/evaluation.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/implementations/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/inference/inference_engine_registry.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/neo4j_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/sklearn_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/utils/stats_utils.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.10
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -24,6 +24,7 @@ Provides-Extra: map
|
|
|
24
24
|
Provides-Extra: mongodb
|
|
25
25
|
Provides-Extra: neo4j
|
|
26
26
|
Provides-Extra: pyarrow
|
|
27
|
+
Provides-Extra: rdf
|
|
27
28
|
Provides-Extra: renderer
|
|
28
29
|
Provides-Extra: scipy
|
|
29
30
|
Provides-Extra: tests
|
|
@@ -39,6 +40,7 @@ Requires-Dist: h5py ; extra == "h5py"
|
|
|
39
40
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
40
41
|
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
41
42
|
Requires-Dist: jsonpatch (>=1.33)
|
|
43
|
+
Requires-Dist: lightrdf ; extra == "rdf"
|
|
42
44
|
Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
|
|
43
45
|
Requires-Dist: linkml-runtime (>=1.8.0)
|
|
44
46
|
Requires-Dist: linkml_map ; extra == "map"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.10"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -23,6 +23,7 @@ pystow = "^0.5.4"
|
|
|
23
23
|
black = { version=">=24.0.0", optional = true }
|
|
24
24
|
ruff = { version=">=0.6.2", optional = true }
|
|
25
25
|
llm = { version="*", optional = true }
|
|
26
|
+
lightrdf = { version="*", optional = true }
|
|
26
27
|
tiktoken = { version="*", optional = true }
|
|
27
28
|
pymongo = "^4.11"
|
|
28
29
|
neo4j = { version="*", optional = true }
|
|
@@ -66,6 +67,10 @@ jupyter = "*"
|
|
|
66
67
|
jupysql = "*"
|
|
67
68
|
papermill = "*"
|
|
68
69
|
nbdime = "*"
|
|
70
|
+
codespell = {version = ">=2.3.0"}
|
|
71
|
+
tomli = {version = ">=2.0.1"}
|
|
72
|
+
black = {version = ">=24.0.0"}
|
|
73
|
+
ruff = {version = ">=0.6.2"}
|
|
69
74
|
|
|
70
75
|
[tool.poetry.group.tests.dependencies]
|
|
71
76
|
pytest = "^7.4.0"
|
|
@@ -91,6 +96,7 @@ renderer = ["linkml_renderer"]
|
|
|
91
96
|
fastapi = ["fastapi", "uvicorn"]
|
|
92
97
|
frictionless = ["frictionless"]
|
|
93
98
|
scipy = ["scipy", "scikit-learn"]
|
|
99
|
+
rdf = ["lightrdf"]
|
|
94
100
|
#ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
|
|
95
101
|
bigquery = ["google-cloud-bigquery"]
|
|
96
102
|
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]
|
|
@@ -12,7 +12,6 @@ from linkml_store.api.config import ClientConfig
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
HANDLE_MAP = {
|
|
17
16
|
"duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
18
17
|
"sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
@@ -220,14 +219,14 @@ class Client:
|
|
|
220
219
|
scheme, _ = handle.split(":", 1)
|
|
221
220
|
if scheme not in HANDLE_MAP:
|
|
222
221
|
raise ValueError(f"Unknown scheme: {scheme}")
|
|
223
|
-
module_path, class_name = HANDLE_MAP[scheme].rsplit(
|
|
222
|
+
module_path, class_name = HANDLE_MAP[scheme].rsplit(".", 1)
|
|
224
223
|
try:
|
|
225
224
|
module = importlib.import_module(module_path)
|
|
226
225
|
cls = getattr(module, class_name)
|
|
227
226
|
except ImportError as e:
|
|
228
227
|
raise ImportError(f"Failed to import {scheme} database. Make sure the correct extras are installed: {e}")
|
|
229
228
|
|
|
230
|
-
#cls = HANDLE_MAP[scheme]
|
|
229
|
+
# cls = HANDLE_MAP[scheme]
|
|
231
230
|
db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
|
|
232
231
|
if schema_view:
|
|
233
232
|
db.set_schema_view(schema_view)
|
|
@@ -211,7 +211,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
211
211
|
"""
|
|
212
212
|
raise NotImplementedError
|
|
213
213
|
|
|
214
|
-
def index
|
|
214
|
+
def index(
|
|
215
215
|
self,
|
|
216
216
|
objs: Union[OBJECT, List[OBJECT]],
|
|
217
217
|
index_name: Optional[str] = None,
|
|
@@ -231,10 +231,13 @@ class Collection(Generic[DatabaseType]):
|
|
|
231
231
|
"""
|
|
232
232
|
raise NotImplementedError
|
|
233
233
|
|
|
234
|
-
def upsert(
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
234
|
+
def upsert(
|
|
235
|
+
self,
|
|
236
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
237
|
+
filter_fields: List[str],
|
|
238
|
+
update_fields: Union[List[str], None] = None,
|
|
239
|
+
**kwargs,
|
|
240
|
+
):
|
|
238
241
|
"""
|
|
239
242
|
Add one or more objects to the collection.
|
|
240
243
|
|
|
@@ -454,7 +457,12 @@ class Collection(Generic[DatabaseType]):
|
|
|
454
457
|
return qr.rows[0]
|
|
455
458
|
return None
|
|
456
459
|
|
|
457
|
-
def find(
|
|
460
|
+
def find(
|
|
461
|
+
self,
|
|
462
|
+
where: Optional[Any] = None,
|
|
463
|
+
select_cols: Optional[List[str]] = None,
|
|
464
|
+
**kwargs,
|
|
465
|
+
) -> QueryResult:
|
|
458
466
|
"""
|
|
459
467
|
Find objects in the collection using a where query.
|
|
460
468
|
|
|
@@ -484,10 +492,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
484
492
|
|
|
485
493
|
|
|
486
494
|
:param where:
|
|
495
|
+
:param select_cols:
|
|
487
496
|
:param kwargs:
|
|
488
497
|
:return:
|
|
489
498
|
"""
|
|
490
|
-
query = self._create_query(
|
|
499
|
+
query = self._create_query(
|
|
500
|
+
where_clause=where,
|
|
501
|
+
select_cols=select_cols,
|
|
502
|
+
)
|
|
491
503
|
self._pre_query_hook(query)
|
|
492
504
|
return self.query(query, **kwargs)
|
|
493
505
|
|
|
@@ -587,6 +599,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
587
599
|
assert ix_coll.size() > 0
|
|
588
600
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
589
601
|
index_col = ix.index_field
|
|
602
|
+
|
|
590
603
|
# TODO: optimize this for large indexes
|
|
591
604
|
def row2array(row):
|
|
592
605
|
v = row[index_col]
|
|
@@ -594,6 +607,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
594
607
|
# sqlite stores arrays as strings
|
|
595
608
|
v = json.loads(v)
|
|
596
609
|
return np.array(v, dtype=float)
|
|
610
|
+
|
|
597
611
|
vector_pairs = [(row, row2array(row)) for row in qr.rows]
|
|
598
612
|
results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
|
|
599
613
|
for r in results:
|
|
@@ -608,6 +622,47 @@ class Collection(Generic[DatabaseType]):
|
|
|
608
622
|
new_qr.rows = [r[1] for r in results]
|
|
609
623
|
return new_qr
|
|
610
624
|
|
|
625
|
+
def group_by(
|
|
626
|
+
self,
|
|
627
|
+
group_by_fields: List[str],
|
|
628
|
+
inlined_field="objects",
|
|
629
|
+
agg_map: Optional[Dict[str, str]] = None,
|
|
630
|
+
where: Optional[Dict] = None,
|
|
631
|
+
**kwargs,
|
|
632
|
+
) -> QueryResult:
|
|
633
|
+
"""
|
|
634
|
+
Group objects in the collection by a column.
|
|
635
|
+
|
|
636
|
+
:param group_by:
|
|
637
|
+
:param where:
|
|
638
|
+
:param kwargs:
|
|
639
|
+
:return:
|
|
640
|
+
"""
|
|
641
|
+
if isinstance(group_by_fields, str):
|
|
642
|
+
group_by_fields = [group_by_fields]
|
|
643
|
+
df = self.find(where=where, limit=-1).rows_dataframe
|
|
644
|
+
|
|
645
|
+
# Handle the case where agg_map is None
|
|
646
|
+
if agg_map is None:
|
|
647
|
+
agg_map = {}
|
|
648
|
+
|
|
649
|
+
pk_fields = agg_map.get("first", []) + group_by_fields
|
|
650
|
+
list_fields = agg_map.get("list", [])
|
|
651
|
+
if not list_fields:
|
|
652
|
+
list_fields = [a for a in df.columns if a not in pk_fields]
|
|
653
|
+
|
|
654
|
+
grouped_objs = defaultdict(list)
|
|
655
|
+
for _, row in df.iterrows():
|
|
656
|
+
pk = tuple(row[pk_fields])
|
|
657
|
+
grouped_objs[pk].append({k: row[k] for k in list_fields})
|
|
658
|
+
results = []
|
|
659
|
+
for pk, objs in grouped_objs.items():
|
|
660
|
+
top_obj = {k: v for k, v in zip(pk_fields, pk)}
|
|
661
|
+
top_obj[inlined_field] = objs
|
|
662
|
+
results.append(top_obj)
|
|
663
|
+
r = QueryResult(num_rows=len(results), rows=results)
|
|
664
|
+
return r
|
|
665
|
+
|
|
611
666
|
@property
|
|
612
667
|
def is_internal(self) -> bool:
|
|
613
668
|
"""
|
|
@@ -1062,7 +1117,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
1062
1117
|
multivalued = any(multivalueds)
|
|
1063
1118
|
inlined = any(inlineds)
|
|
1064
1119
|
if multivalued and False in multivalueds:
|
|
1065
|
-
|
|
1120
|
+
logger.info(f"Mixed list non list: {vs} // inferred= {multivalueds}")
|
|
1066
1121
|
# if not rngs:
|
|
1067
1122
|
# raise AssertionError(f"Empty rngs for {k} = {vs}")
|
|
1068
1123
|
rng = rngs[0] if rngs else None
|
|
@@ -595,11 +595,24 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
595
595
|
sb.add_class(coll.target_class_name)
|
|
596
596
|
return SchemaView(sb.schema)
|
|
597
597
|
|
|
598
|
-
def
|
|
598
|
+
def validate_database(self, **kwargs) -> List["ValidationResult"]:
|
|
599
599
|
"""
|
|
600
600
|
Validate the contents of the database.
|
|
601
601
|
|
|
602
|
-
|
|
602
|
+
As `iter_validate_database`, but returns a list of validation results.
|
|
603
|
+
|
|
604
|
+
:param kwargs:
|
|
605
|
+
:return:
|
|
606
|
+
"""
|
|
607
|
+
return list(self.iter_validate_database(**kwargs))
|
|
608
|
+
|
|
609
|
+
def iter_validate_database(
|
|
610
|
+
self, ensure_referential_integrity: bool = None, **kwargs
|
|
611
|
+
) -> Iterator["ValidationResult"]:
|
|
612
|
+
"""
|
|
613
|
+
Validate the contents of the database.
|
|
614
|
+
|
|
615
|
+
An example, let's create a database with a predefined schema
|
|
603
616
|
from the countries.linkml.yaml file:
|
|
604
617
|
|
|
605
618
|
>>> from linkml_store.api.client import Client
|
|
@@ -635,12 +648,14 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
635
648
|
'capital' is a required property
|
|
636
649
|
'continent' is a required proper
|
|
637
650
|
|
|
651
|
+
:param ensure_referential_integrity: ensure referential integrity
|
|
638
652
|
:param kwargs:
|
|
639
653
|
:return: iterator over validation results
|
|
640
654
|
"""
|
|
641
655
|
for collection in self.list_collections():
|
|
642
656
|
yield from collection.iter_validate_collection(**kwargs)
|
|
643
|
-
if self.metadata.ensure_referential_integrity:
|
|
657
|
+
if self.metadata.ensure_referential_integrity or ensure_referential_integrity:
|
|
658
|
+
logger.info(f"Validating referential integrity on {self.alias}")
|
|
644
659
|
yield from self._validate_referential_integrity(**kwargs)
|
|
645
660
|
|
|
646
661
|
def _validate_referential_integrity(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
@@ -661,7 +676,9 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
661
676
|
induced_slots = sv.class_induced_slots(cd.name)
|
|
662
677
|
slot_map = {s.name: s for s in induced_slots}
|
|
663
678
|
# rmap = {s.name: s.range for s in induced_slots}
|
|
679
|
+
# map slot ranges to a collection where that range is stored
|
|
664
680
|
sr_to_coll = {s.name: cmap.get(s.range, []) for s in induced_slots if s.range}
|
|
681
|
+
logger.debug(f"Validating referential integrity for {collection.target_class_name} // {sr_to_coll}")
|
|
665
682
|
for obj in collection.find_iter():
|
|
666
683
|
for k, v in obj.items():
|
|
667
684
|
if k not in sr_to_coll:
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sqla
|
|
5
|
+
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
6
|
+
from sqlalchemy import Column, Table, delete, insert, inspect, text
|
|
7
|
+
from sqlalchemy.sql.ddl import CreateTable
|
|
8
|
+
|
|
9
|
+
from linkml_store.api import Collection
|
|
10
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
11
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
12
|
+
from linkml_store.api.stores.duckdb.mappings import TMAP
|
|
13
|
+
from linkml_store.utils.sql_utils import facet_count_sql
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DuckDBCollection(Collection):
|
|
19
|
+
_table_created: bool = None
|
|
20
|
+
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
super().__init__(*args, **kwargs)
|
|
23
|
+
|
|
24
|
+
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
25
|
+
logger.debug(f"Inserting {len(objs)}")
|
|
26
|
+
if not isinstance(objs, list):
|
|
27
|
+
objs = [objs]
|
|
28
|
+
if not objs:
|
|
29
|
+
return
|
|
30
|
+
cd = self.class_definition()
|
|
31
|
+
if not cd:
|
|
32
|
+
logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}; will induce")
|
|
33
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
34
|
+
self._create_table(cd)
|
|
35
|
+
table = self._sqla_table(cd)
|
|
36
|
+
logger.info(f"Inserting into: {self.alias} // T={table.name}")
|
|
37
|
+
engine = self.parent.engine
|
|
38
|
+
col_names = [c.name for c in table.columns]
|
|
39
|
+
bad_objs = [obj for obj in objs if not isinstance(obj, dict)]
|
|
40
|
+
if bad_objs:
|
|
41
|
+
logger.error(f"Bad objects: {bad_objs}")
|
|
42
|
+
objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
|
|
43
|
+
with engine.connect() as conn:
|
|
44
|
+
with conn.begin():
|
|
45
|
+
conn.execute(insert(table), objs)
|
|
46
|
+
conn.commit()
|
|
47
|
+
self._post_insert_hook(objs)
|
|
48
|
+
|
|
49
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
50
|
+
if not isinstance(objs, list):
|
|
51
|
+
objs = [objs]
|
|
52
|
+
cd = self.class_definition()
|
|
53
|
+
if not cd or not cd.attributes:
|
|
54
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
55
|
+
assert cd.attributes
|
|
56
|
+
table = self._sqla_table(cd)
|
|
57
|
+
engine = self.parent.engine
|
|
58
|
+
with engine.connect() as conn:
|
|
59
|
+
for obj in objs:
|
|
60
|
+
conditions = [table.c[k] == v for k, v in obj.items() if k in cd.attributes]
|
|
61
|
+
stmt = delete(table).where(*conditions)
|
|
62
|
+
stmt = stmt.compile(engine)
|
|
63
|
+
conn.execute(stmt)
|
|
64
|
+
conn.commit()
|
|
65
|
+
self._post_delete_hook()
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
69
|
+
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
70
|
+
if where is None:
|
|
71
|
+
where = {}
|
|
72
|
+
cd = self.class_definition()
|
|
73
|
+
if not cd:
|
|
74
|
+
logger.info(f"No class definition found for {self.target_class_name}, assuming not prepopulated")
|
|
75
|
+
return 0
|
|
76
|
+
table = self._sqla_table(cd)
|
|
77
|
+
engine = self.parent.engine
|
|
78
|
+
inspector = inspect(engine)
|
|
79
|
+
table_exists = table.name in inspector.get_table_names()
|
|
80
|
+
if not table_exists:
|
|
81
|
+
logger.info(f"Table {table.name} does not exist, assuming no data")
|
|
82
|
+
return 0
|
|
83
|
+
with engine.connect() as conn:
|
|
84
|
+
conditions = [table.c[k] == v for k, v in where.items()]
|
|
85
|
+
stmt = delete(table).where(*conditions)
|
|
86
|
+
stmt = stmt.compile(engine)
|
|
87
|
+
result = conn.execute(stmt)
|
|
88
|
+
deleted_rows_count = result.rowcount
|
|
89
|
+
if deleted_rows_count == 0 and not missing_ok:
|
|
90
|
+
raise ValueError(f"No rows found for {where}")
|
|
91
|
+
conn.commit()
|
|
92
|
+
self._post_delete_hook()
|
|
93
|
+
return deleted_rows_count if deleted_rows_count > -1 else None
|
|
94
|
+
|
|
95
|
+
def query_facets(
|
|
96
|
+
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
97
|
+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
|
|
98
|
+
if facet_limit is None:
|
|
99
|
+
facet_limit = DEFAULT_FACET_LIMIT
|
|
100
|
+
results = {}
|
|
101
|
+
cd = self.class_definition()
|
|
102
|
+
with self.parent.engine.connect() as conn:
|
|
103
|
+
if not facet_columns:
|
|
104
|
+
if not cd:
|
|
105
|
+
raise ValueError(f"No class definition found for {self.target_class_name}")
|
|
106
|
+
facet_columns = list(cd.attributes.keys())
|
|
107
|
+
for col in facet_columns:
|
|
108
|
+
logger.debug(f"Faceting on {col}")
|
|
109
|
+
if isinstance(col, tuple):
|
|
110
|
+
sd = SlotDefinition(name="PLACEHOLDER")
|
|
111
|
+
else:
|
|
112
|
+
sd = cd.attributes[col]
|
|
113
|
+
facet_query = self._create_query(where_clause=where)
|
|
114
|
+
facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
|
|
115
|
+
logger.debug(f"Facet query: {facet_query_str}")
|
|
116
|
+
rows = list(conn.execute(text(facet_query_str)))
|
|
117
|
+
results[col] = [tuple(row) for row in rows]
|
|
118
|
+
return results
|
|
119
|
+
|
|
120
|
+
def _sqla_table(self, cd: ClassDefinition) -> Table:
|
|
121
|
+
schema_view = self.parent.schema_view
|
|
122
|
+
metadata_obj = sqla.MetaData()
|
|
123
|
+
cols = []
|
|
124
|
+
for att in schema_view.class_induced_slots(cd.name):
|
|
125
|
+
typ = TMAP.get(att.range, sqla.String)
|
|
126
|
+
if att.inlined or att.inlined_as_list:
|
|
127
|
+
typ = sqla.JSON
|
|
128
|
+
if att.multivalued:
|
|
129
|
+
typ = sqla.ARRAY(typ, dimensions=1)
|
|
130
|
+
if att.array:
|
|
131
|
+
typ = sqla.ARRAY(typ, dimensions=1)
|
|
132
|
+
col = Column(att.name, typ)
|
|
133
|
+
cols.append(col)
|
|
134
|
+
t = Table(self.alias, metadata_obj, *cols)
|
|
135
|
+
return t
|
|
136
|
+
|
|
137
|
+
def _check_if_initialized(self) -> bool:
|
|
138
|
+
# if self._initialized:
|
|
139
|
+
# return True
|
|
140
|
+
query = Query(
|
|
141
|
+
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
142
|
+
)
|
|
143
|
+
qr = self.parent.query(query)
|
|
144
|
+
if qr.num_rows > 0:
|
|
145
|
+
return True
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
def group_by(
|
|
149
|
+
self,
|
|
150
|
+
group_by_fields: List[str],
|
|
151
|
+
inlined_field="objects",
|
|
152
|
+
agg_map: Optional[Dict[str, str]] = None,
|
|
153
|
+
where: Optional[Dict] = None,
|
|
154
|
+
**kwargs,
|
|
155
|
+
) -> QueryResult:
|
|
156
|
+
"""
|
|
157
|
+
Group objects in the collection by specified fields using SQLAlchemy.
|
|
158
|
+
|
|
159
|
+
This implementation leverages DuckDB's SQL capabilities for more efficient grouping.
|
|
160
|
+
|
|
161
|
+
:param group_by_fields: List of fields to group by
|
|
162
|
+
:param inlined_field: Field name to store aggregated objects
|
|
163
|
+
:param agg_map: Dictionary mapping aggregation types to fields
|
|
164
|
+
:param where: Filter conditions
|
|
165
|
+
:param kwargs: Additional arguments
|
|
166
|
+
:return: Query result containing grouped data
|
|
167
|
+
"""
|
|
168
|
+
if isinstance(group_by_fields, str):
|
|
169
|
+
group_by_fields = [group_by_fields]
|
|
170
|
+
|
|
171
|
+
cd = self.class_definition()
|
|
172
|
+
if not cd:
|
|
173
|
+
logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}")
|
|
174
|
+
return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
|
|
175
|
+
|
|
176
|
+
# Check if the table exists
|
|
177
|
+
if not self.parent._table_exists(self.alias):
|
|
178
|
+
logger.debug(f"Table {self.alias} doesn't exist, falling back to parent implementation")
|
|
179
|
+
return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
|
|
180
|
+
|
|
181
|
+
# Get table definition
|
|
182
|
+
table = self._sqla_table(cd)
|
|
183
|
+
engine = self.parent.engine
|
|
184
|
+
|
|
185
|
+
# Create a SQLAlchemy select statement for groups
|
|
186
|
+
from sqlalchemy import select
|
|
187
|
+
|
|
188
|
+
group_cols = [table.c[field] for field in group_by_fields if field in table.columns.keys()]
|
|
189
|
+
|
|
190
|
+
if not group_cols:
|
|
191
|
+
logger.warning(f"None of the group_by fields {group_by_fields} found in table columns")
|
|
192
|
+
return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
|
|
193
|
+
|
|
194
|
+
stmt = select(*group_cols).distinct()
|
|
195
|
+
|
|
196
|
+
# Add where conditions if specified
|
|
197
|
+
if where:
|
|
198
|
+
conditions = []
|
|
199
|
+
for k, v in where.items():
|
|
200
|
+
if k in table.columns.keys():
|
|
201
|
+
# Handle different operator types (dict values for operators)
|
|
202
|
+
if isinstance(v, dict):
|
|
203
|
+
for op, val in v.items():
|
|
204
|
+
if op == "$gt":
|
|
205
|
+
conditions.append(table.c[k] > val)
|
|
206
|
+
elif op == "$gte":
|
|
207
|
+
conditions.append(table.c[k] >= val)
|
|
208
|
+
elif op == "$lt":
|
|
209
|
+
conditions.append(table.c[k] < val)
|
|
210
|
+
elif op == "$lte":
|
|
211
|
+
conditions.append(table.c[k] <= val)
|
|
212
|
+
elif op == "$ne":
|
|
213
|
+
conditions.append(table.c[k] != val)
|
|
214
|
+
elif op == "$in":
|
|
215
|
+
conditions.append(table.c[k].in_(val))
|
|
216
|
+
else:
|
|
217
|
+
# Default to equality for unknown operators
|
|
218
|
+
logger.warning(f"Unknown operator {op}, using equality")
|
|
219
|
+
conditions.append(table.c[k] == val)
|
|
220
|
+
else:
|
|
221
|
+
# Direct equality comparison
|
|
222
|
+
conditions.append(table.c[k] == v)
|
|
223
|
+
|
|
224
|
+
if conditions:
|
|
225
|
+
for condition in conditions:
|
|
226
|
+
stmt = stmt.where(condition)
|
|
227
|
+
|
|
228
|
+
results = []
|
|
229
|
+
try:
|
|
230
|
+
with engine.connect() as conn:
|
|
231
|
+
# Get all distinct groups
|
|
232
|
+
group_result = conn.execute(stmt)
|
|
233
|
+
group_rows = list(group_result)
|
|
234
|
+
|
|
235
|
+
# For each group, get all objects
|
|
236
|
+
for group_row in group_rows:
|
|
237
|
+
# Build conditions for this group
|
|
238
|
+
group_conditions = []
|
|
239
|
+
group_dict = {}
|
|
240
|
+
|
|
241
|
+
for i, field in enumerate(group_by_fields):
|
|
242
|
+
if field in table.columns.keys():
|
|
243
|
+
value = group_row[i]
|
|
244
|
+
group_dict[field] = value
|
|
245
|
+
if value is None:
|
|
246
|
+
group_conditions.append(table.c[field].is_(None))
|
|
247
|
+
else:
|
|
248
|
+
group_conditions.append(table.c[field] == value)
|
|
249
|
+
|
|
250
|
+
# Get all rows for this group
|
|
251
|
+
row_stmt = select(*table.columns)
|
|
252
|
+
for condition in group_conditions:
|
|
253
|
+
row_stmt = row_stmt.where(condition)
|
|
254
|
+
|
|
255
|
+
# Add original where conditions
|
|
256
|
+
if where:
|
|
257
|
+
for k, v in where.items():
|
|
258
|
+
if k in table.columns.keys():
|
|
259
|
+
# Handle different operator types for the row query as well
|
|
260
|
+
if isinstance(v, dict):
|
|
261
|
+
for op, val in v.items():
|
|
262
|
+
if op == "$gt":
|
|
263
|
+
row_stmt = row_stmt.where(table.c[k] > val)
|
|
264
|
+
elif op == "$gte":
|
|
265
|
+
row_stmt = row_stmt.where(table.c[k] >= val)
|
|
266
|
+
elif op == "$lt":
|
|
267
|
+
row_stmt = row_stmt.where(table.c[k] < val)
|
|
268
|
+
elif op == "$lte":
|
|
269
|
+
row_stmt = row_stmt.where(table.c[k] <= val)
|
|
270
|
+
elif op == "$ne":
|
|
271
|
+
row_stmt = row_stmt.where(table.c[k] != val)
|
|
272
|
+
elif op == "$in":
|
|
273
|
+
row_stmt = row_stmt.where(table.c[k].in_(val))
|
|
274
|
+
else:
|
|
275
|
+
# Default to equality for unknown operators
|
|
276
|
+
row_stmt = row_stmt.where(table.c[k] == val)
|
|
277
|
+
else:
|
|
278
|
+
# Direct equality comparison
|
|
279
|
+
row_stmt = row_stmt.where(table.c[k] == v)
|
|
280
|
+
|
|
281
|
+
row_result = conn.execute(row_stmt)
|
|
282
|
+
rows = list(row_result)
|
|
283
|
+
|
|
284
|
+
# Convert rows to dictionaries
|
|
285
|
+
objects = []
|
|
286
|
+
for row in rows:
|
|
287
|
+
obj = {}
|
|
288
|
+
for i, col in enumerate(row._fields):
|
|
289
|
+
obj[col] = row[i]
|
|
290
|
+
objects.append(obj)
|
|
291
|
+
|
|
292
|
+
# Apply agg_map to filter fields if specified
|
|
293
|
+
if agg_map and "list" in agg_map:
|
|
294
|
+
list_fields = agg_map["list"]
|
|
295
|
+
if list_fields:
|
|
296
|
+
objects = [{k: obj.get(k) for k in list_fields if k in obj} for obj in objects]
|
|
297
|
+
|
|
298
|
+
# Create the result object
|
|
299
|
+
result_obj = group_dict.copy()
|
|
300
|
+
result_obj[inlined_field] = objects
|
|
301
|
+
results.append(result_obj)
|
|
302
|
+
|
|
303
|
+
return QueryResult(num_rows=len(results), rows=results)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
logger.warning(f"Error in DuckDB group_by: {e}")
|
|
306
|
+
# Fall back to parent implementation
|
|
307
|
+
return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
|
|
308
|
+
|
|
309
|
+
def _create_table(self, cd: ClassDefinition):
|
|
310
|
+
if self._table_created or self.metadata.is_prepopulated:
|
|
311
|
+
logger.info(f"Already have table for: {cd.name}")
|
|
312
|
+
return
|
|
313
|
+
if self.parent._table_exists(self.alias):
|
|
314
|
+
logger.info(f"Table already exists for {cd.name}")
|
|
315
|
+
self._table_created = True
|
|
316
|
+
self._initialized = True
|
|
317
|
+
self.metadata.is_prepopulated = True
|
|
318
|
+
return
|
|
319
|
+
# query = Query(
|
|
320
|
+
# from_table="information_schema.tables",
|
|
321
|
+
# where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
322
|
+
# )
|
|
323
|
+
# qr = self.parent.query(query)
|
|
324
|
+
# if qr.num_rows > 0:
|
|
325
|
+
# logger.info(f"Table already exists for {cd.name}")
|
|
326
|
+
# self._table_created = True
|
|
327
|
+
# self._initialized = True
|
|
328
|
+
# self.metadata.is_prepopulated = True
|
|
329
|
+
# return
|
|
330
|
+
logger.info(f"Creating table for {cd.name}")
|
|
331
|
+
t = self._sqla_table(cd)
|
|
332
|
+
ct = CreateTable(t)
|
|
333
|
+
ddl = str(ct.compile(self.parent.engine))
|
|
334
|
+
with self.parent.engine.connect() as conn:
|
|
335
|
+
conn.execute(text(ddl))
|
|
336
|
+
conn.commit()
|
|
337
|
+
self._table_created = True
|
|
338
|
+
self._initialized = True
|
|
339
|
+
self.metadata.is_prepopulated = True
|
{linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/duckdb/duckdb_database.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Optional, Union
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import sqlalchemy
|
|
@@ -14,7 +14,7 @@ from linkml_store.api import Database
|
|
|
14
14
|
from linkml_store.api.queries import Query, QueryResult
|
|
15
15
|
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
16
16
|
from linkml_store.utils.format_utils import Format
|
|
17
|
-
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
|
|
17
|
+
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
|
|
18
18
|
|
|
19
19
|
TYPE_MAP = {
|
|
20
20
|
"VARCHAR": "string",
|
|
@@ -100,9 +100,9 @@ class DuckDBDatabase(Database):
|
|
|
100
100
|
meta_query = Query(
|
|
101
101
|
from_table="sqlite_master",
|
|
102
102
|
where_clause={
|
|
103
|
-
#"type": "table",
|
|
103
|
+
# "type": "table",
|
|
104
104
|
"name": table,
|
|
105
|
-
}
|
|
105
|
+
},
|
|
106
106
|
)
|
|
107
107
|
else:
|
|
108
108
|
if table.startswith("information_schema"):
|
|
@@ -112,7 +112,7 @@ class DuckDBDatabase(Database):
|
|
|
112
112
|
where_clause={
|
|
113
113
|
"table_type": "BASE TABLE",
|
|
114
114
|
"table_name": table,
|
|
115
|
-
}
|
|
115
|
+
},
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
qr = self.query(meta_query)
|
{linkml_store-0.2.6 → linkml_store-0.2.10}/src/linkml_store/api/stores/filesystem/__init__.py
RENAMED
|
@@ -4,7 +4,7 @@ Adapter for FileSystem wrapper
|
|
|
4
4
|
Handles have the form:
|
|
5
5
|
|
|
6
6
|
- ``file:<path>`` for a local file
|
|
7
|
-
|
|
7
|
+
"""
|
|
8
8
|
|
|
9
9
|
from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
|
|
10
10
|
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|