linkml-store 0.1.12__tar.gz → 0.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.1.12 → linkml_store-0.1.14}/PKG-INFO +30 -3
- {linkml_store-0.1.12 → linkml_store-0.1.14}/README.md +20 -1
- {linkml_store-0.1.12 → linkml_store-0.1.14}/pyproject.toml +11 -3
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/client.py +37 -8
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/collection.py +81 -9
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/config.py +28 -1
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/database.py +26 -3
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +4 -0
- linkml_store-0.1.14/src/linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store-0.1.14/src/linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/cli.py +140 -13
- linkml_store-0.1.14/src/linkml_store/graphs/graph_map.py +24 -0
- linkml_store-0.1.14/src/linkml_store/inference/__init__.py +13 -0
- linkml_store-0.1.14/src/linkml_store/inference/implementations/__init__.py +0 -0
- linkml_store-0.1.14/src/linkml_store/inference/implementations/rag_inference_engine.py +145 -0
- linkml_store-0.1.14/src/linkml_store/inference/implementations/rule_based_inference_engine.py +158 -0
- linkml_store-0.1.14/src/linkml_store/inference/implementations/sklearn_inference_engine.py +290 -0
- linkml_store-0.1.14/src/linkml_store/inference/inference_config.py +62 -0
- linkml_store-0.1.14/src/linkml_store/inference/inference_engine.py +173 -0
- linkml_store-0.1.14/src/linkml_store/inference/inference_engine_registry.py +74 -0
- linkml_store-0.1.14/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/format_utils.py +21 -90
- linkml_store-0.1.14/src/linkml_store/utils/llm_utils.py +95 -0
- linkml_store-0.1.14/src/linkml_store/utils/neo4j_utils.py +42 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/object_utils.py +3 -1
- linkml_store-0.1.14/src/linkml_store/utils/pandas_utils.py +93 -0
- linkml_store-0.1.14/src/linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store-0.1.14/src/linkml_store/utils/stats_utils.py +53 -0
- linkml_store-0.1.14/src/linkml_store/webapi/__init__.py +0 -0
- linkml_store-0.1.12/src/linkml_store/utils/pandas_utils.py +0 -40
- {linkml_store-0.1.12 → linkml_store-0.1.14}/LICENSE +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/duckdb/duckdb_database.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.1.12/src/linkml_store/index/implementations → linkml_store-0.1.14/src/linkml_store/api/stores/neo4j}/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.12/src/linkml_store/utils → linkml_store-0.1.14/src/linkml_store/graphs}/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.1.12/src/linkml_store/webapi → linkml_store-0.1.14/src/linkml_store/index/implementations}/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/index/indexer.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/utils/sql_utils.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.1.12 → linkml_store-0.1.14}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.14
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -21,14 +21,16 @@ Provides-Extra: h5py
|
|
|
21
21
|
Provides-Extra: llm
|
|
22
22
|
Provides-Extra: map
|
|
23
23
|
Provides-Extra: mongodb
|
|
24
|
+
Provides-Extra: neo4j
|
|
24
25
|
Provides-Extra: pyarrow
|
|
25
26
|
Provides-Extra: renderer
|
|
27
|
+
Provides-Extra: scipy
|
|
26
28
|
Provides-Extra: tests
|
|
27
29
|
Provides-Extra: validation
|
|
28
30
|
Requires-Dist: black (>=24.0.0) ; extra == "tests"
|
|
29
31
|
Requires-Dist: chromadb ; extra == "chromadb"
|
|
30
32
|
Requires-Dist: click
|
|
31
|
-
Requires-Dist: duckdb (>=0.10.1
|
|
33
|
+
Requires-Dist: duckdb (>=0.10.1)
|
|
32
34
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
33
35
|
Requires-Dist: fastapi ; extra == "fastapi"
|
|
34
36
|
Requires-Dist: frictionless ; extra == "frictionless"
|
|
@@ -41,15 +43,21 @@ Requires-Dist: linkml_map ; extra == "map"
|
|
|
41
43
|
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
42
44
|
Requires-Dist: llm ; extra == "llm"
|
|
43
45
|
Requires-Dist: matplotlib ; extra == "analytics"
|
|
46
|
+
Requires-Dist: neo4j ; extra == "neo4j"
|
|
47
|
+
Requires-Dist: networkx ; extra == "neo4j"
|
|
44
48
|
Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
|
|
45
49
|
Requires-Dist: plotly ; extra == "analytics"
|
|
50
|
+
Requires-Dist: py2neo ; extra == "neo4j"
|
|
46
51
|
Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
47
52
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
48
53
|
Requires-Dist: pymongo ; extra == "mongodb"
|
|
49
54
|
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
55
|
+
Requires-Dist: scikit-learn ; extra == "scipy"
|
|
56
|
+
Requires-Dist: scipy ; extra == "scipy"
|
|
50
57
|
Requires-Dist: seaborn ; extra == "analytics"
|
|
51
58
|
Requires-Dist: sqlalchemy
|
|
52
59
|
Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
60
|
+
Requires-Dist: tiktoken ; extra == "llm"
|
|
53
61
|
Requires-Dist: uvicorn ; extra == "fastapi"
|
|
54
62
|
Description-Content-Type: text/markdown
|
|
55
63
|
|
|
@@ -57,7 +65,7 @@ Description-Content-Type: text/markdown
|
|
|
57
65
|
|
|
58
66
|
An AI-ready data management and integration platform. LinkML-Store
|
|
59
67
|
provides an abstraction layer over multiple different backends
|
|
60
|
-
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
68
|
+
(including DuckDB, MongoDB, Neo4j, and local filesystems), allowing for
|
|
61
69
|
common query, index, and storage operations.
|
|
62
70
|
|
|
63
71
|
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
@@ -95,6 +103,23 @@ linkml-store -d duckdb:///db/my.db -c persons validate
|
|
|
95
103
|
* API
|
|
96
104
|
* Streamlit applications
|
|
97
105
|
|
|
106
|
+
## The CRUDSI pattern
|
|
107
|
+
|
|
108
|
+
Most database APIs implement the **CRUD** pattern: Create, Read, Update, Delete.
|
|
109
|
+
LinkML-Store adds **Search** and **Inference** to this pattern, making it **CRUDSI**.
|
|
110
|
+
|
|
111
|
+
The notion of "Search" and "Inference" is intended to be flexible and extensible,
|
|
112
|
+
including:
|
|
113
|
+
|
|
114
|
+
* Search
|
|
115
|
+
* Traditional keyword search
|
|
116
|
+
* Search using LLM Vector embeddings (*without* a dedicated vector database)
|
|
117
|
+
* Pluggable specialized search, e.g. genomic sequence (not yet implemented)
|
|
118
|
+
* Inference (encompassing *validation*, *repair*, and inference of missing data)
|
|
119
|
+
* Classic rule-based inference
|
|
120
|
+
* Inference using LLM Retrieval Augmented Generation (RAG)
|
|
121
|
+
* Statistical/ML inference
|
|
122
|
+
|
|
98
123
|
## Features
|
|
99
124
|
|
|
100
125
|
### Multiple Adapters
|
|
@@ -104,6 +129,8 @@ LinkML-Store is designed to work with multiple backends, giving a common abstrac
|
|
|
104
129
|
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
105
130
|
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
106
131
|
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
132
|
+
* [Neo4j](https://linkml.io/linkml-store/how-to/Use-Neo4j.html)
|
|
133
|
+
|
|
107
134
|
* Filesystem
|
|
108
135
|
|
|
109
136
|
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
An AI-ready data management and integration platform. LinkML-Store
|
|
4
4
|
provides an abstraction layer over multiple different backends
|
|
5
|
-
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
5
|
+
(including DuckDB, MongoDB, Neo4j, and local filesystems), allowing for
|
|
6
6
|
common query, index, and storage operations.
|
|
7
7
|
|
|
8
8
|
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
@@ -40,6 +40,23 @@ linkml-store -d duckdb:///db/my.db -c persons validate
|
|
|
40
40
|
* API
|
|
41
41
|
* Streamlit applications
|
|
42
42
|
|
|
43
|
+
## The CRUDSI pattern
|
|
44
|
+
|
|
45
|
+
Most database APIs implement the **CRUD** pattern: Create, Read, Update, Delete.
|
|
46
|
+
LinkML-Store adds **Search** and **Inference** to this pattern, making it **CRUDSI**.
|
|
47
|
+
|
|
48
|
+
The notion of "Search" and "Inference" is intended to be flexible and extensible,
|
|
49
|
+
including:
|
|
50
|
+
|
|
51
|
+
* Search
|
|
52
|
+
* Traditional keyword search
|
|
53
|
+
* Search using LLM Vector embeddings (*without* a dedicated vector database)
|
|
54
|
+
* Pluggable specialized search, e.g. genomic sequence (not yet implemented)
|
|
55
|
+
* Inference (encompassing *validation*, *repair*, and inference of missing data)
|
|
56
|
+
* Classic rule-based inference
|
|
57
|
+
* Inference using LLM Retrieval Augmented Generation (RAG)
|
|
58
|
+
* Statistical/ML inference
|
|
59
|
+
|
|
43
60
|
## Features
|
|
44
61
|
|
|
45
62
|
### Multiple Adapters
|
|
@@ -49,6 +66,8 @@ LinkML-Store is designed to work with multiple backends, giving a common abstrac
|
|
|
49
66
|
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
50
67
|
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
51
68
|
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
69
|
+
* [Neo4j](https://linkml.io/linkml-store/how-to/Use-Neo4j.html)
|
|
70
|
+
|
|
52
71
|
* Filesystem
|
|
53
72
|
|
|
54
73
|
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.14"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -13,7 +13,7 @@ pydantic = "^2.0.0"
|
|
|
13
13
|
linkml-runtime = ">=1.8.0"
|
|
14
14
|
streamlit = { version = "^1.32.2", optional = true }
|
|
15
15
|
sqlalchemy = "*"
|
|
16
|
-
duckdb = "
|
|
16
|
+
duckdb = ">=0.10.1"
|
|
17
17
|
duckdb-engine = ">=0.11.2"
|
|
18
18
|
matplotlib = { version = "*", optional = true }
|
|
19
19
|
seaborn = { version = "*", optional = true }
|
|
@@ -21,10 +21,16 @@ plotly = { version = "*", optional = true }
|
|
|
21
21
|
pystow = "^0.5.4"
|
|
22
22
|
black = { version=">=24.0.0", optional = true }
|
|
23
23
|
llm = { version="*", optional = true }
|
|
24
|
+
tiktoken = { version="*", optional = true }
|
|
24
25
|
pymongo = { version="*", optional = true }
|
|
26
|
+
neo4j = { version="*", optional = true }
|
|
27
|
+
py2neo = { version="*", optional = true }
|
|
28
|
+
networkx = { version="*", optional = true }
|
|
25
29
|
chromadb = { version="*", optional = true }
|
|
26
30
|
pyarrow = { version="*", optional = true }
|
|
27
31
|
h5py = { version="*", optional = true }
|
|
32
|
+
scipy = { version="*", optional = true }
|
|
33
|
+
scikit-learn = { version="*", optional = true }
|
|
28
34
|
linkml = { version=">=1.8.0", optional = true }
|
|
29
35
|
linkml_map = { version="*", optional = true }
|
|
30
36
|
linkml_renderer = { version="*", optional = true }
|
|
@@ -64,8 +70,9 @@ numpy = [
|
|
|
64
70
|
analytics = ["pandas", "matplotlib", "seaborn", "plotly"]
|
|
65
71
|
app = ["streamlit"]
|
|
66
72
|
tests = ["black"]
|
|
67
|
-
llm = ["llm"]
|
|
73
|
+
llm = ["llm", "tiktoken"]
|
|
68
74
|
mongodb = ["pymongo"]
|
|
75
|
+
neo4j = ["neo4j", "py2neo", "networkx"]
|
|
69
76
|
chromadb = ["chromadb"]
|
|
70
77
|
h5py = ["h5py"]
|
|
71
78
|
pyarrow = ["pyarrow"]
|
|
@@ -74,6 +81,7 @@ map = ["linkml_map"]
|
|
|
74
81
|
renderer = ["linkml_renderer"]
|
|
75
82
|
fastapi = ["fastapi", "uvicorn"]
|
|
76
83
|
frictionless = ["frictionless"]
|
|
84
|
+
scipy = ["scipy", "scikit-learn"]
|
|
77
85
|
|
|
78
86
|
[tool.poetry.scripts]
|
|
79
87
|
linkml-store = "linkml_store.cli:cli"
|
|
@@ -11,6 +11,7 @@ from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
|
|
|
11
11
|
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
12
|
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
13
13
|
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
14
|
+
from linkml_store.api.stores.neo4j.neo4j_database import Neo4jDatabase
|
|
14
15
|
from linkml_store.api.stores.solr.solr_database import SolrDatabase
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
@@ -21,6 +22,7 @@ HANDLE_MAP = {
|
|
|
21
22
|
"solr": SolrDatabase,
|
|
22
23
|
"mongodb": MongoDBDatabase,
|
|
23
24
|
"chromadb": ChromaDBDatabase,
|
|
25
|
+
"neo4j": Neo4jDatabase,
|
|
24
26
|
"file": FileSystemDatabase,
|
|
25
27
|
}
|
|
26
28
|
|
|
@@ -98,7 +100,7 @@ class Client:
|
|
|
98
100
|
"""
|
|
99
101
|
return self.metadata.base_dir
|
|
100
102
|
|
|
101
|
-
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
|
|
103
|
+
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, auto_attach=False, **kwargs):
|
|
102
104
|
"""
|
|
103
105
|
Create a client from a configuration.
|
|
104
106
|
|
|
@@ -107,6 +109,10 @@ class Client:
|
|
|
107
109
|
>>> from linkml_store.api.config import ClientConfig
|
|
108
110
|
>>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}))
|
|
109
111
|
>>> len(client.databases)
|
|
112
|
+
0
|
|
113
|
+
>>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}),
|
|
114
|
+
... auto_attach=True)
|
|
115
|
+
>>> len(client.databases)
|
|
110
116
|
1
|
|
111
117
|
>>> "test" in client.databases
|
|
112
118
|
True
|
|
@@ -114,6 +120,8 @@ class Client:
|
|
|
114
120
|
'duckdb:///:memory:'
|
|
115
121
|
|
|
116
122
|
:param config:
|
|
123
|
+
:param base_dir:
|
|
124
|
+
:param auto_attach:
|
|
117
125
|
:param kwargs:
|
|
118
126
|
:return:
|
|
119
127
|
|
|
@@ -123,17 +131,17 @@ class Client:
|
|
|
123
131
|
if isinstance(config, Path):
|
|
124
132
|
config = str(config)
|
|
125
133
|
if isinstance(config, str):
|
|
126
|
-
|
|
127
|
-
|
|
134
|
+
if not base_dir:
|
|
135
|
+
base_dir = Path(config).parent
|
|
128
136
|
parsed_obj = yaml.safe_load(open(config))
|
|
129
137
|
config = ClientConfig(**parsed_obj)
|
|
130
138
|
self.metadata = config
|
|
131
139
|
if base_dir:
|
|
132
140
|
self.metadata.base_dir = base_dir
|
|
133
|
-
self._initialize_databases(**kwargs)
|
|
141
|
+
self._initialize_databases(auto_attach=auto_attach, **kwargs)
|
|
134
142
|
return self
|
|
135
143
|
|
|
136
|
-
def _initialize_databases(self, **kwargs):
|
|
144
|
+
def _initialize_databases(self, auto_attach=False, **kwargs):
|
|
137
145
|
for name, db_config in self.metadata.databases.items():
|
|
138
146
|
base_dir = self.base_dir
|
|
139
147
|
logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
|
|
@@ -144,8 +152,22 @@ class Client:
|
|
|
144
152
|
db_config.handle = handle
|
|
145
153
|
if db_config.schema_location:
|
|
146
154
|
db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
|
|
147
|
-
|
|
148
|
-
|
|
155
|
+
if auto_attach:
|
|
156
|
+
db = self.attach_database(handle, alias=name, **kwargs)
|
|
157
|
+
db.from_config(db_config)
|
|
158
|
+
|
|
159
|
+
def _set_database_config(self, db: Database):
|
|
160
|
+
"""
|
|
161
|
+
Set the configuration for a database.
|
|
162
|
+
|
|
163
|
+
:param name:
|
|
164
|
+
:param config:
|
|
165
|
+
:return:
|
|
166
|
+
"""
|
|
167
|
+
if not self.metadata:
|
|
168
|
+
return
|
|
169
|
+
if db.alias in self.metadata.databases:
|
|
170
|
+
db.from_config(self.metadata.databases[db.alias])
|
|
149
171
|
|
|
150
172
|
def attach_database(
|
|
151
173
|
self,
|
|
@@ -200,6 +222,7 @@ class Client:
|
|
|
200
222
|
raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}")
|
|
201
223
|
else:
|
|
202
224
|
db.metadata.alias = alias
|
|
225
|
+
self._set_database_config(db)
|
|
203
226
|
return db
|
|
204
227
|
|
|
205
228
|
def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
|
|
@@ -228,13 +251,19 @@ class Client:
|
|
|
228
251
|
return list(self._databases.values())[0]
|
|
229
252
|
if not self._databases:
|
|
230
253
|
self._databases = {}
|
|
254
|
+
if name not in self._databases and name in self.metadata.databases:
|
|
255
|
+
db_config = self.metadata.databases[name]
|
|
256
|
+
db = self.attach_database(db_config.handle, alias=name, **kwargs)
|
|
257
|
+
self._databases[name] = db
|
|
231
258
|
if name not in self._databases:
|
|
232
259
|
if create_if_not_exists:
|
|
233
260
|
logger.info(f"Creating database: {name}")
|
|
234
261
|
self.attach_database(name, **kwargs)
|
|
235
262
|
else:
|
|
236
263
|
raise ValueError(f"Database {name} does not exist")
|
|
237
|
-
|
|
264
|
+
db = self._databases[name]
|
|
265
|
+
self._set_database_config(db)
|
|
266
|
+
return db
|
|
238
267
|
|
|
239
268
|
@property
|
|
240
269
|
def databases(self) -> Dict[str, Database]:
|
|
@@ -4,7 +4,21 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import (
|
|
8
|
+
TYPE_CHECKING,
|
|
9
|
+
Any,
|
|
10
|
+
ClassVar,
|
|
11
|
+
Dict,
|
|
12
|
+
Generic,
|
|
13
|
+
Iterable,
|
|
14
|
+
Iterator,
|
|
15
|
+
List,
|
|
16
|
+
Optional,
|
|
17
|
+
TextIO,
|
|
18
|
+
Tuple,
|
|
19
|
+
Type,
|
|
20
|
+
Union,
|
|
21
|
+
)
|
|
8
22
|
|
|
9
23
|
import numpy as np
|
|
10
24
|
from linkml_runtime import SchemaView
|
|
@@ -202,6 +216,12 @@ class Collection(Generic[DatabaseType]):
|
|
|
202
216
|
self._materialize_derivations()
|
|
203
217
|
self._initialized = True
|
|
204
218
|
|
|
219
|
+
def _pre_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
220
|
+
if self.metadata.validate_modifications:
|
|
221
|
+
errors = list(self.iter_validate_collection(objs))
|
|
222
|
+
if errors:
|
|
223
|
+
raise ValueError(f"Validation errors: {errors}")
|
|
224
|
+
|
|
205
225
|
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
206
226
|
self._initialized = True
|
|
207
227
|
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
@@ -482,6 +502,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
482
502
|
index_name = self.default_index_name
|
|
483
503
|
ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
|
|
484
504
|
if index_name not in self.indexers:
|
|
505
|
+
logger.debug(f"Indexer not found: {index_name} -- creating")
|
|
485
506
|
ix = get_indexer(index_name)
|
|
486
507
|
if not self._indexers:
|
|
487
508
|
self._indexers = {}
|
|
@@ -489,6 +510,11 @@ class Collection(Generic[DatabaseType]):
|
|
|
489
510
|
ix = self.indexers.get(index_name)
|
|
490
511
|
if not ix:
|
|
491
512
|
raise ValueError(f"No index named {index_name}")
|
|
513
|
+
logger.debug(f"Using indexer {type(ix)} with name {index_name}")
|
|
514
|
+
if ix_coll.size() == 0:
|
|
515
|
+
logger.info(f"Index {index_name} is empty; indexing all objects")
|
|
516
|
+
all_objs = self.find(limit=-1).rows
|
|
517
|
+
self.index_objects(all_objs, index_name, replace=True, **kwargs)
|
|
492
518
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
493
519
|
index_col = ix.index_field
|
|
494
520
|
# TODO: optimize this for large indexes
|
|
@@ -498,6 +524,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
498
524
|
del r[1][index_col]
|
|
499
525
|
new_qr = QueryResult(num_rows=len(results))
|
|
500
526
|
new_qr.ranked_rows = results
|
|
527
|
+
new_qr.rows = [r[1] for r in results]
|
|
501
528
|
return new_qr
|
|
502
529
|
|
|
503
530
|
@property
|
|
@@ -542,6 +569,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
542
569
|
format=source.format,
|
|
543
570
|
expected_type=source.expected_type,
|
|
544
571
|
compression=source.compression,
|
|
572
|
+
select_query=source.select_query,
|
|
545
573
|
**kwargs,
|
|
546
574
|
)
|
|
547
575
|
elif metadata.source.url:
|
|
@@ -550,9 +578,12 @@ class Collection(Generic[DatabaseType]):
|
|
|
550
578
|
format=source.format,
|
|
551
579
|
expected_type=source.expected_type,
|
|
552
580
|
compression=source.compression,
|
|
581
|
+
select_query=source.select_query,
|
|
553
582
|
**kwargs,
|
|
554
583
|
)
|
|
555
|
-
|
|
584
|
+
else:
|
|
585
|
+
raise ValueError("No source local_path or url provided")
|
|
586
|
+
self.insert(objects)
|
|
556
587
|
|
|
557
588
|
def _check_if_initialized(self) -> bool:
|
|
558
589
|
return self._initialized
|
|
@@ -609,6 +640,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
609
640
|
self.insert(tr_objs)
|
|
610
641
|
self.commit()
|
|
611
642
|
|
|
643
|
+
def size(self) -> int:
|
|
644
|
+
"""
|
|
645
|
+
Return the number of objects in the collection.
|
|
646
|
+
|
|
647
|
+
:return: The number of objects in the collection.
|
|
648
|
+
"""
|
|
649
|
+
return self.find({}, limit=1).num_rows
|
|
650
|
+
|
|
612
651
|
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
613
652
|
"""
|
|
614
653
|
Attach an index to the collection.
|
|
@@ -757,6 +796,8 @@ class Collection(Generic[DatabaseType]):
|
|
|
757
796
|
sv: SchemaView = self.parent.schema_view
|
|
758
797
|
if sv:
|
|
759
798
|
cls = sv.get_class(self.target_class_name)
|
|
799
|
+
# if not cls:
|
|
800
|
+
# logger.warning(f"{self.target_class_name} not in {sv.all_classes().keys()} ")
|
|
760
801
|
# cls = sv.schema.classes[self.target_class_name]
|
|
761
802
|
if cls and not cls.attributes:
|
|
762
803
|
if not sv.class_induced_slots(cls.name):
|
|
@@ -880,11 +921,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
880
921
|
exact_dimensions_list.append(v.shape)
|
|
881
922
|
break
|
|
882
923
|
if isinstance(v, list):
|
|
924
|
+
# sample first item. TODO: more robust strategy
|
|
883
925
|
v = v[0] if v else None
|
|
884
926
|
multivalueds.append(True)
|
|
885
927
|
elif isinstance(v, dict):
|
|
886
|
-
|
|
887
|
-
|
|
928
|
+
pass
|
|
929
|
+
# TODO: check if this is a nested object or key-value list
|
|
930
|
+
# v = list(v.values())[0]
|
|
931
|
+
# multivalueds.append(True)
|
|
888
932
|
else:
|
|
889
933
|
multivalueds.append(False)
|
|
890
934
|
if not v:
|
|
@@ -913,10 +957,21 @@ class Collection(Generic[DatabaseType]):
|
|
|
913
957
|
# raise AssertionError(f"Empty rngs for {k} = {vs}")
|
|
914
958
|
rng = rngs[0] if rngs else None
|
|
915
959
|
for other_rng in rngs:
|
|
960
|
+
coercions = {
|
|
961
|
+
("integer", "float"): "float",
|
|
962
|
+
}
|
|
916
963
|
if rng != other_rng:
|
|
917
|
-
|
|
964
|
+
if (rng, other_rng) in coercions:
|
|
965
|
+
rng = coercions[(rng, other_rng)]
|
|
966
|
+
elif (other_rng, rng) in coercions:
|
|
967
|
+
rng = coercions[(other_rng, rng)]
|
|
968
|
+
else:
|
|
969
|
+
raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
|
|
918
970
|
logger.debug(f"Inducing {k} as {rng} {multivalued} {inlined}")
|
|
919
|
-
|
|
971
|
+
inlined_as_list = inlined and multivalued
|
|
972
|
+
cd.attributes[k] = SlotDefinition(
|
|
973
|
+
k, range=rng, multivalued=multivalued, inlined=inlined, inlined_as_list=inlined_as_list
|
|
974
|
+
)
|
|
920
975
|
if exact_dimensions_list:
|
|
921
976
|
array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
|
|
922
977
|
cd.attributes[k].array = array_expr
|
|
@@ -978,11 +1033,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
978
1033
|
patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
979
1034
|
return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
980
1035
|
|
|
981
|
-
def iter_validate_collection(
|
|
1036
|
+
def iter_validate_collection(
|
|
1037
|
+
self, objects: Optional[Iterable[OBJECT]] = None, **kwargs
|
|
1038
|
+
) -> Iterator["ValidationResult"]:
|
|
982
1039
|
"""
|
|
983
1040
|
Validate the contents of the collection
|
|
984
1041
|
|
|
985
1042
|
:param kwargs:
|
|
1043
|
+
:param objects: objects to validate
|
|
986
1044
|
:return: iterator over validation results
|
|
987
1045
|
"""
|
|
988
1046
|
from linkml.validator import JsonschemaValidationPlugin, Validator
|
|
@@ -992,10 +1050,24 @@ class Collection(Generic[DatabaseType]):
|
|
|
992
1050
|
cd = self.class_definition()
|
|
993
1051
|
if not cd:
|
|
994
1052
|
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
1053
|
+
type_designator = None
|
|
1054
|
+
for att in self.parent.schema_view.class_induced_slots(cd.name):
|
|
1055
|
+
if att.designates_type:
|
|
1056
|
+
type_designator = att.name
|
|
995
1057
|
class_name = cd.name
|
|
996
|
-
|
|
1058
|
+
if objects is None:
|
|
1059
|
+
objects = self.find_iter(**kwargs)
|
|
1060
|
+
for obj in objects:
|
|
997
1061
|
obj = clean_empties(obj)
|
|
998
|
-
|
|
1062
|
+
v_class_name = class_name
|
|
1063
|
+
if type_designator is not None:
|
|
1064
|
+
# TODO: move type designator logic to core linkml
|
|
1065
|
+
this_class_name = obj.get(type_designator)
|
|
1066
|
+
if this_class_name:
|
|
1067
|
+
if ":" in this_class_name:
|
|
1068
|
+
this_class_name = this_class_name.split(":")[-1]
|
|
1069
|
+
v_class_name = this_class_name
|
|
1070
|
+
yield from validator.iter_results(obj, v_class_name)
|
|
999
1071
|
|
|
1000
1072
|
def commit(self):
|
|
1001
1073
|
"""
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
|
+
from linkml_store.graphs.graph_map import EdgeProjection, NodeProjection
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class ConfiguredBaseModel(BaseModel, extra="forbid"):
|
|
7
9
|
"""
|
|
@@ -28,13 +30,30 @@ class CollectionSource(ConfiguredBaseModel):
|
|
|
28
30
|
"""
|
|
29
31
|
|
|
30
32
|
url: Optional[str] = None
|
|
33
|
+
"""Remote URL to fetch data from"""
|
|
34
|
+
|
|
31
35
|
local_path: Optional[str] = None
|
|
36
|
+
"""Local path to fetch data from"""
|
|
37
|
+
|
|
32
38
|
source_location: Optional[str] = None
|
|
39
|
+
|
|
33
40
|
refresh_interval_days: Optional[float] = None
|
|
41
|
+
"""How often to refresh the data, in days"""
|
|
42
|
+
|
|
34
43
|
expected_type: Optional[str] = None
|
|
44
|
+
"""The expected type of the data, e.g list"""
|
|
45
|
+
|
|
35
46
|
format: Optional[str] = None
|
|
47
|
+
"""The format of the data, e.g., json, yaml, csv"""
|
|
48
|
+
|
|
36
49
|
compression: Optional[str] = None
|
|
50
|
+
"""The compression of the data, e.g., tgz, gzip, zip"""
|
|
51
|
+
|
|
52
|
+
select_query: Optional[str] = None
|
|
53
|
+
"""A jsonpath query to preprocess the objects with"""
|
|
54
|
+
|
|
37
55
|
arguments: Optional[Dict[str, Any]] = None
|
|
56
|
+
"""Optional arguments to pass to the source"""
|
|
38
57
|
|
|
39
58
|
|
|
40
59
|
class CollectionConfig(ConfiguredBaseModel):
|
|
@@ -79,6 +98,14 @@ class CollectionConfig(ConfiguredBaseModel):
|
|
|
79
98
|
description="LinkML-Map derivations",
|
|
80
99
|
)
|
|
81
100
|
page_size: Optional[int] = Field(default=None, description="Suggested page size (items per page) in apps and APIs")
|
|
101
|
+
graph_projection: Optional[Union[EdgeProjection, NodeProjection]] = Field(
|
|
102
|
+
default=None,
|
|
103
|
+
description="Optional graph projection configuration",
|
|
104
|
+
)
|
|
105
|
+
validate_modifications: Optional[bool] = Field(
|
|
106
|
+
default=False,
|
|
107
|
+
description="Whether to validate inserts, updates, and deletes",
|
|
108
|
+
)
|
|
82
109
|
|
|
83
110
|
|
|
84
111
|
class DatabaseConfig(ConfiguredBaseModel):
|
|
@@ -505,8 +505,10 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
505
505
|
if isinstance(schema_view, str):
|
|
506
506
|
schema_view = SchemaView(schema_view)
|
|
507
507
|
self._schema_view = schema_view
|
|
508
|
+
# self._schema_view = SchemaView(schema_view.materialize_derived_schema())
|
|
508
509
|
if not self._collections:
|
|
509
510
|
return
|
|
511
|
+
|
|
510
512
|
# align with induced schema
|
|
511
513
|
roots = [c for c in schema_view.all_classes().values() if c.tree_root]
|
|
512
514
|
if len(roots) == 0:
|
|
@@ -705,12 +707,29 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
705
707
|
"""
|
|
706
708
|
raise NotImplementedError()
|
|
707
709
|
|
|
708
|
-
def import_database(
|
|
710
|
+
def import_database(
|
|
711
|
+
self,
|
|
712
|
+
location: str,
|
|
713
|
+
source_format: Optional[Union[str, Format]] = None,
|
|
714
|
+
collection_name: Optional[str] = None,
|
|
715
|
+
**kwargs,
|
|
716
|
+
):
|
|
709
717
|
"""
|
|
710
718
|
Import a database from a file or location.
|
|
711
719
|
|
|
720
|
+
>>> from linkml_store.api.client import Client
|
|
721
|
+
>>> client = Client()
|
|
722
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
723
|
+
>>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
|
|
724
|
+
>>> db.list_collection_names()
|
|
725
|
+
['iris']
|
|
726
|
+
>>> collection = db.get_collection("iris")
|
|
727
|
+
>>> collection.find({}).num_rows
|
|
728
|
+
150
|
|
729
|
+
|
|
712
730
|
:param location: location of the file
|
|
713
731
|
:param source_format: source format
|
|
732
|
+
:param collection_name: (Optional) name of the collection, for data that is flat
|
|
714
733
|
:param kwargs: additional arguments
|
|
715
734
|
"""
|
|
716
735
|
if isinstance(source_format, str):
|
|
@@ -730,8 +749,12 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
730
749
|
self.store(obj)
|
|
731
750
|
return
|
|
732
751
|
objects = load_objects(location, format=source_format)
|
|
733
|
-
|
|
734
|
-
self.
|
|
752
|
+
if collection_name:
|
|
753
|
+
collection = self.get_collection(collection_name, create_if_not_exists=True)
|
|
754
|
+
collection.insert(objects)
|
|
755
|
+
else:
|
|
756
|
+
for obj in objects:
|
|
757
|
+
self.store(obj)
|
|
735
758
|
|
|
736
759
|
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
737
760
|
"""
|
|
@@ -51,9 +51,13 @@ class MongoDBCollection(Collection):
|
|
|
51
51
|
if offset and offset >= 0:
|
|
52
52
|
cursor = cursor.skip(offset)
|
|
53
53
|
|
|
54
|
+
select_cols = query.select_cols
|
|
55
|
+
|
|
54
56
|
def _as_row(row: dict):
|
|
55
57
|
row = copy(row)
|
|
56
58
|
del row["_id"]
|
|
59
|
+
if select_cols:
|
|
60
|
+
row = {k: row[k] for k in select_cols if k in row}
|
|
57
61
|
return row
|
|
58
62
|
|
|
59
63
|
rows = [_as_row(row) for row in cursor]
|