linkml-store 0.1.13__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.1.13 → linkml_store-0.2.0}/PKG-INFO +28 -2
- {linkml_store-0.1.13 → linkml_store-0.2.0}/README.md +23 -1
- {linkml_store-0.1.13 → linkml_store-0.2.0}/pyproject.toml +6 -2
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/client.py +35 -8
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/collection.py +40 -5
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/config.py +20 -3
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/database.py +24 -3
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +3 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +4 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/cli.py +149 -13
- linkml_store-0.2.0/src/linkml_store/inference/__init__.py +13 -0
- linkml_store-0.2.0/src/linkml_store/inference/evaluation.py +189 -0
- linkml_store-0.2.0/src/linkml_store/inference/implementations/rag_inference_engine.py +145 -0
- linkml_store-0.2.0/src/linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
- linkml_store-0.2.0/src/linkml_store/inference/implementations/sklearn_inference_engine.py +308 -0
- linkml_store-0.2.0/src/linkml_store/inference/inference_config.py +62 -0
- linkml_store-0.2.0/src/linkml_store/inference/inference_engine.py +200 -0
- linkml_store-0.2.0/src/linkml_store/inference/inference_engine_registry.py +74 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/format_utils.py +27 -90
- linkml_store-0.2.0/src/linkml_store/utils/llm_utils.py +96 -0
- linkml_store-0.2.0/src/linkml_store/utils/object_utils.py +182 -0
- linkml_store-0.2.0/src/linkml_store/utils/pandas_utils.py +93 -0
- linkml_store-0.2.0/src/linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store-0.2.0/src/linkml_store/utils/stats_utils.py +53 -0
- linkml_store-0.2.0/src/linkml_store/webapi/__init__.py +0 -0
- linkml_store-0.1.13/src/linkml_store/utils/object_utils.py +0 -81
- linkml_store-0.1.13/src/linkml_store/utils/pandas_utils.py +0 -40
- {linkml_store-0.1.13 → linkml_store-0.2.0}/LICENSE +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/duckdb_database.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/neo4j/neo4j_database.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/graphs/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/graphs/graph_map.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/index/indexer.py +0 -0
- {linkml_store-0.1.13/src/linkml_store/utils → linkml_store-0.2.0/src/linkml_store/inference/implementations}/__init__.py +0 -0
- {linkml_store-0.1.13/src/linkml_store/webapi → linkml_store-0.2.0/src/linkml_store/utils}/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/neo4j_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/utils/sql_utils.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
@@ -24,6 +24,7 @@ Provides-Extra: mongodb
|
|
|
24
24
|
Provides-Extra: neo4j
|
|
25
25
|
Provides-Extra: pyarrow
|
|
26
26
|
Provides-Extra: renderer
|
|
27
|
+
Provides-Extra: scipy
|
|
27
28
|
Provides-Extra: tests
|
|
28
29
|
Provides-Extra: validation
|
|
29
30
|
Requires-Dist: black (>=24.0.0) ; extra == "tests"
|
|
@@ -51,9 +52,12 @@ Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
|
51
52
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
52
53
|
Requires-Dist: pymongo ; extra == "mongodb"
|
|
53
54
|
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
55
|
+
Requires-Dist: scikit-learn ; extra == "scipy"
|
|
56
|
+
Requires-Dist: scipy ; extra == "scipy"
|
|
54
57
|
Requires-Dist: seaborn ; extra == "analytics"
|
|
55
58
|
Requires-Dist: sqlalchemy
|
|
56
59
|
Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
60
|
+
Requires-Dist: tiktoken ; extra == "llm"
|
|
57
61
|
Requires-Dist: uvicorn ; extra == "fastapi"
|
|
58
62
|
Description-Content-Type: text/markdown
|
|
59
63
|
|
|
@@ -61,11 +65,13 @@ Description-Content-Type: text/markdown
|
|
|
61
65
|
|
|
62
66
|
An AI-ready data management and integration platform. LinkML-Store
|
|
63
67
|
provides an abstraction layer over multiple different backends
|
|
64
|
-
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
68
|
+
(including DuckDB, MongoDB, Neo4j, and local filesystems), allowing for
|
|
65
69
|
common query, index, and storage operations.
|
|
66
70
|
|
|
67
71
|
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
68
72
|
|
|
73
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for a high level overview.
|
|
74
|
+
|
|
69
75
|
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
70
76
|
APIs and command line options are subject to change!
|
|
71
77
|
|
|
@@ -99,6 +105,23 @@ linkml-store -d duckdb:///db/my.db -c persons validate
|
|
|
99
105
|
* API
|
|
100
106
|
* Streamlit applications
|
|
101
107
|
|
|
108
|
+
## The CRUDSI pattern
|
|
109
|
+
|
|
110
|
+
Most database APIs implement the **CRUD** pattern: Create, Read, Update, Delete.
|
|
111
|
+
LinkML-Store adds **Search** and **Inference** to this pattern, making it **CRUDSI**.
|
|
112
|
+
|
|
113
|
+
The notion of "Search" and "Inference" is intended to be flexible and extensible,
|
|
114
|
+
including:
|
|
115
|
+
|
|
116
|
+
* Search
|
|
117
|
+
* Traditional keyword search
|
|
118
|
+
* Search using LLM Vector embeddings (*without* a dedicated vector database)
|
|
119
|
+
* Pluggable specialized search, e.g. genomic sequence (not yet implemented)
|
|
120
|
+
* Inference (encompassing *validation*, *repair*, and inference of missing data)
|
|
121
|
+
* Classic rule-based inference
|
|
122
|
+
* Inference using LLM Retrieval Augmented Generation (RAG)
|
|
123
|
+
* Statistical/ML inference
|
|
124
|
+
|
|
102
125
|
## Features
|
|
103
126
|
|
|
104
127
|
### Multiple Adapters
|
|
@@ -108,6 +131,8 @@ LinkML-Store is designed to work with multiple backends, giving a common abstrac
|
|
|
108
131
|
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
109
132
|
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
110
133
|
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
134
|
+
* [Neo4j](https://linkml.io/linkml-store/how-to/Use-Neo4j.html)
|
|
135
|
+
|
|
111
136
|
* Filesystem
|
|
112
137
|
|
|
113
138
|
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
@@ -173,3 +198,4 @@ make app
|
|
|
173
198
|
|
|
174
199
|
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
175
200
|
|
|
201
|
+
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
An AI-ready data management and integration platform. LinkML-Store
|
|
4
4
|
provides an abstraction layer over multiple different backends
|
|
5
|
-
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
5
|
+
(including DuckDB, MongoDB, Neo4j, and local filesystems), allowing for
|
|
6
6
|
common query, index, and storage operations.
|
|
7
7
|
|
|
8
8
|
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
9
9
|
|
|
10
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for a high level overview.
|
|
11
|
+
|
|
10
12
|
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
11
13
|
APIs and command line options are subject to change!
|
|
12
14
|
|
|
@@ -40,6 +42,23 @@ linkml-store -d duckdb:///db/my.db -c persons validate
|
|
|
40
42
|
* API
|
|
41
43
|
* Streamlit applications
|
|
42
44
|
|
|
45
|
+
## The CRUDSI pattern
|
|
46
|
+
|
|
47
|
+
Most database APIs implement the **CRUD** pattern: Create, Read, Update, Delete.
|
|
48
|
+
LinkML-Store adds **Search** and **Inference** to this pattern, making it **CRUDSI**.
|
|
49
|
+
|
|
50
|
+
The notion of "Search" and "Inference" is intended to be flexible and extensible,
|
|
51
|
+
including:
|
|
52
|
+
|
|
53
|
+
* Search
|
|
54
|
+
* Traditional keyword search
|
|
55
|
+
* Search using LLM Vector embeddings (*without* a dedicated vector database)
|
|
56
|
+
* Pluggable specialized search, e.g. genomic sequence (not yet implemented)
|
|
57
|
+
* Inference (encompassing *validation*, *repair*, and inference of missing data)
|
|
58
|
+
* Classic rule-based inference
|
|
59
|
+
* Inference using LLM Retrieval Augmented Generation (RAG)
|
|
60
|
+
* Statistical/ML inference
|
|
61
|
+
|
|
43
62
|
## Features
|
|
44
63
|
|
|
45
64
|
### Multiple Adapters
|
|
@@ -49,6 +68,8 @@ LinkML-Store is designed to work with multiple backends, giving a common abstrac
|
|
|
49
68
|
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
50
69
|
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
51
70
|
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
71
|
+
* [Neo4j](https://linkml.io/linkml-store/how-to/Use-Neo4j.html)
|
|
72
|
+
|
|
52
73
|
* Filesystem
|
|
53
74
|
|
|
54
75
|
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
@@ -113,3 +134,4 @@ make app
|
|
|
113
134
|
## Background
|
|
114
135
|
|
|
115
136
|
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
137
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -21,6 +21,7 @@ plotly = { version = "*", optional = true }
|
|
|
21
21
|
pystow = "^0.5.4"
|
|
22
22
|
black = { version=">=24.0.0", optional = true }
|
|
23
23
|
llm = { version="*", optional = true }
|
|
24
|
+
tiktoken = { version="*", optional = true }
|
|
24
25
|
pymongo = { version="*", optional = true }
|
|
25
26
|
neo4j = { version="*", optional = true }
|
|
26
27
|
py2neo = { version="*", optional = true }
|
|
@@ -28,6 +29,8 @@ networkx = { version="*", optional = true }
|
|
|
28
29
|
chromadb = { version="*", optional = true }
|
|
29
30
|
pyarrow = { version="*", optional = true }
|
|
30
31
|
h5py = { version="*", optional = true }
|
|
32
|
+
scipy = { version="*", optional = true }
|
|
33
|
+
scikit-learn = { version="*", optional = true }
|
|
31
34
|
linkml = { version=">=1.8.0", optional = true }
|
|
32
35
|
linkml_map = { version="*", optional = true }
|
|
33
36
|
linkml_renderer = { version="*", optional = true }
|
|
@@ -67,7 +70,7 @@ numpy = [
|
|
|
67
70
|
analytics = ["pandas", "matplotlib", "seaborn", "plotly"]
|
|
68
71
|
app = ["streamlit"]
|
|
69
72
|
tests = ["black"]
|
|
70
|
-
llm = ["llm"]
|
|
73
|
+
llm = ["llm", "tiktoken"]
|
|
71
74
|
mongodb = ["pymongo"]
|
|
72
75
|
neo4j = ["neo4j", "py2neo", "networkx"]
|
|
73
76
|
chromadb = ["chromadb"]
|
|
@@ -78,6 +81,7 @@ map = ["linkml_map"]
|
|
|
78
81
|
renderer = ["linkml_renderer"]
|
|
79
82
|
fastapi = ["fastapi", "uvicorn"]
|
|
80
83
|
frictionless = ["frictionless"]
|
|
84
|
+
scipy = ["scipy", "scikit-learn"]
|
|
81
85
|
|
|
82
86
|
[tool.poetry.scripts]
|
|
83
87
|
linkml-store = "linkml_store.cli:cli"
|
|
@@ -100,7 +100,7 @@ class Client:
|
|
|
100
100
|
"""
|
|
101
101
|
return self.metadata.base_dir
|
|
102
102
|
|
|
103
|
-
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
|
|
103
|
+
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, auto_attach=False, **kwargs):
|
|
104
104
|
"""
|
|
105
105
|
Create a client from a configuration.
|
|
106
106
|
|
|
@@ -109,6 +109,10 @@ class Client:
|
|
|
109
109
|
>>> from linkml_store.api.config import ClientConfig
|
|
110
110
|
>>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}))
|
|
111
111
|
>>> len(client.databases)
|
|
112
|
+
0
|
|
113
|
+
>>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}),
|
|
114
|
+
... auto_attach=True)
|
|
115
|
+
>>> len(client.databases)
|
|
112
116
|
1
|
|
113
117
|
>>> "test" in client.databases
|
|
114
118
|
True
|
|
@@ -116,6 +120,8 @@ class Client:
|
|
|
116
120
|
'duckdb:///:memory:'
|
|
117
121
|
|
|
118
122
|
:param config:
|
|
123
|
+
:param base_dir:
|
|
124
|
+
:param auto_attach:
|
|
119
125
|
:param kwargs:
|
|
120
126
|
:return:
|
|
121
127
|
|
|
@@ -125,17 +131,17 @@ class Client:
|
|
|
125
131
|
if isinstance(config, Path):
|
|
126
132
|
config = str(config)
|
|
127
133
|
if isinstance(config, str):
|
|
128
|
-
|
|
129
|
-
|
|
134
|
+
if not base_dir:
|
|
135
|
+
base_dir = Path(config).parent
|
|
130
136
|
parsed_obj = yaml.safe_load(open(config))
|
|
131
137
|
config = ClientConfig(**parsed_obj)
|
|
132
138
|
self.metadata = config
|
|
133
139
|
if base_dir:
|
|
134
140
|
self.metadata.base_dir = base_dir
|
|
135
|
-
self._initialize_databases(**kwargs)
|
|
141
|
+
self._initialize_databases(auto_attach=auto_attach, **kwargs)
|
|
136
142
|
return self
|
|
137
143
|
|
|
138
|
-
def _initialize_databases(self, **kwargs):
|
|
144
|
+
def _initialize_databases(self, auto_attach=False, **kwargs):
|
|
139
145
|
for name, db_config in self.metadata.databases.items():
|
|
140
146
|
base_dir = self.base_dir
|
|
141
147
|
logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
|
|
@@ -146,8 +152,22 @@ class Client:
|
|
|
146
152
|
db_config.handle = handle
|
|
147
153
|
if db_config.schema_location:
|
|
148
154
|
db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
|
|
149
|
-
|
|
150
|
-
|
|
155
|
+
if auto_attach:
|
|
156
|
+
db = self.attach_database(handle, alias=name, **kwargs)
|
|
157
|
+
db.from_config(db_config)
|
|
158
|
+
|
|
159
|
+
def _set_database_config(self, db: Database):
|
|
160
|
+
"""
|
|
161
|
+
Set the configuration for a database.
|
|
162
|
+
|
|
163
|
+
:param name:
|
|
164
|
+
:param config:
|
|
165
|
+
:return:
|
|
166
|
+
"""
|
|
167
|
+
if not self.metadata:
|
|
168
|
+
return
|
|
169
|
+
if db.alias in self.metadata.databases:
|
|
170
|
+
db.from_config(self.metadata.databases[db.alias])
|
|
151
171
|
|
|
152
172
|
def attach_database(
|
|
153
173
|
self,
|
|
@@ -202,6 +222,7 @@ class Client:
|
|
|
202
222
|
raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}")
|
|
203
223
|
else:
|
|
204
224
|
db.metadata.alias = alias
|
|
225
|
+
self._set_database_config(db)
|
|
205
226
|
return db
|
|
206
227
|
|
|
207
228
|
def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database:
|
|
@@ -230,13 +251,19 @@ class Client:
|
|
|
230
251
|
return list(self._databases.values())[0]
|
|
231
252
|
if not self._databases:
|
|
232
253
|
self._databases = {}
|
|
254
|
+
if name not in self._databases and name in self.metadata.databases:
|
|
255
|
+
db_config = self.metadata.databases[name]
|
|
256
|
+
db = self.attach_database(db_config.handle, alias=name, **kwargs)
|
|
257
|
+
self._databases[name] = db
|
|
233
258
|
if name not in self._databases:
|
|
234
259
|
if create_if_not_exists:
|
|
235
260
|
logger.info(f"Creating database: {name}")
|
|
236
261
|
self.attach_database(name, **kwargs)
|
|
237
262
|
else:
|
|
238
263
|
raise ValueError(f"Database {name} does not exist")
|
|
239
|
-
|
|
264
|
+
db = self._databases[name]
|
|
265
|
+
self._set_database_config(db)
|
|
266
|
+
return db
|
|
240
267
|
|
|
241
268
|
@property
|
|
242
269
|
def databases(self) -> Dict[str, Database]:
|
|
@@ -502,6 +502,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
502
502
|
index_name = self.default_index_name
|
|
503
503
|
ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
|
|
504
504
|
if index_name not in self.indexers:
|
|
505
|
+
logger.debug(f"Indexer not found: {index_name} -- creating")
|
|
505
506
|
ix = get_indexer(index_name)
|
|
506
507
|
if not self._indexers:
|
|
507
508
|
self._indexers = {}
|
|
@@ -509,6 +510,11 @@ class Collection(Generic[DatabaseType]):
|
|
|
509
510
|
ix = self.indexers.get(index_name)
|
|
510
511
|
if not ix:
|
|
511
512
|
raise ValueError(f"No index named {index_name}")
|
|
513
|
+
logger.debug(f"Using indexer {type(ix)} with name {index_name}")
|
|
514
|
+
if ix_coll.size() == 0:
|
|
515
|
+
logger.info(f"Index {index_name} is empty; indexing all objects")
|
|
516
|
+
all_objs = self.find(limit=-1).rows
|
|
517
|
+
self.index_objects(all_objs, index_name, replace=True, **kwargs)
|
|
512
518
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
513
519
|
index_col = ix.index_field
|
|
514
520
|
# TODO: optimize this for large indexes
|
|
@@ -518,6 +524,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
518
524
|
del r[1][index_col]
|
|
519
525
|
new_qr = QueryResult(num_rows=len(results))
|
|
520
526
|
new_qr.ranked_rows = results
|
|
527
|
+
new_qr.rows = [r[1] for r in results]
|
|
521
528
|
return new_qr
|
|
522
529
|
|
|
523
530
|
@property
|
|
@@ -562,6 +569,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
562
569
|
format=source.format,
|
|
563
570
|
expected_type=source.expected_type,
|
|
564
571
|
compression=source.compression,
|
|
572
|
+
select_query=source.select_query,
|
|
565
573
|
**kwargs,
|
|
566
574
|
)
|
|
567
575
|
elif metadata.source.url:
|
|
@@ -570,9 +578,12 @@ class Collection(Generic[DatabaseType]):
|
|
|
570
578
|
format=source.format,
|
|
571
579
|
expected_type=source.expected_type,
|
|
572
580
|
compression=source.compression,
|
|
581
|
+
select_query=source.select_query,
|
|
573
582
|
**kwargs,
|
|
574
583
|
)
|
|
575
|
-
|
|
584
|
+
else:
|
|
585
|
+
raise ValueError("No source local_path or url provided")
|
|
586
|
+
self.insert(objects)
|
|
576
587
|
|
|
577
588
|
def _check_if_initialized(self) -> bool:
|
|
578
589
|
return self._initialized
|
|
@@ -629,6 +640,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
629
640
|
self.insert(tr_objs)
|
|
630
641
|
self.commit()
|
|
631
642
|
|
|
643
|
+
def size(self) -> int:
|
|
644
|
+
"""
|
|
645
|
+
Return the number of objects in the collection.
|
|
646
|
+
|
|
647
|
+
:return: The number of objects in the collection.
|
|
648
|
+
"""
|
|
649
|
+
return self.find({}, limit=1).num_rows
|
|
650
|
+
|
|
632
651
|
def attach_indexer(self, index: Union[Indexer, str], name: Optional[str] = None, auto_index=True, **kwargs):
|
|
633
652
|
"""
|
|
634
653
|
Attach an index to the collection.
|
|
@@ -777,6 +796,8 @@ class Collection(Generic[DatabaseType]):
|
|
|
777
796
|
sv: SchemaView = self.parent.schema_view
|
|
778
797
|
if sv:
|
|
779
798
|
cls = sv.get_class(self.target_class_name)
|
|
799
|
+
# if not cls:
|
|
800
|
+
# logger.warning(f"{self.target_class_name} not in {sv.all_classes().keys()} ")
|
|
780
801
|
# cls = sv.schema.classes[self.target_class_name]
|
|
781
802
|
if cls and not cls.attributes:
|
|
782
803
|
if not sv.class_induced_slots(cls.name):
|
|
@@ -900,11 +921,14 @@ class Collection(Generic[DatabaseType]):
|
|
|
900
921
|
exact_dimensions_list.append(v.shape)
|
|
901
922
|
break
|
|
902
923
|
if isinstance(v, list):
|
|
924
|
+
# sample first item. TODO: more robust strategy
|
|
903
925
|
v = v[0] if v else None
|
|
904
926
|
multivalueds.append(True)
|
|
905
927
|
elif isinstance(v, dict):
|
|
906
|
-
|
|
907
|
-
|
|
928
|
+
pass
|
|
929
|
+
# TODO: check if this is a nested object or key-value list
|
|
930
|
+
# v = list(v.values())[0]
|
|
931
|
+
# multivalueds.append(True)
|
|
908
932
|
else:
|
|
909
933
|
multivalueds.append(False)
|
|
910
934
|
if not v:
|
|
@@ -933,10 +957,21 @@ class Collection(Generic[DatabaseType]):
|
|
|
933
957
|
# raise AssertionError(f"Empty rngs for {k} = {vs}")
|
|
934
958
|
rng = rngs[0] if rngs else None
|
|
935
959
|
for other_rng in rngs:
|
|
960
|
+
coercions = {
|
|
961
|
+
("integer", "float"): "float",
|
|
962
|
+
}
|
|
936
963
|
if rng != other_rng:
|
|
937
|
-
|
|
964
|
+
if (rng, other_rng) in coercions:
|
|
965
|
+
rng = coercions[(rng, other_rng)]
|
|
966
|
+
elif (other_rng, rng) in coercions:
|
|
967
|
+
rng = coercions[(other_rng, rng)]
|
|
968
|
+
else:
|
|
969
|
+
raise ValueError(f"Conflict: {rng} != {other_rng} for {vs}")
|
|
938
970
|
logger.debug(f"Inducing {k} as {rng} {multivalued} {inlined}")
|
|
939
|
-
|
|
971
|
+
inlined_as_list = inlined and multivalued
|
|
972
|
+
cd.attributes[k] = SlotDefinition(
|
|
973
|
+
k, range=rng, multivalued=multivalued, inlined=inlined, inlined_as_list=inlined_as_list
|
|
974
|
+
)
|
|
940
975
|
if exact_dimensions_list:
|
|
941
976
|
array_expr = ArrayExpression(exact_number_dimensions=len(exact_dimensions_list[0]))
|
|
942
977
|
cd.attributes[k].array = array_expr
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
|
-
from linkml_store.graphs.graph_map import
|
|
5
|
+
from linkml_store.graphs.graph_map import EdgeProjection, NodeProjection
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class ConfiguredBaseModel(BaseModel, extra="forbid"):
|
|
@@ -30,13 +30,30 @@ class CollectionSource(ConfiguredBaseModel):
|
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
url: Optional[str] = None
|
|
33
|
+
"""Remote URL to fetch data from"""
|
|
34
|
+
|
|
33
35
|
local_path: Optional[str] = None
|
|
36
|
+
"""Local path to fetch data from"""
|
|
37
|
+
|
|
34
38
|
source_location: Optional[str] = None
|
|
39
|
+
|
|
35
40
|
refresh_interval_days: Optional[float] = None
|
|
41
|
+
"""How often to refresh the data, in days"""
|
|
42
|
+
|
|
36
43
|
expected_type: Optional[str] = None
|
|
44
|
+
"""The expected type of the data, e.g list"""
|
|
45
|
+
|
|
37
46
|
format: Optional[str] = None
|
|
47
|
+
"""The format of the data, e.g., json, yaml, csv"""
|
|
48
|
+
|
|
38
49
|
compression: Optional[str] = None
|
|
50
|
+
"""The compression of the data, e.g., tgz, gzip, zip"""
|
|
51
|
+
|
|
52
|
+
select_query: Optional[str] = None
|
|
53
|
+
"""A jsonpath query to preprocess the objects with"""
|
|
54
|
+
|
|
39
55
|
arguments: Optional[Dict[str, Any]] = None
|
|
56
|
+
"""Optional arguments to pass to the source"""
|
|
40
57
|
|
|
41
58
|
|
|
42
59
|
class CollectionConfig(ConfiguredBaseModel):
|
|
@@ -81,7 +98,7 @@ class CollectionConfig(ConfiguredBaseModel):
|
|
|
81
98
|
description="LinkML-Map derivations",
|
|
82
99
|
)
|
|
83
100
|
page_size: Optional[int] = Field(default=None, description="Suggested page size (items per page) in apps and APIs")
|
|
84
|
-
graph_projection: Optional[
|
|
101
|
+
graph_projection: Optional[Union[EdgeProjection, NodeProjection]] = Field(
|
|
85
102
|
default=None,
|
|
86
103
|
description="Optional graph projection configuration",
|
|
87
104
|
)
|
|
@@ -707,12 +707,29 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
707
707
|
"""
|
|
708
708
|
raise NotImplementedError()
|
|
709
709
|
|
|
710
|
-
def import_database(
|
|
710
|
+
def import_database(
|
|
711
|
+
self,
|
|
712
|
+
location: str,
|
|
713
|
+
source_format: Optional[Union[str, Format]] = None,
|
|
714
|
+
collection_name: Optional[str] = None,
|
|
715
|
+
**kwargs,
|
|
716
|
+
):
|
|
711
717
|
"""
|
|
712
718
|
Import a database from a file or location.
|
|
713
719
|
|
|
720
|
+
>>> from linkml_store.api.client import Client
|
|
721
|
+
>>> client = Client()
|
|
722
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
723
|
+
>>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
|
|
724
|
+
>>> db.list_collection_names()
|
|
725
|
+
['iris']
|
|
726
|
+
>>> collection = db.get_collection("iris")
|
|
727
|
+
>>> collection.find({}).num_rows
|
|
728
|
+
150
|
|
729
|
+
|
|
714
730
|
:param location: location of the file
|
|
715
731
|
:param source_format: source format
|
|
732
|
+
:param collection_name: (Optional) name of the collection, for data that is flat
|
|
716
733
|
:param kwargs: additional arguments
|
|
717
734
|
"""
|
|
718
735
|
if isinstance(source_format, str):
|
|
@@ -732,8 +749,12 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
732
749
|
self.store(obj)
|
|
733
750
|
return
|
|
734
751
|
objects = load_objects(location, format=source_format)
|
|
735
|
-
|
|
736
|
-
self.
|
|
752
|
+
if collection_name:
|
|
753
|
+
collection = self.get_collection(collection_name, create_if_not_exists=True)
|
|
754
|
+
collection.insert(objects)
|
|
755
|
+
else:
|
|
756
|
+
for obj in objects:
|
|
757
|
+
self.store(obj)
|
|
737
758
|
|
|
738
759
|
def export_database(self, location: str, target_format: Optional[Union[str, Format]] = None, **kwargs):
|
|
739
760
|
"""
|
{linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/duckdb/duckdb_collection.py
RENAMED
|
@@ -36,6 +36,9 @@ class DuckDBCollection(Collection):
|
|
|
36
36
|
logger.info(f"Inserting into: {self.alias} // T={table.name}")
|
|
37
37
|
engine = self.parent.engine
|
|
38
38
|
col_names = [c.name for c in table.columns]
|
|
39
|
+
bad_objs = [obj for obj in objs if not isinstance(obj, dict)]
|
|
40
|
+
if bad_objs:
|
|
41
|
+
logger.error(f"Bad objects: {bad_objs}")
|
|
39
42
|
objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
|
|
40
43
|
with engine.connect() as conn:
|
|
41
44
|
with conn.begin():
|
{linkml_store-0.1.13 → linkml_store-0.2.0}/src/linkml_store/api/stores/mongodb/mongodb_collection.py
RENAMED
|
@@ -51,9 +51,13 @@ class MongoDBCollection(Collection):
|
|
|
51
51
|
if offset and offset >= 0:
|
|
52
52
|
cursor = cursor.skip(offset)
|
|
53
53
|
|
|
54
|
+
select_cols = query.select_cols
|
|
55
|
+
|
|
54
56
|
def _as_row(row: dict):
|
|
55
57
|
row = copy(row)
|
|
56
58
|
del row["_id"]
|
|
59
|
+
if select_cols:
|
|
60
|
+
row = {k: row[k] for k in select_cols if k in row}
|
|
57
61
|
return row
|
|
58
62
|
|
|
59
63
|
rows = [_as_row(row) for row in cursor]
|