linkml-store 0.1.9__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store-0.1.10/PKG-INFO +138 -0
- linkml_store-0.1.10/README.md +84 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/pyproject.toml +8 -2
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/client.py +13 -4
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/collection.py +85 -10
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/config.py +11 -3
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +5 -3
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +2 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +74 -32
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/cli.py +9 -1
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/__init__.py +5 -3
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/indexer.py +7 -2
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/format_utils.py +1 -1
- linkml_store-0.1.10/src/linkml_store/utils/schema_utils.py +23 -0
- linkml_store-0.1.10/src/linkml_store/webapi/__init__.py +0 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/__init__.py +3 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/generic.html.j2 +46 -0
- linkml_store-0.1.10/src/linkml_store/webapi/main.py +572 -0
- linkml_store-0.1.9/PKG-INFO +0 -61
- linkml_store-0.1.9/README.md +0 -12
- {linkml_store-0.1.9 → linkml_store-0.1.10}/LICENSE +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/database.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/object_utils.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/utils/sql_utils.py +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: linkml-store
|
|
3
|
+
Version: 0.1.10
|
|
4
|
+
Summary: linkml-store
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Author 1
|
|
7
|
+
Author-email: author@org.org
|
|
8
|
+
Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Provides-Extra: analytics
|
|
16
|
+
Provides-Extra: app
|
|
17
|
+
Provides-Extra: chromadb
|
|
18
|
+
Provides-Extra: fastapi
|
|
19
|
+
Provides-Extra: h5py
|
|
20
|
+
Provides-Extra: llm
|
|
21
|
+
Provides-Extra: map
|
|
22
|
+
Provides-Extra: mongodb
|
|
23
|
+
Provides-Extra: pyarrow
|
|
24
|
+
Provides-Extra: renderer
|
|
25
|
+
Provides-Extra: tests
|
|
26
|
+
Provides-Extra: validation
|
|
27
|
+
Requires-Dist: black (>=24.0.0) ; extra == "tests"
|
|
28
|
+
Requires-Dist: chromadb ; extra == "chromadb"
|
|
29
|
+
Requires-Dist: click
|
|
30
|
+
Requires-Dist: duckdb (>=0.10.1,<0.11.0)
|
|
31
|
+
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
32
|
+
Requires-Dist: fastapi ; extra == "fastapi"
|
|
33
|
+
Requires-Dist: h5py ; extra == "h5py"
|
|
34
|
+
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
35
|
+
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
36
|
+
Requires-Dist: linkml ; extra == "validation"
|
|
37
|
+
Requires-Dist: linkml-runtime (>=1.8.0rc2)
|
|
38
|
+
Requires-Dist: linkml_map ; extra == "map"
|
|
39
|
+
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
40
|
+
Requires-Dist: llm ; extra == "llm"
|
|
41
|
+
Requires-Dist: matplotlib ; extra == "analytics"
|
|
42
|
+
Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
|
|
43
|
+
Requires-Dist: plotly ; extra == "analytics"
|
|
44
|
+
Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
45
|
+
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
46
|
+
Requires-Dist: pymongo ; extra == "mongodb"
|
|
47
|
+
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
48
|
+
Requires-Dist: seaborn ; extra == "analytics"
|
|
49
|
+
Requires-Dist: sqlalchemy
|
|
50
|
+
Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
51
|
+
Requires-Dist: uvicorn ; extra == "fastapi"
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# linkml-store
|
|
55
|
+
|
|
56
|
+
An AI-ready data management and integration platform. LinkML-Store
|
|
57
|
+
provides an abstraction layer over multiple different backends
|
|
58
|
+
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
59
|
+
common query, index, and storage operations.
|
|
60
|
+
|
|
61
|
+
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
62
|
+
|
|
63
|
+
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
64
|
+
APIs and command line options are subject to change!
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
Install, add data, query it:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
pip install linkml-store[all]
|
|
72
|
+
linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
|
|
73
|
+
linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Index it, search it:
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
linkml-store -d duckdb:///db/my.db -c persons index -t llm
|
|
80
|
+
linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Validate it:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
linkml-store -d duckdb:///db/my.db -c persons validate
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Basic usage
|
|
90
|
+
|
|
91
|
+
* [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
|
|
92
|
+
* [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
93
|
+
* API
|
|
94
|
+
* Streamlit applications
|
|
95
|
+
|
|
96
|
+
## Features
|
|
97
|
+
|
|
98
|
+
### Multiple Adapters
|
|
99
|
+
|
|
100
|
+
LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
|
|
101
|
+
|
|
102
|
+
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
103
|
+
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
104
|
+
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
105
|
+
* Filesystem
|
|
106
|
+
|
|
107
|
+
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
108
|
+
|
|
109
|
+
The intent is to give a union of all features of each backend. For
|
|
110
|
+
example, analytic faceted queries are provided for *all* backends, not
|
|
111
|
+
just Solr.
|
|
112
|
+
|
|
113
|
+
### Composable indexes
|
|
114
|
+
|
|
115
|
+
Many backends come with their own indexing and search
|
|
116
|
+
schemes. Classically this was Lucene-based indexes, now it is semantic
|
|
117
|
+
search using LLM embeddings.
|
|
118
|
+
|
|
119
|
+
LinkML store treats indexing as an orthogonal concern - you can
|
|
120
|
+
compose different indexing schemes with different backends. You don't
|
|
121
|
+
need to have a vector database to run embedding search!
|
|
122
|
+
|
|
123
|
+
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
124
|
+
|
|
125
|
+
### Validation
|
|
126
|
+
|
|
127
|
+
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
128
|
+
for powerful expressive structural and semantic constraints.
|
|
129
|
+
|
|
130
|
+
See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
|
|
131
|
+
|
|
132
|
+
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
## Background
|
|
136
|
+
|
|
137
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
138
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# linkml-store
|
|
2
|
+
|
|
3
|
+
An AI-ready data management and integration platform. LinkML-Store
|
|
4
|
+
provides an abstraction layer over multiple different backends
|
|
5
|
+
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
6
|
+
common query, index, and storage operations.
|
|
7
|
+
|
|
8
|
+
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
9
|
+
|
|
10
|
+
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
11
|
+
APIs and command line options are subject to change!
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
Install, add data, query it:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
pip install linkml-store[all]
|
|
19
|
+
linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
|
|
20
|
+
linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Index it, search it:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
linkml-store -d duckdb:///db/my.db -c persons index -t llm
|
|
27
|
+
linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Validate it:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
linkml-store -d duckdb:///db/my.db -c persons validate
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Basic usage
|
|
37
|
+
|
|
38
|
+
* [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
|
|
39
|
+
* [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
40
|
+
* API
|
|
41
|
+
* Streamlit applications
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
### Multiple Adapters
|
|
46
|
+
|
|
47
|
+
LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
|
|
48
|
+
|
|
49
|
+
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
50
|
+
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
51
|
+
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
52
|
+
* Filesystem
|
|
53
|
+
|
|
54
|
+
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
55
|
+
|
|
56
|
+
The intent is to give a union of all features of each backend. For
|
|
57
|
+
example, analytic faceted queries are provided for *all* backends, not
|
|
58
|
+
just Solr.
|
|
59
|
+
|
|
60
|
+
### Composable indexes
|
|
61
|
+
|
|
62
|
+
Many backends come with their own indexing and search
|
|
63
|
+
schemes. Classically this was Lucene-based indexes, now it is semantic
|
|
64
|
+
search using LLM embeddings.
|
|
65
|
+
|
|
66
|
+
LinkML store treats indexing as an orthogonal concern - you can
|
|
67
|
+
compose different indexing schemes with different backends. You don't
|
|
68
|
+
need to have a vector database to run embedding search!
|
|
69
|
+
|
|
70
|
+
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
71
|
+
|
|
72
|
+
### Validation
|
|
73
|
+
|
|
74
|
+
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
75
|
+
for powerful expressive structural and semantic constraints.
|
|
76
|
+
|
|
77
|
+
See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
|
|
78
|
+
|
|
79
|
+
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
## Background
|
|
83
|
+
|
|
84
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.10"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -10,7 +10,7 @@ readme = "README.md"
|
|
|
10
10
|
python = "^3.9, !=3.9.7"
|
|
11
11
|
click = "*"
|
|
12
12
|
pydantic = "^2.0.0"
|
|
13
|
-
linkml-runtime = "
|
|
13
|
+
linkml-runtime = ">=1.8.0rc2"
|
|
14
14
|
streamlit = { version = "^1.32.2", optional = true }
|
|
15
15
|
sqlalchemy = "*"
|
|
16
16
|
duckdb = "^0.10.1"
|
|
@@ -27,9 +27,12 @@ pyarrow = { version="*", optional = true }
|
|
|
27
27
|
h5py = { version="*", optional = true }
|
|
28
28
|
linkml = { version="*", optional = true }
|
|
29
29
|
linkml_map = { version="*", optional = true }
|
|
30
|
+
linkml_renderer = { version="*", optional = true }
|
|
30
31
|
pandas = ">=2.2.1"
|
|
31
32
|
jinja2 = "^3.1.4"
|
|
32
33
|
jsonlines = "^4.0.0"
|
|
34
|
+
fastapi = { version="*", optional = true }
|
|
35
|
+
uvicorn = { version="*", optional = true }
|
|
33
36
|
|
|
34
37
|
[tool.poetry.group.dev.dependencies]
|
|
35
38
|
pytest = {version = ">=7.1.2"}
|
|
@@ -66,9 +69,12 @@ h5py = ["h5py"]
|
|
|
66
69
|
pyarrow = ["pyarrow"]
|
|
67
70
|
validation = ["linkml"]
|
|
68
71
|
map = ["linkml_map"]
|
|
72
|
+
renderer = ["linkml_renderer"]
|
|
73
|
+
fastapi = ["fastapi", "uvicorn"]
|
|
69
74
|
|
|
70
75
|
[tool.poetry.scripts]
|
|
71
76
|
linkml-store = "linkml_store.cli:cli"
|
|
77
|
+
linkml-store-api = "linkml_store.webapi.main:start"
|
|
72
78
|
|
|
73
79
|
[tool.poetry-dynamic-versioning]
|
|
74
80
|
enable = false
|
|
@@ -98,7 +98,7 @@ class Client:
|
|
|
98
98
|
"""
|
|
99
99
|
return self.metadata.base_dir
|
|
100
100
|
|
|
101
|
-
def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
|
|
101
|
+
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
|
|
102
102
|
"""
|
|
103
103
|
Create a client from a configuration.
|
|
104
104
|
|
|
@@ -118,11 +118,13 @@ class Client:
|
|
|
118
118
|
:return:
|
|
119
119
|
|
|
120
120
|
"""
|
|
121
|
+
if isinstance(config, dict):
|
|
122
|
+
config = ClientConfig(**config)
|
|
121
123
|
if isinstance(config, Path):
|
|
122
124
|
config = str(config)
|
|
123
125
|
if isinstance(config, str):
|
|
124
|
-
if not base_dir:
|
|
125
|
-
|
|
126
|
+
# if not base_dir:
|
|
127
|
+
# base_dir = Path(config).parent
|
|
126
128
|
parsed_obj = yaml.safe_load(open(config))
|
|
127
129
|
config = ClientConfig(**parsed_obj)
|
|
128
130
|
self.metadata = config
|
|
@@ -133,8 +135,15 @@ class Client:
|
|
|
133
135
|
|
|
134
136
|
def _initialize_databases(self, **kwargs):
|
|
135
137
|
for name, db_config in self.metadata.databases.items():
|
|
136
|
-
|
|
138
|
+
base_dir = self.base_dir
|
|
139
|
+
logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
|
|
140
|
+
if not base_dir:
|
|
141
|
+
base_dir = Path.cwd()
|
|
142
|
+
logger.info(f"Using current working directory: {base_dir}")
|
|
143
|
+
handle = db_config.handle.format(base_dir=base_dir)
|
|
137
144
|
db_config.handle = handle
|
|
145
|
+
if db_config.schema_location:
|
|
146
|
+
db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
|
|
138
147
|
db = self.attach_database(handle, alias=name, **kwargs)
|
|
139
148
|
db.from_config(db_config)
|
|
140
149
|
|
|
@@ -4,7 +4,7 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
from linkml_runtime import SchemaView
|
|
@@ -64,6 +64,7 @@ class Collection(Generic[DatabaseType]):
|
|
|
64
64
|
# hidden: Optional[bool] = False
|
|
65
65
|
|
|
66
66
|
metadata: Optional[CollectionConfig] = None
|
|
67
|
+
default_index_name: ClassVar[str] = "simple"
|
|
67
68
|
|
|
68
69
|
def __init__(
|
|
69
70
|
self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
|
|
@@ -421,7 +422,30 @@ class Collection(Generic[DatabaseType]):
|
|
|
421
422
|
**kwargs,
|
|
422
423
|
) -> QueryResult:
|
|
423
424
|
"""
|
|
424
|
-
Search the collection using a
|
|
425
|
+
Search the collection using a text-based index index.
|
|
426
|
+
|
|
427
|
+
Example:
|
|
428
|
+
|
|
429
|
+
>>> from linkml_store import Client
|
|
430
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
431
|
+
>>> client = Client()
|
|
432
|
+
>>> db = client.attach_database("duckdb")
|
|
433
|
+
>>> collection = db.create_collection("Country")
|
|
434
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
435
|
+
>>> collection.insert(objs)
|
|
436
|
+
|
|
437
|
+
Now let's index, using the simple trigram-based index
|
|
438
|
+
|
|
439
|
+
>>> index = get_indexer("simple")
|
|
440
|
+
>>> collection.attach_indexer(index)
|
|
441
|
+
|
|
442
|
+
Now let's find all objects:
|
|
443
|
+
|
|
444
|
+
>>> qr = collection.search("France")
|
|
445
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
446
|
+
>>> assert score > 0.1
|
|
447
|
+
>>> top_obj["code"]
|
|
448
|
+
'FR'
|
|
425
449
|
|
|
426
450
|
:param query:
|
|
427
451
|
:param where:
|
|
@@ -431,12 +455,18 @@ class Collection(Generic[DatabaseType]):
|
|
|
431
455
|
:return:
|
|
432
456
|
"""
|
|
433
457
|
if index_name is None:
|
|
434
|
-
if len(self.
|
|
435
|
-
index_name = list(self.
|
|
458
|
+
if len(self.indexers) == 1:
|
|
459
|
+
index_name = list(self.indexers.keys())[0]
|
|
436
460
|
else:
|
|
437
|
-
|
|
461
|
+
logger.warning("Multiple indexes found. Using default index.")
|
|
462
|
+
index_name = self.default_index_name
|
|
438
463
|
ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
|
|
439
|
-
|
|
464
|
+
if index_name not in self.indexers:
|
|
465
|
+
ix = get_indexer(index_name)
|
|
466
|
+
if not self._indexers:
|
|
467
|
+
self._indexers = {}
|
|
468
|
+
self._indexers[index_name] = ix
|
|
469
|
+
ix = self.indexers.get(index_name)
|
|
440
470
|
if not ix:
|
|
441
471
|
raise ValueError(f"No index named {index_name}")
|
|
442
472
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
@@ -453,7 +483,10 @@ class Collection(Generic[DatabaseType]):
|
|
|
453
483
|
@property
|
|
454
484
|
def is_internal(self) -> bool:
|
|
455
485
|
"""
|
|
456
|
-
Check if the collection is internal
|
|
486
|
+
Check if the collection is internal.
|
|
487
|
+
|
|
488
|
+
Internal collections are hidden by default. Examples of internal collections
|
|
489
|
+
include shadow "index" collections
|
|
457
490
|
|
|
458
491
|
:return:
|
|
459
492
|
"""
|
|
@@ -469,6 +502,45 @@ class Collection(Generic[DatabaseType]):
|
|
|
469
502
|
"""
|
|
470
503
|
Attach an index to the collection.
|
|
471
504
|
|
|
505
|
+
As an example, first let's create a collection in a database:
|
|
506
|
+
|
|
507
|
+
>>> from linkml_store import Client
|
|
508
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
509
|
+
>>> client = Client()
|
|
510
|
+
>>> db = client.attach_database("duckdb")
|
|
511
|
+
>>> collection = db.create_collection("Country")
|
|
512
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
513
|
+
>>> collection.insert(objs)
|
|
514
|
+
|
|
515
|
+
We will create two indexes - one that indexes the whole object
|
|
516
|
+
(default behavior), the other one indexes the name only
|
|
517
|
+
|
|
518
|
+
>>> full_index = get_indexer("simple")
|
|
519
|
+
>>> full_index.name = "full"
|
|
520
|
+
>>> name_index = get_indexer("simple", text_template="{name}")
|
|
521
|
+
>>> name_index.name = "name"
|
|
522
|
+
>>> collection.attach_indexer(full_index)
|
|
523
|
+
>>> collection.attach_indexer(name_index)
|
|
524
|
+
|
|
525
|
+
Now let's find objects using the full index, using the string "France".
|
|
526
|
+
We expect the country France to be the top hit, but the score will
|
|
527
|
+
be less than zero because we did not match all fields in the object.
|
|
528
|
+
|
|
529
|
+
>>> qr = collection.search("France", index_name="full")
|
|
530
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
531
|
+
>>> assert score > 0.1
|
|
532
|
+
>>> assert score < 0.5
|
|
533
|
+
>>> top_obj["code"]
|
|
534
|
+
'FR'
|
|
535
|
+
|
|
536
|
+
Now using the name index
|
|
537
|
+
|
|
538
|
+
>>> qr = collection.search("France", index_name="name")
|
|
539
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
540
|
+
>>> assert score > 0.99
|
|
541
|
+
>>> top_obj["code"]
|
|
542
|
+
'FR'
|
|
543
|
+
|
|
472
544
|
:param index:
|
|
473
545
|
:param name:
|
|
474
546
|
:param auto_index: Automatically index all objects in the collection
|
|
@@ -504,15 +576,18 @@ class Collection(Generic[DatabaseType]):
|
|
|
504
576
|
|
|
505
577
|
def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
|
|
506
578
|
"""
|
|
507
|
-
Index a list of objects
|
|
579
|
+
Index a list of objects using a specified index.
|
|
580
|
+
|
|
581
|
+
By default, the indexed objects will be stored in a shadow
|
|
582
|
+
collection in the same database, with additional fields for the index vector
|
|
508
583
|
|
|
509
584
|
:param objs:
|
|
510
|
-
:param index_name:
|
|
585
|
+
:param index_name: e.g. simple, llm
|
|
511
586
|
:param replace:
|
|
512
587
|
:param kwargs:
|
|
513
588
|
:return:
|
|
514
589
|
"""
|
|
515
|
-
ix = self._indexers.get(index_name)
|
|
590
|
+
ix = self._indexers.get(index_name, None)
|
|
516
591
|
if not ix:
|
|
517
592
|
raise ValueError(f"No index named {index_name}")
|
|
518
593
|
ix_coll_name = self._index_collection_name(index_name)
|
|
@@ -3,7 +3,11 @@ from typing import Any, Dict, List, Optional
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class
|
|
6
|
+
class ConfiguredBaseModel(BaseModel, extra="forbid"):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CollectionConfig(ConfiguredBaseModel):
|
|
7
11
|
name: Optional[str] = Field(
|
|
8
12
|
default=None,
|
|
9
13
|
description="An optional name for the collection",
|
|
@@ -42,7 +46,7 @@ class CollectionConfig(BaseModel):
|
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
|
|
45
|
-
class DatabaseConfig(
|
|
49
|
+
class DatabaseConfig(ConfiguredBaseModel):
|
|
46
50
|
handle: str = Field(
|
|
47
51
|
default="duckdb:///:memory:",
|
|
48
52
|
description="The database handle, e.g., 'duckdb:///:memory:' or 'mongodb://localhost:27017'",
|
|
@@ -86,7 +90,7 @@ class DatabaseConfig(BaseModel):
|
|
|
86
90
|
)
|
|
87
91
|
|
|
88
92
|
|
|
89
|
-
class ClientConfig(
|
|
93
|
+
class ClientConfig(ConfiguredBaseModel):
|
|
90
94
|
handle: Optional[str] = Field(
|
|
91
95
|
default=None,
|
|
92
96
|
description="The client handle",
|
|
@@ -95,6 +99,10 @@ class ClientConfig(BaseModel):
|
|
|
95
99
|
default={},
|
|
96
100
|
description="A dictionary of database configurations",
|
|
97
101
|
)
|
|
102
|
+
default_database: Optional[str] = Field(
|
|
103
|
+
default=None,
|
|
104
|
+
description="The default database",
|
|
105
|
+
)
|
|
98
106
|
schema_path: Optional[str] = Field(
|
|
99
107
|
default=None,
|
|
100
108
|
description="The path to the LinkML schema file",
|
{linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_collection.py
RENAMED
|
@@ -90,7 +90,9 @@ class DuckDBCollection(Collection):
|
|
|
90
90
|
cd = self.class_definition()
|
|
91
91
|
with self.parent.engine.connect() as conn:
|
|
92
92
|
if not facet_columns:
|
|
93
|
-
|
|
93
|
+
if not cd:
|
|
94
|
+
raise ValueError(f"No class definition found for {self.target_class_name}")
|
|
95
|
+
facet_columns = list(cd.attributes.keys())
|
|
94
96
|
for col in facet_columns:
|
|
95
97
|
logger.debug(f"Faceting on {col}")
|
|
96
98
|
if isinstance(col, tuple):
|
|
@@ -101,7 +103,7 @@ class DuckDBCollection(Collection):
|
|
|
101
103
|
facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
|
|
102
104
|
logger.debug(f"Facet query: {facet_query_str}")
|
|
103
105
|
rows = list(conn.execute(text(facet_query_str)))
|
|
104
|
-
results[col] = rows
|
|
106
|
+
results[col] = [tuple(row) for row in rows]
|
|
105
107
|
return results
|
|
106
108
|
|
|
107
109
|
def _sqla_table(self, cd: ClassDefinition) -> Table:
|
|
@@ -110,7 +112,7 @@ class DuckDBCollection(Collection):
|
|
|
110
112
|
cols = []
|
|
111
113
|
for att in schema_view.class_induced_slots(cd.name):
|
|
112
114
|
typ = TMAP.get(att.range, sqla.String)
|
|
113
|
-
if att.inlined:
|
|
115
|
+
if att.inlined or att.inlined_as_list:
|
|
114
116
|
typ = sqla.JSON
|
|
115
117
|
if att.multivalued:
|
|
116
118
|
typ = sqla.ARRAY(typ, dimensions=1)
|
{linkml_store-0.1.9 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_database.py
RENAMED
|
@@ -31,6 +31,18 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class DuckDBDatabase(Database):
|
|
34
|
+
"""
|
|
35
|
+
An adapter for DuckDB databases.
|
|
36
|
+
|
|
37
|
+
Note that this adapter does not make use of a LinkML relational model transformation and
|
|
38
|
+
SQL Alchemy ORM layer. Instead, it attempts to map each collection (which is of type
|
|
39
|
+
some LinkML class) to a *single* DuckDB table. New tables are not created for nested references,
|
|
40
|
+
and linking tables are not created for many-to-many relationships.
|
|
41
|
+
|
|
42
|
+
Instead the native DuckDB ARRAY type is used to store multivalued attributes, and DuckDB JSON
|
|
43
|
+
types are used for nested inlined objects.
|
|
44
|
+
"""
|
|
45
|
+
|
|
34
46
|
_connection: DuckDBPyConnection = None
|
|
35
47
|
_engine: sqlalchemy.Engine = None
|
|
36
48
|
collection_class = DuckDBCollection
|
|
@@ -103,7 +115,14 @@ class DuckDBDatabase(Database):
|
|
|
103
115
|
if row[col]:
|
|
104
116
|
if isinstance(row[col], list):
|
|
105
117
|
for i in range(len(row[col])):
|
|
106
|
-
|
|
118
|
+
try:
|
|
119
|
+
parsed_val = json.loads(row[col][i])
|
|
120
|
+
except json.JSONDecodeError as e:
|
|
121
|
+
logger.error(f"Failed to parse col {col}[{i}] == {row[col][i]}")
|
|
122
|
+
raise e
|
|
123
|
+
row[col][i] = parsed_val
|
|
124
|
+
elif isinstance(row[col], dict):
|
|
125
|
+
pass
|
|
107
126
|
else:
|
|
108
127
|
row[col] = json.loads(row[col])
|
|
109
128
|
qr.set_rows(pd.DataFrame(rows))
|