linkml-store 0.1.8__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store-0.1.10/PKG-INFO +138 -0
- linkml_store-0.1.10/README.md +84 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/pyproject.toml +13 -3
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/client.py +15 -4
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/collection.py +185 -15
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/config.py +11 -3
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/database.py +36 -5
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +6 -3
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/duckdb_database.py +20 -1
- linkml_store-0.1.10/src/linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store-0.1.10/src/linkml_store/api/stores/filesystem/filesystem_collection.py +179 -0
- linkml_store-0.1.10/src/linkml_store/api/stores/filesystem/filesystem_database.py +72 -0
- linkml_store-0.1.10/src/linkml_store/api/stores/mongodb/mongodb_collection.py +171 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
- linkml_store-0.1.10/src/linkml_store/api/types.py +4 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/cli.py +97 -8
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/__init__.py +5 -3
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/indexer.py +7 -2
- linkml_store-0.1.10/src/linkml_store/utils/change_utils.py +17 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/format_utils.py +89 -8
- linkml_store-0.1.10/src/linkml_store/utils/patch_utils.py +126 -0
- linkml_store-0.1.10/src/linkml_store/utils/query_utils.py +89 -0
- linkml_store-0.1.10/src/linkml_store/utils/schema_utils.py +23 -0
- linkml_store-0.1.10/src/linkml_store/webapi/__init__.py +0 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/__init__.py +3 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store-0.1.10/src/linkml_store/webapi/html/generic.html.j2 +46 -0
- linkml_store-0.1.10/src/linkml_store/webapi/main.py +572 -0
- linkml_store-0.1.8/PKG-INFO +0 -58
- linkml_store-0.1.8/README.md +0 -12
- linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/__init__.py +0 -16
- linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -142
- linkml_store-0.1.8/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -36
- linkml_store-0.1.8/src/linkml_store/api/stores/mongodb/mongodb_collection.py +0 -123
- {linkml_store-0.1.8 → linkml_store-0.1.10}/LICENSE +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/object_utils.py +0 -0
- {linkml_store-0.1.8 → linkml_store-0.1.10}/src/linkml_store/utils/sql_utils.py +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: linkml-store
|
|
3
|
+
Version: 0.1.10
|
|
4
|
+
Summary: linkml-store
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Author 1
|
|
7
|
+
Author-email: author@org.org
|
|
8
|
+
Requires-Python: >=3.9, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*, !=3.8.*
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Provides-Extra: analytics
|
|
16
|
+
Provides-Extra: app
|
|
17
|
+
Provides-Extra: chromadb
|
|
18
|
+
Provides-Extra: fastapi
|
|
19
|
+
Provides-Extra: h5py
|
|
20
|
+
Provides-Extra: llm
|
|
21
|
+
Provides-Extra: map
|
|
22
|
+
Provides-Extra: mongodb
|
|
23
|
+
Provides-Extra: pyarrow
|
|
24
|
+
Provides-Extra: renderer
|
|
25
|
+
Provides-Extra: tests
|
|
26
|
+
Provides-Extra: validation
|
|
27
|
+
Requires-Dist: black (>=24.0.0) ; extra == "tests"
|
|
28
|
+
Requires-Dist: chromadb ; extra == "chromadb"
|
|
29
|
+
Requires-Dist: click
|
|
30
|
+
Requires-Dist: duckdb (>=0.10.1,<0.11.0)
|
|
31
|
+
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
32
|
+
Requires-Dist: fastapi ; extra == "fastapi"
|
|
33
|
+
Requires-Dist: h5py ; extra == "h5py"
|
|
34
|
+
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
35
|
+
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
36
|
+
Requires-Dist: linkml ; extra == "validation"
|
|
37
|
+
Requires-Dist: linkml-runtime (>=1.8.0rc2)
|
|
38
|
+
Requires-Dist: linkml_map ; extra == "map"
|
|
39
|
+
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
40
|
+
Requires-Dist: llm ; extra == "llm"
|
|
41
|
+
Requires-Dist: matplotlib ; extra == "analytics"
|
|
42
|
+
Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
|
|
43
|
+
Requires-Dist: plotly ; extra == "analytics"
|
|
44
|
+
Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
45
|
+
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
46
|
+
Requires-Dist: pymongo ; extra == "mongodb"
|
|
47
|
+
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
48
|
+
Requires-Dist: seaborn ; extra == "analytics"
|
|
49
|
+
Requires-Dist: sqlalchemy
|
|
50
|
+
Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
51
|
+
Requires-Dist: uvicorn ; extra == "fastapi"
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# linkml-store
|
|
55
|
+
|
|
56
|
+
An AI-ready data management and integration platform. LinkML-Store
|
|
57
|
+
provides an abstraction layer over multiple different backends
|
|
58
|
+
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
59
|
+
common query, index, and storage operations.
|
|
60
|
+
|
|
61
|
+
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
62
|
+
|
|
63
|
+
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
64
|
+
APIs and command line options are subject to change!
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
Install, add data, query it:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
pip install linkml-store[all]
|
|
72
|
+
linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
|
|
73
|
+
linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Index it, search it:
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
linkml-store -d duckdb:///db/my.db -c persons index -t llm
|
|
80
|
+
linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Validate it:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
linkml-store -d duckdb:///db/my.db -c persons validate
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Basic usage
|
|
90
|
+
|
|
91
|
+
* [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
|
|
92
|
+
* [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
93
|
+
* API
|
|
94
|
+
* Streamlit applications
|
|
95
|
+
|
|
96
|
+
## Features
|
|
97
|
+
|
|
98
|
+
### Multiple Adapters
|
|
99
|
+
|
|
100
|
+
LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
|
|
101
|
+
|
|
102
|
+
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
103
|
+
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
104
|
+
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
105
|
+
* Filesystem
|
|
106
|
+
|
|
107
|
+
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
108
|
+
|
|
109
|
+
The intent is to give a union of all features of each backend. For
|
|
110
|
+
example, analytic faceted queries are provided for *all* backends, not
|
|
111
|
+
just Solr.
|
|
112
|
+
|
|
113
|
+
### Composable indexes
|
|
114
|
+
|
|
115
|
+
Many backends come with their own indexing and search
|
|
116
|
+
schemes. Classically this was Lucene-based indexes, now it is semantic
|
|
117
|
+
search using LLM embeddings.
|
|
118
|
+
|
|
119
|
+
LinkML store treats indexing as an orthogonal concern - you can
|
|
120
|
+
compose different indexing schemes with different backends. You don't
|
|
121
|
+
need to have a vector database to run embedding search!
|
|
122
|
+
|
|
123
|
+
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
124
|
+
|
|
125
|
+
### Validation
|
|
126
|
+
|
|
127
|
+
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
128
|
+
for powerful expressive structural and semantic constraints.
|
|
129
|
+
|
|
130
|
+
See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
|
|
131
|
+
|
|
132
|
+
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
## Background
|
|
136
|
+
|
|
137
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
138
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# linkml-store
|
|
2
|
+
|
|
3
|
+
An AI-ready data management and integration platform. LinkML-Store
|
|
4
|
+
provides an abstraction layer over multiple different backends
|
|
5
|
+
(including DuckDB, MongoDB, and local filesystems), allowing for
|
|
6
|
+
common query, index, and storage operations.
|
|
7
|
+
|
|
8
|
+
For full documentation, see [https://linkml.io/linkml-store/](https://linkml.io/linkml-store/)
|
|
9
|
+
|
|
10
|
+
__Warning__ LinkML-Store is still undergoing changes and refactoring,
|
|
11
|
+
APIs and command line options are subject to change!
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
Install, add data, query it:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
pip install linkml-store[all]
|
|
19
|
+
linkml-store -d duckdb:///db/my.db -c persons insert data/*.json
|
|
20
|
+
linkml-store -d duckdb:///db/my.db -c persons query -w "occupation: Bricklayer"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Index it, search it:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
linkml-store -d duckdb:///db/my.db -c persons index -t llm
|
|
27
|
+
linkml-store -d duckdb:///db/my.db -c persons search "all persons employed in construction"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Validate it:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
linkml-store -d duckdb:///db/my.db -c persons validate
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Basic usage
|
|
37
|
+
|
|
38
|
+
* [Command Line](https://linkml.io/linkml-store/tutorials/Command-Line-Tutorial.html)
|
|
39
|
+
* [Python](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
40
|
+
* API
|
|
41
|
+
* Streamlit applications
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
### Multiple Adapters
|
|
46
|
+
|
|
47
|
+
LinkML-Store is designed to work with multiple backends, giving a common abstraction layer
|
|
48
|
+
|
|
49
|
+
* [MongoDB](https://linkml.io/linkml-store/how-to/Use-MongoDB.html)
|
|
50
|
+
* [DuckDB](https://linkml.io/linkml-store/tutorials/Python-Tutorial.html)
|
|
51
|
+
* [Solr](https://linkml.io/linkml-store/how-to/Query-Solr-using-CLI.html)
|
|
52
|
+
* Filesystem
|
|
53
|
+
|
|
54
|
+
Coming soon: any RDBMS, any triplestore, Neo4J, HDF5-based stores, ChromaDB/Vector dbs ...
|
|
55
|
+
|
|
56
|
+
The intent is to give a union of all features of each backend. For
|
|
57
|
+
example, analytic faceted queries are provided for *all* backends, not
|
|
58
|
+
just Solr.
|
|
59
|
+
|
|
60
|
+
### Composable indexes
|
|
61
|
+
|
|
62
|
+
Many backends come with their own indexing and search
|
|
63
|
+
schemes. Classically this was Lucene-based indexes, now it is semantic
|
|
64
|
+
search using LLM embeddings.
|
|
65
|
+
|
|
66
|
+
LinkML store treats indexing as an orthogonal concern - you can
|
|
67
|
+
compose different indexing schemes with different backends. You don't
|
|
68
|
+
need to have a vector database to run embedding search!
|
|
69
|
+
|
|
70
|
+
See [How to Use-Semantic-Search](https://linkml.io/linkml-store/how-to/Use-Semantic-Search.html)
|
|
71
|
+
|
|
72
|
+
### Validation
|
|
73
|
+
|
|
74
|
+
LinkML-Store is backed by [LinkML](https://linkml.io), which allows
|
|
75
|
+
for powerful expressive structural and semantic constraints.
|
|
76
|
+
|
|
77
|
+
See [Indexing JSON](https://linkml.io/linkml-store/how-to/Index-Phenopackets.html)
|
|
78
|
+
|
|
79
|
+
and [Referential Integrity](https://linkml.io/linkml-store/how-to/Check-Referential-Integrity.html)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
## Background
|
|
83
|
+
|
|
84
|
+
See [these slides](https://docs.google.com/presentation/d/e/2PACX-1vSgtWUNUW0qNO_ZhMAGQ6fYhlXZJjBNMYT0OiZz8DDx8oj7iG9KofRs6SeaMXBBOICGknoyMG2zaHnm/embed?start=false&loop=false&delayms=3000) for more details
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.10"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -10,7 +10,7 @@ readme = "README.md"
|
|
|
10
10
|
python = "^3.9, !=3.9.7"
|
|
11
11
|
click = "*"
|
|
12
12
|
pydantic = "^2.0.0"
|
|
13
|
-
linkml-runtime = "
|
|
13
|
+
linkml-runtime = ">=1.8.0rc2"
|
|
14
14
|
streamlit = { version = "^1.32.2", optional = true }
|
|
15
15
|
sqlalchemy = "*"
|
|
16
16
|
duckdb = "^0.10.1"
|
|
@@ -23,11 +23,16 @@ black = { version=">=24.0.0", optional = true }
|
|
|
23
23
|
llm = { version="*", optional = true }
|
|
24
24
|
pymongo = { version="*", optional = true }
|
|
25
25
|
chromadb = { version="*", optional = true }
|
|
26
|
+
pyarrow = { version="*", optional = true }
|
|
26
27
|
h5py = { version="*", optional = true }
|
|
27
28
|
linkml = { version="*", optional = true }
|
|
28
29
|
linkml_map = { version="*", optional = true }
|
|
30
|
+
linkml_renderer = { version="*", optional = true }
|
|
29
31
|
pandas = ">=2.2.1"
|
|
30
32
|
jinja2 = "^3.1.4"
|
|
33
|
+
jsonlines = "^4.0.0"
|
|
34
|
+
fastapi = { version="*", optional = true }
|
|
35
|
+
uvicorn = { version="*", optional = true }
|
|
31
36
|
|
|
32
37
|
[tool.poetry.group.dev.dependencies]
|
|
33
38
|
pytest = {version = ">=7.1.2"}
|
|
@@ -43,6 +48,7 @@ furo = {version = "*"}
|
|
|
43
48
|
nbsphinx = "*"
|
|
44
49
|
jupyter = "*"
|
|
45
50
|
jupysql = "*"
|
|
51
|
+
papermill = "*"
|
|
46
52
|
|
|
47
53
|
[tool.poetry.group.tests.dependencies]
|
|
48
54
|
pytest = "^7.4.0"
|
|
@@ -60,11 +66,15 @@ llm = ["llm"]
|
|
|
60
66
|
mongodb = ["pymongo"]
|
|
61
67
|
chromadb = ["chromadb"]
|
|
62
68
|
h5py = ["h5py"]
|
|
69
|
+
pyarrow = ["pyarrow"]
|
|
63
70
|
validation = ["linkml"]
|
|
64
71
|
map = ["linkml_map"]
|
|
72
|
+
renderer = ["linkml_renderer"]
|
|
73
|
+
fastapi = ["fastapi", "uvicorn"]
|
|
65
74
|
|
|
66
75
|
[tool.poetry.scripts]
|
|
67
76
|
linkml-store = "linkml_store.cli:cli"
|
|
77
|
+
linkml-store-api = "linkml_store.webapi.main:start"
|
|
68
78
|
|
|
69
79
|
[tool.poetry-dynamic-versioning]
|
|
70
80
|
enable = false
|
|
@@ -127,7 +137,7 @@ skip = '.git,*.pdf,*.svg,./tests,pyproject.toml,*.dill,poetry.lock,*.ipynb'
|
|
|
127
137
|
# Ignore table where words could be split across rows
|
|
128
138
|
# Ignore shortcut specifications like [Ff]alse
|
|
129
139
|
ignore-regex = '(\|.*\|.*\|.*\||\[[A-Z][a-z]\][a-z][a-z])'
|
|
130
|
-
ignore-words-list = 'mater,connexion,infarction'
|
|
140
|
+
ignore-words-list = 'mater,connexion,infarction,nin'
|
|
131
141
|
count = ""
|
|
132
142
|
quiet-level = 3
|
|
133
143
|
|
|
@@ -9,6 +9,7 @@ from linkml_store.api import Database
|
|
|
9
9
|
from linkml_store.api.config import ClientConfig
|
|
10
10
|
from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
|
|
11
11
|
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
|
+
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
|
|
12
13
|
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
13
14
|
from linkml_store.api.stores.solr.solr_database import SolrDatabase
|
|
14
15
|
|
|
@@ -20,6 +21,7 @@ HANDLE_MAP = {
|
|
|
20
21
|
"solr": SolrDatabase,
|
|
21
22
|
"mongodb": MongoDBDatabase,
|
|
22
23
|
"chromadb": ChromaDBDatabase,
|
|
24
|
+
"file": FileSystemDatabase,
|
|
23
25
|
}
|
|
24
26
|
|
|
25
27
|
|
|
@@ -96,7 +98,7 @@ class Client:
|
|
|
96
98
|
"""
|
|
97
99
|
return self.metadata.base_dir
|
|
98
100
|
|
|
99
|
-
def from_config(self, config: Union[ClientConfig, str, Path], base_dir=None, **kwargs):
|
|
101
|
+
def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, **kwargs):
|
|
100
102
|
"""
|
|
101
103
|
Create a client from a configuration.
|
|
102
104
|
|
|
@@ -116,11 +118,13 @@ class Client:
|
|
|
116
118
|
:return:
|
|
117
119
|
|
|
118
120
|
"""
|
|
121
|
+
if isinstance(config, dict):
|
|
122
|
+
config = ClientConfig(**config)
|
|
119
123
|
if isinstance(config, Path):
|
|
120
124
|
config = str(config)
|
|
121
125
|
if isinstance(config, str):
|
|
122
|
-
if not base_dir:
|
|
123
|
-
|
|
126
|
+
# if not base_dir:
|
|
127
|
+
# base_dir = Path(config).parent
|
|
124
128
|
parsed_obj = yaml.safe_load(open(config))
|
|
125
129
|
config = ClientConfig(**parsed_obj)
|
|
126
130
|
self.metadata = config
|
|
@@ -131,8 +135,15 @@ class Client:
|
|
|
131
135
|
|
|
132
136
|
def _initialize_databases(self, **kwargs):
|
|
133
137
|
for name, db_config in self.metadata.databases.items():
|
|
134
|
-
|
|
138
|
+
base_dir = self.base_dir
|
|
139
|
+
logger.info(f"Initializing database: {name}, base_dir: {base_dir}")
|
|
140
|
+
if not base_dir:
|
|
141
|
+
base_dir = Path.cwd()
|
|
142
|
+
logger.info(f"Using current working directory: {base_dir}")
|
|
143
|
+
handle = db_config.handle.format(base_dir=base_dir)
|
|
135
144
|
db_config.handle = handle
|
|
145
|
+
if db_config.schema_location:
|
|
146
|
+
db_config.schema_location = db_config.schema_location.format(base_dir=base_dir)
|
|
136
147
|
db = self.attach_database(handle, alias=name, **kwargs)
|
|
137
148
|
db.from_config(db_config)
|
|
138
149
|
|
|
@@ -4,16 +4,19 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, TextIO, Type, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Generic, Iterator, List, Optional, TextIO, Tuple, Type, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
+
from linkml_runtime import SchemaView
|
|
10
11
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
11
12
|
from linkml_runtime.linkml_model.meta import ArrayExpression
|
|
12
13
|
from pydantic import BaseModel
|
|
13
14
|
|
|
15
|
+
from linkml_store.api.types import DatabaseType
|
|
14
16
|
from linkml_store.index import get_indexer
|
|
15
17
|
from linkml_store.utils.format_utils import load_objects
|
|
16
18
|
from linkml_store.utils.object_utils import clean_empties
|
|
19
|
+
from linkml_store.utils.patch_utils import PatchDict, apply_patches_to_list, patches_from_objects_lists
|
|
17
20
|
|
|
18
21
|
try:
|
|
19
22
|
from linkml.validator.report import ValidationResult
|
|
@@ -36,7 +39,7 @@ IDENTIFIER = str
|
|
|
36
39
|
FIELD_NAME = str
|
|
37
40
|
|
|
38
41
|
|
|
39
|
-
class Collection:
|
|
42
|
+
class Collection(Generic[DatabaseType]):
|
|
40
43
|
"""
|
|
41
44
|
A collection is an organized set of objects of the same or similar type.
|
|
42
45
|
|
|
@@ -56,11 +59,12 @@ class Collection:
|
|
|
56
59
|
"""
|
|
57
60
|
|
|
58
61
|
# name: str
|
|
59
|
-
parent: Optional[
|
|
62
|
+
parent: Optional[DatabaseType] = None
|
|
60
63
|
_indexers: Optional[Dict[str, Indexer]] = None
|
|
61
64
|
# hidden: Optional[bool] = False
|
|
62
65
|
|
|
63
66
|
metadata: Optional[CollectionConfig] = None
|
|
67
|
+
default_index_name: ClassVar[str] = "simple"
|
|
64
68
|
|
|
65
69
|
def __init__(
|
|
66
70
|
self, name: str, parent: Optional["Database"] = None, metadata: Optional[CollectionConfig] = None, **kwargs
|
|
@@ -197,6 +201,10 @@ class Collection:
|
|
|
197
201
|
"""
|
|
198
202
|
raise NotImplementedError
|
|
199
203
|
|
|
204
|
+
def _post_insert_hook(self, objs: List[OBJECT], **kwargs):
|
|
205
|
+
patches = [{"op": "add", "path": "/0", "value": obj} for obj in objs]
|
|
206
|
+
self._broadcast(patches, **kwargs)
|
|
207
|
+
|
|
200
208
|
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
201
209
|
"""
|
|
202
210
|
Delete one or more objects from the collection.
|
|
@@ -301,7 +309,7 @@ class Collection:
|
|
|
301
309
|
|
|
302
310
|
def query_facets(
|
|
303
311
|
self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
304
|
-
) -> Dict[str,
|
|
312
|
+
) -> Dict[str, List[Tuple[Any, int]]]:
|
|
305
313
|
"""
|
|
306
314
|
Run a query to get facet counts for one or more columns.
|
|
307
315
|
|
|
@@ -319,7 +327,7 @@ class Collection:
|
|
|
319
327
|
:param query: A Query object representing the base query.
|
|
320
328
|
:param facet_columns: A list of column names to get facet counts for.
|
|
321
329
|
:param facet_limit:
|
|
322
|
-
:return: A dictionary where keys are column names and values are
|
|
330
|
+
:return: A dictionary where keys are column names and values are tuples
|
|
323
331
|
containing the facet counts for each unique value in the respective column.
|
|
324
332
|
"""
|
|
325
333
|
raise NotImplementedError
|
|
@@ -414,7 +422,30 @@ class Collection:
|
|
|
414
422
|
**kwargs,
|
|
415
423
|
) -> QueryResult:
|
|
416
424
|
"""
|
|
417
|
-
Search the collection using a
|
|
425
|
+
Search the collection using a text-based index index.
|
|
426
|
+
|
|
427
|
+
Example:
|
|
428
|
+
|
|
429
|
+
>>> from linkml_store import Client
|
|
430
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
431
|
+
>>> client = Client()
|
|
432
|
+
>>> db = client.attach_database("duckdb")
|
|
433
|
+
>>> collection = db.create_collection("Country")
|
|
434
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
435
|
+
>>> collection.insert(objs)
|
|
436
|
+
|
|
437
|
+
Now let's index, using the simple trigram-based index
|
|
438
|
+
|
|
439
|
+
>>> index = get_indexer("simple")
|
|
440
|
+
>>> collection.attach_indexer(index)
|
|
441
|
+
|
|
442
|
+
Now let's find all objects:
|
|
443
|
+
|
|
444
|
+
>>> qr = collection.search("France")
|
|
445
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
446
|
+
>>> assert score > 0.1
|
|
447
|
+
>>> top_obj["code"]
|
|
448
|
+
'FR'
|
|
418
449
|
|
|
419
450
|
:param query:
|
|
420
451
|
:param where:
|
|
@@ -424,12 +455,18 @@ class Collection:
|
|
|
424
455
|
:return:
|
|
425
456
|
"""
|
|
426
457
|
if index_name is None:
|
|
427
|
-
if len(self.
|
|
428
|
-
index_name = list(self.
|
|
458
|
+
if len(self.indexers) == 1:
|
|
459
|
+
index_name = list(self.indexers.keys())[0]
|
|
429
460
|
else:
|
|
430
|
-
|
|
461
|
+
logger.warning("Multiple indexes found. Using default index.")
|
|
462
|
+
index_name = self.default_index_name
|
|
431
463
|
ix_coll = self.parent.get_collection(self._index_collection_name(index_name))
|
|
432
|
-
|
|
464
|
+
if index_name not in self.indexers:
|
|
465
|
+
ix = get_indexer(index_name)
|
|
466
|
+
if not self._indexers:
|
|
467
|
+
self._indexers = {}
|
|
468
|
+
self._indexers[index_name] = ix
|
|
469
|
+
ix = self.indexers.get(index_name)
|
|
433
470
|
if not ix:
|
|
434
471
|
raise ValueError(f"No index named {index_name}")
|
|
435
472
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
@@ -446,7 +483,10 @@ class Collection:
|
|
|
446
483
|
@property
|
|
447
484
|
def is_internal(self) -> bool:
|
|
448
485
|
"""
|
|
449
|
-
Check if the collection is internal
|
|
486
|
+
Check if the collection is internal.
|
|
487
|
+
|
|
488
|
+
Internal collections are hidden by default. Examples of internal collections
|
|
489
|
+
include shadow "index" collections
|
|
450
490
|
|
|
451
491
|
:return:
|
|
452
492
|
"""
|
|
@@ -462,6 +502,45 @@ class Collection:
|
|
|
462
502
|
"""
|
|
463
503
|
Attach an index to the collection.
|
|
464
504
|
|
|
505
|
+
As an example, first let's create a collection in a database:
|
|
506
|
+
|
|
507
|
+
>>> from linkml_store import Client
|
|
508
|
+
>>> from linkml_store.utils.format_utils import load_objects
|
|
509
|
+
>>> client = Client()
|
|
510
|
+
>>> db = client.attach_database("duckdb")
|
|
511
|
+
>>> collection = db.create_collection("Country")
|
|
512
|
+
>>> objs = load_objects("tests/input/countries/countries.jsonl")
|
|
513
|
+
>>> collection.insert(objs)
|
|
514
|
+
|
|
515
|
+
We will create two indexes - one that indexes the whole object
|
|
516
|
+
(default behavior), the other one indexes the name only
|
|
517
|
+
|
|
518
|
+
>>> full_index = get_indexer("simple")
|
|
519
|
+
>>> full_index.name = "full"
|
|
520
|
+
>>> name_index = get_indexer("simple", text_template="{name}")
|
|
521
|
+
>>> name_index.name = "name"
|
|
522
|
+
>>> collection.attach_indexer(full_index)
|
|
523
|
+
>>> collection.attach_indexer(name_index)
|
|
524
|
+
|
|
525
|
+
Now let's find objects using the full index, using the string "France".
|
|
526
|
+
We expect the country France to be the top hit, but the score will
|
|
527
|
+
be less than zero because we did not match all fields in the object.
|
|
528
|
+
|
|
529
|
+
>>> qr = collection.search("France", index_name="full")
|
|
530
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
531
|
+
>>> assert score > 0.1
|
|
532
|
+
>>> assert score < 0.5
|
|
533
|
+
>>> top_obj["code"]
|
|
534
|
+
'FR'
|
|
535
|
+
|
|
536
|
+
Now using the name index
|
|
537
|
+
|
|
538
|
+
>>> qr = collection.search("France", index_name="name")
|
|
539
|
+
>>> score, top_obj = qr.ranked_rows[0]
|
|
540
|
+
>>> assert score > 0.99
|
|
541
|
+
>>> top_obj["code"]
|
|
542
|
+
'FR'
|
|
543
|
+
|
|
465
544
|
:param index:
|
|
466
545
|
:param name:
|
|
467
546
|
:param auto_index: Automatically index all objects in the collection
|
|
@@ -497,15 +576,18 @@ class Collection:
|
|
|
497
576
|
|
|
498
577
|
def index_objects(self, objs: List[OBJECT], index_name: str, replace=False, **kwargs):
|
|
499
578
|
"""
|
|
500
|
-
Index a list of objects
|
|
579
|
+
Index a list of objects using a specified index.
|
|
580
|
+
|
|
581
|
+
By default, the indexed objects will be stored in a shadow
|
|
582
|
+
collection in the same database, with additional fields for the index vector
|
|
501
583
|
|
|
502
584
|
:param objs:
|
|
503
|
-
:param index_name:
|
|
585
|
+
:param index_name: e.g. simple, llm
|
|
504
586
|
:param replace:
|
|
505
587
|
:param kwargs:
|
|
506
588
|
:return:
|
|
507
589
|
"""
|
|
508
|
-
ix = self._indexers.get(index_name)
|
|
590
|
+
ix = self._indexers.get(index_name, None)
|
|
509
591
|
if not ix:
|
|
510
592
|
raise ValueError(f"No index named {index_name}")
|
|
511
593
|
ix_coll_name = self._index_collection_name(index_name)
|
|
@@ -523,6 +605,7 @@ class Collection:
|
|
|
523
605
|
ix_coll.delete_where()
|
|
524
606
|
|
|
525
607
|
ix_coll.insert(objects_with_ix, **kwargs)
|
|
608
|
+
ix_coll.commit()
|
|
526
609
|
|
|
527
610
|
def list_index_names(self) -> List[str]:
|
|
528
611
|
"""
|
|
@@ -557,12 +640,22 @@ class Collection:
|
|
|
557
640
|
|
|
558
641
|
:return:
|
|
559
642
|
"""
|
|
560
|
-
sv = self.parent.schema_view
|
|
643
|
+
sv: SchemaView = self.parent.schema_view
|
|
561
644
|
if sv:
|
|
562
645
|
cls = sv.get_class(self.target_class_name)
|
|
646
|
+
if cls and not cls.attributes:
|
|
647
|
+
if not sv.class_induced_slots(cls.name):
|
|
648
|
+
for att in self._induce_attributes():
|
|
649
|
+
cls.attributes[att.name] = att
|
|
650
|
+
sv.set_modified()
|
|
563
651
|
return cls
|
|
564
652
|
return None
|
|
565
653
|
|
|
654
|
+
def _induce_attributes(self) -> List[SlotDefinition]:
|
|
655
|
+
result = self.find({}, limit=-1)
|
|
656
|
+
cd = self.induce_class_definition_from_objects(result.rows, max_sample_size=None)
|
|
657
|
+
return list(cd.attributes.values())
|
|
658
|
+
|
|
566
659
|
@property
|
|
567
660
|
def identifier_attribute_name(self) -> Optional[str]:
|
|
568
661
|
"""
|
|
@@ -579,6 +672,37 @@ class Collection:
|
|
|
579
672
|
return att.name
|
|
580
673
|
return None
|
|
581
674
|
|
|
675
|
+
def set_identifier_attribute_name(self, name: str):
|
|
676
|
+
"""
|
|
677
|
+
Set the name of the identifier attribute for the collection.
|
|
678
|
+
|
|
679
|
+
AKA the primary key.
|
|
680
|
+
|
|
681
|
+
:param name: The name of the identifier attribute.
|
|
682
|
+
"""
|
|
683
|
+
cd = self.class_definition()
|
|
684
|
+
if not cd:
|
|
685
|
+
raise ValueError(f"Cannot find class definition for {self.target_class_name}")
|
|
686
|
+
id_att = None
|
|
687
|
+
candidates = []
|
|
688
|
+
sv: SchemaView = self.parent.schema_view
|
|
689
|
+
cls = sv.get_class(cd.name)
|
|
690
|
+
existing_id_slot = sv.get_identifier_slot(cls.name)
|
|
691
|
+
if existing_id_slot:
|
|
692
|
+
if existing_id_slot.name == name:
|
|
693
|
+
return
|
|
694
|
+
existing_id_slot.identifier = False
|
|
695
|
+
for att in cls.attributes.values():
|
|
696
|
+
candidates.append(att.name)
|
|
697
|
+
if att.name == name:
|
|
698
|
+
att.identifier = True
|
|
699
|
+
id_att = att
|
|
700
|
+
else:
|
|
701
|
+
att.identifier = False
|
|
702
|
+
if not id_att:
|
|
703
|
+
raise ValueError(f"No attribute found with name {name} in {candidates}")
|
|
704
|
+
sv.set_modified()
|
|
705
|
+
|
|
582
706
|
def object_identifier(self, obj: OBJECT, auto=True) -> Optional[IDENTIFIER]:
|
|
583
707
|
"""
|
|
584
708
|
Return the identifier for an object.
|
|
@@ -622,6 +746,8 @@ class Collection:
|
|
|
622
746
|
for k, v in obj.items():
|
|
623
747
|
keys[k].append(v)
|
|
624
748
|
for k, vs in keys.items():
|
|
749
|
+
if k == "_id":
|
|
750
|
+
continue
|
|
625
751
|
multivalueds = []
|
|
626
752
|
inlineds = []
|
|
627
753
|
rngs = []
|
|
@@ -698,6 +824,39 @@ class Collection:
|
|
|
698
824
|
"""
|
|
699
825
|
raise NotImplementedError
|
|
700
826
|
|
|
827
|
+
def apply_patches(self, patches: List[PatchDict], **kwargs):
|
|
828
|
+
"""
|
|
829
|
+
Apply a patch to the collection.
|
|
830
|
+
|
|
831
|
+
Patches conform to the JSON Patch format,
|
|
832
|
+
|
|
833
|
+
:param patches:
|
|
834
|
+
:param kwargs:
|
|
835
|
+
:return:
|
|
836
|
+
"""
|
|
837
|
+
all_objs = self.find(limit=-1).rows
|
|
838
|
+
primary_key = self.identifier_attribute_name
|
|
839
|
+
if not primary_key:
|
|
840
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
841
|
+
new_objs = apply_patches_to_list(all_objs, patches, primary_key=primary_key, **kwargs)
|
|
842
|
+
self.replace(new_objs)
|
|
843
|
+
|
|
844
|
+
def diff(self, other: "Collection", **kwargs):
|
|
845
|
+
"""
|
|
846
|
+
Diff two collections.
|
|
847
|
+
|
|
848
|
+
:param other:
|
|
849
|
+
:param kwargs:
|
|
850
|
+
:return:
|
|
851
|
+
"""
|
|
852
|
+
src_objs = self.find(limit=-1).rows
|
|
853
|
+
tgt_objs = other.find(limit=-1).rows
|
|
854
|
+
primary_key = self.identifier_attribute_name
|
|
855
|
+
if not primary_key:
|
|
856
|
+
raise ValueError(f"No primary key for {self.target_class_name}")
|
|
857
|
+
patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
858
|
+
return patches_from_objects_lists(src_objs, tgt_objs, primary_key=primary_key)
|
|
859
|
+
|
|
701
860
|
def iter_validate_collection(self, **kwargs) -> Iterator["ValidationResult"]:
|
|
702
861
|
"""
|
|
703
862
|
Validate the contents of the collection
|
|
@@ -717,3 +876,14 @@ class Collection:
|
|
|
717
876
|
for obj in result.rows:
|
|
718
877
|
obj = clean_empties(obj)
|
|
719
878
|
yield from validator.iter_results(obj, class_name)
|
|
879
|
+
|
|
880
|
+
def commit(self):
|
|
881
|
+
"""
|
|
882
|
+
Commit changes to the collection.
|
|
883
|
+
|
|
884
|
+
:return:
|
|
885
|
+
"""
|
|
886
|
+
pass
|
|
887
|
+
|
|
888
|
+
def _broadcast(self, *args, **kwargs):
|
|
889
|
+
self.parent.broadcast(self, *args, **kwargs)
|