linkml-store 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.2.4 → linkml_store-0.2.6}/PKG-INFO +7 -10
- {linkml_store-0.2.4 → linkml_store-0.2.6}/pyproject.toml +9 -8
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/client.py +19 -2
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/collection.py +60 -2
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/database.py +17 -12
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +11 -5
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_database.py +52 -19
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +83 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_database.py +7 -3
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/cli.py +23 -5
- linkml_store-0.2.6/src/linkml_store/inference/implementations/llm_inference_engine.py +152 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/rag_inference_engine.py +20 -9
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/inference_engine.py +2 -2
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/format_utils.py +60 -1
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/llm_utils.py +15 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/object_utils.py +3 -1
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/sql_utils.py +7 -1
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/vector_utils.py +1 -1
- {linkml_store-0.2.4 → linkml_store-0.2.6}/LICENSE +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/README.md +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/config.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/filesystem/filesystem_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/neo4j/neo4j_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/solr_collection.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/graphs/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/graphs/graph_map.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/implementations/llm_indexer.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/index/indexer.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/evaluation.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/inference_config.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/inference/inference_engine_registry.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/neo4j_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/pandas_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/sklearn_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/utils/stats_utils.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: linkml-store
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: linkml-store
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Author 1
|
|
7
7
|
Author-email: author@org.org
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.10,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -20,7 +19,6 @@ Provides-Extra: bigquery
|
|
|
20
19
|
Provides-Extra: fastapi
|
|
21
20
|
Provides-Extra: frictionless
|
|
22
21
|
Provides-Extra: h5py
|
|
23
|
-
Provides-Extra: ibis
|
|
24
22
|
Provides-Extra: llm
|
|
25
23
|
Provides-Extra: map
|
|
26
24
|
Provides-Extra: mongodb
|
|
@@ -36,20 +34,18 @@ Requires-Dist: duckdb (>=0.10.1)
|
|
|
36
34
|
Requires-Dist: duckdb-engine (>=0.11.2)
|
|
37
35
|
Requires-Dist: fastapi ; extra == "fastapi"
|
|
38
36
|
Requires-Dist: frictionless ; extra == "frictionless"
|
|
39
|
-
Requires-Dist: gcsfs ; extra == "ibis"
|
|
40
37
|
Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
|
|
41
38
|
Requires-Dist: h5py ; extra == "h5py"
|
|
42
|
-
Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
|
|
43
39
|
Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
|
|
44
40
|
Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
|
|
45
|
-
Requires-Dist: jsonpatch (>=1.33
|
|
41
|
+
Requires-Dist: jsonpatch (>=1.33)
|
|
46
42
|
Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
|
|
47
43
|
Requires-Dist: linkml-runtime (>=1.8.0)
|
|
48
44
|
Requires-Dist: linkml_map ; extra == "map"
|
|
49
45
|
Requires-Dist: linkml_renderer ; extra == "renderer"
|
|
50
46
|
Requires-Dist: llm ; extra == "llm" or extra == "all"
|
|
51
47
|
Requires-Dist: matplotlib ; extra == "analytics"
|
|
52
|
-
Requires-Dist: multipledispatch
|
|
48
|
+
Requires-Dist: multipledispatch
|
|
53
49
|
Requires-Dist: neo4j ; extra == "neo4j" or extra == "all"
|
|
54
50
|
Requires-Dist: networkx ; extra == "neo4j"
|
|
55
51
|
Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
|
|
@@ -57,8 +53,9 @@ Requires-Dist: plotly ; extra == "analytics"
|
|
|
57
53
|
Requires-Dist: py2neo ; extra == "neo4j"
|
|
58
54
|
Requires-Dist: pyarrow ; extra == "pyarrow"
|
|
59
55
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
|
60
|
-
Requires-Dist: pymongo ; extra == "mongodb"
|
|
56
|
+
Requires-Dist: pymongo (>=4.11,<5.0) ; extra == "mongodb"
|
|
61
57
|
Requires-Dist: pystow (>=0.5.4,<0.6.0)
|
|
58
|
+
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
62
59
|
Requires-Dist: ruff (>=0.6.2) ; extra == "tests"
|
|
63
60
|
Requires-Dist: scikit-learn ; extra == "scipy"
|
|
64
61
|
Requires-Dist: scipy ; extra == "scipy"
|
|
@@ -68,7 +65,7 @@ Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
|
|
|
68
65
|
Requires-Dist: tabulate
|
|
69
66
|
Requires-Dist: tiktoken ; extra == "llm"
|
|
70
67
|
Requires-Dist: uvicorn ; extra == "fastapi"
|
|
71
|
-
Requires-Dist: xmltodict (>=0.13.0
|
|
68
|
+
Requires-Dist: xmltodict (>=0.13.0)
|
|
72
69
|
Description-Content-Type: text/markdown
|
|
73
70
|
|
|
74
71
|
# linkml-store
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.6"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
|
|
9
9
|
[tool.poetry.dependencies]
|
|
10
|
-
python = "^3.
|
|
10
|
+
python = "^3.10"
|
|
11
11
|
click = "*"
|
|
12
12
|
pydantic = "^2.0.0"
|
|
13
13
|
linkml-runtime = ">=1.8.0"
|
|
@@ -24,7 +24,7 @@ black = { version=">=24.0.0", optional = true }
|
|
|
24
24
|
ruff = { version=">=0.6.2", optional = true }
|
|
25
25
|
llm = { version="*", optional = true }
|
|
26
26
|
tiktoken = { version="*", optional = true }
|
|
27
|
-
pymongo =
|
|
27
|
+
pymongo = "^4.11"
|
|
28
28
|
neo4j = { version="*", optional = true }
|
|
29
29
|
py2neo = { version="*", optional = true }
|
|
30
30
|
networkx = { version="*", optional = true }
|
|
@@ -37,7 +37,7 @@ linkml = { version=">=1.8.0", optional = true }
|
|
|
37
37
|
linkml_map = { version="*", optional = true }
|
|
38
38
|
linkml_renderer = { version="*", optional = true }
|
|
39
39
|
frictionless = { version="*", optional = true }
|
|
40
|
-
ibis-framework = { version=">=9.3.0", extras = ["duckdb", "examples"], optional = true }
|
|
40
|
+
#ibis-framework = { version=">=9.3.0", extras = ["duckdb", "examples"], optional = true }
|
|
41
41
|
gcsfs = { version="*", optional = true }
|
|
42
42
|
multipledispatch = { version="*" }
|
|
43
43
|
tabulate = "*"
|
|
@@ -46,8 +46,9 @@ jinja2 = "^3.1.4"
|
|
|
46
46
|
jsonlines = "^4.0.0"
|
|
47
47
|
fastapi = { version="*", optional = true }
|
|
48
48
|
uvicorn = { version="*", optional = true }
|
|
49
|
-
xmltodict = "
|
|
50
|
-
jsonpatch = "
|
|
49
|
+
xmltodict = ">=0.13.0"
|
|
50
|
+
jsonpatch = ">=1.33"
|
|
51
|
+
python-dotenv = "^1.0.1"
|
|
51
52
|
|
|
52
53
|
[tool.poetry.group.dev.dependencies]
|
|
53
54
|
pytest = {version = ">=7.1.2"}
|
|
@@ -90,9 +91,9 @@ renderer = ["linkml_renderer"]
|
|
|
90
91
|
fastapi = ["fastapi", "uvicorn"]
|
|
91
92
|
frictionless = ["frictionless"]
|
|
92
93
|
scipy = ["scipy", "scikit-learn"]
|
|
93
|
-
ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
|
|
94
|
+
#ibis = ["ibis-framework", "multipledispatch", "gcsfs"]
|
|
94
95
|
bigquery = ["google-cloud-bigquery"]
|
|
95
|
-
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "
|
|
96
|
+
all = ["llm", "mongodb", "neo4j", "validation", "map", "renderer", "bigquery"]
|
|
96
97
|
|
|
97
98
|
[tool.poetry.scripts]
|
|
98
99
|
linkml-store = "linkml_store.cli:cli"
|
|
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
|
|
|
15
15
|
|
|
16
16
|
HANDLE_MAP = {
|
|
17
17
|
"duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
18
|
+
"sqlite": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
|
|
18
19
|
"solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
|
|
19
20
|
"mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
|
|
20
21
|
"chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
|
|
@@ -22,6 +23,12 @@ HANDLE_MAP = {
|
|
|
22
23
|
"file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
|
|
23
24
|
}
|
|
24
25
|
|
|
26
|
+
SUFFIX_MAP = {
|
|
27
|
+
"ddb": "duckdb:///{path}",
|
|
28
|
+
"duckdb": "duckdb:///{path}",
|
|
29
|
+
"db": "duckdb:///{path}",
|
|
30
|
+
}
|
|
31
|
+
|
|
25
32
|
|
|
26
33
|
class Client:
|
|
27
34
|
"""
|
|
@@ -197,6 +204,13 @@ class Client:
|
|
|
197
204
|
:param kwargs:
|
|
198
205
|
:return:
|
|
199
206
|
"""
|
|
207
|
+
if ":" not in handle:
|
|
208
|
+
if alias is None:
|
|
209
|
+
alias = handle
|
|
210
|
+
if "." in handle:
|
|
211
|
+
suffix = handle.split(".")[-1]
|
|
212
|
+
if suffix in SUFFIX_MAP:
|
|
213
|
+
handle = SUFFIX_MAP[suffix].format(path=handle)
|
|
200
214
|
if ":" not in handle:
|
|
201
215
|
scheme = handle
|
|
202
216
|
handle = None
|
|
@@ -220,7 +234,9 @@ class Client:
|
|
|
220
234
|
if not alias:
|
|
221
235
|
alias = handle
|
|
222
236
|
if not self._databases:
|
|
237
|
+
logger.info("Initializing databases")
|
|
223
238
|
self._databases = {}
|
|
239
|
+
logger.info(f"Attaching {alias}")
|
|
224
240
|
self._databases[alias] = db
|
|
225
241
|
db.parent = self
|
|
226
242
|
if db.alias:
|
|
@@ -263,8 +279,9 @@ class Client:
|
|
|
263
279
|
self._databases[name] = db
|
|
264
280
|
if name not in self._databases:
|
|
265
281
|
if create_if_not_exists:
|
|
266
|
-
logger.info(f"Creating database: {name}")
|
|
267
|
-
self.attach_database(name, **kwargs)
|
|
282
|
+
logger.info(f"Creating/attaching database: {name}")
|
|
283
|
+
db = self.attach_database(name, **kwargs)
|
|
284
|
+
name = db.alias
|
|
268
285
|
else:
|
|
269
286
|
raise ValueError(f"Database {name} does not exist")
|
|
270
287
|
db = self._databases[name]
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""A structure for representing collections of similar objects."""
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
from collections import defaultdict
|
|
6
7
|
from pathlib import Path
|
|
@@ -210,8 +211,59 @@ class Collection(Generic[DatabaseType]):
|
|
|
210
211
|
"""
|
|
211
212
|
raise NotImplementedError
|
|
212
213
|
|
|
214
|
+
def index (
|
|
215
|
+
self,
|
|
216
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
217
|
+
index_name: Optional[str] = None,
|
|
218
|
+
replace: bool = False,
|
|
219
|
+
unique: bool = False,
|
|
220
|
+
**kwargs,
|
|
221
|
+
) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Index objects in the collection.
|
|
224
|
+
|
|
225
|
+
:param objs:
|
|
226
|
+
:param index_name:
|
|
227
|
+
:param replace: replace the index, or not
|
|
228
|
+
:param unique: boolean used to declare the index unique or not
|
|
229
|
+
:param kwargs:
|
|
230
|
+
:return:
|
|
231
|
+
"""
|
|
232
|
+
raise NotImplementedError
|
|
233
|
+
|
|
234
|
+
def upsert(self,
|
|
235
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
236
|
+
filter_fields: List[str],
|
|
237
|
+
update_fields: Union[List[str], None] = None, **kwargs):
|
|
238
|
+
"""
|
|
239
|
+
Add one or more objects to the collection.
|
|
240
|
+
|
|
241
|
+
>>> from linkml_store import Client
|
|
242
|
+
>>> client = Client()
|
|
243
|
+
>>> db = client.attach_database("mongodb", alias="test")
|
|
244
|
+
>>> collection = db.create_collection("Person")
|
|
245
|
+
>>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}]
|
|
246
|
+
>>> collection.upsert(objs)
|
|
247
|
+
|
|
248
|
+
:param objs:
|
|
249
|
+
:param filter_fields: List of field names to use as the filter for matching existing collections.
|
|
250
|
+
:param update_fields: List of field names to include in the update. If None, all fields are updated.
|
|
251
|
+
:param kwargs:
|
|
252
|
+
|
|
253
|
+
:return:
|
|
254
|
+
"""
|
|
255
|
+
raise NotImplementedError
|
|
256
|
+
|
|
213
257
|
def _pre_query_hook(self, query: Optional[Query] = None, **kwargs):
|
|
214
|
-
|
|
258
|
+
"""
|
|
259
|
+
Pre-query hook.
|
|
260
|
+
|
|
261
|
+
This is called before a query is executed. It is used to materialize derivations and indexes.
|
|
262
|
+
:param query:
|
|
263
|
+
:param kwargs:
|
|
264
|
+
:return:
|
|
265
|
+
"""
|
|
266
|
+
logger.debug(f"Pre-query hook (state: {self._initialized}; Q= {query}") # if logging.info, this is very noisy.
|
|
215
267
|
if not self._initialized:
|
|
216
268
|
self._materialize_derivations()
|
|
217
269
|
self._initialized = True
|
|
@@ -536,7 +588,13 @@ class Collection(Generic[DatabaseType]):
|
|
|
536
588
|
qr = ix_coll.find(where=where, limit=-1, **kwargs)
|
|
537
589
|
index_col = ix.index_field
|
|
538
590
|
# TODO: optimize this for large indexes
|
|
539
|
-
|
|
591
|
+
def row2array(row):
|
|
592
|
+
v = row[index_col]
|
|
593
|
+
if isinstance(v, str):
|
|
594
|
+
# sqlite stores arrays as strings
|
|
595
|
+
v = json.loads(v)
|
|
596
|
+
return np.array(v, dtype=float)
|
|
597
|
+
vector_pairs = [(row, row2array(row)) for row in qr.rows]
|
|
540
598
|
results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
|
|
541
599
|
for r in results:
|
|
542
600
|
del r[1][index_col]
|
|
@@ -276,14 +276,15 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
276
276
|
|
|
277
277
|
Examples:
|
|
278
278
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
279
|
+
>>> from linkml_store.api.client import Client
|
|
280
|
+
>>> client = Client()
|
|
281
|
+
>>> db = client.attach_database("duckdb", alias="test")
|
|
282
|
+
>>> collection = db.create_collection("Person", alias="persons")
|
|
283
|
+
>>> collection.alias
|
|
284
|
+
'persons'
|
|
285
|
+
|
|
286
|
+
>>> collection.target_class_name
|
|
287
|
+
'Person'
|
|
287
288
|
|
|
288
289
|
If alias is not provided, it defaults to the name of the type.
|
|
289
290
|
|
|
@@ -419,7 +420,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
419
420
|
>>> from linkml_store.api.client import Client
|
|
420
421
|
>>> from linkml_store.api.queries import Query
|
|
421
422
|
>>> client = Client()
|
|
422
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
423
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
423
424
|
>>> collection = db.create_collection("Person")
|
|
424
425
|
>>> collection.insert([{"id": "P1", "name": "John"}, {"id": "P2", "name": "Alice"}])
|
|
425
426
|
>>> query = Query(from_table="Person", where_clause={"name": "John"})
|
|
@@ -451,7 +452,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
451
452
|
|
|
452
453
|
>>> from linkml_store.api.client import Client
|
|
453
454
|
>>> client = Client()
|
|
454
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
455
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
455
456
|
>>> collection = db.create_collection("Person", alias="persons")
|
|
456
457
|
>>> collection.insert([{"id": "P1", "name": "John", "age_in_years": 25}])
|
|
457
458
|
>>> schema_view = db.schema_view
|
|
@@ -470,6 +471,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
470
471
|
if not self._schema_view:
|
|
471
472
|
self._initialize_schema()
|
|
472
473
|
if not self._schema_view:
|
|
474
|
+
logger.info("Inducing schema view")
|
|
473
475
|
self._schema_view = self.induce_schema_view()
|
|
474
476
|
return self._schema_view
|
|
475
477
|
|
|
@@ -505,6 +507,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
505
507
|
if isinstance(schema_view, str):
|
|
506
508
|
schema_view = SchemaView(schema_view)
|
|
507
509
|
self._schema_view = schema_view
|
|
510
|
+
logger.info(f"Setting schema view for {self.handle}")
|
|
508
511
|
# self._schema_view = SchemaView(schema_view.materialize_derived_schema())
|
|
509
512
|
if not self._collections:
|
|
510
513
|
return
|
|
@@ -719,7 +722,7 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
719
722
|
|
|
720
723
|
>>> from linkml_store.api.client import Client
|
|
721
724
|
>>> client = Client()
|
|
722
|
-
>>> db = client.attach_database("duckdb", alias="test")
|
|
725
|
+
>>> db = client.attach_database("duckdb", alias="test", recreate_if_exists=True)
|
|
723
726
|
>>> db.import_database("tests/input/iris.csv", Format.CSV, collection_name="iris")
|
|
724
727
|
>>> db.list_collection_names()
|
|
725
728
|
['iris']
|
|
@@ -739,7 +742,9 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
739
742
|
# import into a test instance
|
|
740
743
|
tmp_handle = source_format.value
|
|
741
744
|
client = self.parent
|
|
742
|
-
|
|
745
|
+
tmp_alias = "tmp"
|
|
746
|
+
client.drop_database(tmp_alias, missing_ok=True)
|
|
747
|
+
tmp_db = client.attach_database(tmp_handle, alias=tmp_alias, recreate_if_exists=True)
|
|
743
748
|
# TODO: check for infinite recursion
|
|
744
749
|
tmp_db.import_database(location, source_format=source_format)
|
|
745
750
|
obj = {}
|
{linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_collection.py
RENAMED
|
@@ -147,16 +147,22 @@ class DuckDBCollection(Collection):
|
|
|
147
147
|
if self._table_created or self.metadata.is_prepopulated:
|
|
148
148
|
logger.info(f"Already have table for: {cd.name}")
|
|
149
149
|
return
|
|
150
|
-
|
|
151
|
-
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
152
|
-
)
|
|
153
|
-
qr = self.parent.query(query)
|
|
154
|
-
if qr.num_rows > 0:
|
|
150
|
+
if self.parent._table_exists(self.alias):
|
|
155
151
|
logger.info(f"Table already exists for {cd.name}")
|
|
156
152
|
self._table_created = True
|
|
157
153
|
self._initialized = True
|
|
158
154
|
self.metadata.is_prepopulated = True
|
|
159
155
|
return
|
|
156
|
+
# query = Query(
|
|
157
|
+
# from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
158
|
+
# )
|
|
159
|
+
# qr = self.parent.query(query)
|
|
160
|
+
# if qr.num_rows > 0:
|
|
161
|
+
# logger.info(f"Table already exists for {cd.name}")
|
|
162
|
+
# self._table_created = True
|
|
163
|
+
# self._initialized = True
|
|
164
|
+
# self.metadata.is_prepopulated = True
|
|
165
|
+
# return
|
|
160
166
|
logger.info(f"Creating table for {cd.name}")
|
|
161
167
|
t = self._sqla_table(cd)
|
|
162
168
|
ct = CreateTable(t)
|
{linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/duckdb/duckdb_database.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Optional, Union
|
|
4
|
+
from typing import Optional, Union, List
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import sqlalchemy
|
|
@@ -14,7 +14,7 @@ from linkml_store.api import Database
|
|
|
14
14
|
from linkml_store.api.queries import Query, QueryResult
|
|
15
15
|
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
16
16
|
from linkml_store.utils.format_utils import Format
|
|
17
|
-
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
|
|
17
|
+
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
|
|
18
18
|
|
|
19
19
|
TYPE_MAP = {
|
|
20
20
|
"VARCHAR": "string",
|
|
@@ -62,7 +62,7 @@ class DuckDBDatabase(Database):
|
|
|
62
62
|
def engine(self) -> sqlalchemy.Engine:
|
|
63
63
|
if not self._engine:
|
|
64
64
|
handle = self.handle
|
|
65
|
-
if not handle.startswith("duckdb://") and not handle.startswith(":"):
|
|
65
|
+
if not handle.startswith("duckdb://") and not handle.startswith(":") and "://" not in handle:
|
|
66
66
|
handle = f"duckdb:///{handle}"
|
|
67
67
|
if ":memory:" not in handle:
|
|
68
68
|
# TODO: investigate this; duckdb appears to be prematurely caching
|
|
@@ -71,6 +71,10 @@ class DuckDBDatabase(Database):
|
|
|
71
71
|
self._engine = sqlalchemy.create_engine(handle)
|
|
72
72
|
return self._engine
|
|
73
73
|
|
|
74
|
+
@property
|
|
75
|
+
def _is_sqlite(self) -> bool:
|
|
76
|
+
return self.handle and self.handle.startswith("sqlite:")
|
|
77
|
+
|
|
74
78
|
def commit(self, **kwargs):
|
|
75
79
|
with self.engine.connect() as conn:
|
|
76
80
|
conn.commit()
|
|
@@ -89,34 +93,60 @@ class DuckDBDatabase(Database):
|
|
|
89
93
|
if not missing_ok:
|
|
90
94
|
raise FileNotFoundError(f"Database file not found: {path}")
|
|
91
95
|
|
|
92
|
-
def
|
|
96
|
+
def _table_exists(self, table: str) -> bool:
|
|
97
|
+
if self._is_sqlite:
|
|
98
|
+
if table == "sqlite_master":
|
|
99
|
+
return True
|
|
100
|
+
meta_query = Query(
|
|
101
|
+
from_table="sqlite_master",
|
|
102
|
+
where_clause={
|
|
103
|
+
#"type": "table",
|
|
104
|
+
"name": table,
|
|
105
|
+
}
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
if table.startswith("information_schema"):
|
|
109
|
+
return True
|
|
110
|
+
meta_query = Query(
|
|
111
|
+
from_table="information_schema.tables",
|
|
112
|
+
where_clause={
|
|
113
|
+
"table_type": "BASE TABLE",
|
|
114
|
+
"table_name": table,
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
qr = self.query(meta_query)
|
|
119
|
+
if qr.num_rows == 0:
|
|
120
|
+
logger.debug(f"Table {self.alias} not created yet")
|
|
121
|
+
return False
|
|
122
|
+
return True
|
|
123
|
+
|
|
124
|
+
def _json_encoded_cols(self, table_name: str) -> Optional[List[str]]:
|
|
93
125
|
json_encoded_cols = []
|
|
94
|
-
if
|
|
95
|
-
if
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
)
|
|
99
|
-
qr = self.query(meta_query)
|
|
100
|
-
if qr.num_rows == 0:
|
|
101
|
-
logger.debug(f"Table {query.from_table} not created yet")
|
|
102
|
-
return QueryResult(query=query, num_rows=0, rows=[])
|
|
103
|
-
if not query.from_table.startswith("information_schema"):
|
|
104
|
-
sv = self.schema_view
|
|
105
|
-
else:
|
|
106
|
-
sv = None
|
|
126
|
+
if table_name:
|
|
127
|
+
if table_name.startswith("information_schema") or table_name.startswith("sqlite"):
|
|
128
|
+
return []
|
|
129
|
+
sv = self.schema_view
|
|
107
130
|
if sv:
|
|
108
131
|
cd = None
|
|
109
132
|
for c in self._collections.values():
|
|
110
|
-
|
|
111
|
-
if c.alias == query.from_table or c.target_class_name == query.from_table:
|
|
133
|
+
if c.alias == table_name or c.target_class_name == table_name:
|
|
112
134
|
cd = c.class_definition()
|
|
113
135
|
break
|
|
114
136
|
if cd:
|
|
115
137
|
for att in sv.class_induced_slots(cd.name):
|
|
116
138
|
if att.inlined or att.inlined_as_list:
|
|
117
139
|
json_encoded_cols.append(att.name)
|
|
140
|
+
return json_encoded_cols
|
|
141
|
+
|
|
142
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
143
|
+
if not self._table_exists(query.from_table):
|
|
144
|
+
return QueryResult(query=query, num_rows=0, rows=[])
|
|
145
|
+
json_encoded_cols = self._json_encoded_cols(query.from_table)
|
|
146
|
+
|
|
118
147
|
with self.engine.connect() as conn:
|
|
119
148
|
count_query_str = text(query_to_sql(query, count=True))
|
|
149
|
+
logger.debug(f"count_query_str: {count_query_str}")
|
|
120
150
|
num_rows = list(conn.execute(count_query_str))[0][0]
|
|
121
151
|
logger.debug(f"num_rows: {num_rows}")
|
|
122
152
|
query_str = query_to_sql(query, **kwargs) # include offset, limit
|
|
@@ -167,6 +197,9 @@ class DuckDBDatabase(Database):
|
|
|
167
197
|
logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
|
|
168
198
|
sb = SchemaBuilder()
|
|
169
199
|
schema = sb.schema
|
|
200
|
+
logger.info(f"Checking if {self.metadata.handle} is sqlite: {self._is_sqlite}")
|
|
201
|
+
if self._is_sqlite:
|
|
202
|
+
return SchemaView(schema)
|
|
170
203
|
query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
|
|
171
204
|
qr = self.query(query)
|
|
172
205
|
logger.info(f"Found {qr.num_rows} information_schema.tables // {qr.rows}")
|
{linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_collection.py
RENAMED
|
@@ -41,6 +41,89 @@ class MongoDBCollection(Collection):
|
|
|
41
41
|
del obj["_id"]
|
|
42
42
|
self._post_insert_hook(objs)
|
|
43
43
|
|
|
44
|
+
|
|
45
|
+
def index(self,
|
|
46
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
47
|
+
index_name: Optional[str] = None,
|
|
48
|
+
replace: bool = False,
|
|
49
|
+
unique: bool = False,
|
|
50
|
+
**kwargs):
|
|
51
|
+
"""
|
|
52
|
+
Create indexes on the collection.
|
|
53
|
+
|
|
54
|
+
:param objs: Field(s) to index.
|
|
55
|
+
:param index_name: Optional name for the index.
|
|
56
|
+
:param replace: If True, the index will be dropped and recreated.
|
|
57
|
+
:param unique: If True, creates a unique index (default: False).
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
if not isinstance(objs, list):
|
|
61
|
+
objs = [objs]
|
|
62
|
+
|
|
63
|
+
existing_indexes = self.mongo_collection.index_information()
|
|
64
|
+
|
|
65
|
+
for obj in objs:
|
|
66
|
+
field_exists = False
|
|
67
|
+
index_to_drop = None
|
|
68
|
+
|
|
69
|
+
# Extract existing index details
|
|
70
|
+
for index_name_existing, index_details in existing_indexes.items():
|
|
71
|
+
indexed_fields = [field[0] for field in index_details.get("key", [])] # Extract field names
|
|
72
|
+
|
|
73
|
+
if obj in indexed_fields: # If this field is already indexed
|
|
74
|
+
field_exists = True
|
|
75
|
+
index_to_drop = index_name_existing if replace else None
|
|
76
|
+
|
|
77
|
+
# Drop the index if replace=True and index_to_drop is valid
|
|
78
|
+
if index_to_drop:
|
|
79
|
+
self.mongo_collection.drop_index(index_to_drop)
|
|
80
|
+
logging.debug(f"Dropped existing index: {index_to_drop}")
|
|
81
|
+
|
|
82
|
+
# Create the new index only if it doesn't exist or was dropped
|
|
83
|
+
if not field_exists or replace:
|
|
84
|
+
self.mongo_collection.create_index(obj, name=index_name, unique=unique)
|
|
85
|
+
logging.debug(f"Created new index: {index_name} on field {obj}, unique={unique}")
|
|
86
|
+
else:
|
|
87
|
+
logging.debug(f"Index already exists for field {obj}, skipping creation.")
|
|
88
|
+
|
|
89
|
+
def upsert(self,
|
|
90
|
+
objs: Union[OBJECT, List[OBJECT]],
|
|
91
|
+
filter_fields: List[str],
|
|
92
|
+
update_fields: Optional[List[str]] = None,
|
|
93
|
+
**kwargs):
|
|
94
|
+
"""
|
|
95
|
+
Upsert one or more documents into the MongoDB collection.
|
|
96
|
+
|
|
97
|
+
:param objs: The document(s) to insert or update.
|
|
98
|
+
:param filter_fields: List of field names to use as the filter for matching existing documents.
|
|
99
|
+
:param update_fields: List of field names to include in the update. If None, all fields are updated.
|
|
100
|
+
"""
|
|
101
|
+
if not isinstance(objs, list):
|
|
102
|
+
objs = [objs]
|
|
103
|
+
|
|
104
|
+
for obj in objs:
|
|
105
|
+
# Ensure filter fields exist in the object
|
|
106
|
+
filter_criteria = {field: obj[field] for field in filter_fields if field in obj}
|
|
107
|
+
if not filter_criteria:
|
|
108
|
+
raise ValueError("At least one valid filter field must be present in each object.")
|
|
109
|
+
|
|
110
|
+
# Check if a document already exists
|
|
111
|
+
existing_doc = self.mongo_collection.find_one(filter_criteria)
|
|
112
|
+
|
|
113
|
+
if existing_doc:
|
|
114
|
+
# Update only changed fields
|
|
115
|
+
updates = {key: obj[key] for key in update_fields if key in obj and obj[key] != existing_doc.get(key)}
|
|
116
|
+
|
|
117
|
+
if updates:
|
|
118
|
+
self.mongo_collection.update_one(filter_criteria, {"$set": updates})
|
|
119
|
+
logging.debug(f"Updated existing document: {filter_criteria} with {updates}")
|
|
120
|
+
else:
|
|
121
|
+
logging.debug(f"No changes detected for document: {filter_criteria}. Skipping update.")
|
|
122
|
+
else:
|
|
123
|
+
# Insert a new document
|
|
124
|
+
self.mongo_collection.insert_one(obj)
|
|
125
|
+
logging.debug(f"Inserted new document: {obj}")
|
|
126
|
+
|
|
44
127
|
def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
|
|
45
128
|
mongo_filter = self._build_mongo_filter(query.where_clause)
|
|
46
129
|
limit = limit or query.limit
|
{linkml_store-0.2.4 → linkml_store-0.2.6}/src/linkml_store/api/stores/mongodb/mongodb_database.py
RENAMED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Optional, Union
|
|
6
|
+
from urllib.parse import urlparse
|
|
6
7
|
|
|
7
8
|
from pymongo import MongoClient
|
|
8
9
|
from pymongo.database import Database as NativeDatabase
|
|
@@ -38,10 +39,13 @@ class MongoDBDatabase(Database):
|
|
|
38
39
|
@property
|
|
39
40
|
def _db_name(self) -> str:
|
|
40
41
|
if self.handle:
|
|
41
|
-
|
|
42
|
+
parsed_url = urlparse(self.handle)
|
|
43
|
+
path_parts = parsed_url.path.lstrip("/").split("?")[0].split("/")
|
|
44
|
+
print(path_parts)
|
|
45
|
+
db_name = path_parts[0] if path_parts else "default"
|
|
42
46
|
else:
|
|
43
|
-
|
|
44
|
-
return
|
|
47
|
+
db_name = "default"
|
|
48
|
+
return db_name
|
|
45
49
|
|
|
46
50
|
@property
|
|
47
51
|
def native_client(self) -> MongoClient:
|