linkml-store 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/client.py +32 -3
- linkml_store/api/collection.py +231 -30
- linkml_store/api/config.py +10 -2
- linkml_store/api/database.py +305 -19
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +8 -1
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +11 -9
- linkml_store/api/stores/duckdb/duckdb_database.py +22 -8
- linkml_store/api/stores/duckdb/mappings.py +1 -0
- linkml_store/api/stores/filesystem/__init__.py +16 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +142 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +36 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +1 -1
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +29 -8
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +2 -1
- linkml_store/api/stores/solr/solr_database.py +1 -0
- linkml_store/cli.py +64 -10
- linkml_store/index/__init__.py +6 -2
- linkml_store/index/implementations/llm_indexer.py +83 -5
- linkml_store/index/implementations/simple_indexer.py +2 -2
- linkml_store/index/indexer.py +32 -8
- linkml_store/utils/format_utils.py +52 -2
- linkml_store/utils/object_utils.py +9 -1
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/METADATA +4 -1
- linkml_store-0.1.8.dist-info/RECORD +45 -0
- linkml_store-0.1.6.dist-info/RECORD +0 -41
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/LICENSE +0 -0
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/WHEEL +0 -0
- {linkml_store-0.1.6.dist-info → linkml_store-0.1.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
@@ -22,6 +23,7 @@ TYPE_MAP = {
|
|
|
22
23
|
"DATE": "date",
|
|
23
24
|
"DOUBLE": "float",
|
|
24
25
|
"INTEGER": "integer",
|
|
26
|
+
"JSON": "Any",
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
|
|
@@ -33,9 +35,13 @@ class DuckDBDatabase(Database):
|
|
|
33
35
|
_engine: sqlalchemy.Engine = None
|
|
34
36
|
collection_class = DuckDBCollection
|
|
35
37
|
|
|
36
|
-
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
38
|
+
def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
|
|
37
39
|
if handle is None:
|
|
38
40
|
handle = "duckdb:///:memory:"
|
|
41
|
+
if recreate_if_exists:
|
|
42
|
+
path = Path(handle.replace("duckdb:///", ""))
|
|
43
|
+
if path.exists():
|
|
44
|
+
path.unlink()
|
|
39
45
|
super().__init__(handle=handle, **kwargs)
|
|
40
46
|
|
|
41
47
|
@property
|
|
@@ -69,16 +75,19 @@ class DuckDBDatabase(Database):
|
|
|
69
75
|
if qr.num_rows == 0:
|
|
70
76
|
logger.debug(f"Table {query.from_table} not created yet")
|
|
71
77
|
return QueryResult(query=query, num_rows=0, rows=[])
|
|
72
|
-
|
|
78
|
+
if not query.from_table.startswith("information_schema"):
|
|
79
|
+
sv = self.schema_view
|
|
80
|
+
else:
|
|
81
|
+
sv = None
|
|
73
82
|
if sv:
|
|
74
83
|
cd = None
|
|
75
84
|
for c in self._collections.values():
|
|
76
|
-
if c.name == query.from_table:
|
|
85
|
+
if c.name == query.from_table or c.metadata.alias == query.from_table:
|
|
77
86
|
cd = c.class_definition()
|
|
78
87
|
break
|
|
79
88
|
if cd:
|
|
80
|
-
for att in cd.
|
|
81
|
-
if att.inlined:
|
|
89
|
+
for att in sv.class_induced_slots(cd.name):
|
|
90
|
+
if att.inlined or att.inlined_as_list:
|
|
82
91
|
json_encoded_cols.append(att.name)
|
|
83
92
|
with self.engine.connect() as conn:
|
|
84
93
|
count_query_str = text(query_to_sql(query, count=True))
|
|
@@ -107,7 +116,10 @@ class DuckDBDatabase(Database):
|
|
|
107
116
|
|
|
108
117
|
def init_collections(self):
|
|
109
118
|
# TODO: unify schema introspection
|
|
110
|
-
|
|
119
|
+
if not self.schema_view:
|
|
120
|
+
schema = introspect_schema(self.engine)
|
|
121
|
+
else:
|
|
122
|
+
schema = self.schema_view.schema
|
|
111
123
|
table_names = schema.classes.keys()
|
|
112
124
|
if self._collections is None:
|
|
113
125
|
self._collections = {}
|
|
@@ -119,7 +131,7 @@ class DuckDBDatabase(Database):
|
|
|
119
131
|
def induce_schema_view(self) -> SchemaView:
|
|
120
132
|
# TODO: unify schema introspection
|
|
121
133
|
# TODO: handle case where schema is provided in advance
|
|
122
|
-
logger.info(f"Inducing schema view for {self.metadata.handle}")
|
|
134
|
+
logger.info(f"Inducing schema view for {self.metadata.handle} // {self}")
|
|
123
135
|
sb = SchemaBuilder()
|
|
124
136
|
schema = sb.schema
|
|
125
137
|
query = Query(from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE"})
|
|
@@ -144,8 +156,10 @@ class DuckDBDatabase(Database):
|
|
|
144
156
|
sd = SlotDefinition(
|
|
145
157
|
row["column_name"], required=row["is_nullable"] == "NO", multivalued=multivalued, range=rng
|
|
146
158
|
)
|
|
159
|
+
if dt == "JSON":
|
|
160
|
+
sd.inlined_as_list = True
|
|
147
161
|
sb.schema.classes[tbl_name].attributes[sd.name] = sd
|
|
148
|
-
logger.info(f"Introspected slot: {tbl_name}.{sd.name}: {sd.range}")
|
|
162
|
+
logger.info(f"Introspected slot: {tbl_name}.{sd.name}: {sd.range} FROM {dt}")
|
|
149
163
|
sb.add_defaults()
|
|
150
164
|
for cls_name in schema.classes:
|
|
151
165
|
if cls_name in self.metadata.collections:
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapter for DuckDB embedded database.
|
|
3
|
+
|
|
4
|
+
Handles have the form:
|
|
5
|
+
|
|
6
|
+
- ``duckdb:///<path>`` for a file-based database
|
|
7
|
+
- ``duckdb:///:memory:`` for an in-memory database
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
11
|
+
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"DuckDBCollection",
|
|
15
|
+
"DuckDBDatabase",
|
|
16
|
+
]
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sqla
|
|
5
|
+
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
6
|
+
from sqlalchemy import Column, Table, delete, insert, inspect, text
|
|
7
|
+
from sqlalchemy.sql.ddl import CreateTable
|
|
8
|
+
|
|
9
|
+
from linkml_store.api import Collection
|
|
10
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
11
|
+
from linkml_store.api.queries import Query
|
|
12
|
+
from linkml_store.api.stores.duckdb.mappings import TMAP
|
|
13
|
+
from linkml_store.utils.sql_utils import facet_count_sql
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FileSystemCollection(Collection):
|
|
19
|
+
_table_created: bool = None
|
|
20
|
+
|
|
21
|
+
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
22
|
+
if not isinstance(objs, list):
|
|
23
|
+
objs = [objs]
|
|
24
|
+
if not objs:
|
|
25
|
+
return
|
|
26
|
+
cd = self.class_definition()
|
|
27
|
+
if not cd:
|
|
28
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
29
|
+
self._create_table(cd)
|
|
30
|
+
table = self._sqla_table(cd)
|
|
31
|
+
logger.info(f"Inserting into: {self.alias} // T={table.name}")
|
|
32
|
+
engine = self.parent.engine
|
|
33
|
+
col_names = [c.name for c in table.columns]
|
|
34
|
+
objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
|
|
35
|
+
with engine.connect() as conn:
|
|
36
|
+
with conn.begin():
|
|
37
|
+
conn.execute(insert(table), objs)
|
|
38
|
+
conn.commit()
|
|
39
|
+
|
|
40
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
41
|
+
if not isinstance(objs, list):
|
|
42
|
+
objs = [objs]
|
|
43
|
+
cd = self.class_definition()
|
|
44
|
+
if not cd:
|
|
45
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
46
|
+
table = self._sqla_table(cd)
|
|
47
|
+
engine = self.parent.engine
|
|
48
|
+
with engine.connect() as conn:
|
|
49
|
+
for obj in objs:
|
|
50
|
+
conditions = [table.c[k] == v for k, v in obj.items() if k in cd.attributes]
|
|
51
|
+
stmt = delete(table).where(*conditions)
|
|
52
|
+
stmt = stmt.compile(engine)
|
|
53
|
+
conn.execute(stmt)
|
|
54
|
+
conn.commit()
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
58
|
+
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
59
|
+
if where is None:
|
|
60
|
+
where = {}
|
|
61
|
+
cd = self.class_definition()
|
|
62
|
+
if not cd:
|
|
63
|
+
logger.info(f"No class definition found for {self.target_class_name}, assuming not prepopulated")
|
|
64
|
+
return 0
|
|
65
|
+
table = self._sqla_table(cd)
|
|
66
|
+
engine = self.parent.engine
|
|
67
|
+
inspector = inspect(engine)
|
|
68
|
+
table_exists = table.name in inspector.get_table_names()
|
|
69
|
+
if not table_exists:
|
|
70
|
+
logger.info(f"Table {table.name} does not exist, assuming no data")
|
|
71
|
+
return 0
|
|
72
|
+
with engine.connect() as conn:
|
|
73
|
+
conditions = [table.c[k] == v for k, v in where.items()]
|
|
74
|
+
stmt = delete(table).where(*conditions)
|
|
75
|
+
stmt = stmt.compile(engine)
|
|
76
|
+
result = conn.execute(stmt)
|
|
77
|
+
deleted_rows_count = result.rowcount
|
|
78
|
+
if deleted_rows_count == 0 and not missing_ok:
|
|
79
|
+
raise ValueError(f"No rows found for {where}")
|
|
80
|
+
conn.commit()
|
|
81
|
+
return deleted_rows_count if deleted_rows_count > -1 else None
|
|
82
|
+
|
|
83
|
+
def query_facets(
|
|
84
|
+
self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
85
|
+
) -> Dict[str, Dict[str, int]]:
|
|
86
|
+
results = {}
|
|
87
|
+
cd = self.class_definition()
|
|
88
|
+
with self.parent.engine.connect() as conn:
|
|
89
|
+
if not facet_columns:
|
|
90
|
+
facet_columns = list(self.class_definition().attributes.keys())
|
|
91
|
+
for col in facet_columns:
|
|
92
|
+
logger.debug(f"Faceting on {col}")
|
|
93
|
+
if isinstance(col, tuple):
|
|
94
|
+
sd = SlotDefinition(name="PLACEHOLDER")
|
|
95
|
+
else:
|
|
96
|
+
sd = cd.attributes[col]
|
|
97
|
+
facet_query = self._create_query(where_clause=where)
|
|
98
|
+
facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
|
|
99
|
+
logger.debug(f"Facet query: {facet_query_str}")
|
|
100
|
+
rows = list(conn.execute(text(facet_query_str)))
|
|
101
|
+
results[col] = rows
|
|
102
|
+
return results
|
|
103
|
+
|
|
104
|
+
def _sqla_table(self, cd: ClassDefinition) -> Table:
|
|
105
|
+
schema_view = self.parent.schema_view
|
|
106
|
+
metadata_obj = sqla.MetaData()
|
|
107
|
+
cols = []
|
|
108
|
+
for att in schema_view.class_induced_slots(cd.name):
|
|
109
|
+
typ = TMAP.get(att.range, sqla.String)
|
|
110
|
+
if att.inlined:
|
|
111
|
+
typ = sqla.JSON
|
|
112
|
+
if att.multivalued:
|
|
113
|
+
typ = sqla.ARRAY(typ, dimensions=1)
|
|
114
|
+
if att.array:
|
|
115
|
+
typ = sqla.ARRAY(typ, dimensions=1)
|
|
116
|
+
col = Column(att.name, typ)
|
|
117
|
+
cols.append(col)
|
|
118
|
+
t = Table(self.alias, metadata_obj, *cols)
|
|
119
|
+
return t
|
|
120
|
+
|
|
121
|
+
def _create_table(self, cd: ClassDefinition):
|
|
122
|
+
if self._table_created or self.metadata.is_prepopulated:
|
|
123
|
+
logger.info(f"Already have table for: {cd.name}")
|
|
124
|
+
return
|
|
125
|
+
query = Query(
|
|
126
|
+
from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
127
|
+
)
|
|
128
|
+
qr = self.parent.query(query)
|
|
129
|
+
if qr.num_rows > 0:
|
|
130
|
+
logger.info(f"Table already exists for {cd.name}")
|
|
131
|
+
self._table_created = True
|
|
132
|
+
self.metadata.is_prepopulated = True
|
|
133
|
+
return
|
|
134
|
+
logger.info(f"Creating table for {cd.name}")
|
|
135
|
+
t = self._sqla_table(cd)
|
|
136
|
+
ct = CreateTable(t)
|
|
137
|
+
ddl = str(ct.compile(self.parent.engine))
|
|
138
|
+
with self.parent.engine.connect() as conn:
|
|
139
|
+
conn.execute(text(ddl))
|
|
140
|
+
conn.commit()
|
|
141
|
+
self._table_created = True
|
|
142
|
+
self.metadata.is_prepopulated = True
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from linkml_store.api import Collection, Database
|
|
5
|
+
from linkml_store.api.config import CollectionConfig
|
|
6
|
+
from linkml_store.api.stores.duckdb import DuckDBDatabase
|
|
7
|
+
from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FileSystemDatabase(Database):
|
|
13
|
+
collection_class = FileSystemCollection
|
|
14
|
+
wrapped_database: Database = None
|
|
15
|
+
|
|
16
|
+
def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
|
|
17
|
+
self.wrapped_database = DuckDBDatabase("duckdb:///:memory:")
|
|
18
|
+
super().__init__(handle=handle, **kwargs)
|
|
19
|
+
|
|
20
|
+
def commit(self, **kwargs):
|
|
21
|
+
# TODO: sync
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
def close(self, **kwargs):
|
|
25
|
+
self.wrapped_database.close()
|
|
26
|
+
|
|
27
|
+
def create_collection(
|
|
28
|
+
self,
|
|
29
|
+
name: str,
|
|
30
|
+
alias: Optional[str] = None,
|
|
31
|
+
metadata: Optional[CollectionConfig] = None,
|
|
32
|
+
recreate_if_exists=False,
|
|
33
|
+
**kwargs,
|
|
34
|
+
) -> Collection:
|
|
35
|
+
wd = self.wrapped_database
|
|
36
|
+
wd.create_collection()
|
|
@@ -46,7 +46,7 @@ class HDF5Collection(Collection):
|
|
|
46
46
|
return count
|
|
47
47
|
|
|
48
48
|
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
49
|
-
logger.info(f"Deleting from {self.
|
|
49
|
+
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
50
50
|
if where is None:
|
|
51
51
|
where = {}
|
|
52
52
|
results = self.query(Query(where_clause=where)).rows
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Adapter for MongoDB document store.
|
|
3
|
+
|
|
4
|
+
Handles have the form: ``mongodb://<host>:<port>/<database>``
|
|
5
|
+
|
|
6
|
+
To use this, you must have the `pymongo` extra installed.
|
|
7
|
+
|
|
8
|
+
.. code-block:: bash
|
|
9
|
+
|
|
10
|
+
pip install linkml-store[mongodb]
|
|
11
|
+
|
|
12
|
+
or
|
|
13
|
+
|
|
14
|
+
.. code-block:: bash
|
|
15
|
+
|
|
16
|
+
pip install linkml-store[all]
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
|
|
20
|
+
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"MongoDBCollection",
|
|
24
|
+
"MongoDBDatabase",
|
|
25
|
+
]
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from copy import copy
|
|
2
3
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
4
|
|
|
4
5
|
from linkml_runtime.linkml_model import SlotDefinition
|
|
@@ -12,6 +13,14 @@ logger = logging.getLogger(__name__)
|
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class MongoDBCollection(Collection):
|
|
16
|
+
"""
|
|
17
|
+
Adapter for collections in a MongoDB database.
|
|
18
|
+
|
|
19
|
+
.. note::
|
|
20
|
+
|
|
21
|
+
You should not use or manipulate this class directly.
|
|
22
|
+
Instead, use the general :class:`linkml_store.api.Collection`
|
|
23
|
+
"""
|
|
15
24
|
|
|
16
25
|
@property
|
|
17
26
|
def mongo_collection(self) -> MongoCollection:
|
|
@@ -31,7 +40,12 @@ class MongoDBCollection(Collection):
|
|
|
31
40
|
else:
|
|
32
41
|
cursor = self.mongo_collection.find(mongo_filter)
|
|
33
42
|
|
|
34
|
-
|
|
43
|
+
def _as_row(row: dict):
|
|
44
|
+
row = copy(row)
|
|
45
|
+
del row["_id"]
|
|
46
|
+
return row
|
|
47
|
+
|
|
48
|
+
rows = [_as_row(row) for row in cursor]
|
|
35
49
|
count = self.mongo_collection.count_documents(mongo_filter)
|
|
36
50
|
|
|
37
51
|
return QueryResult(query=query, num_rows=count, rows=rows)
|
|
@@ -56,24 +70,31 @@ class MongoDBCollection(Collection):
|
|
|
56
70
|
if isinstance(col, tuple):
|
|
57
71
|
sd = SlotDefinition(name="PLACEHOLDER")
|
|
58
72
|
else:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
73
|
+
if col in cd.attributes:
|
|
74
|
+
sd = cd.attributes[col]
|
|
75
|
+
else:
|
|
76
|
+
logger.info(f"No schema metadata for {col}")
|
|
77
|
+
sd = SlotDefinition(name=col)
|
|
78
|
+
group = {"$group": {"_id": f"${col}", "count": {"$sum": 1}}}
|
|
79
|
+
if isinstance(col, tuple):
|
|
80
|
+
q = {k.replace(".", ""): f"${k}" for k in col}
|
|
81
|
+
group["$group"]["_id"] = q
|
|
82
|
+
if sd and sd.multivalued:
|
|
62
83
|
facet_pipeline = [
|
|
63
84
|
{"$match": where} if where else {"$match": {}},
|
|
64
85
|
{"$unwind": f"${col}"},
|
|
65
|
-
|
|
86
|
+
group,
|
|
66
87
|
{"$sort": {"count": -1}},
|
|
67
88
|
{"$limit": facet_limit},
|
|
68
89
|
]
|
|
69
90
|
else:
|
|
70
91
|
facet_pipeline = [
|
|
71
92
|
{"$match": where} if where else {"$match": {}},
|
|
72
|
-
|
|
93
|
+
group,
|
|
73
94
|
{"$sort": {"count": -1}},
|
|
74
95
|
{"$limit": facet_limit},
|
|
75
96
|
]
|
|
76
|
-
|
|
97
|
+
logger.info(f"Facet pipeline: {facet_pipeline}")
|
|
77
98
|
facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
|
|
78
99
|
results[col] = [(result["_id"], result["count"]) for result in facet_results]
|
|
79
100
|
|
|
@@ -92,7 +113,7 @@ class MongoDBCollection(Collection):
|
|
|
92
113
|
return result.deleted_count
|
|
93
114
|
|
|
94
115
|
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> int:
|
|
95
|
-
logger.info(f"Deleting from {self.
|
|
116
|
+
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
96
117
|
if where is None:
|
|
97
118
|
where = {}
|
|
98
119
|
result = self.mongo_collection.delete_many(where)
|
|
@@ -5,6 +5,7 @@ from copy import copy
|
|
|
5
5
|
from typing import Any, Dict, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
import requests
|
|
8
|
+
|
|
8
9
|
from linkml_store.api import Collection
|
|
9
10
|
from linkml_store.api.collection import DEFAULT_FACET_LIMIT
|
|
10
11
|
from linkml_store.api.queries import Query, QueryResult
|
|
@@ -119,7 +120,7 @@ class SolrCollection(Collection):
|
|
|
119
120
|
conditions = []
|
|
120
121
|
if self.parent.metadata.collection_type_slot:
|
|
121
122
|
where_clause = copy(where_clause)
|
|
122
|
-
where_clause[self.parent.metadata.collection_type_slot] = self.
|
|
123
|
+
where_clause[self.parent.metadata.collection_type_slot] = self.alias
|
|
123
124
|
for field, value in where_clause.items():
|
|
124
125
|
if not isinstance(value, (list, tuple)):
|
|
125
126
|
value = [value]
|
linkml_store/cli.py
CHANGED
|
@@ -11,12 +11,19 @@ from pydantic import BaseModel
|
|
|
11
11
|
from linkml_store import Client
|
|
12
12
|
from linkml_store.api import Collection, Database
|
|
13
13
|
from linkml_store.api.queries import Query
|
|
14
|
+
from linkml_store.index import get_indexer
|
|
14
15
|
from linkml_store.index.implementations.simple_indexer import SimpleIndexer
|
|
15
16
|
from linkml_store.index.indexer import Indexer
|
|
16
|
-
from linkml_store.utils.format_utils import Format, load_objects, render_output
|
|
17
|
+
from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output
|
|
17
18
|
from linkml_store.utils.object_utils import object_path_update
|
|
18
19
|
|
|
19
|
-
index_type_option = click.option(
|
|
20
|
+
index_type_option = click.option(
|
|
21
|
+
"--index-type",
|
|
22
|
+
"-t",
|
|
23
|
+
default="simple",
|
|
24
|
+
show_default=True,
|
|
25
|
+
help="Type of index to create. Values: simple, llm",
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
logger = logging.getLogger(__name__)
|
|
22
29
|
|
|
@@ -70,6 +77,9 @@ class ContextSettings(BaseModel):
|
|
|
70
77
|
format_choice = click.Choice([f.value for f in Format])
|
|
71
78
|
|
|
72
79
|
|
|
80
|
+
include_internal_option = click.option("--include-internal/--no-include-internal", default=False, show_default=True)
|
|
81
|
+
|
|
82
|
+
|
|
73
83
|
@click.group()
|
|
74
84
|
@click.option("--database", "-d", help="Database name")
|
|
75
85
|
@click.option("--collection", "-c", help="Collection name")
|
|
@@ -89,6 +99,15 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
|
|
|
89
99
|
if not stacktrace:
|
|
90
100
|
sys.tracebacklimit = 0
|
|
91
101
|
logger = logging.getLogger()
|
|
102
|
+
# Set handler for the root logger to output to the console
|
|
103
|
+
console_handler = logging.StreamHandler()
|
|
104
|
+
console_handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
|
105
|
+
|
|
106
|
+
# Clear existing handlers to avoid duplicate messages if function runs multiple times
|
|
107
|
+
logger.handlers = []
|
|
108
|
+
|
|
109
|
+
# Add the newly created console handler to the logger
|
|
110
|
+
logger.addHandler(console_handler)
|
|
92
111
|
if verbose >= 2:
|
|
93
112
|
logger.setLevel(logging.DEBUG)
|
|
94
113
|
elif verbose == 1:
|
|
@@ -193,6 +212,35 @@ def store(ctx, files, object, format):
|
|
|
193
212
|
click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{db.name}'.")
|
|
194
213
|
|
|
195
214
|
|
|
215
|
+
@cli.command(name="import")
|
|
216
|
+
@click.argument("files", type=click.Path(exists=True), nargs=-1)
|
|
217
|
+
@click.option("--format", "-f", help="Input format")
|
|
218
|
+
@click.pass_context
|
|
219
|
+
def import_database(ctx, files, format):
|
|
220
|
+
"""Imports a database from a dump."""
|
|
221
|
+
settings = ctx.obj["settings"]
|
|
222
|
+
db = settings.database
|
|
223
|
+
if not files and not object:
|
|
224
|
+
files = ["-"]
|
|
225
|
+
for file_path in files:
|
|
226
|
+
db.import_database(file_path, source_format=format)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
@cli.command()
|
|
230
|
+
@click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
|
|
231
|
+
@click.option("--output", "-o", required=True, type=click.Path(), help="Output file path")
|
|
232
|
+
@click.pass_context
|
|
233
|
+
def export(ctx, output_type, output):
|
|
234
|
+
"""Exports a database to a dump."""
|
|
235
|
+
settings = ctx.obj["settings"]
|
|
236
|
+
db = settings.database
|
|
237
|
+
if output_type is None:
|
|
238
|
+
output_type = guess_format(output)
|
|
239
|
+
if output_type is None:
|
|
240
|
+
raise ValueError(f"Output format must be specified can't be inferred from {output}.")
|
|
241
|
+
db.export_database(output, target_format=output_type)
|
|
242
|
+
|
|
243
|
+
|
|
196
244
|
@cli.command()
|
|
197
245
|
@click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
|
|
198
246
|
@click.option("--limit", "-l", type=click.INT, help="Maximum number of results to return")
|
|
@@ -216,9 +264,10 @@ def query(ctx, where, limit, output_type, output):
|
|
|
216
264
|
|
|
217
265
|
@cli.command()
|
|
218
266
|
@click.pass_context
|
|
219
|
-
|
|
267
|
+
@include_internal_option
|
|
268
|
+
def list_collections(ctx, **kwargs):
|
|
220
269
|
db = ctx.obj["settings"].database
|
|
221
|
-
for collection in db.list_collections():
|
|
270
|
+
for collection in db.list_collections(**kwargs):
|
|
222
271
|
click.echo(collection.name)
|
|
223
272
|
click.echo(render_output(collection.metadata))
|
|
224
273
|
|
|
@@ -254,7 +303,7 @@ def fq(ctx, where, limit, columns, output_type, output):
|
|
|
254
303
|
|
|
255
304
|
def _untuple(key):
|
|
256
305
|
if isinstance(key, tuple):
|
|
257
|
-
return "+".join(key)
|
|
306
|
+
return "+".join([str(x) for x in key])
|
|
258
307
|
return key
|
|
259
308
|
|
|
260
309
|
count_dict = {}
|
|
@@ -279,8 +328,10 @@ def _get_index(index_type=None, **kwargs) -> Indexer:
|
|
|
279
328
|
|
|
280
329
|
@cli.command()
|
|
281
330
|
@index_type_option
|
|
331
|
+
@click.option("--cached-embeddings-database", "-E", help="Path to the database where embeddings are cached")
|
|
332
|
+
@click.option("--text-template", "-T", help="Template for text embeddings")
|
|
282
333
|
@click.pass_context
|
|
283
|
-
def index(ctx, index_type):
|
|
334
|
+
def index(ctx, index_type, **kwargs):
|
|
284
335
|
"""
|
|
285
336
|
Create an index over a collection.
|
|
286
337
|
|
|
@@ -289,7 +340,7 @@ def index(ctx, index_type):
|
|
|
289
340
|
:return:
|
|
290
341
|
"""
|
|
291
342
|
collection = ctx.obj["settings"].collection
|
|
292
|
-
ix =
|
|
343
|
+
ix = get_indexer(index_type, **kwargs)
|
|
293
344
|
collection.attach_indexer(ix)
|
|
294
345
|
|
|
295
346
|
|
|
@@ -322,14 +373,17 @@ def schema(ctx, output_type, output):
|
|
|
322
373
|
@click.option("--limit", "-l", type=click.INT, help="Maximum number of search results")
|
|
323
374
|
@click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
|
|
324
375
|
@click.option("--output", "-o", type=click.Path(), help="Output file path")
|
|
376
|
+
@click.option(
|
|
377
|
+
"--auto-index/--no-auto-index", default=False, show_default=True, help="Automatically index the collection"
|
|
378
|
+
)
|
|
325
379
|
@index_type_option
|
|
326
380
|
@click.pass_context
|
|
327
|
-
def search(ctx, search_term, where, limit, index_type, output_type, output):
|
|
381
|
+
def search(ctx, search_term, where, limit, index_type, output_type, output, auto_index):
|
|
328
382
|
"""Search objects in the specified collection."""
|
|
329
383
|
collection = ctx.obj["settings"].collection
|
|
330
|
-
ix =
|
|
384
|
+
ix = get_indexer(index_type)
|
|
331
385
|
logger.info(f"Attaching index to collection {collection.name}: {ix.model_dump()}")
|
|
332
|
-
collection.attach_indexer(ix, auto_index=
|
|
386
|
+
collection.attach_indexer(ix, auto_index=auto_index)
|
|
333
387
|
result = collection.search(search_term, where=where, limit=limit)
|
|
334
388
|
output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
|
|
335
389
|
if output:
|
linkml_store/index/__init__.py
CHANGED
|
@@ -22,7 +22,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
|
|
|
22
22
|
return INDEXER_CLASSES[name]
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def get_indexer(name: str,
|
|
25
|
+
def get_indexer(name: str, **kwargs) -> Indexer:
|
|
26
26
|
"""
|
|
27
27
|
Get an indexer by name.
|
|
28
28
|
|
|
@@ -30,4 +30,8 @@ def get_indexer(name: str, *args, **kwargs) -> Indexer:
|
|
|
30
30
|
:param kwargs: additional arguments to pass to the indexer
|
|
31
31
|
:return: the indexer
|
|
32
32
|
"""
|
|
33
|
-
|
|
33
|
+
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
34
|
+
cls = get_indexer_class(name)
|
|
35
|
+
kwargs["name"] = name
|
|
36
|
+
indexer = cls(**kwargs)
|
|
37
|
+
return indexer
|