linkml-store 0.0.0__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- linkml_store/api/__init__.py +2 -2
- linkml_store/api/client.py +108 -7
- linkml_store/api/collection.py +221 -30
- linkml_store/api/config.py +97 -0
- linkml_store/api/database.py +207 -17
- linkml_store/api/queries.py +12 -1
- linkml_store/api/stores/chromadb/__init__.py +0 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +114 -0
- linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +47 -14
- linkml_store/api/stores/duckdb/duckdb_database.py +35 -44
- linkml_store/api/stores/hdf5/__init__.py +0 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
- linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +86 -40
- linkml_store/api/stores/mongodb/mongodb_database.py +58 -67
- linkml_store/api/stores/solr/solr_collection.py +132 -0
- linkml_store/api/stores/solr/solr_database.py +82 -0
- linkml_store/api/stores/solr/solr_utils.py +0 -0
- linkml_store/cli.py +369 -0
- linkml_store/index/__init__.py +33 -0
- linkml_store/index/implementations/{llm_index.py → llm_indexer.py} +2 -2
- linkml_store/index/implementations/{simple_index.py → simple_indexer.py} +6 -3
- linkml_store/index/{index.py → indexer.py} +7 -4
- linkml_store/utils/format_utils.py +93 -0
- linkml_store/utils/object_utils.py +73 -0
- linkml_store/utils/sql_utils.py +46 -7
- {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/METADATA +17 -6
- linkml_store-0.1.6.dist-info/RECORD +41 -0
- linkml_store-0.1.6.dist-info/entry_points.txt +3 -0
- linkml_store/api/metadata.py +0 -5
- linkml_store-0.0.0.dist-info/RECORD +0 -29
- linkml_store-0.0.0.dist-info/entry_points.txt +0 -3
- {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/LICENSE +0 -0
- {linkml_store-0.0.0.dist-info → linkml_store-0.1.6.dist-info}/WHEEL +0 -0
|
@@ -1,112 +1,103 @@
|
|
|
1
|
-
|
|
1
|
+
# mongodb_database.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
2
4
|
from typing import Optional
|
|
3
5
|
|
|
4
6
|
from linkml_runtime import SchemaView
|
|
5
|
-
from linkml_runtime.linkml_model import SlotDefinition
|
|
7
|
+
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
6
8
|
from linkml_runtime.utils.schema_builder import SchemaBuilder
|
|
7
9
|
from pymongo import MongoClient
|
|
10
|
+
from pymongo.database import Database as NativeDatabase
|
|
8
11
|
|
|
9
12
|
from linkml_store.api import Database
|
|
10
13
|
from linkml_store.api.queries import Query, QueryResult
|
|
11
14
|
from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
|
|
12
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
13
18
|
|
|
14
|
-
@dataclass
|
|
15
19
|
class MongoDBDatabase(Database):
|
|
16
20
|
"""
|
|
17
|
-
|
|
21
|
+
An adapter for MongoDB databases.
|
|
22
|
+
|
|
23
|
+
The LinkML-Store Database abstraction combines mongodb Client and Database.
|
|
18
24
|
"""
|
|
19
25
|
|
|
20
|
-
|
|
21
|
-
|
|
26
|
+
_native_client: MongoClient = None
|
|
27
|
+
_native_db = None
|
|
28
|
+
collection_class = MongoDBCollection
|
|
22
29
|
|
|
23
|
-
def
|
|
24
|
-
if
|
|
25
|
-
|
|
30
|
+
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
31
|
+
if handle is None:
|
|
32
|
+
handle = "mongodb://localhost:27017"
|
|
33
|
+
super().__init__(handle=handle, **kwargs)
|
|
26
34
|
|
|
27
35
|
@property
|
|
28
|
-
def
|
|
29
|
-
if
|
|
30
|
-
self.
|
|
31
|
-
return self.
|
|
36
|
+
def native_client(self) -> MongoClient:
|
|
37
|
+
if self._native_client is None:
|
|
38
|
+
self._native_client = MongoClient(self.handle)
|
|
39
|
+
return self._native_client
|
|
32
40
|
|
|
33
41
|
@property
|
|
34
|
-
def
|
|
35
|
-
if
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
42
|
+
def native_db(self) -> NativeDatabase:
|
|
43
|
+
if self._native_db is None:
|
|
44
|
+
alias = self.metadata.alias
|
|
45
|
+
if not alias:
|
|
46
|
+
alias = "default"
|
|
47
|
+
self._native_db = self.native_client[alias]
|
|
48
|
+
return self._native_db
|
|
39
49
|
|
|
40
50
|
def commit(self, **kwargs):
|
|
41
51
|
pass
|
|
42
52
|
|
|
43
53
|
def close(self, **kwargs):
|
|
44
|
-
self.
|
|
54
|
+
if self._native_client:
|
|
55
|
+
self._native_client.close()
|
|
56
|
+
|
|
57
|
+
def drop(self, **kwargs):
|
|
58
|
+
self.native_client.drop_database(self.metadata.alias)
|
|
45
59
|
|
|
46
60
|
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
if query.limit:
|
|
51
|
-
cursor = cursor.limit(query.limit)
|
|
52
|
-
if query.offset:
|
|
53
|
-
cursor = cursor.skip(query.offset)
|
|
54
|
-
if query.sort_by:
|
|
55
|
-
sort_key = [(col, 1) for col in query.sort_by]
|
|
56
|
-
cursor = cursor.sort(sort_key)
|
|
57
|
-
rows = list(cursor)
|
|
58
|
-
num_rows = len(rows)
|
|
59
|
-
qr = QueryResult(query=query, num_rows=num_rows, rows=rows)
|
|
60
|
-
return qr
|
|
61
|
+
if query.from_table:
|
|
62
|
+
collection = self.get_collection(query.from_table)
|
|
63
|
+
return collection.query(query, **kwargs)
|
|
61
64
|
|
|
62
65
|
def init_collections(self):
|
|
63
66
|
if self._collections is None:
|
|
64
67
|
self._collections = {}
|
|
65
|
-
|
|
68
|
+
|
|
69
|
+
for collection_name in self.native_db.list_collection_names():
|
|
66
70
|
if collection_name not in self._collections:
|
|
67
71
|
collection = MongoDBCollection(name=collection_name, parent=self)
|
|
68
72
|
self._collections[collection_name] = collection
|
|
69
73
|
|
|
70
|
-
def create_collection(self, name: str, alias: Optional[str] = None, **kwargs) -> MongoDBCollection:
|
|
71
|
-
collection = MongoDBCollection(name=name, parent=self)
|
|
72
|
-
if not self._collections:
|
|
73
|
-
self._collections = {}
|
|
74
|
-
if not alias:
|
|
75
|
-
alias = name
|
|
76
|
-
self._collections[alias] = collection
|
|
77
|
-
return collection
|
|
78
|
-
|
|
79
74
|
def induce_schema_view(self) -> SchemaView:
|
|
75
|
+
logger.info(f"Inducing schema view for {self.handle}")
|
|
80
76
|
sb = SchemaBuilder()
|
|
81
77
|
schema = sb.schema
|
|
82
|
-
|
|
83
|
-
for collection_name in
|
|
78
|
+
|
|
79
|
+
for collection_name in self.native_db.list_collection_names():
|
|
84
80
|
sb.add_class(collection_name)
|
|
85
|
-
|
|
86
|
-
sample_doc =
|
|
81
|
+
mongo_collection = self.native_db[collection_name]
|
|
82
|
+
sample_doc = mongo_collection.find_one()
|
|
87
83
|
if sample_doc:
|
|
88
|
-
for
|
|
89
|
-
if
|
|
84
|
+
for field, value in sample_doc.items():
|
|
85
|
+
if field == "_id":
|
|
90
86
|
continue
|
|
87
|
+
sd = SlotDefinition(field)
|
|
91
88
|
if isinstance(value, list):
|
|
92
|
-
multivalued = True
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
else:
|
|
96
|
-
value = None
|
|
97
|
-
else:
|
|
98
|
-
multivalued = False
|
|
99
|
-
if isinstance(value, str):
|
|
100
|
-
rng = "string"
|
|
101
|
-
elif isinstance(value, int):
|
|
102
|
-
rng = "integer"
|
|
103
|
-
elif isinstance(value, float):
|
|
104
|
-
rng = "float"
|
|
105
|
-
elif isinstance(value, bool):
|
|
106
|
-
rng = "boolean"
|
|
107
|
-
else:
|
|
108
|
-
rng = "string"
|
|
109
|
-
sd = SlotDefinition(key, range=rng, multivalued=multivalued)
|
|
89
|
+
sd.multivalued = True
|
|
90
|
+
if isinstance(value, dict):
|
|
91
|
+
sd.inlined = True
|
|
110
92
|
sb.schema.classes[collection_name].attributes[sd.name] = sd
|
|
93
|
+
|
|
111
94
|
sb.add_defaults()
|
|
95
|
+
for cls_name in schema.classes:
|
|
96
|
+
if cls_name in self.metadata.collections:
|
|
97
|
+
collection_metadata = self.metadata.collections[cls_name]
|
|
98
|
+
if collection_metadata.attributes:
|
|
99
|
+
del schema.classes[cls_name]
|
|
100
|
+
cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
|
|
101
|
+
schema.classes[cls.name] = cls
|
|
102
|
+
|
|
112
103
|
return SchemaView(schema)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# solr_collection.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from copy import copy
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
from linkml_store.api import Collection
|
|
9
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT
|
|
10
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SolrCollection(Collection):
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def _collection_base(self) -> str:
|
|
19
|
+
if self.parent.use_cores:
|
|
20
|
+
base_url = f"{self.parent.base_url}/{self.name}"
|
|
21
|
+
else:
|
|
22
|
+
base_url = self.parent.base_url
|
|
23
|
+
return base_url
|
|
24
|
+
|
|
25
|
+
def search(
|
|
26
|
+
self,
|
|
27
|
+
query: str,
|
|
28
|
+
where: Optional[Any] = None,
|
|
29
|
+
index_name: Optional[str] = None,
|
|
30
|
+
limit: Optional[int] = None,
|
|
31
|
+
**kwargs,
|
|
32
|
+
) -> QueryResult:
|
|
33
|
+
if index_name is None:
|
|
34
|
+
index_name = "edismax"
|
|
35
|
+
qfs = self.parent.metadata.searchable_slots
|
|
36
|
+
if not qfs:
|
|
37
|
+
raise ValueError("No searchable slots configured for Solr collection")
|
|
38
|
+
solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
|
|
39
|
+
logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
|
|
40
|
+
|
|
41
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
42
|
+
response.raise_for_status()
|
|
43
|
+
|
|
44
|
+
data = response.json()
|
|
45
|
+
num_rows = data["response"]["numFound"]
|
|
46
|
+
rows = data["response"]["docs"]
|
|
47
|
+
ranked_rows = [(1.0, row) for row in rows]
|
|
48
|
+
return QueryResult(query=where, search_term=query, num_rows=num_rows, rows=rows, ranked_rows=ranked_rows)
|
|
49
|
+
|
|
50
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
51
|
+
solr_query = self._build_solr_query(query)
|
|
52
|
+
logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
|
|
53
|
+
|
|
54
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
55
|
+
response.raise_for_status()
|
|
56
|
+
|
|
57
|
+
data = response.json()
|
|
58
|
+
num_rows = data["response"]["numFound"]
|
|
59
|
+
rows = data["response"]["docs"]
|
|
60
|
+
|
|
61
|
+
return QueryResult(query=query, num_rows=num_rows, rows=rows)
|
|
62
|
+
|
|
63
|
+
def query_facets(
|
|
64
|
+
self, where: Optional[Dict] = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
|
|
65
|
+
) -> Dict[str, Dict[str, int]]:
|
|
66
|
+
solr_query = self._build_solr_query(where)
|
|
67
|
+
solr_query["facet"] = "true"
|
|
68
|
+
solr_query["facet.field"] = facet_columns
|
|
69
|
+
solr_query["facet.limit"] = facet_limit
|
|
70
|
+
|
|
71
|
+
logger.info(f"Querying Solr collection {self.name} for facets with query: {solr_query}")
|
|
72
|
+
|
|
73
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
74
|
+
response.raise_for_status()
|
|
75
|
+
|
|
76
|
+
data = response.json()
|
|
77
|
+
facet_counts = data["facet_counts"]["facet_fields"]
|
|
78
|
+
|
|
79
|
+
results = {}
|
|
80
|
+
for facet_field, counts in facet_counts.items():
|
|
81
|
+
results[facet_field] = list(zip(counts[::2], counts[1::2]))
|
|
82
|
+
|
|
83
|
+
return results
|
|
84
|
+
|
|
85
|
+
def _build_solr_query(
|
|
86
|
+
self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None
|
|
87
|
+
) -> Dict[str, Any]:
|
|
88
|
+
solr_query = {}
|
|
89
|
+
if query is None:
|
|
90
|
+
query = {}
|
|
91
|
+
|
|
92
|
+
if isinstance(query, Query):
|
|
93
|
+
where = query.where_clause
|
|
94
|
+
solr_query["fq"] = self._build_solr_where_clause(where)
|
|
95
|
+
|
|
96
|
+
if query.select_cols:
|
|
97
|
+
solr_query["fl"] = ",".join(query.select_cols)
|
|
98
|
+
|
|
99
|
+
if query.limit:
|
|
100
|
+
solr_query["rows"] = query.limit
|
|
101
|
+
|
|
102
|
+
if query.offset:
|
|
103
|
+
solr_query["start"] = query.offset
|
|
104
|
+
|
|
105
|
+
elif isinstance(query, dict):
|
|
106
|
+
solr_query["fq"] = self._build_solr_where_clause(query)
|
|
107
|
+
|
|
108
|
+
solr_query["wt"] = "json"
|
|
109
|
+
if "q" not in solr_query:
|
|
110
|
+
solr_query["q"] = search_term
|
|
111
|
+
if extra:
|
|
112
|
+
solr_query.update(extra)
|
|
113
|
+
logger.info(f"Built Solr query: {solr_query}")
|
|
114
|
+
return solr_query
|
|
115
|
+
|
|
116
|
+
def _build_solr_where_clause(self, where_clause: Dict) -> str:
|
|
117
|
+
if where_clause is None:
|
|
118
|
+
where_clause = {}
|
|
119
|
+
conditions = []
|
|
120
|
+
if self.parent.metadata.collection_type_slot:
|
|
121
|
+
where_clause = copy(where_clause)
|
|
122
|
+
where_clause[self.parent.metadata.collection_type_slot] = self._alias
|
|
123
|
+
for field, value in where_clause.items():
|
|
124
|
+
if not isinstance(value, (list, tuple)):
|
|
125
|
+
value = [value]
|
|
126
|
+
value = [f'"{v}"' if isinstance(v, str) else str(v) for v in value]
|
|
127
|
+
if len(value) > 1:
|
|
128
|
+
conditions.append(f"{field}:({' '.join(value)})")
|
|
129
|
+
else:
|
|
130
|
+
conditions.append(f"{field}:{value[0]}")
|
|
131
|
+
|
|
132
|
+
return " AND ".join(conditions)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
from linkml_store.api import Collection, Database
|
|
6
|
+
from linkml_store.api.config import CollectionConfig
|
|
7
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
8
|
+
from linkml_store.api.stores.solr.solr_collection import SolrCollection
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SolrDatabase(Database):
|
|
14
|
+
base_url: str
|
|
15
|
+
collection_class = SolrCollection
|
|
16
|
+
use_cores: bool = False
|
|
17
|
+
|
|
18
|
+
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
19
|
+
if handle.startswith("solr:"):
|
|
20
|
+
self.base_url = handle.replace("solr:", "")
|
|
21
|
+
else:
|
|
22
|
+
self.base_url = handle
|
|
23
|
+
super().__init__(handle=handle, **kwargs)
|
|
24
|
+
|
|
25
|
+
def get_collection(self, name: str, create_if_not_exists=True, **kwargs) -> "Collection":
|
|
26
|
+
if not self._collections:
|
|
27
|
+
self.init_collections()
|
|
28
|
+
|
|
29
|
+
if name not in self._collections.keys():
|
|
30
|
+
if create_if_not_exists:
|
|
31
|
+
self._collections[name] = self.create_collection(name)
|
|
32
|
+
else:
|
|
33
|
+
raise KeyError(f"Collection {name} does not exist")
|
|
34
|
+
|
|
35
|
+
return self._collections[name]
|
|
36
|
+
|
|
37
|
+
def create_collection(
|
|
38
|
+
self, name: str, alias: Optional[str] = None, metadata: Optional[CollectionConfig] = None, **kwargs
|
|
39
|
+
) -> Collection:
|
|
40
|
+
if not name:
|
|
41
|
+
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
42
|
+
|
|
43
|
+
collection_cls = self.collection_class
|
|
44
|
+
collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
|
|
45
|
+
|
|
46
|
+
if not self._collections:
|
|
47
|
+
self._collections = {}
|
|
48
|
+
|
|
49
|
+
if not alias:
|
|
50
|
+
alias = name
|
|
51
|
+
|
|
52
|
+
self._collections[alias] = collection
|
|
53
|
+
return collection
|
|
54
|
+
|
|
55
|
+
def init_collections(self):
|
|
56
|
+
if self._collections is None:
|
|
57
|
+
self._collections = {}
|
|
58
|
+
if self.metadata.collection_type_slot:
|
|
59
|
+
response = requests.get(
|
|
60
|
+
f"{self.base_url}/select",
|
|
61
|
+
params={
|
|
62
|
+
"q": "*:*",
|
|
63
|
+
"wt": "json",
|
|
64
|
+
"rows": 0,
|
|
65
|
+
"facet": "true",
|
|
66
|
+
"facet.field": self.metadata.collection_type_slot,
|
|
67
|
+
"facet.limit": -1,
|
|
68
|
+
},
|
|
69
|
+
)
|
|
70
|
+
response.raise_for_status()
|
|
71
|
+
data = response.json()
|
|
72
|
+
coll_names = data["facet_counts"]["facet_fields"][self.metadata.collection_type_slot]
|
|
73
|
+
coll_names = coll_names[::2]
|
|
74
|
+
for coll_name in coll_names:
|
|
75
|
+
self.create_collection(coll_name)
|
|
76
|
+
else:
|
|
77
|
+
self.create_collection("default")
|
|
78
|
+
|
|
79
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
80
|
+
collection_name = query.from_table
|
|
81
|
+
collection = self.get_collection(collection_name)
|
|
82
|
+
return collection.query(query, **kwargs)
|
|
File without changes
|