linkml-store 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml_store/__init__.py +7 -0
- linkml_store/api/__init__.py +8 -0
- linkml_store/api/client.py +414 -0
- linkml_store/api/collection.py +1280 -0
- linkml_store/api/config.py +187 -0
- linkml_store/api/database.py +862 -0
- linkml_store/api/queries.py +69 -0
- linkml_store/api/stores/__init__.py +0 -0
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
- linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
- linkml_store/api/stores/dremio/__init__.py +10 -0
- linkml_store/api/stores/dremio/dremio_collection.py +555 -0
- linkml_store/api/stores/dremio/dremio_database.py +1052 -0
- linkml_store/api/stores/dremio/mappings.py +105 -0
- linkml_store/api/stores/dremio_rest/__init__.py +11 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
- linkml_store/api/stores/duckdb/mappings.py +8 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
- linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
- linkml_store/api/stores/ibis/__init__.py +5 -0
- linkml_store/api/stores/ibis/ibis_collection.py +488 -0
- linkml_store/api/stores/ibis/ibis_database.py +328 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
- linkml_store/api/stores/neo4j/__init__.py +0 -0
- linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +224 -0
- linkml_store/api/stores/solr/solr_database.py +83 -0
- linkml_store/api/stores/solr/solr_utils.py +0 -0
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +1147 -0
- linkml_store/constants.py +7 -0
- linkml_store/graphs/__init__.py +0 -0
- linkml_store/graphs/graph_map.py +24 -0
- linkml_store/index/__init__.py +53 -0
- linkml_store/index/implementations/__init__.py +0 -0
- linkml_store/index/implementations/llm_indexer.py +174 -0
- linkml_store/index/implementations/simple_indexer.py +43 -0
- linkml_store/index/indexer.py +211 -0
- linkml_store/inference/__init__.py +13 -0
- linkml_store/inference/evaluation.py +195 -0
- linkml_store/inference/implementations/__init__.py +0 -0
- linkml_store/inference/implementations/llm_inference_engine.py +154 -0
- linkml_store/inference/implementations/rag_inference_engine.py +276 -0
- linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
- linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
- linkml_store/inference/inference_config.py +66 -0
- linkml_store/inference/inference_engine.py +209 -0
- linkml_store/inference/inference_engine_registry.py +74 -0
- linkml_store/plotting/__init__.py +5 -0
- linkml_store/plotting/cli.py +826 -0
- linkml_store/plotting/dimensionality_reduction.py +453 -0
- linkml_store/plotting/embedding_plot.py +489 -0
- linkml_store/plotting/facet_chart.py +73 -0
- linkml_store/plotting/heatmap.py +383 -0
- linkml_store/utils/__init__.py +0 -0
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/dat_parser.py +95 -0
- linkml_store/utils/embedding_matcher.py +424 -0
- linkml_store/utils/embedding_utils.py +299 -0
- linkml_store/utils/enrichment_analyzer.py +217 -0
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +550 -0
- linkml_store/utils/io.py +38 -0
- linkml_store/utils/llm_utils.py +122 -0
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/neo4j_utils.py +42 -0
- linkml_store/utils/object_utils.py +190 -0
- linkml_store/utils/pandas_utils.py +93 -0
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store/utils/sql_utils.py +177 -0
- linkml_store/utils/stats_utils.py +53 -0
- linkml_store/utils/vector_utils.py +158 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +43 -0
- linkml_store/webapi/main.py +855 -0
- linkml_store-0.3.0.dist-info/METADATA +226 -0
- linkml_store-0.3.0.dist-info/RECORD +101 -0
- linkml_store-0.3.0.dist-info/WHEEL +4 -0
- linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
- linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# solr_collection.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from copy import copy
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union, Tuple
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from linkml_store.api import Collection
|
|
10
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT
|
|
11
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SolrCollection(Collection):
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def _collection_base(self) -> str:
|
|
20
|
+
if self.parent.use_cores:
|
|
21
|
+
base_url = f"{self.parent.base_url}/{self.alias}"
|
|
22
|
+
else:
|
|
23
|
+
base_url = self.parent.base_url
|
|
24
|
+
return base_url
|
|
25
|
+
|
|
26
|
+
def search(
|
|
27
|
+
self,
|
|
28
|
+
query: str,
|
|
29
|
+
where: Optional[Any] = None,
|
|
30
|
+
index_name: Optional[str] = None,
|
|
31
|
+
limit: Optional[int] = None,
|
|
32
|
+
**kwargs,
|
|
33
|
+
) -> QueryResult:
|
|
34
|
+
if index_name is None:
|
|
35
|
+
index_name = "edismax"
|
|
36
|
+
qfs = self.parent.metadata.searchable_slots
|
|
37
|
+
#if not qfs:
|
|
38
|
+
# raise ValueError("No searchable slots configured for Solr collection")
|
|
39
|
+
if not qfs:
|
|
40
|
+
qfs = list(self.class_definition().attributes.keys())
|
|
41
|
+
solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
|
|
42
|
+
logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
|
|
43
|
+
|
|
44
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
45
|
+
response.raise_for_status()
|
|
46
|
+
|
|
47
|
+
data = response.json()
|
|
48
|
+
num_rows = data["response"]["numFound"]
|
|
49
|
+
rows = data["response"]["docs"]
|
|
50
|
+
ranked_rows = [(1.0, row) for row in rows]
|
|
51
|
+
return QueryResult(query=where, search_term=query, num_rows=num_rows, rows=rows, ranked_rows=ranked_rows)
|
|
52
|
+
|
|
53
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
54
|
+
solr_query = self._build_solr_query(query)
|
|
55
|
+
logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
|
|
56
|
+
|
|
57
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
58
|
+
response.raise_for_status()
|
|
59
|
+
|
|
60
|
+
data = response.json()
|
|
61
|
+
logger.debug(f"Response: {data}")
|
|
62
|
+
num_rows = data["response"]["numFound"]
|
|
63
|
+
rows = data["response"]["docs"]
|
|
64
|
+
|
|
65
|
+
return QueryResult(query=query, num_rows=num_rows, rows=rows)
|
|
66
|
+
|
|
67
|
+
def query_facets(
|
|
68
|
+
self,
|
|
69
|
+
where: Optional[Dict] = None,
|
|
70
|
+
facet_columns: List[Union[str, Tuple[str, ...]]] = None,
|
|
71
|
+
facet_limit=DEFAULT_FACET_LIMIT,
|
|
72
|
+
facet_min_count: int = 1,
|
|
73
|
+
**kwargs,
|
|
74
|
+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
|
|
75
|
+
"""
|
|
76
|
+
Query facet counts for fields or field combinations.
|
|
77
|
+
|
|
78
|
+
:param where: Filter conditions
|
|
79
|
+
:param facet_columns: List of fields to facet on. Elements can be:
|
|
80
|
+
- Simple strings for single field facets
|
|
81
|
+
- Tuples of strings for field combinations (pivot facets)
|
|
82
|
+
:param facet_limit: Maximum number of facet values to return
|
|
83
|
+
:param facet_min_count: Minimum count for facet values to be included
|
|
84
|
+
:return: Dictionary mapping fields or field tuples to lists of (value, count) tuples
|
|
85
|
+
"""
|
|
86
|
+
solr_query = self._build_solr_query(where)
|
|
87
|
+
|
|
88
|
+
# Separate single fields and tuple fields
|
|
89
|
+
single_fields = []
|
|
90
|
+
tuple_fields = []
|
|
91
|
+
|
|
92
|
+
if facet_columns:
|
|
93
|
+
for field in facet_columns:
|
|
94
|
+
if isinstance(field, str):
|
|
95
|
+
single_fields.append(field)
|
|
96
|
+
elif isinstance(field, tuple):
|
|
97
|
+
tuple_fields.append(field)
|
|
98
|
+
|
|
99
|
+
# Process regular facets
|
|
100
|
+
results = {}
|
|
101
|
+
if single_fields:
|
|
102
|
+
solr_query["facet"] = "true"
|
|
103
|
+
solr_query["facet.field"] = single_fields
|
|
104
|
+
solr_query["facet.limit"] = facet_limit
|
|
105
|
+
solr_query["facet.mincount"] = facet_min_count
|
|
106
|
+
|
|
107
|
+
logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
|
|
108
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
109
|
+
response.raise_for_status()
|
|
110
|
+
|
|
111
|
+
data = response.json()
|
|
112
|
+
facet_counts = data["facet_counts"]["facet_fields"]
|
|
113
|
+
|
|
114
|
+
for facet_field, counts in facet_counts.items():
|
|
115
|
+
results[facet_field] = list(zip(counts[::2], counts[1::2]))
|
|
116
|
+
|
|
117
|
+
# Process pivot facets for tuple fields
|
|
118
|
+
if tuple_fields:
|
|
119
|
+
# TODO: Add a warning if Solr < 4.0, when this was introduced
|
|
120
|
+
for field_tuple in tuple_fields:
|
|
121
|
+
# Create a query for this specific field tuple
|
|
122
|
+
pivot_query = self._build_solr_query(where)
|
|
123
|
+
pivot_query["facet"] = "true"
|
|
124
|
+
|
|
125
|
+
# Create pivot facet
|
|
126
|
+
field_str = ','.join(field_tuple)
|
|
127
|
+
pivot_query["facet.pivot"] = field_str
|
|
128
|
+
pivot_query["facet.pivot.mincount"] = facet_min_count
|
|
129
|
+
pivot_query["facet.limit"] = facet_limit
|
|
130
|
+
|
|
131
|
+
logger.info(f"Querying Solr collection {self.alias} for pivot facets with query: {pivot_query}")
|
|
132
|
+
response = requests.get(f"{self._collection_base}/select", params=pivot_query)
|
|
133
|
+
response.raise_for_status()
|
|
134
|
+
|
|
135
|
+
data = response.json()
|
|
136
|
+
pivot_facets = data.get("facet_counts", {}).get("facet_pivot", {})
|
|
137
|
+
|
|
138
|
+
# Process pivot facets into the same format as MongoDB results
|
|
139
|
+
field_str = ','.join(field_tuple)
|
|
140
|
+
pivot_data = pivot_facets.get(field_str, [])
|
|
141
|
+
|
|
142
|
+
# Build a list of tuples (field values, count)
|
|
143
|
+
pivot_results = []
|
|
144
|
+
self._process_pivot_facets(pivot_data, [], pivot_results, field_tuple)
|
|
145
|
+
|
|
146
|
+
results[field_tuple] = pivot_results
|
|
147
|
+
|
|
148
|
+
return results
|
|
149
|
+
|
|
150
|
+
def _process_pivot_facets(self, pivot_data, current_values, results, field_tuple):
|
|
151
|
+
"""
|
|
152
|
+
Recursively process pivot facet results to extract combinations of field values.
|
|
153
|
+
|
|
154
|
+
:param pivot_data: The pivot facet data from Solr
|
|
155
|
+
:param current_values: The current path of values in the recursion
|
|
156
|
+
:param results: The result list to populate
|
|
157
|
+
:param field_tuple: The original field tuple for reference
|
|
158
|
+
"""
|
|
159
|
+
for item in pivot_data:
|
|
160
|
+
# Add the current field value
|
|
161
|
+
value = item.get("value")
|
|
162
|
+
count = item.get("count", 0)
|
|
163
|
+
|
|
164
|
+
# Update the current path with this value
|
|
165
|
+
values = current_values + [value]
|
|
166
|
+
|
|
167
|
+
# If we have all the fields from the tuple, add a result
|
|
168
|
+
if len(values) == len(field_tuple):
|
|
169
|
+
# Create a tuple of values corresponding to the field tuple
|
|
170
|
+
results.append((tuple(values), count))
|
|
171
|
+
|
|
172
|
+
# Process child pivot fields recursively
|
|
173
|
+
pivot = item.get("pivot", [])
|
|
174
|
+
if pivot and len(values) < len(field_tuple):
|
|
175
|
+
self._process_pivot_facets(pivot, values, results, field_tuple)
|
|
176
|
+
|
|
177
|
+
def _build_solr_query(
|
|
178
|
+
self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None
|
|
179
|
+
) -> Dict[str, Any]:
|
|
180
|
+
solr_query = {}
|
|
181
|
+
if query is None:
|
|
182
|
+
query = {}
|
|
183
|
+
|
|
184
|
+
if isinstance(query, Query):
|
|
185
|
+
where = query.where_clause
|
|
186
|
+
solr_query["fq"] = self._build_solr_where_clause(where)
|
|
187
|
+
|
|
188
|
+
if query.select_cols:
|
|
189
|
+
solr_query["fl"] = ",".join(query.select_cols)
|
|
190
|
+
|
|
191
|
+
if query.limit:
|
|
192
|
+
solr_query["rows"] = query.limit
|
|
193
|
+
|
|
194
|
+
if query.offset:
|
|
195
|
+
solr_query["start"] = query.offset
|
|
196
|
+
|
|
197
|
+
elif isinstance(query, dict):
|
|
198
|
+
solr_query["fq"] = self._build_solr_where_clause(query)
|
|
199
|
+
|
|
200
|
+
solr_query["wt"] = "json"
|
|
201
|
+
if "q" not in solr_query:
|
|
202
|
+
solr_query["q"] = search_term
|
|
203
|
+
if extra:
|
|
204
|
+
solr_query.update(extra)
|
|
205
|
+
logger.info(f"Built Solr query: {solr_query}")
|
|
206
|
+
return solr_query
|
|
207
|
+
|
|
208
|
+
def _build_solr_where_clause(self, where_clause: Dict) -> str:
|
|
209
|
+
if where_clause is None:
|
|
210
|
+
where_clause = {}
|
|
211
|
+
conditions = []
|
|
212
|
+
if self.parent.metadata.collection_type_slot:
|
|
213
|
+
where_clause = copy(where_clause)
|
|
214
|
+
where_clause[self.parent.metadata.collection_type_slot] = self.alias
|
|
215
|
+
for field, value in where_clause.items():
|
|
216
|
+
if not isinstance(value, (list, tuple)):
|
|
217
|
+
value = [value]
|
|
218
|
+
value = [f'"{v}"' if isinstance(v, str) else str(v) for v in value]
|
|
219
|
+
if len(value) > 1:
|
|
220
|
+
conditions.append(f"{field}:({' '.join(value)})")
|
|
221
|
+
else:
|
|
222
|
+
conditions.append(f"{field}:{value[0]}")
|
|
223
|
+
|
|
224
|
+
return " AND ".join(conditions)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
from linkml_store.api import Collection, Database
|
|
7
|
+
from linkml_store.api.config import CollectionConfig
|
|
8
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
9
|
+
from linkml_store.api.stores.solr.solr_collection import SolrCollection
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SolrDatabase(Database):
|
|
15
|
+
base_url: str
|
|
16
|
+
collection_class = SolrCollection
|
|
17
|
+
use_cores: bool = False
|
|
18
|
+
|
|
19
|
+
def __init__(self, handle: Optional[str] = None, **kwargs):
|
|
20
|
+
if handle.startswith("solr:"):
|
|
21
|
+
self.base_url = handle.replace("solr:", "")
|
|
22
|
+
else:
|
|
23
|
+
self.base_url = handle
|
|
24
|
+
super().__init__(handle=handle, **kwargs)
|
|
25
|
+
|
|
26
|
+
def get_collection(self, name: str, create_if_not_exists=True, **kwargs) -> "Collection":
|
|
27
|
+
if not self._collections:
|
|
28
|
+
self.init_collections()
|
|
29
|
+
|
|
30
|
+
if name not in self._collections.keys():
|
|
31
|
+
if create_if_not_exists:
|
|
32
|
+
self._collections[name] = self.create_collection(name)
|
|
33
|
+
else:
|
|
34
|
+
raise KeyError(f"Collection {name} does not exist")
|
|
35
|
+
|
|
36
|
+
return self._collections[name]
|
|
37
|
+
|
|
38
|
+
def create_collection(
|
|
39
|
+
self, name: str, alias: Optional[str] = None, metadata: Optional[CollectionConfig] = None, **kwargs
|
|
40
|
+
) -> Collection:
|
|
41
|
+
if not name:
|
|
42
|
+
raise ValueError(f"Collection name must be provided: alias: {alias} metadata: {metadata}")
|
|
43
|
+
|
|
44
|
+
collection_cls = self.collection_class
|
|
45
|
+
collection = collection_cls(name=name, alias=alias, parent=self, metadata=metadata)
|
|
46
|
+
|
|
47
|
+
if not self._collections:
|
|
48
|
+
self._collections = {}
|
|
49
|
+
|
|
50
|
+
if not alias:
|
|
51
|
+
alias = name
|
|
52
|
+
|
|
53
|
+
self._collections[alias] = collection
|
|
54
|
+
return collection
|
|
55
|
+
|
|
56
|
+
def init_collections(self):
|
|
57
|
+
if self._collections is None:
|
|
58
|
+
self._collections = {}
|
|
59
|
+
if self.metadata.collection_type_slot:
|
|
60
|
+
response = requests.get(
|
|
61
|
+
f"{self.base_url}/select",
|
|
62
|
+
params={
|
|
63
|
+
"q": "*:*",
|
|
64
|
+
"wt": "json",
|
|
65
|
+
"rows": 0,
|
|
66
|
+
"facet": "true",
|
|
67
|
+
"facet.field": self.metadata.collection_type_slot,
|
|
68
|
+
"facet.limit": -1,
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
response.raise_for_status()
|
|
72
|
+
data = response.json()
|
|
73
|
+
coll_names = data["facet_counts"]["facet_fields"][self.metadata.collection_type_slot]
|
|
74
|
+
coll_names = coll_names[::2]
|
|
75
|
+
for coll_name in coll_names:
|
|
76
|
+
self.create_collection(coll_name)
|
|
77
|
+
else:
|
|
78
|
+
self.create_collection("default")
|
|
79
|
+
|
|
80
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
81
|
+
collection_name = query.from_table
|
|
82
|
+
collection = self.get_collection(collection_name)
|
|
83
|
+
return collection.query(query, **kwargs)
|
|
File without changes
|