linkml-store 0.2.9__tar.gz → 0.2.10rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linkml-store might be problematic. Click here for more details.
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/PKG-INFO +1 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/pyproject.toml +5 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/collection.py +2 -2
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/database.py +1 -12
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +25 -23
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/duckdb_database.py +2 -2
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +39 -25
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/neo4j_database.py +1 -1
- linkml_store-0.2.10rc1/src/linkml_store/api/stores/solr/solr_collection.py +222 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/cli.py +1 -2
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/implementations/llm_indexer.py +0 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/indexer.py +2 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/llm_inference_engine.py +2 -4
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/inference_config.py +1 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/inference_engine.py +1 -1
- linkml_store-0.2.10rc1/src/linkml_store/plotting/__init__.py +5 -0
- linkml_store-0.2.10rc1/src/linkml_store/plotting/cli.py +172 -0
- linkml_store-0.2.10rc1/src/linkml_store/plotting/heatmap.py +356 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/dat_parser.py +1 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/enrichment_analyzer.py +7 -7
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/llm_utils.py +1 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/object_utils.py +9 -3
- linkml_store-0.2.9/src/linkml_store/api/stores/solr/solr_collection.py +0 -139
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/LICENSE +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/README.md +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/client.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/config.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/queries.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/filesystem/filesystem_database.py +1 -1
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/solr/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/types.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/constants.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/graphs/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/graphs/graph_map.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/implementations/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/evaluation.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/rag_inference_engine.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/inference_engine_registry.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/change_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/file_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/format_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/io.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/mongodb_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/neo4j_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/pandas_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/patch_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/query_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/schema_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/sklearn_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/sql_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/stats_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/vector_utils.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/__init__.py +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/base.html.j2 +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
- {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "linkml-store"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.10rc1"
|
|
4
4
|
description = "linkml-store"
|
|
5
5
|
authors = ["Author 1 <author@org.org>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -67,6 +67,10 @@ jupyter = "*"
|
|
|
67
67
|
jupysql = "*"
|
|
68
68
|
papermill = "*"
|
|
69
69
|
nbdime = "*"
|
|
70
|
+
codespell = {version = ">=2.3.0"}
|
|
71
|
+
tomli = {version = ">=2.0.1"}
|
|
72
|
+
black = {version = ">=24.0.0"}
|
|
73
|
+
ruff = {version = ">=0.6.2"}
|
|
70
74
|
|
|
71
75
|
[tool.poetry.group.tests.dependencies]
|
|
72
76
|
pytest = "^7.4.0"
|
|
@@ -641,11 +641,11 @@ class Collection(Generic[DatabaseType]):
|
|
|
641
641
|
if isinstance(group_by_fields, str):
|
|
642
642
|
group_by_fields = [group_by_fields]
|
|
643
643
|
df = self.find(where=where, limit=-1).rows_dataframe
|
|
644
|
-
|
|
644
|
+
|
|
645
645
|
# Handle the case where agg_map is None
|
|
646
646
|
if agg_map is None:
|
|
647
647
|
agg_map = {}
|
|
648
|
-
|
|
648
|
+
|
|
649
649
|
pk_fields = agg_map.get("first", []) + group_by_fields
|
|
650
650
|
list_fields = agg_map.get("list", [])
|
|
651
651
|
if not list_fields:
|
|
@@ -606,24 +606,13 @@ class Database(ABC, Generic[CollectionType]):
|
|
|
606
606
|
"""
|
|
607
607
|
return list(self.iter_validate_database(**kwargs))
|
|
608
608
|
|
|
609
|
-
def validate_database(self, **kwargs) -> List["ValidationResult"]:
|
|
610
|
-
"""
|
|
611
|
-
Validate the contents of the database.
|
|
612
|
-
|
|
613
|
-
As `iter_validate_database`, but returns a list of validation results.
|
|
614
|
-
|
|
615
|
-
:param kwargs:
|
|
616
|
-
:return:
|
|
617
|
-
"""
|
|
618
|
-
return list(self.iter_validate_database(**kwargs))
|
|
619
|
-
|
|
620
609
|
def iter_validate_database(
|
|
621
610
|
self, ensure_referential_integrity: bool = None, **kwargs
|
|
622
611
|
) -> Iterator["ValidationResult"]:
|
|
623
612
|
"""
|
|
624
613
|
Validate the contents of the database.
|
|
625
614
|
|
|
626
|
-
An
|
|
615
|
+
An example, let's create a database with a predefined schema
|
|
627
616
|
from the countries.linkml.yaml file:
|
|
628
617
|
|
|
629
618
|
>>> from linkml_store.api.client import Client
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, Dict, List, Optional,
|
|
2
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
3
|
|
|
4
4
|
import sqlalchemy as sqla
|
|
5
5
|
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
@@ -155,9 +155,9 @@ class DuckDBCollection(Collection):
|
|
|
155
155
|
) -> QueryResult:
|
|
156
156
|
"""
|
|
157
157
|
Group objects in the collection by specified fields using SQLAlchemy.
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
This implementation leverages DuckDB's SQL capabilities for more efficient grouping.
|
|
160
|
-
|
|
160
|
+
|
|
161
161
|
:param group_by_fields: List of fields to group by
|
|
162
162
|
:param inlined_field: Field name to store aggregated objects
|
|
163
163
|
:param agg_map: Dictionary mapping aggregation types to fields
|
|
@@ -167,31 +167,32 @@ class DuckDBCollection(Collection):
|
|
|
167
167
|
"""
|
|
168
168
|
if isinstance(group_by_fields, str):
|
|
169
169
|
group_by_fields = [group_by_fields]
|
|
170
|
-
|
|
170
|
+
|
|
171
171
|
cd = self.class_definition()
|
|
172
172
|
if not cd:
|
|
173
173
|
logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}")
|
|
174
174
|
return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
|
|
175
|
-
|
|
175
|
+
|
|
176
176
|
# Check if the table exists
|
|
177
177
|
if not self.parent._table_exists(self.alias):
|
|
178
178
|
logger.debug(f"Table {self.alias} doesn't exist, falling back to parent implementation")
|
|
179
179
|
return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
|
|
180
|
-
|
|
180
|
+
|
|
181
181
|
# Get table definition
|
|
182
182
|
table = self._sqla_table(cd)
|
|
183
183
|
engine = self.parent.engine
|
|
184
|
-
|
|
184
|
+
|
|
185
185
|
# Create a SQLAlchemy select statement for groups
|
|
186
|
-
from sqlalchemy import select
|
|
186
|
+
from sqlalchemy import select
|
|
187
|
+
|
|
187
188
|
group_cols = [table.c[field] for field in group_by_fields if field in table.columns.keys()]
|
|
188
|
-
|
|
189
|
+
|
|
189
190
|
if not group_cols:
|
|
190
191
|
logger.warning(f"None of the group_by fields {group_by_fields} found in table columns")
|
|
191
192
|
return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
|
|
192
|
-
|
|
193
|
+
|
|
193
194
|
stmt = select(*group_cols).distinct()
|
|
194
|
-
|
|
195
|
+
|
|
195
196
|
# Add where conditions if specified
|
|
196
197
|
if where:
|
|
197
198
|
conditions = []
|
|
@@ -219,24 +220,24 @@ class DuckDBCollection(Collection):
|
|
|
219
220
|
else:
|
|
220
221
|
# Direct equality comparison
|
|
221
222
|
conditions.append(table.c[k] == v)
|
|
222
|
-
|
|
223
|
+
|
|
223
224
|
if conditions:
|
|
224
225
|
for condition in conditions:
|
|
225
226
|
stmt = stmt.where(condition)
|
|
226
|
-
|
|
227
|
+
|
|
227
228
|
results = []
|
|
228
229
|
try:
|
|
229
230
|
with engine.connect() as conn:
|
|
230
231
|
# Get all distinct groups
|
|
231
232
|
group_result = conn.execute(stmt)
|
|
232
233
|
group_rows = list(group_result)
|
|
233
|
-
|
|
234
|
+
|
|
234
235
|
# For each group, get all objects
|
|
235
236
|
for group_row in group_rows:
|
|
236
237
|
# Build conditions for this group
|
|
237
238
|
group_conditions = []
|
|
238
239
|
group_dict = {}
|
|
239
|
-
|
|
240
|
+
|
|
240
241
|
for i, field in enumerate(group_by_fields):
|
|
241
242
|
if field in table.columns.keys():
|
|
242
243
|
value = group_row[i]
|
|
@@ -245,12 +246,12 @@ class DuckDBCollection(Collection):
|
|
|
245
246
|
group_conditions.append(table.c[field].is_(None))
|
|
246
247
|
else:
|
|
247
248
|
group_conditions.append(table.c[field] == value)
|
|
248
|
-
|
|
249
|
+
|
|
249
250
|
# Get all rows for this group
|
|
250
251
|
row_stmt = select(*table.columns)
|
|
251
252
|
for condition in group_conditions:
|
|
252
253
|
row_stmt = row_stmt.where(condition)
|
|
253
|
-
|
|
254
|
+
|
|
254
255
|
# Add original where conditions
|
|
255
256
|
if where:
|
|
256
257
|
for k, v in where.items():
|
|
@@ -276,10 +277,10 @@ class DuckDBCollection(Collection):
|
|
|
276
277
|
else:
|
|
277
278
|
# Direct equality comparison
|
|
278
279
|
row_stmt = row_stmt.where(table.c[k] == v)
|
|
279
|
-
|
|
280
|
+
|
|
280
281
|
row_result = conn.execute(row_stmt)
|
|
281
282
|
rows = list(row_result)
|
|
282
|
-
|
|
283
|
+
|
|
283
284
|
# Convert rows to dictionaries
|
|
284
285
|
objects = []
|
|
285
286
|
for row in rows:
|
|
@@ -287,18 +288,18 @@ class DuckDBCollection(Collection):
|
|
|
287
288
|
for i, col in enumerate(row._fields):
|
|
288
289
|
obj[col] = row[i]
|
|
289
290
|
objects.append(obj)
|
|
290
|
-
|
|
291
|
+
|
|
291
292
|
# Apply agg_map to filter fields if specified
|
|
292
293
|
if agg_map and "list" in agg_map:
|
|
293
294
|
list_fields = agg_map["list"]
|
|
294
295
|
if list_fields:
|
|
295
296
|
objects = [{k: obj.get(k) for k in list_fields if k in obj} for obj in objects]
|
|
296
|
-
|
|
297
|
+
|
|
297
298
|
# Create the result object
|
|
298
299
|
result_obj = group_dict.copy()
|
|
299
300
|
result_obj[inlined_field] = objects
|
|
300
301
|
results.append(result_obj)
|
|
301
|
-
|
|
302
|
+
|
|
302
303
|
return QueryResult(num_rows=len(results), rows=results)
|
|
303
304
|
except Exception as e:
|
|
304
305
|
logger.warning(f"Error in DuckDB group_by: {e}")
|
|
@@ -316,7 +317,8 @@ class DuckDBCollection(Collection):
|
|
|
316
317
|
self.metadata.is_prepopulated = True
|
|
317
318
|
return
|
|
318
319
|
# query = Query(
|
|
319
|
-
# from_table="information_schema.tables",
|
|
320
|
+
# from_table="information_schema.tables",
|
|
321
|
+
# where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
|
|
320
322
|
# )
|
|
321
323
|
# qr = self.parent.query(query)
|
|
322
324
|
# if qr.num_rows > 0:
|
{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/duckdb_database.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Optional, Union
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import sqlalchemy
|
|
@@ -14,7 +14,7 @@ from linkml_store.api import Database
|
|
|
14
14
|
from linkml_store.api.queries import Query, QueryResult
|
|
15
15
|
from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
|
|
16
16
|
from linkml_store.utils.format_utils import Format
|
|
17
|
-
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
|
|
17
|
+
from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
|
|
18
18
|
|
|
19
19
|
TYPE_MAP = {
|
|
20
20
|
"VARCHAR": "string",
|
|
@@ -7,6 +7,7 @@ from pymongo.collection import Collection as MongoCollection
|
|
|
7
7
|
from linkml_store.api import Collection
|
|
8
8
|
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
9
9
|
from linkml_store.api.queries import Query, QueryResult
|
|
10
|
+
from linkml_store.utils.object_utils import object_path_get
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger(__name__)
|
|
12
13
|
|
|
@@ -130,7 +131,15 @@ class MongoDBCollection(Collection):
|
|
|
130
131
|
def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
|
|
131
132
|
mongo_filter = self._build_mongo_filter(query.where_clause)
|
|
132
133
|
limit = limit or query.limit
|
|
133
|
-
|
|
134
|
+
|
|
135
|
+
# Build projection if select_cols are provided
|
|
136
|
+
projection = None
|
|
137
|
+
if query.select_cols:
|
|
138
|
+
projection = {"_id": 0}
|
|
139
|
+
for col in query.select_cols:
|
|
140
|
+
projection[col] = 1
|
|
141
|
+
|
|
142
|
+
cursor = self.mongo_collection.find(mongo_filter, projection)
|
|
134
143
|
if limit and limit >= 0:
|
|
135
144
|
cursor = cursor.limit(limit)
|
|
136
145
|
offset = offset or query.offset
|
|
@@ -141,9 +150,19 @@ class MongoDBCollection(Collection):
|
|
|
141
150
|
|
|
142
151
|
def _as_row(row: dict):
|
|
143
152
|
row = copy(row)
|
|
144
|
-
|
|
153
|
+
if "_id" in row:
|
|
154
|
+
del row["_id"]
|
|
155
|
+
|
|
145
156
|
if select_cols:
|
|
146
|
-
|
|
157
|
+
# For nested fields, ensure we handle them properly
|
|
158
|
+
result = {}
|
|
159
|
+
for col in select_cols:
|
|
160
|
+
# If it's a nested field (contains dots)
|
|
161
|
+
if "." in col or "[" in col:
|
|
162
|
+
result[col] = object_path_get(row, col)
|
|
163
|
+
elif col in row:
|
|
164
|
+
result[col] = row[col]
|
|
165
|
+
return result
|
|
147
166
|
return row
|
|
148
167
|
|
|
149
168
|
rows = [_as_row(row) for row in cursor]
|
|
@@ -265,7 +284,7 @@ class MongoDBCollection(Collection):
|
|
|
265
284
|
if deleted_rows_count == 0 and not missing_ok:
|
|
266
285
|
raise ValueError(f"No rows found for {where}")
|
|
267
286
|
return deleted_rows_count
|
|
268
|
-
|
|
287
|
+
|
|
269
288
|
def group_by(
|
|
270
289
|
self,
|
|
271
290
|
group_by_fields: List[str],
|
|
@@ -276,9 +295,9 @@ class MongoDBCollection(Collection):
|
|
|
276
295
|
) -> QueryResult:
|
|
277
296
|
"""
|
|
278
297
|
Group objects in the collection by specified fields using MongoDB's aggregation pipeline.
|
|
279
|
-
|
|
298
|
+
|
|
280
299
|
This implementation leverages MongoDB's native aggregation capabilities for efficient grouping.
|
|
281
|
-
|
|
300
|
+
|
|
282
301
|
:param group_by_fields: List of fields to group by
|
|
283
302
|
:param inlined_field: Field name to store aggregated objects
|
|
284
303
|
:param agg_map: Dictionary mapping aggregation types to fields
|
|
@@ -288,7 +307,7 @@ class MongoDBCollection(Collection):
|
|
|
288
307
|
"""
|
|
289
308
|
if isinstance(group_by_fields, str):
|
|
290
309
|
group_by_fields = [group_by_fields]
|
|
291
|
-
|
|
310
|
+
|
|
292
311
|
# Build the group key for MongoDB
|
|
293
312
|
if len(group_by_fields) == 1:
|
|
294
313
|
# Single field grouping
|
|
@@ -296,34 +315,29 @@ class MongoDBCollection(Collection):
|
|
|
296
315
|
else:
|
|
297
316
|
# Multi-field grouping
|
|
298
317
|
group_id = {field: f"${field}" for field in group_by_fields}
|
|
299
|
-
|
|
318
|
+
|
|
300
319
|
# Start building the pipeline
|
|
301
320
|
pipeline = []
|
|
302
|
-
|
|
321
|
+
|
|
303
322
|
# Add match stage if where clause is provided
|
|
304
323
|
if where:
|
|
305
324
|
pipeline.append({"$match": where})
|
|
306
|
-
|
|
325
|
+
|
|
307
326
|
# Add the group stage
|
|
308
|
-
group_stage = {
|
|
309
|
-
"$group": {
|
|
310
|
-
"_id": group_id,
|
|
311
|
-
"objects": {"$push": "$$ROOT"}
|
|
312
|
-
}
|
|
313
|
-
}
|
|
327
|
+
group_stage = {"$group": {"_id": group_id, "objects": {"$push": "$$ROOT"}}}
|
|
314
328
|
pipeline.append(group_stage)
|
|
315
|
-
|
|
329
|
+
|
|
316
330
|
# Execute the aggregation
|
|
317
331
|
logger.debug(f"MongoDB group_by pipeline: {pipeline}")
|
|
318
332
|
aggregation_results = list(self.mongo_collection.aggregate(pipeline))
|
|
319
|
-
|
|
333
|
+
|
|
320
334
|
# Transform the results to match the expected format
|
|
321
335
|
results = []
|
|
322
336
|
for result in aggregation_results:
|
|
323
337
|
# Skip null groups if needed
|
|
324
338
|
if result["_id"] is None and kwargs.get("skip_nulls", False):
|
|
325
339
|
continue
|
|
326
|
-
|
|
340
|
+
|
|
327
341
|
# Create the group object
|
|
328
342
|
if isinstance(result["_id"], dict):
|
|
329
343
|
# Multi-field grouping
|
|
@@ -331,15 +345,15 @@ class MongoDBCollection(Collection):
|
|
|
331
345
|
else:
|
|
332
346
|
# Single field grouping
|
|
333
347
|
group_obj = {group_by_fields[0]: result["_id"]}
|
|
334
|
-
|
|
348
|
+
|
|
335
349
|
# Add the grouped objects
|
|
336
350
|
objects = result["objects"]
|
|
337
|
-
|
|
351
|
+
|
|
338
352
|
# Remove MongoDB _id field from each object
|
|
339
353
|
for obj in objects:
|
|
340
354
|
if "_id" in obj:
|
|
341
355
|
del obj["_id"]
|
|
342
|
-
|
|
356
|
+
|
|
343
357
|
# Apply any field selection or transformations based on agg_map
|
|
344
358
|
if agg_map:
|
|
345
359
|
# Get first fields (fields to keep as single values)
|
|
@@ -347,7 +361,7 @@ class MongoDBCollection(Collection):
|
|
|
347
361
|
if first_fields:
|
|
348
362
|
# These are already in the group_obj from the _id
|
|
349
363
|
pass
|
|
350
|
-
|
|
364
|
+
|
|
351
365
|
# Get list fields (fields to aggregate as lists)
|
|
352
366
|
list_fields = agg_map.get("list", [])
|
|
353
367
|
if list_fields:
|
|
@@ -357,9 +371,9 @@ class MongoDBCollection(Collection):
|
|
|
357
371
|
# If list_fields is empty but first_fields is specified,
|
|
358
372
|
# filter out first_fields from objects to avoid duplication
|
|
359
373
|
objects = [{k: v for k, v in obj.items() if k not in first_fields} for obj in objects]
|
|
360
|
-
|
|
374
|
+
|
|
361
375
|
# Add the objects to the group
|
|
362
376
|
group_obj[inlined_field] = objects
|
|
363
377
|
results.append(group_obj)
|
|
364
|
-
|
|
378
|
+
|
|
365
379
|
return QueryResult(num_rows=len(results), rows=results)
|
{linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/neo4j_database.py
RENAMED
|
@@ -27,7 +27,7 @@ class Neo4jDatabase(Database):
|
|
|
27
27
|
if handle is None:
|
|
28
28
|
handle = "bolt://localhost:7687/neo4j"
|
|
29
29
|
if handle.startswith("neo4j:"):
|
|
30
|
-
handle = handle.replace("neo4j:", "bolt:")
|
|
30
|
+
handle = handle.replace("neo4j:", "bolt:", 1)
|
|
31
31
|
super().__init__(handle=handle, **kwargs)
|
|
32
32
|
|
|
33
33
|
@property
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# solr_collection.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from copy import copy
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union, Tuple
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from linkml_store.api import Collection
|
|
10
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT
|
|
11
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SolrCollection(Collection):
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def _collection_base(self) -> str:
|
|
20
|
+
if self.parent.use_cores:
|
|
21
|
+
base_url = f"{self.parent.base_url}/{self.alias}"
|
|
22
|
+
else:
|
|
23
|
+
base_url = self.parent.base_url
|
|
24
|
+
return base_url
|
|
25
|
+
|
|
26
|
+
def search(
|
|
27
|
+
self,
|
|
28
|
+
query: str,
|
|
29
|
+
where: Optional[Any] = None,
|
|
30
|
+
index_name: Optional[str] = None,
|
|
31
|
+
limit: Optional[int] = None,
|
|
32
|
+
**kwargs,
|
|
33
|
+
) -> QueryResult:
|
|
34
|
+
if index_name is None:
|
|
35
|
+
index_name = "edismax"
|
|
36
|
+
qfs = self.parent.metadata.searchable_slots
|
|
37
|
+
if not qfs:
|
|
38
|
+
raise ValueError("No searchable slots configured for Solr collection")
|
|
39
|
+
solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
|
|
40
|
+
logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
|
|
41
|
+
|
|
42
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
43
|
+
response.raise_for_status()
|
|
44
|
+
|
|
45
|
+
data = response.json()
|
|
46
|
+
num_rows = data["response"]["numFound"]
|
|
47
|
+
rows = data["response"]["docs"]
|
|
48
|
+
ranked_rows = [(1.0, row) for row in rows]
|
|
49
|
+
return QueryResult(query=where, search_term=query, num_rows=num_rows, rows=rows, ranked_rows=ranked_rows)
|
|
50
|
+
|
|
51
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
52
|
+
solr_query = self._build_solr_query(query)
|
|
53
|
+
logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
|
|
54
|
+
|
|
55
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
56
|
+
response.raise_for_status()
|
|
57
|
+
|
|
58
|
+
data = response.json()
|
|
59
|
+
logger.debug(f"Response: {data}")
|
|
60
|
+
num_rows = data["response"]["numFound"]
|
|
61
|
+
rows = data["response"]["docs"]
|
|
62
|
+
|
|
63
|
+
return QueryResult(query=query, num_rows=num_rows, rows=rows)
|
|
64
|
+
|
|
65
|
+
def query_facets(
|
|
66
|
+
self,
|
|
67
|
+
where: Optional[Dict] = None,
|
|
68
|
+
facet_columns: List[Union[str, Tuple[str, ...]]] = None,
|
|
69
|
+
facet_limit=DEFAULT_FACET_LIMIT,
|
|
70
|
+
facet_min_count: int = 1,
|
|
71
|
+
**kwargs,
|
|
72
|
+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
|
|
73
|
+
"""
|
|
74
|
+
Query facet counts for fields or field combinations.
|
|
75
|
+
|
|
76
|
+
:param where: Filter conditions
|
|
77
|
+
:param facet_columns: List of fields to facet on. Elements can be:
|
|
78
|
+
- Simple strings for single field facets
|
|
79
|
+
- Tuples of strings for field combinations (pivot facets)
|
|
80
|
+
:param facet_limit: Maximum number of facet values to return
|
|
81
|
+
:param facet_min_count: Minimum count for facet values to be included
|
|
82
|
+
:return: Dictionary mapping fields or field tuples to lists of (value, count) tuples
|
|
83
|
+
"""
|
|
84
|
+
solr_query = self._build_solr_query(where)
|
|
85
|
+
|
|
86
|
+
# Separate single fields and tuple fields
|
|
87
|
+
single_fields = []
|
|
88
|
+
tuple_fields = []
|
|
89
|
+
|
|
90
|
+
if facet_columns:
|
|
91
|
+
for field in facet_columns:
|
|
92
|
+
if isinstance(field, str):
|
|
93
|
+
single_fields.append(field)
|
|
94
|
+
elif isinstance(field, tuple):
|
|
95
|
+
tuple_fields.append(field)
|
|
96
|
+
|
|
97
|
+
# Process regular facets
|
|
98
|
+
results = {}
|
|
99
|
+
if single_fields:
|
|
100
|
+
solr_query["facet"] = "true"
|
|
101
|
+
solr_query["facet.field"] = single_fields
|
|
102
|
+
solr_query["facet.limit"] = facet_limit
|
|
103
|
+
solr_query["facet.mincount"] = facet_min_count
|
|
104
|
+
|
|
105
|
+
logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
|
|
106
|
+
response = requests.get(f"{self._collection_base}/select", params=solr_query)
|
|
107
|
+
response.raise_for_status()
|
|
108
|
+
|
|
109
|
+
data = response.json()
|
|
110
|
+
facet_counts = data["facet_counts"]["facet_fields"]
|
|
111
|
+
|
|
112
|
+
for facet_field, counts in facet_counts.items():
|
|
113
|
+
results[facet_field] = list(zip(counts[::2], counts[1::2]))
|
|
114
|
+
|
|
115
|
+
# Process pivot facets for tuple fields
|
|
116
|
+
if tuple_fields:
|
|
117
|
+
# TODO: Add a warning if Solr < 4.0, when this was introduced
|
|
118
|
+
for field_tuple in tuple_fields:
|
|
119
|
+
# Create a query for this specific field tuple
|
|
120
|
+
pivot_query = self._build_solr_query(where)
|
|
121
|
+
pivot_query["facet"] = "true"
|
|
122
|
+
|
|
123
|
+
# Create pivot facet
|
|
124
|
+
field_str = ','.join(field_tuple)
|
|
125
|
+
pivot_query["facet.pivot"] = field_str
|
|
126
|
+
pivot_query["facet.pivot.mincount"] = facet_min_count
|
|
127
|
+
pivot_query["facet.limit"] = facet_limit
|
|
128
|
+
|
|
129
|
+
logger.info(f"Querying Solr collection {self.alias} for pivot facets with query: {pivot_query}")
|
|
130
|
+
response = requests.get(f"{self._collection_base}/select", params=pivot_query)
|
|
131
|
+
response.raise_for_status()
|
|
132
|
+
|
|
133
|
+
data = response.json()
|
|
134
|
+
pivot_facets = data.get("facet_counts", {}).get("facet_pivot", {})
|
|
135
|
+
|
|
136
|
+
# Process pivot facets into the same format as MongoDB results
|
|
137
|
+
field_str = ','.join(field_tuple)
|
|
138
|
+
pivot_data = pivot_facets.get(field_str, [])
|
|
139
|
+
|
|
140
|
+
# Build a list of tuples (field values, count)
|
|
141
|
+
pivot_results = []
|
|
142
|
+
self._process_pivot_facets(pivot_data, [], pivot_results, field_tuple)
|
|
143
|
+
|
|
144
|
+
results[field_tuple] = pivot_results
|
|
145
|
+
|
|
146
|
+
return results
|
|
147
|
+
|
|
148
|
+
def _process_pivot_facets(self, pivot_data, current_values, results, field_tuple):
|
|
149
|
+
"""
|
|
150
|
+
Recursively process pivot facet results to extract combinations of field values.
|
|
151
|
+
|
|
152
|
+
:param pivot_data: The pivot facet data from Solr
|
|
153
|
+
:param current_values: The current path of values in the recursion
|
|
154
|
+
:param results: The result list to populate
|
|
155
|
+
:param field_tuple: The original field tuple for reference
|
|
156
|
+
"""
|
|
157
|
+
for item in pivot_data:
|
|
158
|
+
# Add the current field value
|
|
159
|
+
value = item.get("value")
|
|
160
|
+
count = item.get("count", 0)
|
|
161
|
+
|
|
162
|
+
# Update the current path with this value
|
|
163
|
+
values = current_values + [value]
|
|
164
|
+
|
|
165
|
+
# If we have all the fields from the tuple, add a result
|
|
166
|
+
if len(values) == len(field_tuple):
|
|
167
|
+
# Create a tuple of values corresponding to the field tuple
|
|
168
|
+
results.append((tuple(values), count))
|
|
169
|
+
|
|
170
|
+
# Process child pivot fields recursively
|
|
171
|
+
pivot = item.get("pivot", [])
|
|
172
|
+
if pivot and len(values) < len(field_tuple):
|
|
173
|
+
self._process_pivot_facets(pivot, values, results, field_tuple)
|
|
174
|
+
|
|
175
|
+
def _build_solr_query(
|
|
176
|
+
self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None
|
|
177
|
+
) -> Dict[str, Any]:
|
|
178
|
+
solr_query = {}
|
|
179
|
+
if query is None:
|
|
180
|
+
query = {}
|
|
181
|
+
|
|
182
|
+
if isinstance(query, Query):
|
|
183
|
+
where = query.where_clause
|
|
184
|
+
solr_query["fq"] = self._build_solr_where_clause(where)
|
|
185
|
+
|
|
186
|
+
if query.select_cols:
|
|
187
|
+
solr_query["fl"] = ",".join(query.select_cols)
|
|
188
|
+
|
|
189
|
+
if query.limit:
|
|
190
|
+
solr_query["rows"] = query.limit
|
|
191
|
+
|
|
192
|
+
if query.offset:
|
|
193
|
+
solr_query["start"] = query.offset
|
|
194
|
+
|
|
195
|
+
elif isinstance(query, dict):
|
|
196
|
+
solr_query["fq"] = self._build_solr_where_clause(query)
|
|
197
|
+
|
|
198
|
+
solr_query["wt"] = "json"
|
|
199
|
+
if "q" not in solr_query:
|
|
200
|
+
solr_query["q"] = search_term
|
|
201
|
+
if extra:
|
|
202
|
+
solr_query.update(extra)
|
|
203
|
+
logger.info(f"Built Solr query: {solr_query}")
|
|
204
|
+
return solr_query
|
|
205
|
+
|
|
206
|
+
def _build_solr_where_clause(self, where_clause: Dict) -> str:
|
|
207
|
+
if where_clause is None:
|
|
208
|
+
where_clause = {}
|
|
209
|
+
conditions = []
|
|
210
|
+
if self.parent.metadata.collection_type_slot:
|
|
211
|
+
where_clause = copy(where_clause)
|
|
212
|
+
where_clause[self.parent.metadata.collection_type_slot] = self.alias
|
|
213
|
+
for field, value in where_clause.items():
|
|
214
|
+
if not isinstance(value, (list, tuple)):
|
|
215
|
+
value = [value]
|
|
216
|
+
value = [f'"{v}"' if isinstance(v, str) else str(v) for v in value]
|
|
217
|
+
if len(value) > 1:
|
|
218
|
+
conditions.append(f"{field}:({' '.join(value)})")
|
|
219
|
+
else:
|
|
220
|
+
conditions.append(f"{field}:{value[0]}")
|
|
221
|
+
|
|
222
|
+
return " AND ".join(conditions)
|
|
@@ -3,9 +3,10 @@ from enum import Enum
|
|
|
3
3
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
|
-
from linkml_store.utils.vector_utils import pairwise_cosine_similarity, mmr_diversified_search
|
|
7
6
|
from pydantic import BaseModel
|
|
8
7
|
|
|
8
|
+
from linkml_store.utils.vector_utils import mmr_diversified_search, pairwise_cosine_similarity
|
|
9
|
+
|
|
9
10
|
INDEX_ITEM = np.ndarray
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger(__name__)
|
|
@@ -1,18 +1,16 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import ClassVar, List, Optional, TextIO, Union
|
|
6
5
|
|
|
7
6
|
import yaml
|
|
8
|
-
from linkml_store.utils.llm_utils import parse_yaml_payload
|
|
9
7
|
from llm import get_key
|
|
10
8
|
from pydantic import BaseModel
|
|
11
9
|
|
|
12
|
-
from linkml_store.api.collection import OBJECT
|
|
10
|
+
from linkml_store.api.collection import OBJECT
|
|
13
11
|
from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
|
|
14
12
|
from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
|
|
15
|
-
from linkml_store.utils.
|
|
13
|
+
from linkml_store.utils.llm_utils import parse_yaml_payload
|
|
16
14
|
|
|
17
15
|
logger = logging.getLogger(__name__)
|
|
18
16
|
|