linkml-store 0.2.9__tar.gz → 0.2.10rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (86) hide show
  1. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/PKG-INFO +1 -1
  2. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/pyproject.toml +5 -1
  3. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/collection.py +2 -2
  4. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/database.py +1 -12
  5. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/duckdb_collection.py +25 -23
  6. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/duckdb_database.py +2 -2
  7. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/mongodb/mongodb_collection.py +39 -25
  8. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/neo4j_database.py +1 -1
  9. linkml_store-0.2.10rc1/src/linkml_store/api/stores/solr/solr_collection.py +222 -0
  10. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/cli.py +1 -2
  11. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/implementations/llm_indexer.py +0 -1
  12. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/indexer.py +2 -1
  13. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/llm_inference_engine.py +2 -4
  14. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/inference_config.py +1 -1
  15. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/inference_engine.py +1 -1
  16. linkml_store-0.2.10rc1/src/linkml_store/plotting/__init__.py +5 -0
  17. linkml_store-0.2.10rc1/src/linkml_store/plotting/cli.py +172 -0
  18. linkml_store-0.2.10rc1/src/linkml_store/plotting/heatmap.py +356 -0
  19. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/dat_parser.py +1 -1
  20. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/enrichment_analyzer.py +7 -7
  21. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/llm_utils.py +1 -1
  22. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/object_utils.py +9 -3
  23. linkml_store-0.2.9/src/linkml_store/api/stores/solr/solr_collection.py +0 -139
  24. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/LICENSE +0 -0
  25. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/README.md +0 -0
  26. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/__init__.py +0 -0
  27. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/__init__.py +0 -0
  28. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/client.py +0 -0
  29. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/config.py +0 -0
  30. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/queries.py +0 -0
  31. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/__init__.py +0 -0
  32. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/chromadb/__init__.py +0 -0
  33. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/chromadb/chromadb_collection.py +0 -0
  34. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/chromadb/chromadb_database.py +0 -0
  35. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/__init__.py +0 -0
  36. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/duckdb/mappings.py +0 -0
  37. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/filesystem/__init__.py +0 -0
  38. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/filesystem/filesystem_collection.py +0 -0
  39. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/filesystem/filesystem_database.py +1 -1
  40. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/hdf5/__init__.py +0 -0
  41. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/hdf5/hdf5_collection.py +0 -0
  42. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/hdf5/hdf5_database.py +0 -0
  43. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/mongodb/__init__.py +0 -0
  44. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/mongodb/mongodb_database.py +0 -0
  45. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/__init__.py +0 -0
  46. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/neo4j/neo4j_collection.py +0 -0
  47. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/solr/__init__.py +0 -0
  48. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/solr/solr_database.py +0 -0
  49. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/stores/solr/solr_utils.py +0 -0
  50. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/api/types.py +0 -0
  51. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/constants.py +0 -0
  52. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/graphs/__init__.py +0 -0
  53. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/graphs/graph_map.py +0 -0
  54. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/__init__.py +0 -0
  55. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/implementations/__init__.py +0 -0
  56. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/index/implementations/simple_indexer.py +0 -0
  57. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/__init__.py +0 -0
  58. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/evaluation.py +0 -0
  59. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/__init__.py +0 -0
  60. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/rag_inference_engine.py +0 -0
  61. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/rule_based_inference_engine.py +0 -0
  62. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/implementations/sklearn_inference_engine.py +0 -0
  63. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/inference/inference_engine_registry.py +0 -0
  64. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/__init__.py +0 -0
  65. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/change_utils.py +0 -0
  66. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/file_utils.py +0 -0
  67. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/format_utils.py +0 -0
  68. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/io.py +0 -0
  69. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/mongodb_utils.py +0 -0
  70. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/neo4j_utils.py +0 -0
  71. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/pandas_utils.py +0 -0
  72. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/patch_utils.py +0 -0
  73. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/query_utils.py +0 -0
  74. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/schema_utils.py +0 -0
  75. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/sklearn_utils.py +0 -0
  76. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/sql_utils.py +0 -0
  77. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/stats_utils.py +0 -0
  78. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/utils/vector_utils.py +0 -0
  79. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/__init__.py +0 -0
  80. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/__init__.py +0 -0
  81. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/base.html.j2 +0 -0
  82. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/collection_details.html.j2 +0 -0
  83. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/database_details.html.j2 +0 -0
  84. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/databases.html.j2 +0 -0
  85. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/html/generic.html.j2 +0 -0
  86. {linkml_store-0.2.9 → linkml_store-0.2.10rc1}/src/linkml_store/webapi/main.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: linkml-store
3
- Version: 0.2.9
3
+ Version: 0.2.10rc1
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "linkml-store"
3
- version = "0.2.9"
3
+ version = "0.2.10rc1"
4
4
  description = "linkml-store"
5
5
  authors = ["Author 1 <author@org.org>"]
6
6
  license = "MIT"
@@ -67,6 +67,10 @@ jupyter = "*"
67
67
  jupysql = "*"
68
68
  papermill = "*"
69
69
  nbdime = "*"
70
+ codespell = {version = ">=2.3.0"}
71
+ tomli = {version = ">=2.0.1"}
72
+ black = {version = ">=24.0.0"}
73
+ ruff = {version = ">=0.6.2"}
70
74
 
71
75
  [tool.poetry.group.tests.dependencies]
72
76
  pytest = "^7.4.0"
@@ -641,11 +641,11 @@ class Collection(Generic[DatabaseType]):
641
641
  if isinstance(group_by_fields, str):
642
642
  group_by_fields = [group_by_fields]
643
643
  df = self.find(where=where, limit=-1).rows_dataframe
644
-
644
+
645
645
  # Handle the case where agg_map is None
646
646
  if agg_map is None:
647
647
  agg_map = {}
648
-
648
+
649
649
  pk_fields = agg_map.get("first", []) + group_by_fields
650
650
  list_fields = agg_map.get("list", [])
651
651
  if not list_fields:
@@ -606,24 +606,13 @@ class Database(ABC, Generic[CollectionType]):
606
606
  """
607
607
  return list(self.iter_validate_database(**kwargs))
608
608
 
609
- def validate_database(self, **kwargs) -> List["ValidationResult"]:
610
- """
611
- Validate the contents of the database.
612
-
613
- As `iter_validate_database`, but returns a list of validation results.
614
-
615
- :param kwargs:
616
- :return:
617
- """
618
- return list(self.iter_validate_database(**kwargs))
619
-
620
609
  def iter_validate_database(
621
610
  self, ensure_referential_integrity: bool = None, **kwargs
622
611
  ) -> Iterator["ValidationResult"]:
623
612
  """
624
613
  Validate the contents of the database.
625
614
 
626
- An an example, let's create a database with a predefined schema
615
+ An example, let's create a database with a predefined schema
627
616
  from the countries.linkml.yaml file:
628
617
 
629
618
  >>> from linkml_store.api.client import Client
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, Dict, List, Optional, Union, Tuple
2
+ from typing import Any, Dict, List, Optional, Tuple, Union
3
3
 
4
4
  import sqlalchemy as sqla
5
5
  from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
@@ -155,9 +155,9 @@ class DuckDBCollection(Collection):
155
155
  ) -> QueryResult:
156
156
  """
157
157
  Group objects in the collection by specified fields using SQLAlchemy.
158
-
158
+
159
159
  This implementation leverages DuckDB's SQL capabilities for more efficient grouping.
160
-
160
+
161
161
  :param group_by_fields: List of fields to group by
162
162
  :param inlined_field: Field name to store aggregated objects
163
163
  :param agg_map: Dictionary mapping aggregation types to fields
@@ -167,31 +167,32 @@ class DuckDBCollection(Collection):
167
167
  """
168
168
  if isinstance(group_by_fields, str):
169
169
  group_by_fields = [group_by_fields]
170
-
170
+
171
171
  cd = self.class_definition()
172
172
  if not cd:
173
173
  logger.debug(f"No class definition defined for {self.alias} {self.target_class_name}")
174
174
  return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
175
-
175
+
176
176
  # Check if the table exists
177
177
  if not self.parent._table_exists(self.alias):
178
178
  logger.debug(f"Table {self.alias} doesn't exist, falling back to parent implementation")
179
179
  return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
180
-
180
+
181
181
  # Get table definition
182
182
  table = self._sqla_table(cd)
183
183
  engine = self.parent.engine
184
-
184
+
185
185
  # Create a SQLAlchemy select statement for groups
186
- from sqlalchemy import select, func, and_, or_
186
+ from sqlalchemy import select
187
+
187
188
  group_cols = [table.c[field] for field in group_by_fields if field in table.columns.keys()]
188
-
189
+
189
190
  if not group_cols:
190
191
  logger.warning(f"None of the group_by fields {group_by_fields} found in table columns")
191
192
  return super().group_by(group_by_fields, inlined_field, agg_map, where, **kwargs)
192
-
193
+
193
194
  stmt = select(*group_cols).distinct()
194
-
195
+
195
196
  # Add where conditions if specified
196
197
  if where:
197
198
  conditions = []
@@ -219,24 +220,24 @@ class DuckDBCollection(Collection):
219
220
  else:
220
221
  # Direct equality comparison
221
222
  conditions.append(table.c[k] == v)
222
-
223
+
223
224
  if conditions:
224
225
  for condition in conditions:
225
226
  stmt = stmt.where(condition)
226
-
227
+
227
228
  results = []
228
229
  try:
229
230
  with engine.connect() as conn:
230
231
  # Get all distinct groups
231
232
  group_result = conn.execute(stmt)
232
233
  group_rows = list(group_result)
233
-
234
+
234
235
  # For each group, get all objects
235
236
  for group_row in group_rows:
236
237
  # Build conditions for this group
237
238
  group_conditions = []
238
239
  group_dict = {}
239
-
240
+
240
241
  for i, field in enumerate(group_by_fields):
241
242
  if field in table.columns.keys():
242
243
  value = group_row[i]
@@ -245,12 +246,12 @@ class DuckDBCollection(Collection):
245
246
  group_conditions.append(table.c[field].is_(None))
246
247
  else:
247
248
  group_conditions.append(table.c[field] == value)
248
-
249
+
249
250
  # Get all rows for this group
250
251
  row_stmt = select(*table.columns)
251
252
  for condition in group_conditions:
252
253
  row_stmt = row_stmt.where(condition)
253
-
254
+
254
255
  # Add original where conditions
255
256
  if where:
256
257
  for k, v in where.items():
@@ -276,10 +277,10 @@ class DuckDBCollection(Collection):
276
277
  else:
277
278
  # Direct equality comparison
278
279
  row_stmt = row_stmt.where(table.c[k] == v)
279
-
280
+
280
281
  row_result = conn.execute(row_stmt)
281
282
  rows = list(row_result)
282
-
283
+
283
284
  # Convert rows to dictionaries
284
285
  objects = []
285
286
  for row in rows:
@@ -287,18 +288,18 @@ class DuckDBCollection(Collection):
287
288
  for i, col in enumerate(row._fields):
288
289
  obj[col] = row[i]
289
290
  objects.append(obj)
290
-
291
+
291
292
  # Apply agg_map to filter fields if specified
292
293
  if agg_map and "list" in agg_map:
293
294
  list_fields = agg_map["list"]
294
295
  if list_fields:
295
296
  objects = [{k: obj.get(k) for k in list_fields if k in obj} for obj in objects]
296
-
297
+
297
298
  # Create the result object
298
299
  result_obj = group_dict.copy()
299
300
  result_obj[inlined_field] = objects
300
301
  results.append(result_obj)
301
-
302
+
302
303
  return QueryResult(num_rows=len(results), rows=results)
303
304
  except Exception as e:
304
305
  logger.warning(f"Error in DuckDB group_by: {e}")
@@ -316,7 +317,8 @@ class DuckDBCollection(Collection):
316
317
  self.metadata.is_prepopulated = True
317
318
  return
318
319
  # query = Query(
319
- # from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
320
+ # from_table="information_schema.tables",
321
+ # where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
320
322
  # )
321
323
  # qr = self.parent.query(query)
322
324
  # if qr.num_rows > 0:
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  from pathlib import Path
4
- from typing import Optional, Union, List
4
+ from typing import List, Optional, Union
5
5
 
6
6
  import pandas as pd
7
7
  import sqlalchemy
@@ -14,7 +14,7 @@ from linkml_store.api import Database
14
14
  from linkml_store.api.queries import Query, QueryResult
15
15
  from linkml_store.api.stores.duckdb.duckdb_collection import DuckDBCollection
16
16
  from linkml_store.utils.format_utils import Format
17
- from linkml_store.utils.sql_utils import introspect_schema, query_to_sql, where_clause_to_sql
17
+ from linkml_store.utils.sql_utils import introspect_schema, query_to_sql
18
18
 
19
19
  TYPE_MAP = {
20
20
  "VARCHAR": "string",
@@ -7,6 +7,7 @@ from pymongo.collection import Collection as MongoCollection
7
7
  from linkml_store.api import Collection
8
8
  from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
9
9
  from linkml_store.api.queries import Query, QueryResult
10
+ from linkml_store.utils.object_utils import object_path_get
10
11
 
11
12
  logger = logging.getLogger(__name__)
12
13
 
@@ -130,7 +131,15 @@ class MongoDBCollection(Collection):
130
131
  def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
131
132
  mongo_filter = self._build_mongo_filter(query.where_clause)
132
133
  limit = limit or query.limit
133
- cursor = self.mongo_collection.find(mongo_filter)
134
+
135
+ # Build projection if select_cols are provided
136
+ projection = None
137
+ if query.select_cols:
138
+ projection = {"_id": 0}
139
+ for col in query.select_cols:
140
+ projection[col] = 1
141
+
142
+ cursor = self.mongo_collection.find(mongo_filter, projection)
134
143
  if limit and limit >= 0:
135
144
  cursor = cursor.limit(limit)
136
145
  offset = offset or query.offset
@@ -141,9 +150,19 @@ class MongoDBCollection(Collection):
141
150
 
142
151
  def _as_row(row: dict):
143
152
  row = copy(row)
144
- del row["_id"]
153
+ if "_id" in row:
154
+ del row["_id"]
155
+
145
156
  if select_cols:
146
- row = {k: row[k] for k in select_cols if k in row}
157
+ # For nested fields, ensure we handle them properly
158
+ result = {}
159
+ for col in select_cols:
160
+ # If it's a nested field (contains dots)
161
+ if "." in col or "[" in col:
162
+ result[col] = object_path_get(row, col)
163
+ elif col in row:
164
+ result[col] = row[col]
165
+ return result
147
166
  return row
148
167
 
149
168
  rows = [_as_row(row) for row in cursor]
@@ -265,7 +284,7 @@ class MongoDBCollection(Collection):
265
284
  if deleted_rows_count == 0 and not missing_ok:
266
285
  raise ValueError(f"No rows found for {where}")
267
286
  return deleted_rows_count
268
-
287
+
269
288
  def group_by(
270
289
  self,
271
290
  group_by_fields: List[str],
@@ -276,9 +295,9 @@ class MongoDBCollection(Collection):
276
295
  ) -> QueryResult:
277
296
  """
278
297
  Group objects in the collection by specified fields using MongoDB's aggregation pipeline.
279
-
298
+
280
299
  This implementation leverages MongoDB's native aggregation capabilities for efficient grouping.
281
-
300
+
282
301
  :param group_by_fields: List of fields to group by
283
302
  :param inlined_field: Field name to store aggregated objects
284
303
  :param agg_map: Dictionary mapping aggregation types to fields
@@ -288,7 +307,7 @@ class MongoDBCollection(Collection):
288
307
  """
289
308
  if isinstance(group_by_fields, str):
290
309
  group_by_fields = [group_by_fields]
291
-
310
+
292
311
  # Build the group key for MongoDB
293
312
  if len(group_by_fields) == 1:
294
313
  # Single field grouping
@@ -296,34 +315,29 @@ class MongoDBCollection(Collection):
296
315
  else:
297
316
  # Multi-field grouping
298
317
  group_id = {field: f"${field}" for field in group_by_fields}
299
-
318
+
300
319
  # Start building the pipeline
301
320
  pipeline = []
302
-
321
+
303
322
  # Add match stage if where clause is provided
304
323
  if where:
305
324
  pipeline.append({"$match": where})
306
-
325
+
307
326
  # Add the group stage
308
- group_stage = {
309
- "$group": {
310
- "_id": group_id,
311
- "objects": {"$push": "$$ROOT"}
312
- }
313
- }
327
+ group_stage = {"$group": {"_id": group_id, "objects": {"$push": "$$ROOT"}}}
314
328
  pipeline.append(group_stage)
315
-
329
+
316
330
  # Execute the aggregation
317
331
  logger.debug(f"MongoDB group_by pipeline: {pipeline}")
318
332
  aggregation_results = list(self.mongo_collection.aggregate(pipeline))
319
-
333
+
320
334
  # Transform the results to match the expected format
321
335
  results = []
322
336
  for result in aggregation_results:
323
337
  # Skip null groups if needed
324
338
  if result["_id"] is None and kwargs.get("skip_nulls", False):
325
339
  continue
326
-
340
+
327
341
  # Create the group object
328
342
  if isinstance(result["_id"], dict):
329
343
  # Multi-field grouping
@@ -331,15 +345,15 @@ class MongoDBCollection(Collection):
331
345
  else:
332
346
  # Single field grouping
333
347
  group_obj = {group_by_fields[0]: result["_id"]}
334
-
348
+
335
349
  # Add the grouped objects
336
350
  objects = result["objects"]
337
-
351
+
338
352
  # Remove MongoDB _id field from each object
339
353
  for obj in objects:
340
354
  if "_id" in obj:
341
355
  del obj["_id"]
342
-
356
+
343
357
  # Apply any field selection or transformations based on agg_map
344
358
  if agg_map:
345
359
  # Get first fields (fields to keep as single values)
@@ -347,7 +361,7 @@ class MongoDBCollection(Collection):
347
361
  if first_fields:
348
362
  # These are already in the group_obj from the _id
349
363
  pass
350
-
364
+
351
365
  # Get list fields (fields to aggregate as lists)
352
366
  list_fields = agg_map.get("list", [])
353
367
  if list_fields:
@@ -357,9 +371,9 @@ class MongoDBCollection(Collection):
357
371
  # If list_fields is empty but first_fields is specified,
358
372
  # filter out first_fields from objects to avoid duplication
359
373
  objects = [{k: v for k, v in obj.items() if k not in first_fields} for obj in objects]
360
-
374
+
361
375
  # Add the objects to the group
362
376
  group_obj[inlined_field] = objects
363
377
  results.append(group_obj)
364
-
378
+
365
379
  return QueryResult(num_rows=len(results), rows=results)
@@ -27,7 +27,7 @@ class Neo4jDatabase(Database):
27
27
  if handle is None:
28
28
  handle = "bolt://localhost:7687/neo4j"
29
29
  if handle.startswith("neo4j:"):
30
- handle = handle.replace("neo4j:", "bolt:")
30
+ handle = handle.replace("neo4j:", "bolt:", 1)
31
31
  super().__init__(handle=handle, **kwargs)
32
32
 
33
33
  @property
@@ -0,0 +1,222 @@
1
+ # solr_collection.py
2
+
3
+ import logging
4
+ from copy import copy
5
+ from typing import Any, Dict, List, Optional, Union, Tuple
6
+
7
+ import requests
8
+
9
+ from linkml_store.api import Collection
10
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT
11
+ from linkml_store.api.queries import Query, QueryResult
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class SolrCollection(Collection):
17
+
18
+ @property
19
+ def _collection_base(self) -> str:
20
+ if self.parent.use_cores:
21
+ base_url = f"{self.parent.base_url}/{self.alias}"
22
+ else:
23
+ base_url = self.parent.base_url
24
+ return base_url
25
+
26
+ def search(
27
+ self,
28
+ query: str,
29
+ where: Optional[Any] = None,
30
+ index_name: Optional[str] = None,
31
+ limit: Optional[int] = None,
32
+ **kwargs,
33
+ ) -> QueryResult:
34
+ if index_name is None:
35
+ index_name = "edismax"
36
+ qfs = self.parent.metadata.searchable_slots
37
+ if not qfs:
38
+ raise ValueError("No searchable slots configured for Solr collection")
39
+ solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
40
+ logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
41
+
42
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
43
+ response.raise_for_status()
44
+
45
+ data = response.json()
46
+ num_rows = data["response"]["numFound"]
47
+ rows = data["response"]["docs"]
48
+ ranked_rows = [(1.0, row) for row in rows]
49
+ return QueryResult(query=where, search_term=query, num_rows=num_rows, rows=rows, ranked_rows=ranked_rows)
50
+
51
+ def query(self, query: Query, **kwargs) -> QueryResult:
52
+ solr_query = self._build_solr_query(query)
53
+ logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
54
+
55
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
56
+ response.raise_for_status()
57
+
58
+ data = response.json()
59
+ logger.debug(f"Response: {data}")
60
+ num_rows = data["response"]["numFound"]
61
+ rows = data["response"]["docs"]
62
+
63
+ return QueryResult(query=query, num_rows=num_rows, rows=rows)
64
+
65
+ def query_facets(
66
+ self,
67
+ where: Optional[Dict] = None,
68
+ facet_columns: List[Union[str, Tuple[str, ...]]] = None,
69
+ facet_limit=DEFAULT_FACET_LIMIT,
70
+ facet_min_count: int = 1,
71
+ **kwargs,
72
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
73
+ """
74
+ Query facet counts for fields or field combinations.
75
+
76
+ :param where: Filter conditions
77
+ :param facet_columns: List of fields to facet on. Elements can be:
78
+ - Simple strings for single field facets
79
+ - Tuples of strings for field combinations (pivot facets)
80
+ :param facet_limit: Maximum number of facet values to return
81
+ :param facet_min_count: Minimum count for facet values to be included
82
+ :return: Dictionary mapping fields or field tuples to lists of (value, count) tuples
83
+ """
84
+ solr_query = self._build_solr_query(where)
85
+
86
+ # Separate single fields and tuple fields
87
+ single_fields = []
88
+ tuple_fields = []
89
+
90
+ if facet_columns:
91
+ for field in facet_columns:
92
+ if isinstance(field, str):
93
+ single_fields.append(field)
94
+ elif isinstance(field, tuple):
95
+ tuple_fields.append(field)
96
+
97
+ # Process regular facets
98
+ results = {}
99
+ if single_fields:
100
+ solr_query["facet"] = "true"
101
+ solr_query["facet.field"] = single_fields
102
+ solr_query["facet.limit"] = facet_limit
103
+ solr_query["facet.mincount"] = facet_min_count
104
+
105
+ logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
106
+ response = requests.get(f"{self._collection_base}/select", params=solr_query)
107
+ response.raise_for_status()
108
+
109
+ data = response.json()
110
+ facet_counts = data["facet_counts"]["facet_fields"]
111
+
112
+ for facet_field, counts in facet_counts.items():
113
+ results[facet_field] = list(zip(counts[::2], counts[1::2]))
114
+
115
+ # Process pivot facets for tuple fields
116
+ if tuple_fields:
117
+ # TODO: Add a warning if Solr < 4.0, when this was introduced
118
+ for field_tuple in tuple_fields:
119
+ # Create a query for this specific field tuple
120
+ pivot_query = self._build_solr_query(where)
121
+ pivot_query["facet"] = "true"
122
+
123
+ # Create pivot facet
124
+ field_str = ','.join(field_tuple)
125
+ pivot_query["facet.pivot"] = field_str
126
+ pivot_query["facet.pivot.mincount"] = facet_min_count
127
+ pivot_query["facet.limit"] = facet_limit
128
+
129
+ logger.info(f"Querying Solr collection {self.alias} for pivot facets with query: {pivot_query}")
130
+ response = requests.get(f"{self._collection_base}/select", params=pivot_query)
131
+ response.raise_for_status()
132
+
133
+ data = response.json()
134
+ pivot_facets = data.get("facet_counts", {}).get("facet_pivot", {})
135
+
136
+ # Process pivot facets into the same format as MongoDB results
137
+ field_str = ','.join(field_tuple)
138
+ pivot_data = pivot_facets.get(field_str, [])
139
+
140
+ # Build a list of tuples (field values, count)
141
+ pivot_results = []
142
+ self._process_pivot_facets(pivot_data, [], pivot_results, field_tuple)
143
+
144
+ results[field_tuple] = pivot_results
145
+
146
+ return results
147
+
148
+ def _process_pivot_facets(self, pivot_data, current_values, results, field_tuple):
149
+ """
150
+ Recursively process pivot facet results to extract combinations of field values.
151
+
152
+ :param pivot_data: The pivot facet data from Solr
153
+ :param current_values: The current path of values in the recursion
154
+ :param results: The result list to populate
155
+ :param field_tuple: The original field tuple for reference
156
+ """
157
+ for item in pivot_data:
158
+ # Add the current field value
159
+ value = item.get("value")
160
+ count = item.get("count", 0)
161
+
162
+ # Update the current path with this value
163
+ values = current_values + [value]
164
+
165
+ # If we have all the fields from the tuple, add a result
166
+ if len(values) == len(field_tuple):
167
+ # Create a tuple of values corresponding to the field tuple
168
+ results.append((tuple(values), count))
169
+
170
+ # Process child pivot fields recursively
171
+ pivot = item.get("pivot", [])
172
+ if pivot and len(values) < len(field_tuple):
173
+ self._process_pivot_facets(pivot, values, results, field_tuple)
174
+
175
+ def _build_solr_query(
176
+ self, query: Union[Query, Dict], search_term="*:*", extra: Optional[Dict] = None
177
+ ) -> Dict[str, Any]:
178
+ solr_query = {}
179
+ if query is None:
180
+ query = {}
181
+
182
+ if isinstance(query, Query):
183
+ where = query.where_clause
184
+ solr_query["fq"] = self._build_solr_where_clause(where)
185
+
186
+ if query.select_cols:
187
+ solr_query["fl"] = ",".join(query.select_cols)
188
+
189
+ if query.limit:
190
+ solr_query["rows"] = query.limit
191
+
192
+ if query.offset:
193
+ solr_query["start"] = query.offset
194
+
195
+ elif isinstance(query, dict):
196
+ solr_query["fq"] = self._build_solr_where_clause(query)
197
+
198
+ solr_query["wt"] = "json"
199
+ if "q" not in solr_query:
200
+ solr_query["q"] = search_term
201
+ if extra:
202
+ solr_query.update(extra)
203
+ logger.info(f"Built Solr query: {solr_query}")
204
+ return solr_query
205
+
206
+ def _build_solr_where_clause(self, where_clause: Dict) -> str:
207
+ if where_clause is None:
208
+ where_clause = {}
209
+ conditions = []
210
+ if self.parent.metadata.collection_type_slot:
211
+ where_clause = copy(where_clause)
212
+ where_clause[self.parent.metadata.collection_type_slot] = self.alias
213
+ for field, value in where_clause.items():
214
+ if not isinstance(value, (list, tuple)):
215
+ value = [value]
216
+ value = [f'"{v}"' if isinstance(v, str) else str(v) for v in value]
217
+ if len(value) > 1:
218
+ conditions.append(f"{field}:({' '.join(value)})")
219
+ else:
220
+ conditions.append(f"{field}:{value[0]}")
221
+
222
+ return " AND ".join(conditions)
@@ -3,8 +3,7 @@ import sys
3
3
  import warnings
4
4
  from collections import defaultdict
5
5
  from pathlib import Path
6
- from tokenize import group
7
- from typing import Optional, Tuple, Any
6
+ from typing import Any, Optional, Tuple
8
7
 
9
8
  import click
10
9
  import yaml
@@ -3,7 +3,6 @@ from pathlib import Path
3
3
  from typing import TYPE_CHECKING, List, Optional
4
4
 
5
5
  import numpy as np
6
- import openai
7
6
 
8
7
  from linkml_store.api.config import CollectionConfig
9
8
  from linkml_store.index.indexer import INDEX_ITEM, Indexer
@@ -3,9 +3,10 @@ from enum import Enum
3
3
  from typing import Any, Callable, Dict, List, Optional, Tuple
4
4
 
5
5
  import numpy as np
6
- from linkml_store.utils.vector_utils import pairwise_cosine_similarity, mmr_diversified_search
7
6
  from pydantic import BaseModel
8
7
 
8
+ from linkml_store.utils.vector_utils import mmr_diversified_search, pairwise_cosine_similarity
9
+
9
10
  INDEX_ITEM = np.ndarray
10
11
 
11
12
  logger = logging.getLogger(__name__)
@@ -1,18 +1,16 @@
1
- import json
2
1
  import logging
3
2
  from dataclasses import dataclass
4
3
  from pathlib import Path
5
4
  from typing import ClassVar, List, Optional, TextIO, Union
6
5
 
7
6
  import yaml
8
- from linkml_store.utils.llm_utils import parse_yaml_payload
9
7
  from llm import get_key
10
8
  from pydantic import BaseModel
11
9
 
12
- from linkml_store.api.collection import OBJECT, Collection
10
+ from linkml_store.api.collection import OBJECT
13
11
  from linkml_store.inference.inference_config import Inference, InferenceConfig, LLMConfig
14
12
  from linkml_store.inference.inference_engine import InferenceEngine, ModelSerialization
15
- from linkml_store.utils.object_utils import select_nested
13
+ from linkml_store.utils.llm_utils import parse_yaml_payload
16
14
 
17
15
  logger = logging.getLogger(__name__)
18
16