linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,105 @@
1
+ """Type mappings between LinkML types and Dremio/Arrow types."""
2
+
3
+ import pyarrow as pa
4
+
5
+ # Mapping from LinkML types to PyArrow types
6
+ LINKML_TO_ARROW = {
7
+ "string": pa.string(),
8
+ "integer": pa.int64(),
9
+ "float": pa.float64(),
10
+ "boolean": pa.bool_(),
11
+ "date": pa.date32(),
12
+ "datetime": pa.timestamp("us"),
13
+ "decimal": pa.decimal128(38, 10),
14
+ "Any": pa.string(), # Fallback to string for Any type
15
+ }
16
+
17
+ # Mapping from Arrow types to LinkML types
18
+ ARROW_TO_LINKML = {
19
+ pa.string(): "string",
20
+ pa.utf8(): "string",
21
+ pa.large_string(): "string",
22
+ pa.int8(): "integer",
23
+ pa.int16(): "integer",
24
+ pa.int32(): "integer",
25
+ pa.int64(): "integer",
26
+ pa.uint8(): "integer",
27
+ pa.uint16(): "integer",
28
+ pa.uint32(): "integer",
29
+ pa.uint64(): "integer",
30
+ pa.float16(): "float",
31
+ pa.float32(): "float",
32
+ pa.float64(): "float",
33
+ pa.bool_(): "boolean",
34
+ pa.date32(): "date",
35
+ pa.date64(): "date",
36
+ }
37
+
38
+ # Mapping from Dremio SQL type names to LinkML types
39
+ DREMIO_SQL_TO_LINKML = {
40
+ "VARCHAR": "string",
41
+ "CHAR": "string",
42
+ "BIGINT": "integer",
43
+ "INTEGER": "integer",
44
+ "INT": "integer",
45
+ "SMALLINT": "integer",
46
+ "TINYINT": "integer",
47
+ "BOOLEAN": "boolean",
48
+ "DOUBLE": "float",
49
+ "FLOAT": "float",
50
+ "DECIMAL": "float",
51
+ "DATE": "date",
52
+ "TIMESTAMP": "datetime",
53
+ "TIME": "string",
54
+ "BINARY": "string",
55
+ "VARBINARY": "string",
56
+ "LIST": "string", # Complex types mapped to string
57
+ "STRUCT": "string",
58
+ "MAP": "string",
59
+ }
60
+
61
+
62
+ def get_arrow_type(linkml_type: str) -> pa.DataType:
63
+ """Convert a LinkML type to a PyArrow type.
64
+
65
+ Args:
66
+ linkml_type: The LinkML type name.
67
+
68
+ Returns:
69
+ The corresponding PyArrow data type.
70
+ """
71
+ return LINKML_TO_ARROW.get(linkml_type, pa.string())
72
+
73
+
74
+ def get_linkml_type_from_arrow(arrow_type: pa.DataType) -> str:
75
+ """Convert a PyArrow type to a LinkML type.
76
+
77
+ Args:
78
+ arrow_type: The PyArrow data type.
79
+
80
+ Returns:
81
+ The corresponding LinkML type name.
82
+ """
83
+ # Handle parameterized types by checking base type
84
+ if pa.types.is_string(arrow_type) or pa.types.is_large_string(arrow_type):
85
+ return "string"
86
+ if pa.types.is_integer(arrow_type):
87
+ return "integer"
88
+ if pa.types.is_floating(arrow_type):
89
+ return "float"
90
+ if pa.types.is_boolean(arrow_type):
91
+ return "boolean"
92
+ if pa.types.is_date(arrow_type):
93
+ return "date"
94
+ if pa.types.is_timestamp(arrow_type):
95
+ return "datetime"
96
+ if pa.types.is_decimal(arrow_type):
97
+ return "float"
98
+ if pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type):
99
+ return "string" # Complex types as string
100
+ if pa.types.is_struct(arrow_type):
101
+ return "string"
102
+ if pa.types.is_map(arrow_type):
103
+ return "string"
104
+
105
+ return "string" # Default fallback
@@ -0,0 +1,11 @@
1
+ """Dremio REST API adapter for linkml-store.
2
+
3
+ This module provides a Dremio adapter that uses the REST API v3 for
4
+ connectivity to Dremio data lakehouse instances that don't expose
5
+ the Arrow Flight SQL port (e.g., behind Cloudflare or firewalls).
6
+ """
7
+
8
+ from linkml_store.api.stores.dremio_rest.dremio_rest_collection import DremioRestCollection
9
+ from linkml_store.api.stores.dremio_rest.dremio_rest_database import DremioRestDatabase
10
+
11
+ __all__ = ["DremioRestDatabase", "DremioRestCollection"]
@@ -0,0 +1,502 @@
1
+ """Dremio REST API collection implementation.
2
+
3
+ This module provides the Collection implementation for Dremio REST API,
4
+ supporting query operations via the REST API v3.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from typing import Any, Dict, List, Optional, Tuple, Union
10
+
11
+ from linkml_store.api import Collection
12
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
13
+ from linkml_store.api.queries import Query, QueryResult
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class DremioRestCollection(Collection):
19
+ """Collection implementation for Dremio data lakehouse via REST API.
20
+
21
+ This collection connects to Dremio tables via the REST API v3
22
+ and provides query capabilities. Write operations may be limited
23
+ depending on the underlying data source configuration in Dremio.
24
+ """
25
+
26
+ _table_exists_checked: bool = False
27
+
28
+ def __init__(self, *args, **kwargs):
29
+ super().__init__(*args, **kwargs)
30
+
31
+ def _get_table_path(self) -> str:
32
+ """Get the full qualified table path.
33
+
34
+ Returns:
35
+ Full table path for SQL queries.
36
+ """
37
+ return self.parent._get_table_path(self.alias)
38
+
39
+ def _build_select_sql(
40
+ self,
41
+ select_cols: Optional[List[str]] = None,
42
+ where_clause: Optional[Union[str, Dict[str, Any]]] = None,
43
+ sort_by: Optional[List[str]] = None,
44
+ limit: Optional[int] = None,
45
+ offset: Optional[int] = None,
46
+ ) -> str:
47
+ """Build a SELECT SQL statement.
48
+
49
+ Args:
50
+ select_cols: Columns to select (None for all).
51
+ where_clause: WHERE conditions.
52
+ sort_by: ORDER BY columns.
53
+ limit: Maximum rows to return.
54
+ offset: Number of rows to skip.
55
+
56
+ Returns:
57
+ SQL SELECT statement.
58
+ """
59
+ table_path = self._get_table_path()
60
+
61
+ # Build SELECT clause
62
+ if select_cols:
63
+ cols = ", ".join(f'"{c}"' for c in select_cols)
64
+ else:
65
+ cols = "*"
66
+
67
+ sql = f"SELECT {cols} FROM {table_path}"
68
+
69
+ # Build WHERE clause
70
+ if where_clause:
71
+ conditions = self._build_where_conditions(where_clause)
72
+ if conditions:
73
+ sql += f" WHERE {conditions}"
74
+
75
+ # Build ORDER BY clause
76
+ if sort_by:
77
+ order_cols = ", ".join(f'"{c}"' for c in sort_by)
78
+ sql += f" ORDER BY {order_cols}"
79
+
80
+ # Build LIMIT/OFFSET
81
+ if limit is not None and limit >= 0:
82
+ sql += f" LIMIT {limit}"
83
+ if offset is not None and offset > 0:
84
+ sql += f" OFFSET {offset}"
85
+
86
+ return sql
87
+
88
+ def _build_where_conditions(self, where_clause: Union[str, Dict[str, Any]]) -> str:
89
+ """Build WHERE clause conditions from a dict or string.
90
+
91
+ Args:
92
+ where_clause: WHERE conditions as dict or string.
93
+
94
+ Returns:
95
+ SQL WHERE clause (without WHERE keyword).
96
+ """
97
+ if isinstance(where_clause, str):
98
+ return where_clause
99
+
100
+ if not isinstance(where_clause, dict):
101
+ return ""
102
+
103
+ conditions = []
104
+ for key, value in where_clause.items():
105
+ condition = self._build_single_condition(key, value)
106
+ if condition:
107
+ conditions.append(condition)
108
+
109
+ return " AND ".join(conditions)
110
+
111
+ def _build_single_condition(self, key: str, value: Any) -> str:
112
+ """Build a single WHERE condition.
113
+
114
+ Supports MongoDB-style operators like $gt, $gte, $lt, $lte, $in, $ne.
115
+
116
+ Args:
117
+ key: Column name.
118
+ value: Value or operator dict.
119
+
120
+ Returns:
121
+ SQL condition string.
122
+ """
123
+ col = f'"{key}"'
124
+
125
+ if value is None:
126
+ return f"{col} IS NULL"
127
+
128
+ if isinstance(value, dict):
129
+ # Handle operators
130
+ sub_conditions = []
131
+ for op, val in value.items():
132
+ if op == "$gt":
133
+ sub_conditions.append(f"{col} > {self._sql_value(val)}")
134
+ elif op == "$gte":
135
+ sub_conditions.append(f"{col} >= {self._sql_value(val)}")
136
+ elif op == "$lt":
137
+ sub_conditions.append(f"{col} < {self._sql_value(val)}")
138
+ elif op == "$lte":
139
+ sub_conditions.append(f"{col} <= {self._sql_value(val)}")
140
+ elif op == "$ne":
141
+ if val is None:
142
+ sub_conditions.append(f"{col} IS NOT NULL")
143
+ else:
144
+ sub_conditions.append(f"{col} != {self._sql_value(val)}")
145
+ elif op == "$in":
146
+ if isinstance(val, (list, tuple)):
147
+ vals = ", ".join(self._sql_value(v) for v in val)
148
+ sub_conditions.append(f"{col} IN ({vals})")
149
+ elif op == "$nin":
150
+ if isinstance(val, (list, tuple)):
151
+ vals = ", ".join(self._sql_value(v) for v in val)
152
+ sub_conditions.append(f"{col} NOT IN ({vals})")
153
+ elif op == "$like":
154
+ sub_conditions.append(f"{col} LIKE {self._sql_value(val)}")
155
+ elif op == "$ilike":
156
+ sub_conditions.append(f"LOWER({col}) LIKE LOWER({self._sql_value(val)})")
157
+ elif op == "$regex":
158
+ # Dremio uses REGEXP_LIKE
159
+ sub_conditions.append(f"REGEXP_LIKE({col}, {self._sql_value(val)})")
160
+ else:
161
+ logger.warning(f"Unknown operator: {op}")
162
+
163
+ return " AND ".join(sub_conditions) if sub_conditions else ""
164
+ else:
165
+ return f"{col} = {self._sql_value(value)}"
166
+
167
+ def _sql_value(self, value: Any) -> str:
168
+ """Convert a Python value to SQL literal.
169
+
170
+ Args:
171
+ value: Python value.
172
+
173
+ Returns:
174
+ SQL literal string.
175
+ """
176
+ if value is None:
177
+ return "NULL"
178
+ elif isinstance(value, bool):
179
+ return "TRUE" if value else "FALSE"
180
+ elif isinstance(value, (int, float)):
181
+ return str(value)
182
+ elif isinstance(value, str):
183
+ # Escape single quotes
184
+ escaped = value.replace("'", "''")
185
+ return f"'{escaped}'"
186
+ elif isinstance(value, (list, dict)):
187
+ # Convert to JSON string
188
+ escaped = json.dumps(value).replace("'", "''")
189
+ return f"'{escaped}'"
190
+ else:
191
+ escaped = str(value).replace("'", "''")
192
+ return f"'{escaped}'"
193
+
194
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
195
+ """Insert objects into the collection.
196
+
197
+ Note: Write operations in Dremio depend on the underlying data source.
198
+ Some sources (like Iceberg, Delta Lake) support writes, while others
199
+ (like file-based sources) may not.
200
+
201
+ Args:
202
+ objs: Object(s) to insert.
203
+ **kwargs: Additional arguments.
204
+ """
205
+ if not isinstance(objs, list):
206
+ objs = [objs]
207
+
208
+ if not objs:
209
+ return
210
+
211
+ logger.debug(f"Inserting {len(objs)} objects into {self.alias}")
212
+
213
+ cd = self.class_definition()
214
+ if not cd:
215
+ logger.debug(f"No class definition for {self.alias}; inducing from objects")
216
+ cd = self.induce_class_definition_from_objects(objs)
217
+
218
+ table_path = self._get_table_path()
219
+
220
+ if cd and cd.attributes:
221
+ columns = list(cd.attributes.keys())
222
+ else:
223
+ columns = list(objs[0].keys())
224
+
225
+ col_list = ", ".join(f'"{c}"' for c in columns)
226
+
227
+ batch_size = 100
228
+ for i in range(0, len(objs), batch_size):
229
+ batch = objs[i : i + batch_size]
230
+
231
+ values_list = []
232
+ for obj in batch:
233
+ values = []
234
+ for col in columns:
235
+ val = obj.get(col)
236
+ values.append(self._sql_value(val))
237
+ values_list.append(f"({', '.join(values)})")
238
+
239
+ values_sql = ", ".join(values_list)
240
+ sql = f"INSERT INTO {table_path} ({col_list}) VALUES {values_sql}"
241
+
242
+ self.parent._execute_update(sql)
243
+
244
+ self._post_insert_hook(objs)
245
+
246
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
247
+ """Delete specific objects from the collection.
248
+
249
+ Args:
250
+ objs: Object(s) to delete.
251
+ **kwargs: Additional arguments.
252
+
253
+ Returns:
254
+ Number of deleted rows, or None if unknown.
255
+ """
256
+ if not isinstance(objs, list):
257
+ objs = [objs]
258
+
259
+ if not objs:
260
+ return 0
261
+
262
+ table_path = self._get_table_path()
263
+ total_deleted = 0
264
+
265
+ for obj in objs:
266
+ conditions = []
267
+ for key, value in obj.items():
268
+ if key.startswith("_"):
269
+ continue
270
+ condition = self._build_single_condition(key, value)
271
+ if condition:
272
+ conditions.append(condition)
273
+
274
+ if not conditions:
275
+ continue
276
+
277
+ sql = f"DELETE FROM {table_path} WHERE {' AND '.join(conditions)}"
278
+ result = self.parent._execute_update(sql)
279
+ if result > 0:
280
+ total_deleted += result
281
+
282
+ self._post_delete_hook()
283
+ return total_deleted if total_deleted > 0 else None
284
+
285
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
286
+ """Delete objects matching a condition.
287
+
288
+ Args:
289
+ where: WHERE conditions (empty dict means delete all).
290
+ missing_ok: If True, don't raise error if no rows deleted.
291
+ **kwargs: Additional arguments.
292
+
293
+ Returns:
294
+ Number of deleted rows, or None if unknown.
295
+ """
296
+ if where is None:
297
+ where = {}
298
+
299
+ table_path = self._get_table_path()
300
+
301
+ if where:
302
+ conditions = self._build_where_conditions(where)
303
+ sql = f"DELETE FROM {table_path} WHERE {conditions}"
304
+ else:
305
+ sql = f"DELETE FROM {table_path}"
306
+
307
+ result = self.parent._execute_update(sql)
308
+ if result == 0 and not missing_ok:
309
+ raise ValueError(f"No rows found for {where}")
310
+ self._post_delete_hook()
311
+ return result if result >= 0 else None
312
+
313
+ def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
314
+ """Update objects in the collection.
315
+
316
+ Note: Requires a primary key field to identify rows.
317
+
318
+ Args:
319
+ objs: Object(s) to update.
320
+ **kwargs: Additional arguments.
321
+ """
322
+ if not isinstance(objs, list):
323
+ objs = [objs]
324
+
325
+ if not objs:
326
+ return
327
+
328
+ table_path = self._get_table_path()
329
+ pk = self.identifier_attribute_name
330
+
331
+ if not pk:
332
+ raise ValueError("Cannot update without an identifier attribute")
333
+
334
+ for obj in objs:
335
+ if pk not in obj:
336
+ raise ValueError(f"Object missing primary key field: {pk}")
337
+
338
+ pk_value = obj[pk]
339
+
340
+ set_parts = []
341
+ for key, value in obj.items():
342
+ if key == pk or key.startswith("_"):
343
+ continue
344
+ set_parts.append(f'"{key}" = {self._sql_value(value)}')
345
+
346
+ if not set_parts:
347
+ continue
348
+
349
+ set_clause = ", ".join(set_parts)
350
+ sql = f'UPDATE {table_path} SET {set_clause} WHERE "{pk}" = {self._sql_value(pk_value)}'
351
+ self.parent._execute_update(sql)
352
+
353
+ def query(self, query: Query, **kwargs) -> QueryResult:
354
+ """Execute a query against the collection.
355
+
356
+ Args:
357
+ query: Query specification.
358
+ **kwargs: Additional arguments.
359
+
360
+ Returns:
361
+ QueryResult with matching rows.
362
+ """
363
+ self._pre_query_hook(query)
364
+
365
+ limit = query.limit
366
+ if limit == -1:
367
+ limit = None
368
+
369
+ sql = self._build_select_sql(
370
+ select_cols=query.select_cols,
371
+ where_clause=query.where_clause,
372
+ sort_by=query.sort_by,
373
+ limit=limit,
374
+ offset=query.offset,
375
+ )
376
+
377
+ df = self.parent._execute_query(sql)
378
+
379
+ # Convert DataFrame to list of dicts
380
+ row_list = df.to_dict("records") if not df.empty else []
381
+
382
+ # Get total count for pagination
383
+ if query.offset or (limit is not None and len(row_list) == limit):
384
+ count_sql = self._build_count_sql(query.where_clause)
385
+ try:
386
+ count_df = self.parent._execute_query(count_sql)
387
+ total_rows = int(count_df.iloc[0, 0]) if not count_df.empty else len(row_list)
388
+ except Exception:
389
+ total_rows = len(row_list)
390
+ else:
391
+ total_rows = len(row_list)
392
+
393
+ qr = QueryResult(query=query, num_rows=total_rows, rows=row_list, offset=query.offset or 0)
394
+
395
+ if query.include_facet_counts and query.facet_slots:
396
+ qr.facet_counts = self.query_facets(where=query.where_clause, facet_columns=query.facet_slots)
397
+
398
+ return qr
399
+
400
+ def _build_count_sql(self, where_clause: Optional[Union[str, Dict[str, Any]]] = None) -> str:
401
+ """Build a COUNT SQL statement.
402
+
403
+ Args:
404
+ where_clause: WHERE conditions.
405
+
406
+ Returns:
407
+ SQL COUNT statement.
408
+ """
409
+ table_path = self._get_table_path()
410
+ sql = f"SELECT COUNT(*) FROM {table_path}"
411
+
412
+ if where_clause:
413
+ conditions = self._build_where_conditions(where_clause)
414
+ if conditions:
415
+ sql += f" WHERE {conditions}"
416
+
417
+ return sql
418
+
419
+ def query_facets(
420
+ self,
421
+ where: Optional[Dict] = None,
422
+ facet_columns: Optional[List[str]] = None,
423
+ facet_limit: int = DEFAULT_FACET_LIMIT,
424
+ **kwargs,
425
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
426
+ """Get facet counts for columns.
427
+
428
+ Args:
429
+ where: Filter conditions.
430
+ facet_columns: Columns to get facets for.
431
+ facet_limit: Maximum facet values per column.
432
+ **kwargs: Additional arguments.
433
+
434
+ Returns:
435
+ Dictionary mapping column names to list of (value, count) tuples.
436
+ """
437
+ if facet_limit is None:
438
+ facet_limit = DEFAULT_FACET_LIMIT
439
+
440
+ results = {}
441
+ cd = self.class_definition()
442
+ table_path = self._get_table_path()
443
+
444
+ if not facet_columns:
445
+ if cd and cd.attributes:
446
+ facet_columns = list(cd.attributes.keys())
447
+ else:
448
+ return results
449
+
450
+ for col in facet_columns:
451
+ if isinstance(col, tuple):
452
+ col_list = ", ".join(f'"{c}"' for c in col)
453
+ col_name = col
454
+ else:
455
+ col_list = f'"{col}"'
456
+ col_name = col
457
+
458
+ sql = f"SELECT {col_list}, COUNT(*) as cnt FROM {table_path}"
459
+
460
+ if where:
461
+ conditions = self._build_where_conditions(where)
462
+ if conditions:
463
+ sql += f" WHERE {conditions}"
464
+
465
+ sql += f" GROUP BY {col_list} ORDER BY cnt DESC"
466
+
467
+ if facet_limit > 0:
468
+ sql += f" LIMIT {facet_limit}"
469
+
470
+ try:
471
+ df = self.parent._execute_query(sql)
472
+
473
+ facets = []
474
+ for _, row in df.iterrows():
475
+ if isinstance(col, tuple):
476
+ value = tuple(row[c] for c in col)
477
+ else:
478
+ value = row[col]
479
+ count = int(row["cnt"])
480
+ facets.append((value, count))
481
+
482
+ results[col_name] = facets
483
+
484
+ except Exception as e:
485
+ logger.warning(f"Facet query failed for {col}: {e}")
486
+ results[col_name] = []
487
+
488
+ return results
489
+
490
+ def _check_if_initialized(self) -> bool:
491
+ """Check if the collection's table exists.
492
+
493
+ Returns:
494
+ True if table exists.
495
+ """
496
+ if self._table_exists_checked:
497
+ return True
498
+
499
+ result = self.parent._table_exists(self.alias)
500
+ if result:
501
+ self._table_exists_checked = True
502
+ return result