linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,555 @@
1
+ """Dremio collection implementation.
2
+
3
+ This module provides the Collection implementation for Dremio,
4
+ supporting CRUD operations and queries via Arrow Flight SQL.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from typing import Any, Dict, List, Optional, Tuple, Union
10
+
11
+ from linkml_runtime.linkml_model import ClassDefinition
12
+
13
+ from linkml_store.api import Collection
14
+ from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
15
+ from linkml_store.api.queries import Query, QueryResult
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class DremioCollection(Collection):
21
+ """Collection implementation for Dremio data lakehouse.
22
+
23
+ This collection connects to Dremio tables via Arrow Flight SQL
24
+ and provides query capabilities. Write operations may be limited
25
+ depending on the underlying data source configuration in Dremio.
26
+ """
27
+
28
+ _table_exists_checked: bool = False
29
+
30
+ def __init__(self, *args, **kwargs):
31
+ super().__init__(*args, **kwargs)
32
+
33
+ def _get_table_path(self) -> str:
34
+ """Get the full qualified table path.
35
+
36
+ Returns:
37
+ Full table path for SQL queries.
38
+ """
39
+ return self.parent._get_table_path(self.alias)
40
+
41
+ def _build_select_sql(
42
+ self,
43
+ select_cols: Optional[List[str]] = None,
44
+ where_clause: Optional[Union[str, Dict[str, Any]]] = None,
45
+ sort_by: Optional[List[str]] = None,
46
+ limit: Optional[int] = None,
47
+ offset: Optional[int] = None,
48
+ ) -> str:
49
+ """Build a SELECT SQL statement.
50
+
51
+ Args:
52
+ select_cols: Columns to select (None for all).
53
+ where_clause: WHERE conditions.
54
+ sort_by: ORDER BY columns.
55
+ limit: Maximum rows to return.
56
+ offset: Number of rows to skip.
57
+
58
+ Returns:
59
+ SQL SELECT statement.
60
+ """
61
+ table_path = self._get_table_path()
62
+
63
+ # Build SELECT clause
64
+ if select_cols:
65
+ cols = ", ".join(f'"{c}"' for c in select_cols)
66
+ else:
67
+ cols = "*"
68
+
69
+ sql = f"SELECT {cols} FROM {table_path}"
70
+
71
+ # Build WHERE clause
72
+ if where_clause:
73
+ conditions = self._build_where_conditions(where_clause)
74
+ if conditions:
75
+ sql += f" WHERE {conditions}"
76
+
77
+ # Build ORDER BY clause
78
+ if sort_by:
79
+ order_cols = ", ".join(f'"{c}"' for c in sort_by)
80
+ sql += f" ORDER BY {order_cols}"
81
+
82
+ # Build LIMIT/OFFSET
83
+ if limit is not None and limit >= 0:
84
+ sql += f" LIMIT {limit}"
85
+ if offset is not None and offset > 0:
86
+ sql += f" OFFSET {offset}"
87
+
88
+ return sql
89
+
90
+ def _build_where_conditions(self, where_clause: Union[str, Dict[str, Any]]) -> str:
91
+ """Build WHERE clause conditions from a dict or string.
92
+
93
+ Args:
94
+ where_clause: WHERE conditions as dict or string.
95
+
96
+ Returns:
97
+ SQL WHERE clause (without WHERE keyword).
98
+ """
99
+ if isinstance(where_clause, str):
100
+ return where_clause
101
+
102
+ if not isinstance(where_clause, dict):
103
+ return ""
104
+
105
+ conditions = []
106
+ for key, value in where_clause.items():
107
+ condition = self._build_single_condition(key, value)
108
+ if condition:
109
+ conditions.append(condition)
110
+
111
+ return " AND ".join(conditions)
112
+
113
+ def _build_single_condition(self, key: str, value: Any) -> str:
114
+ """Build a single WHERE condition.
115
+
116
+ Supports MongoDB-style operators like $gt, $gte, $lt, $lte, $in, $ne.
117
+
118
+ Args:
119
+ key: Column name.
120
+ value: Value or operator dict.
121
+
122
+ Returns:
123
+ SQL condition string.
124
+ """
125
+ col = f'"{key}"'
126
+
127
+ if value is None:
128
+ return f"{col} IS NULL"
129
+
130
+ if isinstance(value, dict):
131
+ # Handle operators
132
+ sub_conditions = []
133
+ for op, val in value.items():
134
+ if op == "$gt":
135
+ sub_conditions.append(f"{col} > {self._sql_value(val)}")
136
+ elif op == "$gte":
137
+ sub_conditions.append(f"{col} >= {self._sql_value(val)}")
138
+ elif op == "$lt":
139
+ sub_conditions.append(f"{col} < {self._sql_value(val)}")
140
+ elif op == "$lte":
141
+ sub_conditions.append(f"{col} <= {self._sql_value(val)}")
142
+ elif op == "$ne":
143
+ if val is None:
144
+ sub_conditions.append(f"{col} IS NOT NULL")
145
+ else:
146
+ sub_conditions.append(f"{col} != {self._sql_value(val)}")
147
+ elif op == "$in":
148
+ if isinstance(val, (list, tuple)):
149
+ vals = ", ".join(self._sql_value(v) for v in val)
150
+ sub_conditions.append(f"{col} IN ({vals})")
151
+ elif op == "$nin":
152
+ if isinstance(val, (list, tuple)):
153
+ vals = ", ".join(self._sql_value(v) for v in val)
154
+ sub_conditions.append(f"{col} NOT IN ({vals})")
155
+ elif op == "$like":
156
+ sub_conditions.append(f"{col} LIKE {self._sql_value(val)}")
157
+ elif op == "$regex":
158
+ # Dremio uses REGEXP_LIKE
159
+ sub_conditions.append(f"REGEXP_LIKE({col}, {self._sql_value(val)})")
160
+ else:
161
+ logger.warning(f"Unknown operator: {op}")
162
+
163
+ return " AND ".join(sub_conditions) if sub_conditions else ""
164
+ else:
165
+ return f"{col} = {self._sql_value(value)}"
166
+
167
+ def _sql_value(self, value: Any) -> str:
168
+ """Convert a Python value to SQL literal.
169
+
170
+ Args:
171
+ value: Python value.
172
+
173
+ Returns:
174
+ SQL literal string.
175
+ """
176
+ if value is None:
177
+ return "NULL"
178
+ elif isinstance(value, bool):
179
+ return "TRUE" if value else "FALSE"
180
+ elif isinstance(value, (int, float)):
181
+ return str(value)
182
+ elif isinstance(value, str):
183
+ # Escape single quotes
184
+ escaped = value.replace("'", "''")
185
+ return f"'{escaped}'"
186
+ elif isinstance(value, (list, dict)):
187
+ # Convert to JSON string
188
+ escaped = json.dumps(value).replace("'", "''")
189
+ return f"'{escaped}'"
190
+ else:
191
+ escaped = str(value).replace("'", "''")
192
+ return f"'{escaped}'"
193
+
194
+ def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
195
+ """Insert objects into the collection.
196
+
197
+ Note: Write operations in Dremio depend on the underlying data source.
198
+ Some sources (like Iceberg, Delta Lake) support writes, while others
199
+ (like file-based sources) may not.
200
+
201
+ Args:
202
+ objs: Object(s) to insert.
203
+ **kwargs: Additional arguments.
204
+ """
205
+ if not isinstance(objs, list):
206
+ objs = [objs]
207
+
208
+ if not objs:
209
+ return
210
+
211
+ logger.debug(f"Inserting {len(objs)} objects into {self.alias}")
212
+
213
+ # Get or induce class definition
214
+ cd = self.class_definition()
215
+ if not cd:
216
+ logger.debug(f"No class definition for {self.alias}; inducing from objects")
217
+ cd = self.induce_class_definition_from_objects(objs)
218
+
219
+ table_path = self._get_table_path()
220
+
221
+ # Get column names from class definition or first object
222
+ if cd and cd.attributes:
223
+ columns = list(cd.attributes.keys())
224
+ else:
225
+ columns = list(objs[0].keys())
226
+
227
+ # Build INSERT statement
228
+ col_list = ", ".join(f'"{c}"' for c in columns)
229
+
230
+ # Insert objects in batches
231
+ batch_size = 100
232
+ for i in range(0, len(objs), batch_size):
233
+ batch = objs[i : i + batch_size]
234
+
235
+ values_list = []
236
+ for obj in batch:
237
+ values = []
238
+ for col in columns:
239
+ val = obj.get(col)
240
+ values.append(self._sql_value(val))
241
+ values_list.append(f"({', '.join(values)})")
242
+
243
+ values_sql = ", ".join(values_list)
244
+ sql = f"INSERT INTO {table_path} ({col_list}) VALUES {values_sql}"
245
+
246
+ try:
247
+ self.parent._execute_update(sql)
248
+ except Exception as e:
249
+ logger.error(f"Insert failed: {e}")
250
+ raise
251
+
252
+ self._post_insert_hook(objs)
253
+
254
+ def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
255
+ """Delete specific objects from the collection.
256
+
257
+ Args:
258
+ objs: Object(s) to delete.
259
+ **kwargs: Additional arguments.
260
+
261
+ Returns:
262
+ Number of deleted rows, or None if unknown.
263
+ """
264
+ if not isinstance(objs, list):
265
+ objs = [objs]
266
+
267
+ if not objs:
268
+ return 0
269
+
270
+ table_path = self._get_table_path()
271
+ total_deleted = 0
272
+
273
+ for obj in objs:
274
+ # Build WHERE clause from object fields
275
+ conditions = []
276
+ for key, value in obj.items():
277
+ if key.startswith("_"):
278
+ continue
279
+ condition = self._build_single_condition(key, value)
280
+ if condition:
281
+ conditions.append(condition)
282
+
283
+ if not conditions:
284
+ continue
285
+
286
+ sql = f"DELETE FROM {table_path} WHERE {' AND '.join(conditions)}"
287
+
288
+ try:
289
+ result = self.parent._execute_update(sql)
290
+ if result > 0:
291
+ total_deleted += result
292
+ except Exception as e:
293
+ logger.error(f"Delete failed: {e}")
294
+ raise
295
+
296
+ self._post_delete_hook()
297
+ return total_deleted if total_deleted > 0 else None
298
+
299
+ def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
300
+ """Delete objects matching a condition.
301
+
302
+ Args:
303
+ where: WHERE conditions (empty dict means delete all).
304
+ missing_ok: If True, don't raise error if no rows deleted.
305
+ **kwargs: Additional arguments.
306
+
307
+ Returns:
308
+ Number of deleted rows, or None if unknown.
309
+ """
310
+ if where is None:
311
+ where = {}
312
+
313
+ table_path = self._get_table_path()
314
+
315
+ if where:
316
+ conditions = self._build_where_conditions(where)
317
+ sql = f"DELETE FROM {table_path} WHERE {conditions}"
318
+ else:
319
+ # Delete all
320
+ sql = f"DELETE FROM {table_path}"
321
+
322
+ try:
323
+ result = self.parent._execute_update(sql)
324
+ if result == 0 and not missing_ok:
325
+ raise ValueError(f"No rows found for {where}")
326
+ self._post_delete_hook()
327
+ return result if result >= 0 else None
328
+ except Exception as e:
329
+ if "does not exist" in str(e).lower():
330
+ if missing_ok:
331
+ return 0
332
+ raise
333
+
334
+ def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
335
+ """Update objects in the collection.
336
+
337
+ Note: Requires a primary key field to identify rows.
338
+
339
+ Args:
340
+ objs: Object(s) to update.
341
+ **kwargs: Additional arguments.
342
+ """
343
+ if not isinstance(objs, list):
344
+ objs = [objs]
345
+
346
+ if not objs:
347
+ return
348
+
349
+ table_path = self._get_table_path()
350
+ pk = self.identifier_attribute_name
351
+
352
+ if not pk:
353
+ raise ValueError("Cannot update without an identifier attribute")
354
+
355
+ for obj in objs:
356
+ if pk not in obj:
357
+ raise ValueError(f"Object missing primary key field: {pk}")
358
+
359
+ pk_value = obj[pk]
360
+
361
+ # Build SET clause (exclude primary key)
362
+ set_parts = []
363
+ for key, value in obj.items():
364
+ if key == pk or key.startswith("_"):
365
+ continue
366
+ set_parts.append(f'"{key}" = {self._sql_value(value)}')
367
+
368
+ if not set_parts:
369
+ continue
370
+
371
+ set_clause = ", ".join(set_parts)
372
+ sql = f'UPDATE {table_path} SET {set_clause} WHERE "{pk}" = {self._sql_value(pk_value)}'
373
+
374
+ try:
375
+ self.parent._execute_update(sql)
376
+ except Exception as e:
377
+ logger.error(f"Update failed: {e}")
378
+ raise
379
+
380
+ def query(self, query: Query, **kwargs) -> QueryResult:
381
+ """Execute a query against the collection.
382
+
383
+ Args:
384
+ query: Query specification.
385
+ **kwargs: Additional arguments.
386
+
387
+ Returns:
388
+ QueryResult with matching rows.
389
+ """
390
+ self._pre_query_hook(query)
391
+
392
+ # Handle limit=-1 as "no limit"
393
+ limit = query.limit
394
+ if limit == -1:
395
+ limit = None
396
+
397
+ # Build and execute SQL
398
+ sql = self._build_select_sql(
399
+ select_cols=query.select_cols,
400
+ where_clause=query.where_clause,
401
+ sort_by=query.sort_by,
402
+ limit=limit,
403
+ offset=query.offset,
404
+ )
405
+
406
+ try:
407
+ result_table = self.parent._execute_query(sql)
408
+
409
+ # Convert Arrow table to list of dicts
410
+ rows = result_table.to_pydict()
411
+ num_result_rows = result_table.num_rows
412
+
413
+ # Restructure from column-oriented to row-oriented
414
+ if rows and num_result_rows > 0:
415
+ row_list = []
416
+ columns = list(rows.keys())
417
+ for i in range(num_result_rows):
418
+ row = {col: rows[col][i] for col in columns}
419
+ row_list.append(row)
420
+ else:
421
+ row_list = []
422
+
423
+ # Get total count (for pagination)
424
+ if query.offset or (limit is not None and len(row_list) == limit):
425
+ # Need to get actual count
426
+ count_sql = self._build_count_sql(query.where_clause)
427
+ try:
428
+ count_result = self.parent._execute_query(count_sql)
429
+ total_rows = count_result.column(0)[0].as_py()
430
+ except Exception:
431
+ total_rows = len(row_list)
432
+ else:
433
+ total_rows = len(row_list)
434
+
435
+ qr = QueryResult(query=query, num_rows=total_rows, rows=row_list, offset=query.offset or 0)
436
+
437
+ # Handle facets if requested
438
+ if query.include_facet_counts and query.facet_slots:
439
+ qr.facet_counts = self.query_facets(where=query.where_clause, facet_columns=query.facet_slots)
440
+
441
+ return qr
442
+
443
+ except Exception as e:
444
+ logger.error(f"Query failed: {e}")
445
+ # Return empty result on error
446
+ return QueryResult(query=query, num_rows=0, rows=[])
447
+
448
+ def _build_count_sql(self, where_clause: Optional[Union[str, Dict[str, Any]]] = None) -> str:
449
+ """Build a COUNT SQL statement.
450
+
451
+ Args:
452
+ where_clause: WHERE conditions.
453
+
454
+ Returns:
455
+ SQL COUNT statement.
456
+ """
457
+ table_path = self._get_table_path()
458
+ sql = f"SELECT COUNT(*) FROM {table_path}"
459
+
460
+ if where_clause:
461
+ conditions = self._build_where_conditions(where_clause)
462
+ if conditions:
463
+ sql += f" WHERE {conditions}"
464
+
465
+ return sql
466
+
467
+ def query_facets(
468
+ self,
469
+ where: Optional[Dict] = None,
470
+ facet_columns: Optional[List[str]] = None,
471
+ facet_limit: int = DEFAULT_FACET_LIMIT,
472
+ **kwargs,
473
+ ) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
474
+ """Get facet counts for columns.
475
+
476
+ Args:
477
+ where: Filter conditions.
478
+ facet_columns: Columns to get facets for.
479
+ facet_limit: Maximum facet values per column.
480
+ **kwargs: Additional arguments.
481
+
482
+ Returns:
483
+ Dictionary mapping column names to list of (value, count) tuples.
484
+ """
485
+ if facet_limit is None:
486
+ facet_limit = DEFAULT_FACET_LIMIT
487
+
488
+ results = {}
489
+ cd = self.class_definition()
490
+ table_path = self._get_table_path()
491
+
492
+ if not facet_columns:
493
+ if cd and cd.attributes:
494
+ facet_columns = list(cd.attributes.keys())
495
+ else:
496
+ return results
497
+
498
+ for col in facet_columns:
499
+ if isinstance(col, tuple):
500
+ # Multi-column facet
501
+ col_list = ", ".join(f'"{c}"' for c in col)
502
+ col_name = col
503
+ else:
504
+ col_list = f'"{col}"'
505
+ col_name = col
506
+
507
+ # Build facet query
508
+ sql = f"SELECT {col_list}, COUNT(*) as cnt FROM {table_path}"
509
+
510
+ if where:
511
+ conditions = self._build_where_conditions(where)
512
+ if conditions:
513
+ sql += f" WHERE {conditions}"
514
+
515
+ sql += f" GROUP BY {col_list} ORDER BY cnt DESC"
516
+
517
+ if facet_limit > 0:
518
+ sql += f" LIMIT {facet_limit}"
519
+
520
+ try:
521
+ result = self.parent._execute_query(sql)
522
+
523
+ facets = []
524
+ for i in range(result.num_rows):
525
+ if isinstance(col, tuple):
526
+ value = tuple(result.column(c)[i].as_py() for c in col)
527
+ else:
528
+ value = result.column(col)[i].as_py()
529
+ count = result.column("cnt")[i].as_py()
530
+ facets.append((value, count))
531
+
532
+ results[col_name] = facets
533
+
534
+ except Exception as e:
535
+ logger.warning(f"Facet query failed for {col}: {e}")
536
+ results[col_name] = []
537
+
538
+ return results
539
+
540
+ def _check_if_initialized(self) -> bool:
541
+ """Check if the collection's table exists.
542
+
543
+ Returns:
544
+ True if table exists.
545
+ """
546
+ if self._table_exists_checked:
547
+ return True
548
+
549
+ try:
550
+ result = self.parent._table_exists(self.alias)
551
+ if result:
552
+ self._table_exists_checked = True
553
+ return result
554
+ except Exception:
555
+ return False