linkml-store 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml_store/__init__.py +7 -0
- linkml_store/api/__init__.py +8 -0
- linkml_store/api/client.py +414 -0
- linkml_store/api/collection.py +1280 -0
- linkml_store/api/config.py +187 -0
- linkml_store/api/database.py +862 -0
- linkml_store/api/queries.py +69 -0
- linkml_store/api/stores/__init__.py +0 -0
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
- linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
- linkml_store/api/stores/dremio/__init__.py +10 -0
- linkml_store/api/stores/dremio/dremio_collection.py +555 -0
- linkml_store/api/stores/dremio/dremio_database.py +1052 -0
- linkml_store/api/stores/dremio/mappings.py +105 -0
- linkml_store/api/stores/dremio_rest/__init__.py +11 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
- linkml_store/api/stores/duckdb/mappings.py +8 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
- linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
- linkml_store/api/stores/ibis/__init__.py +5 -0
- linkml_store/api/stores/ibis/ibis_collection.py +488 -0
- linkml_store/api/stores/ibis/ibis_database.py +328 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
- linkml_store/api/stores/neo4j/__init__.py +0 -0
- linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +224 -0
- linkml_store/api/stores/solr/solr_database.py +83 -0
- linkml_store/api/stores/solr/solr_utils.py +0 -0
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +1147 -0
- linkml_store/constants.py +7 -0
- linkml_store/graphs/__init__.py +0 -0
- linkml_store/graphs/graph_map.py +24 -0
- linkml_store/index/__init__.py +53 -0
- linkml_store/index/implementations/__init__.py +0 -0
- linkml_store/index/implementations/llm_indexer.py +174 -0
- linkml_store/index/implementations/simple_indexer.py +43 -0
- linkml_store/index/indexer.py +211 -0
- linkml_store/inference/__init__.py +13 -0
- linkml_store/inference/evaluation.py +195 -0
- linkml_store/inference/implementations/__init__.py +0 -0
- linkml_store/inference/implementations/llm_inference_engine.py +154 -0
- linkml_store/inference/implementations/rag_inference_engine.py +276 -0
- linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
- linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
- linkml_store/inference/inference_config.py +66 -0
- linkml_store/inference/inference_engine.py +209 -0
- linkml_store/inference/inference_engine_registry.py +74 -0
- linkml_store/plotting/__init__.py +5 -0
- linkml_store/plotting/cli.py +826 -0
- linkml_store/plotting/dimensionality_reduction.py +453 -0
- linkml_store/plotting/embedding_plot.py +489 -0
- linkml_store/plotting/facet_chart.py +73 -0
- linkml_store/plotting/heatmap.py +383 -0
- linkml_store/utils/__init__.py +0 -0
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/dat_parser.py +95 -0
- linkml_store/utils/embedding_matcher.py +424 -0
- linkml_store/utils/embedding_utils.py +299 -0
- linkml_store/utils/enrichment_analyzer.py +217 -0
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +550 -0
- linkml_store/utils/io.py +38 -0
- linkml_store/utils/llm_utils.py +122 -0
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/neo4j_utils.py +42 -0
- linkml_store/utils/object_utils.py +190 -0
- linkml_store/utils/pandas_utils.py +93 -0
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store/utils/sql_utils.py +177 -0
- linkml_store/utils/stats_utils.py +53 -0
- linkml_store/utils/vector_utils.py +158 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +43 -0
- linkml_store/webapi/main.py +855 -0
- linkml_store-0.3.0.dist-info/METADATA +226 -0
- linkml_store-0.3.0.dist-info/RECORD +101 -0
- linkml_store-0.3.0.dist-info/WHEEL +4 -0
- linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
- linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
"""Dremio collection implementation.
|
|
2
|
+
|
|
3
|
+
This module provides the Collection implementation for Dremio,
|
|
4
|
+
supporting CRUD operations and queries via Arrow Flight SQL.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
10
|
+
|
|
11
|
+
from linkml_runtime.linkml_model import ClassDefinition
|
|
12
|
+
|
|
13
|
+
from linkml_store.api import Collection
|
|
14
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
15
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DremioCollection(Collection):
|
|
21
|
+
"""Collection implementation for Dremio data lakehouse.
|
|
22
|
+
|
|
23
|
+
This collection connects to Dremio tables via Arrow Flight SQL
|
|
24
|
+
and provides query capabilities. Write operations may be limited
|
|
25
|
+
depending on the underlying data source configuration in Dremio.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
_table_exists_checked: bool = False
|
|
29
|
+
|
|
30
|
+
def __init__(self, *args, **kwargs):
|
|
31
|
+
super().__init__(*args, **kwargs)
|
|
32
|
+
|
|
33
|
+
def _get_table_path(self) -> str:
|
|
34
|
+
"""Get the full qualified table path.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Full table path for SQL queries.
|
|
38
|
+
"""
|
|
39
|
+
return self.parent._get_table_path(self.alias)
|
|
40
|
+
|
|
41
|
+
def _build_select_sql(
|
|
42
|
+
self,
|
|
43
|
+
select_cols: Optional[List[str]] = None,
|
|
44
|
+
where_clause: Optional[Union[str, Dict[str, Any]]] = None,
|
|
45
|
+
sort_by: Optional[List[str]] = None,
|
|
46
|
+
limit: Optional[int] = None,
|
|
47
|
+
offset: Optional[int] = None,
|
|
48
|
+
) -> str:
|
|
49
|
+
"""Build a SELECT SQL statement.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
select_cols: Columns to select (None for all).
|
|
53
|
+
where_clause: WHERE conditions.
|
|
54
|
+
sort_by: ORDER BY columns.
|
|
55
|
+
limit: Maximum rows to return.
|
|
56
|
+
offset: Number of rows to skip.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
SQL SELECT statement.
|
|
60
|
+
"""
|
|
61
|
+
table_path = self._get_table_path()
|
|
62
|
+
|
|
63
|
+
# Build SELECT clause
|
|
64
|
+
if select_cols:
|
|
65
|
+
cols = ", ".join(f'"{c}"' for c in select_cols)
|
|
66
|
+
else:
|
|
67
|
+
cols = "*"
|
|
68
|
+
|
|
69
|
+
sql = f"SELECT {cols} FROM {table_path}"
|
|
70
|
+
|
|
71
|
+
# Build WHERE clause
|
|
72
|
+
if where_clause:
|
|
73
|
+
conditions = self._build_where_conditions(where_clause)
|
|
74
|
+
if conditions:
|
|
75
|
+
sql += f" WHERE {conditions}"
|
|
76
|
+
|
|
77
|
+
# Build ORDER BY clause
|
|
78
|
+
if sort_by:
|
|
79
|
+
order_cols = ", ".join(f'"{c}"' for c in sort_by)
|
|
80
|
+
sql += f" ORDER BY {order_cols}"
|
|
81
|
+
|
|
82
|
+
# Build LIMIT/OFFSET
|
|
83
|
+
if limit is not None and limit >= 0:
|
|
84
|
+
sql += f" LIMIT {limit}"
|
|
85
|
+
if offset is not None and offset > 0:
|
|
86
|
+
sql += f" OFFSET {offset}"
|
|
87
|
+
|
|
88
|
+
return sql
|
|
89
|
+
|
|
90
|
+
def _build_where_conditions(self, where_clause: Union[str, Dict[str, Any]]) -> str:
|
|
91
|
+
"""Build WHERE clause conditions from a dict or string.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
where_clause: WHERE conditions as dict or string.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
SQL WHERE clause (without WHERE keyword).
|
|
98
|
+
"""
|
|
99
|
+
if isinstance(where_clause, str):
|
|
100
|
+
return where_clause
|
|
101
|
+
|
|
102
|
+
if not isinstance(where_clause, dict):
|
|
103
|
+
return ""
|
|
104
|
+
|
|
105
|
+
conditions = []
|
|
106
|
+
for key, value in where_clause.items():
|
|
107
|
+
condition = self._build_single_condition(key, value)
|
|
108
|
+
if condition:
|
|
109
|
+
conditions.append(condition)
|
|
110
|
+
|
|
111
|
+
return " AND ".join(conditions)
|
|
112
|
+
|
|
113
|
+
def _build_single_condition(self, key: str, value: Any) -> str:
|
|
114
|
+
"""Build a single WHERE condition.
|
|
115
|
+
|
|
116
|
+
Supports MongoDB-style operators like $gt, $gte, $lt, $lte, $in, $ne.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
key: Column name.
|
|
120
|
+
value: Value or operator dict.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
SQL condition string.
|
|
124
|
+
"""
|
|
125
|
+
col = f'"{key}"'
|
|
126
|
+
|
|
127
|
+
if value is None:
|
|
128
|
+
return f"{col} IS NULL"
|
|
129
|
+
|
|
130
|
+
if isinstance(value, dict):
|
|
131
|
+
# Handle operators
|
|
132
|
+
sub_conditions = []
|
|
133
|
+
for op, val in value.items():
|
|
134
|
+
if op == "$gt":
|
|
135
|
+
sub_conditions.append(f"{col} > {self._sql_value(val)}")
|
|
136
|
+
elif op == "$gte":
|
|
137
|
+
sub_conditions.append(f"{col} >= {self._sql_value(val)}")
|
|
138
|
+
elif op == "$lt":
|
|
139
|
+
sub_conditions.append(f"{col} < {self._sql_value(val)}")
|
|
140
|
+
elif op == "$lte":
|
|
141
|
+
sub_conditions.append(f"{col} <= {self._sql_value(val)}")
|
|
142
|
+
elif op == "$ne":
|
|
143
|
+
if val is None:
|
|
144
|
+
sub_conditions.append(f"{col} IS NOT NULL")
|
|
145
|
+
else:
|
|
146
|
+
sub_conditions.append(f"{col} != {self._sql_value(val)}")
|
|
147
|
+
elif op == "$in":
|
|
148
|
+
if isinstance(val, (list, tuple)):
|
|
149
|
+
vals = ", ".join(self._sql_value(v) for v in val)
|
|
150
|
+
sub_conditions.append(f"{col} IN ({vals})")
|
|
151
|
+
elif op == "$nin":
|
|
152
|
+
if isinstance(val, (list, tuple)):
|
|
153
|
+
vals = ", ".join(self._sql_value(v) for v in val)
|
|
154
|
+
sub_conditions.append(f"{col} NOT IN ({vals})")
|
|
155
|
+
elif op == "$like":
|
|
156
|
+
sub_conditions.append(f"{col} LIKE {self._sql_value(val)}")
|
|
157
|
+
elif op == "$regex":
|
|
158
|
+
# Dremio uses REGEXP_LIKE
|
|
159
|
+
sub_conditions.append(f"REGEXP_LIKE({col}, {self._sql_value(val)})")
|
|
160
|
+
else:
|
|
161
|
+
logger.warning(f"Unknown operator: {op}")
|
|
162
|
+
|
|
163
|
+
return " AND ".join(sub_conditions) if sub_conditions else ""
|
|
164
|
+
else:
|
|
165
|
+
return f"{col} = {self._sql_value(value)}"
|
|
166
|
+
|
|
167
|
+
def _sql_value(self, value: Any) -> str:
|
|
168
|
+
"""Convert a Python value to SQL literal.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
value: Python value.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
SQL literal string.
|
|
175
|
+
"""
|
|
176
|
+
if value is None:
|
|
177
|
+
return "NULL"
|
|
178
|
+
elif isinstance(value, bool):
|
|
179
|
+
return "TRUE" if value else "FALSE"
|
|
180
|
+
elif isinstance(value, (int, float)):
|
|
181
|
+
return str(value)
|
|
182
|
+
elif isinstance(value, str):
|
|
183
|
+
# Escape single quotes
|
|
184
|
+
escaped = value.replace("'", "''")
|
|
185
|
+
return f"'{escaped}'"
|
|
186
|
+
elif isinstance(value, (list, dict)):
|
|
187
|
+
# Convert to JSON string
|
|
188
|
+
escaped = json.dumps(value).replace("'", "''")
|
|
189
|
+
return f"'{escaped}'"
|
|
190
|
+
else:
|
|
191
|
+
escaped = str(value).replace("'", "''")
|
|
192
|
+
return f"'{escaped}'"
|
|
193
|
+
|
|
194
|
+
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
195
|
+
"""Insert objects into the collection.
|
|
196
|
+
|
|
197
|
+
Note: Write operations in Dremio depend on the underlying data source.
|
|
198
|
+
Some sources (like Iceberg, Delta Lake) support writes, while others
|
|
199
|
+
(like file-based sources) may not.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
objs: Object(s) to insert.
|
|
203
|
+
**kwargs: Additional arguments.
|
|
204
|
+
"""
|
|
205
|
+
if not isinstance(objs, list):
|
|
206
|
+
objs = [objs]
|
|
207
|
+
|
|
208
|
+
if not objs:
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
logger.debug(f"Inserting {len(objs)} objects into {self.alias}")
|
|
212
|
+
|
|
213
|
+
# Get or induce class definition
|
|
214
|
+
cd = self.class_definition()
|
|
215
|
+
if not cd:
|
|
216
|
+
logger.debug(f"No class definition for {self.alias}; inducing from objects")
|
|
217
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
218
|
+
|
|
219
|
+
table_path = self._get_table_path()
|
|
220
|
+
|
|
221
|
+
# Get column names from class definition or first object
|
|
222
|
+
if cd and cd.attributes:
|
|
223
|
+
columns = list(cd.attributes.keys())
|
|
224
|
+
else:
|
|
225
|
+
columns = list(objs[0].keys())
|
|
226
|
+
|
|
227
|
+
# Build INSERT statement
|
|
228
|
+
col_list = ", ".join(f'"{c}"' for c in columns)
|
|
229
|
+
|
|
230
|
+
# Insert objects in batches
|
|
231
|
+
batch_size = 100
|
|
232
|
+
for i in range(0, len(objs), batch_size):
|
|
233
|
+
batch = objs[i : i + batch_size]
|
|
234
|
+
|
|
235
|
+
values_list = []
|
|
236
|
+
for obj in batch:
|
|
237
|
+
values = []
|
|
238
|
+
for col in columns:
|
|
239
|
+
val = obj.get(col)
|
|
240
|
+
values.append(self._sql_value(val))
|
|
241
|
+
values_list.append(f"({', '.join(values)})")
|
|
242
|
+
|
|
243
|
+
values_sql = ", ".join(values_list)
|
|
244
|
+
sql = f"INSERT INTO {table_path} ({col_list}) VALUES {values_sql}"
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
self.parent._execute_update(sql)
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.error(f"Insert failed: {e}")
|
|
250
|
+
raise
|
|
251
|
+
|
|
252
|
+
self._post_insert_hook(objs)
|
|
253
|
+
|
|
254
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
255
|
+
"""Delete specific objects from the collection.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
objs: Object(s) to delete.
|
|
259
|
+
**kwargs: Additional arguments.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Number of deleted rows, or None if unknown.
|
|
263
|
+
"""
|
|
264
|
+
if not isinstance(objs, list):
|
|
265
|
+
objs = [objs]
|
|
266
|
+
|
|
267
|
+
if not objs:
|
|
268
|
+
return 0
|
|
269
|
+
|
|
270
|
+
table_path = self._get_table_path()
|
|
271
|
+
total_deleted = 0
|
|
272
|
+
|
|
273
|
+
for obj in objs:
|
|
274
|
+
# Build WHERE clause from object fields
|
|
275
|
+
conditions = []
|
|
276
|
+
for key, value in obj.items():
|
|
277
|
+
if key.startswith("_"):
|
|
278
|
+
continue
|
|
279
|
+
condition = self._build_single_condition(key, value)
|
|
280
|
+
if condition:
|
|
281
|
+
conditions.append(condition)
|
|
282
|
+
|
|
283
|
+
if not conditions:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
sql = f"DELETE FROM {table_path} WHERE {' AND '.join(conditions)}"
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
result = self.parent._execute_update(sql)
|
|
290
|
+
if result > 0:
|
|
291
|
+
total_deleted += result
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.error(f"Delete failed: {e}")
|
|
294
|
+
raise
|
|
295
|
+
|
|
296
|
+
self._post_delete_hook()
|
|
297
|
+
return total_deleted if total_deleted > 0 else None
|
|
298
|
+
|
|
299
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
300
|
+
"""Delete objects matching a condition.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
where: WHERE conditions (empty dict means delete all).
|
|
304
|
+
missing_ok: If True, don't raise error if no rows deleted.
|
|
305
|
+
**kwargs: Additional arguments.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Number of deleted rows, or None if unknown.
|
|
309
|
+
"""
|
|
310
|
+
if where is None:
|
|
311
|
+
where = {}
|
|
312
|
+
|
|
313
|
+
table_path = self._get_table_path()
|
|
314
|
+
|
|
315
|
+
if where:
|
|
316
|
+
conditions = self._build_where_conditions(where)
|
|
317
|
+
sql = f"DELETE FROM {table_path} WHERE {conditions}"
|
|
318
|
+
else:
|
|
319
|
+
# Delete all
|
|
320
|
+
sql = f"DELETE FROM {table_path}"
|
|
321
|
+
|
|
322
|
+
try:
|
|
323
|
+
result = self.parent._execute_update(sql)
|
|
324
|
+
if result == 0 and not missing_ok:
|
|
325
|
+
raise ValueError(f"No rows found for {where}")
|
|
326
|
+
self._post_delete_hook()
|
|
327
|
+
return result if result >= 0 else None
|
|
328
|
+
except Exception as e:
|
|
329
|
+
if "does not exist" in str(e).lower():
|
|
330
|
+
if missing_ok:
|
|
331
|
+
return 0
|
|
332
|
+
raise
|
|
333
|
+
|
|
334
|
+
def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
335
|
+
"""Update objects in the collection.
|
|
336
|
+
|
|
337
|
+
Note: Requires a primary key field to identify rows.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
objs: Object(s) to update.
|
|
341
|
+
**kwargs: Additional arguments.
|
|
342
|
+
"""
|
|
343
|
+
if not isinstance(objs, list):
|
|
344
|
+
objs = [objs]
|
|
345
|
+
|
|
346
|
+
if not objs:
|
|
347
|
+
return
|
|
348
|
+
|
|
349
|
+
table_path = self._get_table_path()
|
|
350
|
+
pk = self.identifier_attribute_name
|
|
351
|
+
|
|
352
|
+
if not pk:
|
|
353
|
+
raise ValueError("Cannot update without an identifier attribute")
|
|
354
|
+
|
|
355
|
+
for obj in objs:
|
|
356
|
+
if pk not in obj:
|
|
357
|
+
raise ValueError(f"Object missing primary key field: {pk}")
|
|
358
|
+
|
|
359
|
+
pk_value = obj[pk]
|
|
360
|
+
|
|
361
|
+
# Build SET clause (exclude primary key)
|
|
362
|
+
set_parts = []
|
|
363
|
+
for key, value in obj.items():
|
|
364
|
+
if key == pk or key.startswith("_"):
|
|
365
|
+
continue
|
|
366
|
+
set_parts.append(f'"{key}" = {self._sql_value(value)}')
|
|
367
|
+
|
|
368
|
+
if not set_parts:
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
set_clause = ", ".join(set_parts)
|
|
372
|
+
sql = f'UPDATE {table_path} SET {set_clause} WHERE "{pk}" = {self._sql_value(pk_value)}'
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
self.parent._execute_update(sql)
|
|
376
|
+
except Exception as e:
|
|
377
|
+
logger.error(f"Update failed: {e}")
|
|
378
|
+
raise
|
|
379
|
+
|
|
380
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
381
|
+
"""Execute a query against the collection.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
query: Query specification.
|
|
385
|
+
**kwargs: Additional arguments.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
QueryResult with matching rows.
|
|
389
|
+
"""
|
|
390
|
+
self._pre_query_hook(query)
|
|
391
|
+
|
|
392
|
+
# Handle limit=-1 as "no limit"
|
|
393
|
+
limit = query.limit
|
|
394
|
+
if limit == -1:
|
|
395
|
+
limit = None
|
|
396
|
+
|
|
397
|
+
# Build and execute SQL
|
|
398
|
+
sql = self._build_select_sql(
|
|
399
|
+
select_cols=query.select_cols,
|
|
400
|
+
where_clause=query.where_clause,
|
|
401
|
+
sort_by=query.sort_by,
|
|
402
|
+
limit=limit,
|
|
403
|
+
offset=query.offset,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
try:
|
|
407
|
+
result_table = self.parent._execute_query(sql)
|
|
408
|
+
|
|
409
|
+
# Convert Arrow table to list of dicts
|
|
410
|
+
rows = result_table.to_pydict()
|
|
411
|
+
num_result_rows = result_table.num_rows
|
|
412
|
+
|
|
413
|
+
# Restructure from column-oriented to row-oriented
|
|
414
|
+
if rows and num_result_rows > 0:
|
|
415
|
+
row_list = []
|
|
416
|
+
columns = list(rows.keys())
|
|
417
|
+
for i in range(num_result_rows):
|
|
418
|
+
row = {col: rows[col][i] for col in columns}
|
|
419
|
+
row_list.append(row)
|
|
420
|
+
else:
|
|
421
|
+
row_list = []
|
|
422
|
+
|
|
423
|
+
# Get total count (for pagination)
|
|
424
|
+
if query.offset or (limit is not None and len(row_list) == limit):
|
|
425
|
+
# Need to get actual count
|
|
426
|
+
count_sql = self._build_count_sql(query.where_clause)
|
|
427
|
+
try:
|
|
428
|
+
count_result = self.parent._execute_query(count_sql)
|
|
429
|
+
total_rows = count_result.column(0)[0].as_py()
|
|
430
|
+
except Exception:
|
|
431
|
+
total_rows = len(row_list)
|
|
432
|
+
else:
|
|
433
|
+
total_rows = len(row_list)
|
|
434
|
+
|
|
435
|
+
qr = QueryResult(query=query, num_rows=total_rows, rows=row_list, offset=query.offset or 0)
|
|
436
|
+
|
|
437
|
+
# Handle facets if requested
|
|
438
|
+
if query.include_facet_counts and query.facet_slots:
|
|
439
|
+
qr.facet_counts = self.query_facets(where=query.where_clause, facet_columns=query.facet_slots)
|
|
440
|
+
|
|
441
|
+
return qr
|
|
442
|
+
|
|
443
|
+
except Exception as e:
|
|
444
|
+
logger.error(f"Query failed: {e}")
|
|
445
|
+
# Return empty result on error
|
|
446
|
+
return QueryResult(query=query, num_rows=0, rows=[])
|
|
447
|
+
|
|
448
|
+
def _build_count_sql(self, where_clause: Optional[Union[str, Dict[str, Any]]] = None) -> str:
|
|
449
|
+
"""Build a COUNT SQL statement.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
where_clause: WHERE conditions.
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
SQL COUNT statement.
|
|
456
|
+
"""
|
|
457
|
+
table_path = self._get_table_path()
|
|
458
|
+
sql = f"SELECT COUNT(*) FROM {table_path}"
|
|
459
|
+
|
|
460
|
+
if where_clause:
|
|
461
|
+
conditions = self._build_where_conditions(where_clause)
|
|
462
|
+
if conditions:
|
|
463
|
+
sql += f" WHERE {conditions}"
|
|
464
|
+
|
|
465
|
+
return sql
|
|
466
|
+
|
|
467
|
+
def query_facets(
|
|
468
|
+
self,
|
|
469
|
+
where: Optional[Dict] = None,
|
|
470
|
+
facet_columns: Optional[List[str]] = None,
|
|
471
|
+
facet_limit: int = DEFAULT_FACET_LIMIT,
|
|
472
|
+
**kwargs,
|
|
473
|
+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
|
|
474
|
+
"""Get facet counts for columns.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
where: Filter conditions.
|
|
478
|
+
facet_columns: Columns to get facets for.
|
|
479
|
+
facet_limit: Maximum facet values per column.
|
|
480
|
+
**kwargs: Additional arguments.
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
Dictionary mapping column names to list of (value, count) tuples.
|
|
484
|
+
"""
|
|
485
|
+
if facet_limit is None:
|
|
486
|
+
facet_limit = DEFAULT_FACET_LIMIT
|
|
487
|
+
|
|
488
|
+
results = {}
|
|
489
|
+
cd = self.class_definition()
|
|
490
|
+
table_path = self._get_table_path()
|
|
491
|
+
|
|
492
|
+
if not facet_columns:
|
|
493
|
+
if cd and cd.attributes:
|
|
494
|
+
facet_columns = list(cd.attributes.keys())
|
|
495
|
+
else:
|
|
496
|
+
return results
|
|
497
|
+
|
|
498
|
+
for col in facet_columns:
|
|
499
|
+
if isinstance(col, tuple):
|
|
500
|
+
# Multi-column facet
|
|
501
|
+
col_list = ", ".join(f'"{c}"' for c in col)
|
|
502
|
+
col_name = col
|
|
503
|
+
else:
|
|
504
|
+
col_list = f'"{col}"'
|
|
505
|
+
col_name = col
|
|
506
|
+
|
|
507
|
+
# Build facet query
|
|
508
|
+
sql = f"SELECT {col_list}, COUNT(*) as cnt FROM {table_path}"
|
|
509
|
+
|
|
510
|
+
if where:
|
|
511
|
+
conditions = self._build_where_conditions(where)
|
|
512
|
+
if conditions:
|
|
513
|
+
sql += f" WHERE {conditions}"
|
|
514
|
+
|
|
515
|
+
sql += f" GROUP BY {col_list} ORDER BY cnt DESC"
|
|
516
|
+
|
|
517
|
+
if facet_limit > 0:
|
|
518
|
+
sql += f" LIMIT {facet_limit}"
|
|
519
|
+
|
|
520
|
+
try:
|
|
521
|
+
result = self.parent._execute_query(sql)
|
|
522
|
+
|
|
523
|
+
facets = []
|
|
524
|
+
for i in range(result.num_rows):
|
|
525
|
+
if isinstance(col, tuple):
|
|
526
|
+
value = tuple(result.column(c)[i].as_py() for c in col)
|
|
527
|
+
else:
|
|
528
|
+
value = result.column(col)[i].as_py()
|
|
529
|
+
count = result.column("cnt")[i].as_py()
|
|
530
|
+
facets.append((value, count))
|
|
531
|
+
|
|
532
|
+
results[col_name] = facets
|
|
533
|
+
|
|
534
|
+
except Exception as e:
|
|
535
|
+
logger.warning(f"Facet query failed for {col}: {e}")
|
|
536
|
+
results[col_name] = []
|
|
537
|
+
|
|
538
|
+
return results
|
|
539
|
+
|
|
540
|
+
def _check_if_initialized(self) -> bool:
|
|
541
|
+
"""Check if the collection's table exists.
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
True if table exists.
|
|
545
|
+
"""
|
|
546
|
+
if self._table_exists_checked:
|
|
547
|
+
return True
|
|
548
|
+
|
|
549
|
+
try:
|
|
550
|
+
result = self.parent._table_exists(self.alias)
|
|
551
|
+
if result:
|
|
552
|
+
self._table_exists_checked = True
|
|
553
|
+
return result
|
|
554
|
+
except Exception:
|
|
555
|
+
return False
|