linkml-store 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml_store/__init__.py +7 -0
- linkml_store/api/__init__.py +8 -0
- linkml_store/api/client.py +414 -0
- linkml_store/api/collection.py +1280 -0
- linkml_store/api/config.py +187 -0
- linkml_store/api/database.py +862 -0
- linkml_store/api/queries.py +69 -0
- linkml_store/api/stores/__init__.py +0 -0
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
- linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
- linkml_store/api/stores/dremio/__init__.py +10 -0
- linkml_store/api/stores/dremio/dremio_collection.py +555 -0
- linkml_store/api/stores/dremio/dremio_database.py +1052 -0
- linkml_store/api/stores/dremio/mappings.py +105 -0
- linkml_store/api/stores/dremio_rest/__init__.py +11 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
- linkml_store/api/stores/duckdb/mappings.py +8 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
- linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
- linkml_store/api/stores/ibis/__init__.py +5 -0
- linkml_store/api/stores/ibis/ibis_collection.py +488 -0
- linkml_store/api/stores/ibis/ibis_database.py +328 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
- linkml_store/api/stores/neo4j/__init__.py +0 -0
- linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +224 -0
- linkml_store/api/stores/solr/solr_database.py +83 -0
- linkml_store/api/stores/solr/solr_utils.py +0 -0
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +1147 -0
- linkml_store/constants.py +7 -0
- linkml_store/graphs/__init__.py +0 -0
- linkml_store/graphs/graph_map.py +24 -0
- linkml_store/index/__init__.py +53 -0
- linkml_store/index/implementations/__init__.py +0 -0
- linkml_store/index/implementations/llm_indexer.py +174 -0
- linkml_store/index/implementations/simple_indexer.py +43 -0
- linkml_store/index/indexer.py +211 -0
- linkml_store/inference/__init__.py +13 -0
- linkml_store/inference/evaluation.py +195 -0
- linkml_store/inference/implementations/__init__.py +0 -0
- linkml_store/inference/implementations/llm_inference_engine.py +154 -0
- linkml_store/inference/implementations/rag_inference_engine.py +276 -0
- linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
- linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
- linkml_store/inference/inference_config.py +66 -0
- linkml_store/inference/inference_engine.py +209 -0
- linkml_store/inference/inference_engine_registry.py +74 -0
- linkml_store/plotting/__init__.py +5 -0
- linkml_store/plotting/cli.py +826 -0
- linkml_store/plotting/dimensionality_reduction.py +453 -0
- linkml_store/plotting/embedding_plot.py +489 -0
- linkml_store/plotting/facet_chart.py +73 -0
- linkml_store/plotting/heatmap.py +383 -0
- linkml_store/utils/__init__.py +0 -0
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/dat_parser.py +95 -0
- linkml_store/utils/embedding_matcher.py +424 -0
- linkml_store/utils/embedding_utils.py +299 -0
- linkml_store/utils/enrichment_analyzer.py +217 -0
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +550 -0
- linkml_store/utils/io.py +38 -0
- linkml_store/utils/llm_utils.py +122 -0
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/neo4j_utils.py +42 -0
- linkml_store/utils/object_utils.py +190 -0
- linkml_store/utils/pandas_utils.py +93 -0
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store/utils/sql_utils.py +177 -0
- linkml_store/utils/stats_utils.py +53 -0
- linkml_store/utils/vector_utils.py +158 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +43 -0
- linkml_store/webapi/main.py +855 -0
- linkml_store-0.3.0.dist-info/METADATA +226 -0
- linkml_store-0.3.0.dist-info/RECORD +101 -0
- linkml_store-0.3.0.dist-info/WHEEL +4 -0
- linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
- linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
"""Ibis collection adapter for linkml-store."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
|
|
9
|
+
|
|
10
|
+
from linkml_store.api import Collection
|
|
11
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
12
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _is_complex_value(v):
|
|
18
|
+
"""Check if a value is complex (list/dict) and needs JSON serialization."""
|
|
19
|
+
if isinstance(v, (list, dict)):
|
|
20
|
+
return True
|
|
21
|
+
return False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _serialize_complex_values(obj: dict) -> dict:
|
|
25
|
+
"""Serialize complex values (lists, dicts) to JSON strings."""
|
|
26
|
+
result = {}
|
|
27
|
+
for k, v in obj.items():
|
|
28
|
+
if _is_complex_value(v):
|
|
29
|
+
result[k] = json.dumps(v)
|
|
30
|
+
else:
|
|
31
|
+
result[k] = v
|
|
32
|
+
return result
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _deserialize_complex_values(obj: dict, json_columns: set = None) -> dict:
|
|
36
|
+
"""Deserialize JSON strings back to Python objects.
|
|
37
|
+
|
|
38
|
+
If json_columns is None, attempt to auto-detect by trying to parse
|
|
39
|
+
string values that look like JSON arrays or objects.
|
|
40
|
+
"""
|
|
41
|
+
result = {}
|
|
42
|
+
for k, v in obj.items():
|
|
43
|
+
if isinstance(v, str):
|
|
44
|
+
# Check if this is a known JSON column or looks like JSON
|
|
45
|
+
if json_columns and k in json_columns:
|
|
46
|
+
try:
|
|
47
|
+
result[k] = json.loads(v)
|
|
48
|
+
except (json.JSONDecodeError, TypeError):
|
|
49
|
+
result[k] = v
|
|
50
|
+
elif v.startswith('[') or v.startswith('{'):
|
|
51
|
+
# Auto-detect JSON arrays and objects
|
|
52
|
+
try:
|
|
53
|
+
result[k] = json.loads(v)
|
|
54
|
+
except (json.JSONDecodeError, TypeError):
|
|
55
|
+
result[k] = v
|
|
56
|
+
else:
|
|
57
|
+
result[k] = v
|
|
58
|
+
else:
|
|
59
|
+
result[k] = v
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class IbisCollection(Collection):
|
|
64
|
+
"""
|
|
65
|
+
Collection implementation using Ibis tables.
|
|
66
|
+
|
|
67
|
+
This adapter maps LinkML collections to Ibis tables, providing a unified
|
|
68
|
+
interface across multiple database backends through Ibis.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
_table_created: bool = None
|
|
72
|
+
_json_columns: set = None # Columns that contain JSON-serialized data
|
|
73
|
+
|
|
74
|
+
def __init__(self, *args, **kwargs):
|
|
75
|
+
super().__init__(*args, **kwargs)
|
|
76
|
+
self._json_columns = set()
|
|
77
|
+
|
|
78
|
+
def _check_if_initialized(self) -> bool:
|
|
79
|
+
"""Check if the table exists in the database."""
|
|
80
|
+
conn = self.parent.connection
|
|
81
|
+
table_name = self.alias or self.target_class_name
|
|
82
|
+
return table_name in conn.list_tables()
|
|
83
|
+
|
|
84
|
+
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
85
|
+
"""Insert objects into the collection."""
|
|
86
|
+
logger.debug(f"Inserting {len(objs) if isinstance(objs, list) else 1} objects")
|
|
87
|
+
if not isinstance(objs, list):
|
|
88
|
+
objs = [objs]
|
|
89
|
+
if not objs:
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
cd = self.class_definition()
|
|
93
|
+
if not cd:
|
|
94
|
+
logger.debug(f"No class definition for {self.alias}; inducing from objects")
|
|
95
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
96
|
+
|
|
97
|
+
self._create_table(cd)
|
|
98
|
+
|
|
99
|
+
# Identify and serialize complex values (lists, dicts)
|
|
100
|
+
serialized_objs = []
|
|
101
|
+
for obj in objs:
|
|
102
|
+
serialized_obj = {}
|
|
103
|
+
for k, v in obj.items():
|
|
104
|
+
if _is_complex_value(v):
|
|
105
|
+
serialized_obj[k] = json.dumps(v)
|
|
106
|
+
self._json_columns.add(k)
|
|
107
|
+
else:
|
|
108
|
+
serialized_obj[k] = v
|
|
109
|
+
serialized_objs.append(serialized_obj)
|
|
110
|
+
|
|
111
|
+
# Convert objects to DataFrame for efficient insertion
|
|
112
|
+
df = pd.DataFrame(serialized_objs)
|
|
113
|
+
|
|
114
|
+
# Get the Ibis connection and table
|
|
115
|
+
conn = self.parent.connection
|
|
116
|
+
table_name = self.alias or self.target_class_name
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
# Insert using Ibis
|
|
120
|
+
# For most backends, we can use insert or create_table with data
|
|
121
|
+
if table_name in conn.list_tables():
|
|
122
|
+
# Table exists, insert into it
|
|
123
|
+
table = conn.table(table_name)
|
|
124
|
+
# Convert DataFrame to records and insert
|
|
125
|
+
# Note: Ibis insert semantics vary by backend
|
|
126
|
+
try:
|
|
127
|
+
# Try using insert (if supported)
|
|
128
|
+
conn.insert(table_name, df)
|
|
129
|
+
except (AttributeError, NotImplementedError):
|
|
130
|
+
# Fallback: use backend-specific methods
|
|
131
|
+
# For DuckDB and similar, we can use raw SQL
|
|
132
|
+
try:
|
|
133
|
+
# Create a temp table and insert from it
|
|
134
|
+
temp_name = f"_temp_{table_name}"
|
|
135
|
+
conn.create_table(temp_name, df, overwrite=True)
|
|
136
|
+
sql = f"INSERT INTO {table_name} SELECT * FROM {temp_name}"
|
|
137
|
+
conn.raw_sql(sql)
|
|
138
|
+
conn.drop_table(temp_name)
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.error(f"Error inserting data: {e}")
|
|
141
|
+
# Last resort: use pandas to_sql if available
|
|
142
|
+
if hasattr(conn, "con"):
|
|
143
|
+
# Some Ibis backends expose the underlying connection
|
|
144
|
+
df.to_sql(table_name, conn.con, if_exists="append", index=False)
|
|
145
|
+
else:
|
|
146
|
+
raise
|
|
147
|
+
else:
|
|
148
|
+
# Table doesn't exist, create it with data
|
|
149
|
+
conn.create_table(table_name, df)
|
|
150
|
+
|
|
151
|
+
logger.info(f"Inserted {len(objs)} objects into {table_name}")
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.error(f"Error inserting into {table_name}: {e}")
|
|
154
|
+
raise
|
|
155
|
+
|
|
156
|
+
self._post_insert_hook(objs)
|
|
157
|
+
|
|
158
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
159
|
+
"""Delete specific objects from the collection."""
|
|
160
|
+
if not isinstance(objs, list):
|
|
161
|
+
objs = [objs]
|
|
162
|
+
|
|
163
|
+
cd = self.class_definition()
|
|
164
|
+
if not cd or not cd.attributes:
|
|
165
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
166
|
+
|
|
167
|
+
conn = self.parent.connection
|
|
168
|
+
table_name = self.alias or self.target_class_name
|
|
169
|
+
|
|
170
|
+
if table_name not in conn.list_tables():
|
|
171
|
+
logger.warning(f"Table {table_name} does not exist")
|
|
172
|
+
return 0
|
|
173
|
+
|
|
174
|
+
# For Ibis, deletion is backend-specific
|
|
175
|
+
# We'll use raw SQL for broader compatibility
|
|
176
|
+
deleted_count = 0
|
|
177
|
+
for obj in objs:
|
|
178
|
+
conditions = []
|
|
179
|
+
for k, v in obj.items():
|
|
180
|
+
if k in cd.attributes:
|
|
181
|
+
if v is None:
|
|
182
|
+
conditions.append(f"{k} IS NULL")
|
|
183
|
+
elif _is_complex_value(v):
|
|
184
|
+
# Complex values are stored as JSON strings
|
|
185
|
+
json_str = json.dumps(v).replace("'", "''") # Escape quotes
|
|
186
|
+
conditions.append(f"{k} = '{json_str}'")
|
|
187
|
+
elif isinstance(v, str):
|
|
188
|
+
escaped_v = v.replace("'", "''") # Escape quotes
|
|
189
|
+
conditions.append(f"{k} = '{escaped_v}'")
|
|
190
|
+
elif isinstance(v, bool):
|
|
191
|
+
conditions.append(f"{k} = {str(v).lower()}")
|
|
192
|
+
else:
|
|
193
|
+
conditions.append(f"{k} = {v}")
|
|
194
|
+
|
|
195
|
+
if conditions:
|
|
196
|
+
where_clause = " AND ".join(conditions)
|
|
197
|
+
sql = f"DELETE FROM {table_name} WHERE {where_clause}"
|
|
198
|
+
try:
|
|
199
|
+
conn.raw_sql(sql)
|
|
200
|
+
deleted_count += 1
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(f"Error deleting object: {e}")
|
|
203
|
+
|
|
204
|
+
self._post_delete_hook()
|
|
205
|
+
return deleted_count
|
|
206
|
+
|
|
207
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
208
|
+
"""Delete objects matching a where clause."""
|
|
209
|
+
logger.info(f"Deleting from {self.target_class_name} where: {where}")
|
|
210
|
+
if where is None:
|
|
211
|
+
where = {}
|
|
212
|
+
|
|
213
|
+
cd = self.class_definition()
|
|
214
|
+
if not cd:
|
|
215
|
+
logger.info(f"No class definition found for {self.target_class_name}")
|
|
216
|
+
return 0
|
|
217
|
+
|
|
218
|
+
conn = self.parent.connection
|
|
219
|
+
table_name = self.alias or self.target_class_name
|
|
220
|
+
|
|
221
|
+
if table_name not in conn.list_tables():
|
|
222
|
+
logger.info(f"Table {table_name} does not exist")
|
|
223
|
+
return 0
|
|
224
|
+
|
|
225
|
+
# Build where clause
|
|
226
|
+
conditions = []
|
|
227
|
+
for k, v in where.items():
|
|
228
|
+
if isinstance(v, str):
|
|
229
|
+
conditions.append(f"{k} = '{v}'")
|
|
230
|
+
else:
|
|
231
|
+
conditions.append(f"{k} = {v}")
|
|
232
|
+
|
|
233
|
+
if conditions:
|
|
234
|
+
where_clause = " AND ".join(conditions)
|
|
235
|
+
sql = f"DELETE FROM {table_name} WHERE {where_clause}"
|
|
236
|
+
else:
|
|
237
|
+
sql = f"DELETE FROM {table_name}"
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
result = conn.raw_sql(sql)
|
|
241
|
+
# Note: Getting rowcount from raw SQL varies by backend
|
|
242
|
+
# For now, return None to indicate success without count
|
|
243
|
+
self._post_delete_hook()
|
|
244
|
+
return None
|
|
245
|
+
except Exception as e:
|
|
246
|
+
if not missing_ok:
|
|
247
|
+
raise
|
|
248
|
+
logger.warning(f"Error deleting: {e}")
|
|
249
|
+
return 0
|
|
250
|
+
|
|
251
|
+
def query(self, query: Query = None, **kwargs) -> QueryResult:
|
|
252
|
+
"""Execute a query against the collection."""
|
|
253
|
+
if query is None:
|
|
254
|
+
query = Query()
|
|
255
|
+
|
|
256
|
+
conn = self.parent.connection
|
|
257
|
+
table_name = self.alias or self.target_class_name
|
|
258
|
+
|
|
259
|
+
if table_name not in conn.list_tables():
|
|
260
|
+
logger.warning(f"Table {table_name} does not exist")
|
|
261
|
+
return QueryResult(num_rows=0, rows=[])
|
|
262
|
+
|
|
263
|
+
# Get the Ibis table
|
|
264
|
+
table = conn.table(table_name)
|
|
265
|
+
|
|
266
|
+
# Apply filters
|
|
267
|
+
if query.where_clause:
|
|
268
|
+
table = self._apply_where(table, query.where_clause)
|
|
269
|
+
|
|
270
|
+
# Apply column selection
|
|
271
|
+
if query.select_cols:
|
|
272
|
+
table = table.select(query.select_cols)
|
|
273
|
+
|
|
274
|
+
# Apply sorting
|
|
275
|
+
if query.sort_by:
|
|
276
|
+
# Convert sort specs to Ibis sort expressions
|
|
277
|
+
sort_exprs = []
|
|
278
|
+
for sort_spec in query.sort_by:
|
|
279
|
+
if sort_spec.startswith("-"):
|
|
280
|
+
# Descending
|
|
281
|
+
col_name = sort_spec[1:]
|
|
282
|
+
sort_exprs.append(table[col_name].desc())
|
|
283
|
+
else:
|
|
284
|
+
# Ascending
|
|
285
|
+
sort_exprs.append(table[sort_spec].asc())
|
|
286
|
+
table = table.order_by(sort_exprs)
|
|
287
|
+
|
|
288
|
+
# Get total count BEFORE applying limit/offset (for pagination)
|
|
289
|
+
total_count = table.count().execute()
|
|
290
|
+
|
|
291
|
+
# Apply limit and offset
|
|
292
|
+
# Note: limit=-1 is used as a magic value for "no limit" in linkml-store
|
|
293
|
+
if query.offset and query.offset > 0:
|
|
294
|
+
table = table.limit(None, offset=query.offset)
|
|
295
|
+
if query.limit and query.limit > 0:
|
|
296
|
+
table = table.limit(query.limit)
|
|
297
|
+
|
|
298
|
+
# Execute query and convert to pandas
|
|
299
|
+
try:
|
|
300
|
+
df = table.to_pandas()
|
|
301
|
+
rows = df.to_dict("records")
|
|
302
|
+
|
|
303
|
+
# Deserialize JSON columns (auto-detect if not explicitly tracked)
|
|
304
|
+
rows = [_deserialize_complex_values(row, self._json_columns) for row in rows]
|
|
305
|
+
|
|
306
|
+
result = QueryResult(
|
|
307
|
+
query=query,
|
|
308
|
+
num_rows=total_count,
|
|
309
|
+
offset=query.offset,
|
|
310
|
+
rows=rows,
|
|
311
|
+
rows_dataframe=df,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Handle facets if requested
|
|
315
|
+
if query.include_facet_counts and query.facet_slots:
|
|
316
|
+
result.facet_counts = self._compute_facets(table_name, query.where_clause, query.facet_slots)
|
|
317
|
+
|
|
318
|
+
return result
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.error(f"Error executing query: {e}")
|
|
321
|
+
raise
|
|
322
|
+
|
|
323
|
+
def _apply_where(self, table, where_clause):
|
|
324
|
+
"""Apply where clause filters to an Ibis table.
|
|
325
|
+
|
|
326
|
+
Supports MongoDB-style operators: $in, $gt, $gte, $lt, $lte, $ne
|
|
327
|
+
"""
|
|
328
|
+
if isinstance(where_clause, dict):
|
|
329
|
+
for k, v in where_clause.items():
|
|
330
|
+
if isinstance(v, dict):
|
|
331
|
+
# Handle MongoDB-style operators
|
|
332
|
+
for op, op_val in v.items():
|
|
333
|
+
if op == "$in":
|
|
334
|
+
# IN operator
|
|
335
|
+
if isinstance(op_val, (list, tuple)):
|
|
336
|
+
table = table.filter(table[k].isin(list(op_val)))
|
|
337
|
+
else:
|
|
338
|
+
table = table.filter(table[k] == op_val)
|
|
339
|
+
elif op == "$gt":
|
|
340
|
+
table = table.filter(table[k] > op_val)
|
|
341
|
+
elif op == "$gte":
|
|
342
|
+
table = table.filter(table[k] >= op_val)
|
|
343
|
+
elif op == "$lt":
|
|
344
|
+
table = table.filter(table[k] < op_val)
|
|
345
|
+
elif op == "$lte":
|
|
346
|
+
table = table.filter(table[k] <= op_val)
|
|
347
|
+
elif op == "$ne":
|
|
348
|
+
table = table.filter(table[k] != op_val)
|
|
349
|
+
else:
|
|
350
|
+
logger.warning(f"Unsupported operator {op}")
|
|
351
|
+
else:
|
|
352
|
+
# Simple equality
|
|
353
|
+
table = table.filter(table[k] == v)
|
|
354
|
+
elif isinstance(where_clause, list):
|
|
355
|
+
# Multiple conditions (AND)
|
|
356
|
+
for condition in where_clause:
|
|
357
|
+
if isinstance(condition, dict):
|
|
358
|
+
table = self._apply_where(table, condition)
|
|
359
|
+
else:
|
|
360
|
+
# String condition - use SQL
|
|
361
|
+
logger.warning(f"String where clauses not fully supported in Ibis: {condition}")
|
|
362
|
+
elif isinstance(where_clause, str):
|
|
363
|
+
# SQL string - limited support
|
|
364
|
+
logger.warning(f"String where clauses require SQL mode: {where_clause}")
|
|
365
|
+
|
|
366
|
+
return table
|
|
367
|
+
|
|
368
|
+
def _compute_facets(
|
|
369
|
+
self, table_name: str, where_clause, facet_columns: List[str], facet_limit: int = DEFAULT_FACET_LIMIT
|
|
370
|
+
) -> Dict[str, List[Tuple[Any, int]]]:
|
|
371
|
+
"""Compute facet counts for specified columns."""
|
|
372
|
+
conn = self.parent.connection
|
|
373
|
+
table = conn.table(table_name)
|
|
374
|
+
|
|
375
|
+
if where_clause:
|
|
376
|
+
table = self._apply_where(table, where_clause)
|
|
377
|
+
|
|
378
|
+
facets = {}
|
|
379
|
+
for col in facet_columns:
|
|
380
|
+
try:
|
|
381
|
+
# Group by and count
|
|
382
|
+
grouped = table.group_by(col).aggregate(count=table.count())
|
|
383
|
+
# Order by count descending and limit
|
|
384
|
+
grouped = grouped.order_by(grouped["count"].desc())
|
|
385
|
+
if facet_limit:
|
|
386
|
+
grouped = grouped.limit(facet_limit)
|
|
387
|
+
df = grouped.to_pandas()
|
|
388
|
+
# Convert to list of tuples
|
|
389
|
+
facets[col] = list(zip(df[col], df["count"]))
|
|
390
|
+
except Exception as e:
|
|
391
|
+
logger.warning(f"Error computing facets for {col}: {e}")
|
|
392
|
+
facets[col] = []
|
|
393
|
+
|
|
394
|
+
return facets
|
|
395
|
+
|
|
396
|
+
def _create_table(self, cd: ClassDefinition):
|
|
397
|
+
"""Create the table if it doesn't exist."""
|
|
398
|
+
if self._table_created:
|
|
399
|
+
return
|
|
400
|
+
|
|
401
|
+
conn = self.parent.connection
|
|
402
|
+
table_name = self.alias or self.target_class_name
|
|
403
|
+
|
|
404
|
+
if table_name in conn.list_tables():
|
|
405
|
+
self._table_created = True
|
|
406
|
+
return
|
|
407
|
+
|
|
408
|
+
# Create an empty table with the schema
|
|
409
|
+
# Build a sample DataFrame with correct types
|
|
410
|
+
columns = {}
|
|
411
|
+
if cd.attributes:
|
|
412
|
+
for attr_name, slot in cd.attributes.items():
|
|
413
|
+
# Map LinkML types to Python types for DataFrame
|
|
414
|
+
slot_range = slot.range or "string"
|
|
415
|
+
if slot_range == "integer":
|
|
416
|
+
columns[attr_name] = pd.Series([], dtype="Int64")
|
|
417
|
+
elif slot_range == "float":
|
|
418
|
+
columns[attr_name] = pd.Series([], dtype="float64")
|
|
419
|
+
elif slot_range == "boolean":
|
|
420
|
+
columns[attr_name] = pd.Series([], dtype="boolean")
|
|
421
|
+
elif slot_range == "date":
|
|
422
|
+
columns[attr_name] = pd.Series([], dtype="object")
|
|
423
|
+
elif slot_range == "datetime":
|
|
424
|
+
columns[attr_name] = pd.Series([], dtype="datetime64[ns]")
|
|
425
|
+
else:
|
|
426
|
+
columns[attr_name] = pd.Series([], dtype="string")
|
|
427
|
+
|
|
428
|
+
if not columns:
|
|
429
|
+
# No columns defined - table will be created on first insert with actual data
|
|
430
|
+
logger.debug(f"No columns defined for {table_name}, will create on first insert")
|
|
431
|
+
self._table_created = False
|
|
432
|
+
return
|
|
433
|
+
|
|
434
|
+
# Create empty DataFrame with schema
|
|
435
|
+
df = pd.DataFrame(columns)
|
|
436
|
+
|
|
437
|
+
try:
|
|
438
|
+
# Create table using Ibis
|
|
439
|
+
conn.create_table(table_name, df)
|
|
440
|
+
self._table_created = True
|
|
441
|
+
logger.info(f"Created table {table_name}")
|
|
442
|
+
except Exception as e:
|
|
443
|
+
logger.error(f"Error creating table {table_name}: {e}")
|
|
444
|
+
raise
|
|
445
|
+
|
|
446
|
+
def find(self, where: Optional[Dict[str, Any]] = None, **kwargs) -> QueryResult:
|
|
447
|
+
"""Find objects matching the where clause."""
|
|
448
|
+
query = Query(where_clause=where, limit=kwargs.get("limit"), offset=kwargs.get("offset"))
|
|
449
|
+
self._pre_query_hook(query)
|
|
450
|
+
return self.query(query)
|
|
451
|
+
|
|
452
|
+
def peek(self, limit=5) -> QueryResult:
|
|
453
|
+
"""Get a few sample objects from the collection."""
|
|
454
|
+
query = Query(limit=limit)
|
|
455
|
+
return self.query(query)
|
|
456
|
+
|
|
457
|
+
def query_facets(
|
|
458
|
+
self,
|
|
459
|
+
where: Optional[Dict[str, Any]] = None,
|
|
460
|
+
facet_columns: List[str] = None,
|
|
461
|
+
facet_limit: int = DEFAULT_FACET_LIMIT,
|
|
462
|
+
**kwargs,
|
|
463
|
+
) -> Dict[str, List[Tuple[Any, int]]]:
|
|
464
|
+
"""Get facet counts for specified columns.
|
|
465
|
+
|
|
466
|
+
:param where: Optional filter conditions
|
|
467
|
+
:param facet_columns: Columns to facet on (defaults to all columns)
|
|
468
|
+
:param facet_limit: Maximum number of facet values per column
|
|
469
|
+
:return: Dictionary mapping column names to lists of (value, count) tuples
|
|
470
|
+
"""
|
|
471
|
+
conn = self.parent.connection
|
|
472
|
+
table_name = self.alias or self.target_class_name
|
|
473
|
+
|
|
474
|
+
if table_name not in conn.list_tables():
|
|
475
|
+
logger.warning(f"Table {table_name} does not exist")
|
|
476
|
+
return {}
|
|
477
|
+
|
|
478
|
+
# Get facet columns from class definition if not specified
|
|
479
|
+
if not facet_columns:
|
|
480
|
+
cd = self.class_definition()
|
|
481
|
+
if cd and cd.attributes:
|
|
482
|
+
facet_columns = list(cd.attributes.keys())
|
|
483
|
+
else:
|
|
484
|
+
# Fallback: get columns from table schema
|
|
485
|
+
table = conn.table(table_name)
|
|
486
|
+
facet_columns = list(table.columns)
|
|
487
|
+
|
|
488
|
+
return self._compute_facets(table_name, where, facet_columns, facet_limit)
|