linkml-store 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml_store/__init__.py +7 -0
- linkml_store/api/__init__.py +8 -0
- linkml_store/api/client.py +414 -0
- linkml_store/api/collection.py +1280 -0
- linkml_store/api/config.py +187 -0
- linkml_store/api/database.py +862 -0
- linkml_store/api/queries.py +69 -0
- linkml_store/api/stores/__init__.py +0 -0
- linkml_store/api/stores/chromadb/__init__.py +7 -0
- linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
- linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
- linkml_store/api/stores/dremio/__init__.py +10 -0
- linkml_store/api/stores/dremio/dremio_collection.py +555 -0
- linkml_store/api/stores/dremio/dremio_database.py +1052 -0
- linkml_store/api/stores/dremio/mappings.py +105 -0
- linkml_store/api/stores/dremio_rest/__init__.py +11 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
- linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
- linkml_store/api/stores/duckdb/__init__.py +16 -0
- linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
- linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
- linkml_store/api/stores/duckdb/mappings.py +8 -0
- linkml_store/api/stores/filesystem/__init__.py +15 -0
- linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
- linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
- linkml_store/api/stores/hdf5/__init__.py +7 -0
- linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
- linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
- linkml_store/api/stores/ibis/__init__.py +5 -0
- linkml_store/api/stores/ibis/ibis_collection.py +488 -0
- linkml_store/api/stores/ibis/ibis_database.py +328 -0
- linkml_store/api/stores/mongodb/__init__.py +25 -0
- linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
- linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
- linkml_store/api/stores/neo4j/__init__.py +0 -0
- linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
- linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
- linkml_store/api/stores/solr/__init__.py +3 -0
- linkml_store/api/stores/solr/solr_collection.py +224 -0
- linkml_store/api/stores/solr/solr_database.py +83 -0
- linkml_store/api/stores/solr/solr_utils.py +0 -0
- linkml_store/api/types.py +4 -0
- linkml_store/cli.py +1147 -0
- linkml_store/constants.py +7 -0
- linkml_store/graphs/__init__.py +0 -0
- linkml_store/graphs/graph_map.py +24 -0
- linkml_store/index/__init__.py +53 -0
- linkml_store/index/implementations/__init__.py +0 -0
- linkml_store/index/implementations/llm_indexer.py +174 -0
- linkml_store/index/implementations/simple_indexer.py +43 -0
- linkml_store/index/indexer.py +211 -0
- linkml_store/inference/__init__.py +13 -0
- linkml_store/inference/evaluation.py +195 -0
- linkml_store/inference/implementations/__init__.py +0 -0
- linkml_store/inference/implementations/llm_inference_engine.py +154 -0
- linkml_store/inference/implementations/rag_inference_engine.py +276 -0
- linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
- linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
- linkml_store/inference/inference_config.py +66 -0
- linkml_store/inference/inference_engine.py +209 -0
- linkml_store/inference/inference_engine_registry.py +74 -0
- linkml_store/plotting/__init__.py +5 -0
- linkml_store/plotting/cli.py +826 -0
- linkml_store/plotting/dimensionality_reduction.py +453 -0
- linkml_store/plotting/embedding_plot.py +489 -0
- linkml_store/plotting/facet_chart.py +73 -0
- linkml_store/plotting/heatmap.py +383 -0
- linkml_store/utils/__init__.py +0 -0
- linkml_store/utils/change_utils.py +17 -0
- linkml_store/utils/dat_parser.py +95 -0
- linkml_store/utils/embedding_matcher.py +424 -0
- linkml_store/utils/embedding_utils.py +299 -0
- linkml_store/utils/enrichment_analyzer.py +217 -0
- linkml_store/utils/file_utils.py +37 -0
- linkml_store/utils/format_utils.py +550 -0
- linkml_store/utils/io.py +38 -0
- linkml_store/utils/llm_utils.py +122 -0
- linkml_store/utils/mongodb_utils.py +145 -0
- linkml_store/utils/neo4j_utils.py +42 -0
- linkml_store/utils/object_utils.py +190 -0
- linkml_store/utils/pandas_utils.py +93 -0
- linkml_store/utils/patch_utils.py +126 -0
- linkml_store/utils/query_utils.py +89 -0
- linkml_store/utils/schema_utils.py +23 -0
- linkml_store/utils/sklearn_utils.py +193 -0
- linkml_store/utils/sql_utils.py +177 -0
- linkml_store/utils/stats_utils.py +53 -0
- linkml_store/utils/vector_utils.py +158 -0
- linkml_store/webapi/__init__.py +0 -0
- linkml_store/webapi/html/__init__.py +3 -0
- linkml_store/webapi/html/base.html.j2 +24 -0
- linkml_store/webapi/html/collection_details.html.j2 +15 -0
- linkml_store/webapi/html/database_details.html.j2 +16 -0
- linkml_store/webapi/html/databases.html.j2 +14 -0
- linkml_store/webapi/html/generic.html.j2 +43 -0
- linkml_store/webapi/main.py +855 -0
- linkml_store-0.3.0.dist-info/METADATA +226 -0
- linkml_store-0.3.0.dist-info/RECORD +101 -0
- linkml_store-0.3.0.dist-info/WHEEL +4 -0
- linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
- linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Type mappings between LinkML types and Dremio/Arrow types."""
|
|
2
|
+
|
|
3
|
+
import pyarrow as pa
|
|
4
|
+
|
|
5
|
+
# Mapping from LinkML types to PyArrow types
|
|
6
|
+
LINKML_TO_ARROW = {
|
|
7
|
+
"string": pa.string(),
|
|
8
|
+
"integer": pa.int64(),
|
|
9
|
+
"float": pa.float64(),
|
|
10
|
+
"boolean": pa.bool_(),
|
|
11
|
+
"date": pa.date32(),
|
|
12
|
+
"datetime": pa.timestamp("us"),
|
|
13
|
+
"decimal": pa.decimal128(38, 10),
|
|
14
|
+
"Any": pa.string(), # Fallback to string for Any type
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
# Mapping from Arrow types to LinkML types
|
|
18
|
+
ARROW_TO_LINKML = {
|
|
19
|
+
pa.string(): "string",
|
|
20
|
+
pa.utf8(): "string",
|
|
21
|
+
pa.large_string(): "string",
|
|
22
|
+
pa.int8(): "integer",
|
|
23
|
+
pa.int16(): "integer",
|
|
24
|
+
pa.int32(): "integer",
|
|
25
|
+
pa.int64(): "integer",
|
|
26
|
+
pa.uint8(): "integer",
|
|
27
|
+
pa.uint16(): "integer",
|
|
28
|
+
pa.uint32(): "integer",
|
|
29
|
+
pa.uint64(): "integer",
|
|
30
|
+
pa.float16(): "float",
|
|
31
|
+
pa.float32(): "float",
|
|
32
|
+
pa.float64(): "float",
|
|
33
|
+
pa.bool_(): "boolean",
|
|
34
|
+
pa.date32(): "date",
|
|
35
|
+
pa.date64(): "date",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Mapping from Dremio SQL type names to LinkML types
|
|
39
|
+
DREMIO_SQL_TO_LINKML = {
|
|
40
|
+
"VARCHAR": "string",
|
|
41
|
+
"CHAR": "string",
|
|
42
|
+
"BIGINT": "integer",
|
|
43
|
+
"INTEGER": "integer",
|
|
44
|
+
"INT": "integer",
|
|
45
|
+
"SMALLINT": "integer",
|
|
46
|
+
"TINYINT": "integer",
|
|
47
|
+
"BOOLEAN": "boolean",
|
|
48
|
+
"DOUBLE": "float",
|
|
49
|
+
"FLOAT": "float",
|
|
50
|
+
"DECIMAL": "float",
|
|
51
|
+
"DATE": "date",
|
|
52
|
+
"TIMESTAMP": "datetime",
|
|
53
|
+
"TIME": "string",
|
|
54
|
+
"BINARY": "string",
|
|
55
|
+
"VARBINARY": "string",
|
|
56
|
+
"LIST": "string", # Complex types mapped to string
|
|
57
|
+
"STRUCT": "string",
|
|
58
|
+
"MAP": "string",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_arrow_type(linkml_type: str) -> pa.DataType:
|
|
63
|
+
"""Convert a LinkML type to a PyArrow type.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
linkml_type: The LinkML type name.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
The corresponding PyArrow data type.
|
|
70
|
+
"""
|
|
71
|
+
return LINKML_TO_ARROW.get(linkml_type, pa.string())
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_linkml_type_from_arrow(arrow_type: pa.DataType) -> str:
|
|
75
|
+
"""Convert a PyArrow type to a LinkML type.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
arrow_type: The PyArrow data type.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
The corresponding LinkML type name.
|
|
82
|
+
"""
|
|
83
|
+
# Handle parameterized types by checking base type
|
|
84
|
+
if pa.types.is_string(arrow_type) or pa.types.is_large_string(arrow_type):
|
|
85
|
+
return "string"
|
|
86
|
+
if pa.types.is_integer(arrow_type):
|
|
87
|
+
return "integer"
|
|
88
|
+
if pa.types.is_floating(arrow_type):
|
|
89
|
+
return "float"
|
|
90
|
+
if pa.types.is_boolean(arrow_type):
|
|
91
|
+
return "boolean"
|
|
92
|
+
if pa.types.is_date(arrow_type):
|
|
93
|
+
return "date"
|
|
94
|
+
if pa.types.is_timestamp(arrow_type):
|
|
95
|
+
return "datetime"
|
|
96
|
+
if pa.types.is_decimal(arrow_type):
|
|
97
|
+
return "float"
|
|
98
|
+
if pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type):
|
|
99
|
+
return "string" # Complex types as string
|
|
100
|
+
if pa.types.is_struct(arrow_type):
|
|
101
|
+
return "string"
|
|
102
|
+
if pa.types.is_map(arrow_type):
|
|
103
|
+
return "string"
|
|
104
|
+
|
|
105
|
+
return "string" # Default fallback
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Dremio REST API adapter for linkml-store.
|
|
2
|
+
|
|
3
|
+
This module provides a Dremio adapter that uses the REST API v3 for
|
|
4
|
+
connectivity to Dremio data lakehouse instances that don't expose
|
|
5
|
+
the Arrow Flight SQL port (e.g., behind Cloudflare or firewalls).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from linkml_store.api.stores.dremio_rest.dremio_rest_collection import DremioRestCollection
|
|
9
|
+
from linkml_store.api.stores.dremio_rest.dremio_rest_database import DremioRestDatabase
|
|
10
|
+
|
|
11
|
+
__all__ = ["DremioRestDatabase", "DremioRestCollection"]
|
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
"""Dremio REST API collection implementation.
|
|
2
|
+
|
|
3
|
+
This module provides the Collection implementation for Dremio REST API,
|
|
4
|
+
supporting query operations via the REST API v3.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
10
|
+
|
|
11
|
+
from linkml_store.api import Collection
|
|
12
|
+
from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
|
|
13
|
+
from linkml_store.api.queries import Query, QueryResult
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DremioRestCollection(Collection):
|
|
19
|
+
"""Collection implementation for Dremio data lakehouse via REST API.
|
|
20
|
+
|
|
21
|
+
This collection connects to Dremio tables via the REST API v3
|
|
22
|
+
and provides query capabilities. Write operations may be limited
|
|
23
|
+
depending on the underlying data source configuration in Dremio.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
_table_exists_checked: bool = False
|
|
27
|
+
|
|
28
|
+
def __init__(self, *args, **kwargs):
|
|
29
|
+
super().__init__(*args, **kwargs)
|
|
30
|
+
|
|
31
|
+
def _get_table_path(self) -> str:
|
|
32
|
+
"""Get the full qualified table path.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Full table path for SQL queries.
|
|
36
|
+
"""
|
|
37
|
+
return self.parent._get_table_path(self.alias)
|
|
38
|
+
|
|
39
|
+
def _build_select_sql(
|
|
40
|
+
self,
|
|
41
|
+
select_cols: Optional[List[str]] = None,
|
|
42
|
+
where_clause: Optional[Union[str, Dict[str, Any]]] = None,
|
|
43
|
+
sort_by: Optional[List[str]] = None,
|
|
44
|
+
limit: Optional[int] = None,
|
|
45
|
+
offset: Optional[int] = None,
|
|
46
|
+
) -> str:
|
|
47
|
+
"""Build a SELECT SQL statement.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
select_cols: Columns to select (None for all).
|
|
51
|
+
where_clause: WHERE conditions.
|
|
52
|
+
sort_by: ORDER BY columns.
|
|
53
|
+
limit: Maximum rows to return.
|
|
54
|
+
offset: Number of rows to skip.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
SQL SELECT statement.
|
|
58
|
+
"""
|
|
59
|
+
table_path = self._get_table_path()
|
|
60
|
+
|
|
61
|
+
# Build SELECT clause
|
|
62
|
+
if select_cols:
|
|
63
|
+
cols = ", ".join(f'"{c}"' for c in select_cols)
|
|
64
|
+
else:
|
|
65
|
+
cols = "*"
|
|
66
|
+
|
|
67
|
+
sql = f"SELECT {cols} FROM {table_path}"
|
|
68
|
+
|
|
69
|
+
# Build WHERE clause
|
|
70
|
+
if where_clause:
|
|
71
|
+
conditions = self._build_where_conditions(where_clause)
|
|
72
|
+
if conditions:
|
|
73
|
+
sql += f" WHERE {conditions}"
|
|
74
|
+
|
|
75
|
+
# Build ORDER BY clause
|
|
76
|
+
if sort_by:
|
|
77
|
+
order_cols = ", ".join(f'"{c}"' for c in sort_by)
|
|
78
|
+
sql += f" ORDER BY {order_cols}"
|
|
79
|
+
|
|
80
|
+
# Build LIMIT/OFFSET
|
|
81
|
+
if limit is not None and limit >= 0:
|
|
82
|
+
sql += f" LIMIT {limit}"
|
|
83
|
+
if offset is not None and offset > 0:
|
|
84
|
+
sql += f" OFFSET {offset}"
|
|
85
|
+
|
|
86
|
+
return sql
|
|
87
|
+
|
|
88
|
+
def _build_where_conditions(self, where_clause: Union[str, Dict[str, Any]]) -> str:
|
|
89
|
+
"""Build WHERE clause conditions from a dict or string.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
where_clause: WHERE conditions as dict or string.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
SQL WHERE clause (without WHERE keyword).
|
|
96
|
+
"""
|
|
97
|
+
if isinstance(where_clause, str):
|
|
98
|
+
return where_clause
|
|
99
|
+
|
|
100
|
+
if not isinstance(where_clause, dict):
|
|
101
|
+
return ""
|
|
102
|
+
|
|
103
|
+
conditions = []
|
|
104
|
+
for key, value in where_clause.items():
|
|
105
|
+
condition = self._build_single_condition(key, value)
|
|
106
|
+
if condition:
|
|
107
|
+
conditions.append(condition)
|
|
108
|
+
|
|
109
|
+
return " AND ".join(conditions)
|
|
110
|
+
|
|
111
|
+
def _build_single_condition(self, key: str, value: Any) -> str:
|
|
112
|
+
"""Build a single WHERE condition.
|
|
113
|
+
|
|
114
|
+
Supports MongoDB-style operators like $gt, $gte, $lt, $lte, $in, $ne.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
key: Column name.
|
|
118
|
+
value: Value or operator dict.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
SQL condition string.
|
|
122
|
+
"""
|
|
123
|
+
col = f'"{key}"'
|
|
124
|
+
|
|
125
|
+
if value is None:
|
|
126
|
+
return f"{col} IS NULL"
|
|
127
|
+
|
|
128
|
+
if isinstance(value, dict):
|
|
129
|
+
# Handle operators
|
|
130
|
+
sub_conditions = []
|
|
131
|
+
for op, val in value.items():
|
|
132
|
+
if op == "$gt":
|
|
133
|
+
sub_conditions.append(f"{col} > {self._sql_value(val)}")
|
|
134
|
+
elif op == "$gte":
|
|
135
|
+
sub_conditions.append(f"{col} >= {self._sql_value(val)}")
|
|
136
|
+
elif op == "$lt":
|
|
137
|
+
sub_conditions.append(f"{col} < {self._sql_value(val)}")
|
|
138
|
+
elif op == "$lte":
|
|
139
|
+
sub_conditions.append(f"{col} <= {self._sql_value(val)}")
|
|
140
|
+
elif op == "$ne":
|
|
141
|
+
if val is None:
|
|
142
|
+
sub_conditions.append(f"{col} IS NOT NULL")
|
|
143
|
+
else:
|
|
144
|
+
sub_conditions.append(f"{col} != {self._sql_value(val)}")
|
|
145
|
+
elif op == "$in":
|
|
146
|
+
if isinstance(val, (list, tuple)):
|
|
147
|
+
vals = ", ".join(self._sql_value(v) for v in val)
|
|
148
|
+
sub_conditions.append(f"{col} IN ({vals})")
|
|
149
|
+
elif op == "$nin":
|
|
150
|
+
if isinstance(val, (list, tuple)):
|
|
151
|
+
vals = ", ".join(self._sql_value(v) for v in val)
|
|
152
|
+
sub_conditions.append(f"{col} NOT IN ({vals})")
|
|
153
|
+
elif op == "$like":
|
|
154
|
+
sub_conditions.append(f"{col} LIKE {self._sql_value(val)}")
|
|
155
|
+
elif op == "$ilike":
|
|
156
|
+
sub_conditions.append(f"LOWER({col}) LIKE LOWER({self._sql_value(val)})")
|
|
157
|
+
elif op == "$regex":
|
|
158
|
+
# Dremio uses REGEXP_LIKE
|
|
159
|
+
sub_conditions.append(f"REGEXP_LIKE({col}, {self._sql_value(val)})")
|
|
160
|
+
else:
|
|
161
|
+
logger.warning(f"Unknown operator: {op}")
|
|
162
|
+
|
|
163
|
+
return " AND ".join(sub_conditions) if sub_conditions else ""
|
|
164
|
+
else:
|
|
165
|
+
return f"{col} = {self._sql_value(value)}"
|
|
166
|
+
|
|
167
|
+
def _sql_value(self, value: Any) -> str:
|
|
168
|
+
"""Convert a Python value to SQL literal.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
value: Python value.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
SQL literal string.
|
|
175
|
+
"""
|
|
176
|
+
if value is None:
|
|
177
|
+
return "NULL"
|
|
178
|
+
elif isinstance(value, bool):
|
|
179
|
+
return "TRUE" if value else "FALSE"
|
|
180
|
+
elif isinstance(value, (int, float)):
|
|
181
|
+
return str(value)
|
|
182
|
+
elif isinstance(value, str):
|
|
183
|
+
# Escape single quotes
|
|
184
|
+
escaped = value.replace("'", "''")
|
|
185
|
+
return f"'{escaped}'"
|
|
186
|
+
elif isinstance(value, (list, dict)):
|
|
187
|
+
# Convert to JSON string
|
|
188
|
+
escaped = json.dumps(value).replace("'", "''")
|
|
189
|
+
return f"'{escaped}'"
|
|
190
|
+
else:
|
|
191
|
+
escaped = str(value).replace("'", "''")
|
|
192
|
+
return f"'{escaped}'"
|
|
193
|
+
|
|
194
|
+
def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
195
|
+
"""Insert objects into the collection.
|
|
196
|
+
|
|
197
|
+
Note: Write operations in Dremio depend on the underlying data source.
|
|
198
|
+
Some sources (like Iceberg, Delta Lake) support writes, while others
|
|
199
|
+
(like file-based sources) may not.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
objs: Object(s) to insert.
|
|
203
|
+
**kwargs: Additional arguments.
|
|
204
|
+
"""
|
|
205
|
+
if not isinstance(objs, list):
|
|
206
|
+
objs = [objs]
|
|
207
|
+
|
|
208
|
+
if not objs:
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
logger.debug(f"Inserting {len(objs)} objects into {self.alias}")
|
|
212
|
+
|
|
213
|
+
cd = self.class_definition()
|
|
214
|
+
if not cd:
|
|
215
|
+
logger.debug(f"No class definition for {self.alias}; inducing from objects")
|
|
216
|
+
cd = self.induce_class_definition_from_objects(objs)
|
|
217
|
+
|
|
218
|
+
table_path = self._get_table_path()
|
|
219
|
+
|
|
220
|
+
if cd and cd.attributes:
|
|
221
|
+
columns = list(cd.attributes.keys())
|
|
222
|
+
else:
|
|
223
|
+
columns = list(objs[0].keys())
|
|
224
|
+
|
|
225
|
+
col_list = ", ".join(f'"{c}"' for c in columns)
|
|
226
|
+
|
|
227
|
+
batch_size = 100
|
|
228
|
+
for i in range(0, len(objs), batch_size):
|
|
229
|
+
batch = objs[i : i + batch_size]
|
|
230
|
+
|
|
231
|
+
values_list = []
|
|
232
|
+
for obj in batch:
|
|
233
|
+
values = []
|
|
234
|
+
for col in columns:
|
|
235
|
+
val = obj.get(col)
|
|
236
|
+
values.append(self._sql_value(val))
|
|
237
|
+
values_list.append(f"({', '.join(values)})")
|
|
238
|
+
|
|
239
|
+
values_sql = ", ".join(values_list)
|
|
240
|
+
sql = f"INSERT INTO {table_path} ({col_list}) VALUES {values_sql}"
|
|
241
|
+
|
|
242
|
+
self.parent._execute_update(sql)
|
|
243
|
+
|
|
244
|
+
self._post_insert_hook(objs)
|
|
245
|
+
|
|
246
|
+
def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
|
|
247
|
+
"""Delete specific objects from the collection.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
objs: Object(s) to delete.
|
|
251
|
+
**kwargs: Additional arguments.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Number of deleted rows, or None if unknown.
|
|
255
|
+
"""
|
|
256
|
+
if not isinstance(objs, list):
|
|
257
|
+
objs = [objs]
|
|
258
|
+
|
|
259
|
+
if not objs:
|
|
260
|
+
return 0
|
|
261
|
+
|
|
262
|
+
table_path = self._get_table_path()
|
|
263
|
+
total_deleted = 0
|
|
264
|
+
|
|
265
|
+
for obj in objs:
|
|
266
|
+
conditions = []
|
|
267
|
+
for key, value in obj.items():
|
|
268
|
+
if key.startswith("_"):
|
|
269
|
+
continue
|
|
270
|
+
condition = self._build_single_condition(key, value)
|
|
271
|
+
if condition:
|
|
272
|
+
conditions.append(condition)
|
|
273
|
+
|
|
274
|
+
if not conditions:
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
sql = f"DELETE FROM {table_path} WHERE {' AND '.join(conditions)}"
|
|
278
|
+
result = self.parent._execute_update(sql)
|
|
279
|
+
if result > 0:
|
|
280
|
+
total_deleted += result
|
|
281
|
+
|
|
282
|
+
self._post_delete_hook()
|
|
283
|
+
return total_deleted if total_deleted > 0 else None
|
|
284
|
+
|
|
285
|
+
def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
|
|
286
|
+
"""Delete objects matching a condition.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
where: WHERE conditions (empty dict means delete all).
|
|
290
|
+
missing_ok: If True, don't raise error if no rows deleted.
|
|
291
|
+
**kwargs: Additional arguments.
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
Number of deleted rows, or None if unknown.
|
|
295
|
+
"""
|
|
296
|
+
if where is None:
|
|
297
|
+
where = {}
|
|
298
|
+
|
|
299
|
+
table_path = self._get_table_path()
|
|
300
|
+
|
|
301
|
+
if where:
|
|
302
|
+
conditions = self._build_where_conditions(where)
|
|
303
|
+
sql = f"DELETE FROM {table_path} WHERE {conditions}"
|
|
304
|
+
else:
|
|
305
|
+
sql = f"DELETE FROM {table_path}"
|
|
306
|
+
|
|
307
|
+
result = self.parent._execute_update(sql)
|
|
308
|
+
if result == 0 and not missing_ok:
|
|
309
|
+
raise ValueError(f"No rows found for {where}")
|
|
310
|
+
self._post_delete_hook()
|
|
311
|
+
return result if result >= 0 else None
|
|
312
|
+
|
|
313
|
+
def update(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
|
|
314
|
+
"""Update objects in the collection.
|
|
315
|
+
|
|
316
|
+
Note: Requires a primary key field to identify rows.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
objs: Object(s) to update.
|
|
320
|
+
**kwargs: Additional arguments.
|
|
321
|
+
"""
|
|
322
|
+
if not isinstance(objs, list):
|
|
323
|
+
objs = [objs]
|
|
324
|
+
|
|
325
|
+
if not objs:
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
table_path = self._get_table_path()
|
|
329
|
+
pk = self.identifier_attribute_name
|
|
330
|
+
|
|
331
|
+
if not pk:
|
|
332
|
+
raise ValueError("Cannot update without an identifier attribute")
|
|
333
|
+
|
|
334
|
+
for obj in objs:
|
|
335
|
+
if pk not in obj:
|
|
336
|
+
raise ValueError(f"Object missing primary key field: {pk}")
|
|
337
|
+
|
|
338
|
+
pk_value = obj[pk]
|
|
339
|
+
|
|
340
|
+
set_parts = []
|
|
341
|
+
for key, value in obj.items():
|
|
342
|
+
if key == pk or key.startswith("_"):
|
|
343
|
+
continue
|
|
344
|
+
set_parts.append(f'"{key}" = {self._sql_value(value)}')
|
|
345
|
+
|
|
346
|
+
if not set_parts:
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
set_clause = ", ".join(set_parts)
|
|
350
|
+
sql = f'UPDATE {table_path} SET {set_clause} WHERE "{pk}" = {self._sql_value(pk_value)}'
|
|
351
|
+
self.parent._execute_update(sql)
|
|
352
|
+
|
|
353
|
+
def query(self, query: Query, **kwargs) -> QueryResult:
|
|
354
|
+
"""Execute a query against the collection.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
query: Query specification.
|
|
358
|
+
**kwargs: Additional arguments.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
QueryResult with matching rows.
|
|
362
|
+
"""
|
|
363
|
+
self._pre_query_hook(query)
|
|
364
|
+
|
|
365
|
+
limit = query.limit
|
|
366
|
+
if limit == -1:
|
|
367
|
+
limit = None
|
|
368
|
+
|
|
369
|
+
sql = self._build_select_sql(
|
|
370
|
+
select_cols=query.select_cols,
|
|
371
|
+
where_clause=query.where_clause,
|
|
372
|
+
sort_by=query.sort_by,
|
|
373
|
+
limit=limit,
|
|
374
|
+
offset=query.offset,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
df = self.parent._execute_query(sql)
|
|
378
|
+
|
|
379
|
+
# Convert DataFrame to list of dicts
|
|
380
|
+
row_list = df.to_dict("records") if not df.empty else []
|
|
381
|
+
|
|
382
|
+
# Get total count for pagination
|
|
383
|
+
if query.offset or (limit is not None and len(row_list) == limit):
|
|
384
|
+
count_sql = self._build_count_sql(query.where_clause)
|
|
385
|
+
try:
|
|
386
|
+
count_df = self.parent._execute_query(count_sql)
|
|
387
|
+
total_rows = int(count_df.iloc[0, 0]) if not count_df.empty else len(row_list)
|
|
388
|
+
except Exception:
|
|
389
|
+
total_rows = len(row_list)
|
|
390
|
+
else:
|
|
391
|
+
total_rows = len(row_list)
|
|
392
|
+
|
|
393
|
+
qr = QueryResult(query=query, num_rows=total_rows, rows=row_list, offset=query.offset or 0)
|
|
394
|
+
|
|
395
|
+
if query.include_facet_counts and query.facet_slots:
|
|
396
|
+
qr.facet_counts = self.query_facets(where=query.where_clause, facet_columns=query.facet_slots)
|
|
397
|
+
|
|
398
|
+
return qr
|
|
399
|
+
|
|
400
|
+
def _build_count_sql(self, where_clause: Optional[Union[str, Dict[str, Any]]] = None) -> str:
|
|
401
|
+
"""Build a COUNT SQL statement.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
where_clause: WHERE conditions.
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
SQL COUNT statement.
|
|
408
|
+
"""
|
|
409
|
+
table_path = self._get_table_path()
|
|
410
|
+
sql = f"SELECT COUNT(*) FROM {table_path}"
|
|
411
|
+
|
|
412
|
+
if where_clause:
|
|
413
|
+
conditions = self._build_where_conditions(where_clause)
|
|
414
|
+
if conditions:
|
|
415
|
+
sql += f" WHERE {conditions}"
|
|
416
|
+
|
|
417
|
+
return sql
|
|
418
|
+
|
|
419
|
+
def query_facets(
|
|
420
|
+
self,
|
|
421
|
+
where: Optional[Dict] = None,
|
|
422
|
+
facet_columns: Optional[List[str]] = None,
|
|
423
|
+
facet_limit: int = DEFAULT_FACET_LIMIT,
|
|
424
|
+
**kwargs,
|
|
425
|
+
) -> Dict[Union[str, Tuple[str, ...]], List[Tuple[Any, int]]]:
|
|
426
|
+
"""Get facet counts for columns.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
where: Filter conditions.
|
|
430
|
+
facet_columns: Columns to get facets for.
|
|
431
|
+
facet_limit: Maximum facet values per column.
|
|
432
|
+
**kwargs: Additional arguments.
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
Dictionary mapping column names to list of (value, count) tuples.
|
|
436
|
+
"""
|
|
437
|
+
if facet_limit is None:
|
|
438
|
+
facet_limit = DEFAULT_FACET_LIMIT
|
|
439
|
+
|
|
440
|
+
results = {}
|
|
441
|
+
cd = self.class_definition()
|
|
442
|
+
table_path = self._get_table_path()
|
|
443
|
+
|
|
444
|
+
if not facet_columns:
|
|
445
|
+
if cd and cd.attributes:
|
|
446
|
+
facet_columns = list(cd.attributes.keys())
|
|
447
|
+
else:
|
|
448
|
+
return results
|
|
449
|
+
|
|
450
|
+
for col in facet_columns:
|
|
451
|
+
if isinstance(col, tuple):
|
|
452
|
+
col_list = ", ".join(f'"{c}"' for c in col)
|
|
453
|
+
col_name = col
|
|
454
|
+
else:
|
|
455
|
+
col_list = f'"{col}"'
|
|
456
|
+
col_name = col
|
|
457
|
+
|
|
458
|
+
sql = f"SELECT {col_list}, COUNT(*) as cnt FROM {table_path}"
|
|
459
|
+
|
|
460
|
+
if where:
|
|
461
|
+
conditions = self._build_where_conditions(where)
|
|
462
|
+
if conditions:
|
|
463
|
+
sql += f" WHERE {conditions}"
|
|
464
|
+
|
|
465
|
+
sql += f" GROUP BY {col_list} ORDER BY cnt DESC"
|
|
466
|
+
|
|
467
|
+
if facet_limit > 0:
|
|
468
|
+
sql += f" LIMIT {facet_limit}"
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
df = self.parent._execute_query(sql)
|
|
472
|
+
|
|
473
|
+
facets = []
|
|
474
|
+
for _, row in df.iterrows():
|
|
475
|
+
if isinstance(col, tuple):
|
|
476
|
+
value = tuple(row[c] for c in col)
|
|
477
|
+
else:
|
|
478
|
+
value = row[col]
|
|
479
|
+
count = int(row["cnt"])
|
|
480
|
+
facets.append((value, count))
|
|
481
|
+
|
|
482
|
+
results[col_name] = facets
|
|
483
|
+
|
|
484
|
+
except Exception as e:
|
|
485
|
+
logger.warning(f"Facet query failed for {col}: {e}")
|
|
486
|
+
results[col_name] = []
|
|
487
|
+
|
|
488
|
+
return results
|
|
489
|
+
|
|
490
|
+
def _check_if_initialized(self) -> bool:
|
|
491
|
+
"""Check if the collection's table exists.
|
|
492
|
+
|
|
493
|
+
Returns:
|
|
494
|
+
True if table exists.
|
|
495
|
+
"""
|
|
496
|
+
if self._table_exists_checked:
|
|
497
|
+
return True
|
|
498
|
+
|
|
499
|
+
result = self.parent._table_exists(self.alias)
|
|
500
|
+
if result:
|
|
501
|
+
self._table_exists_checked = True
|
|
502
|
+
return result
|