pyseekdb 0.1.0.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyseekdb/__init__.py +90 -0
- pyseekdb/client/__init__.py +324 -0
- pyseekdb/client/admin_client.py +202 -0
- pyseekdb/client/base_connection.py +82 -0
- pyseekdb/client/client_base.py +1921 -0
- pyseekdb/client/client_oceanbase_server.py +258 -0
- pyseekdb/client/client_seekdb_embedded.py +324 -0
- pyseekdb/client/client_seekdb_server.py +226 -0
- pyseekdb/client/collection.py +485 -0
- pyseekdb/client/database.py +55 -0
- pyseekdb/client/filters.py +357 -0
- pyseekdb/client/meta_info.py +15 -0
- pyseekdb/client/query_result.py +122 -0
- pyseekdb/client/sql_utils.py +48 -0
- pyseekdb/examples/comprehensive_example.py +412 -0
- pyseekdb/examples/simple_example.py +113 -0
- pyseekdb/tests/__init__.py +0 -0
- pyseekdb/tests/test_admin_database_management.py +307 -0
- pyseekdb/tests/test_client_creation.py +425 -0
- pyseekdb/tests/test_collection_dml.py +652 -0
- pyseekdb/tests/test_collection_get.py +550 -0
- pyseekdb/tests/test_collection_hybrid_search.py +1126 -0
- pyseekdb/tests/test_collection_query.py +428 -0
- pyseekdb-0.1.0.dev3.dist-info/LICENSE +202 -0
- pyseekdb-0.1.0.dev3.dist-info/METADATA +856 -0
- pyseekdb-0.1.0.dev3.dist-info/RECORD +27 -0
- pyseekdb-0.1.0.dev3.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Filter builder utilities for metadata and document filtering
|
|
3
|
+
|
|
4
|
+
Supports:
|
|
5
|
+
- Metadata filters: $eq, $lt, $gt, $lte, $gte, $ne, $in, $nin
|
|
6
|
+
- Logical operators: $or, $and, $not
|
|
7
|
+
- Document filters: $contains, $regex
|
|
8
|
+
"""
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FilterBuilder:
|
|
14
|
+
"""Build SQL WHERE clauses from filter dictionaries"""
|
|
15
|
+
|
|
16
|
+
# Comparison operators mapping
|
|
17
|
+
COMPARISON_OPS = {
|
|
18
|
+
"$eq": "=",
|
|
19
|
+
"$lt": "<",
|
|
20
|
+
"$gt": ">",
|
|
21
|
+
"$lte": "<=",
|
|
22
|
+
"$gte": ">=",
|
|
23
|
+
"$ne": "!="
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Logical operators
|
|
27
|
+
LOGICAL_OPS = ["$and", "$or", "$not"]
|
|
28
|
+
|
|
29
|
+
# Document operators
|
|
30
|
+
DOCUMENT_OPS = ["$contains", "$regex"]
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def build_metadata_filter(
|
|
34
|
+
where: Dict[str, Any],
|
|
35
|
+
metadata_column: str = "metadata"
|
|
36
|
+
) -> Tuple[str, List[Any]]:
|
|
37
|
+
"""
|
|
38
|
+
Build WHERE clause for metadata filtering
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
where: Filter dictionary with operators like $eq, $lt, $gt, $lte, $gte, $ne, $in, $nin, $and, $or, $not
|
|
42
|
+
metadata_column: Name of metadata column (default: "metadata")
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Tuple of (where_clause, params) for parameterized query
|
|
46
|
+
|
|
47
|
+
Examples:
|
|
48
|
+
where = {"age": {"$gte": 18}}
|
|
49
|
+
-> ("JSON_EXTRACT(metadata, '$.age') >= %s", [18])
|
|
50
|
+
|
|
51
|
+
where = {"$and": [{"age": {"$gte": 18}}, {"city": "Beijing"}]}
|
|
52
|
+
-> ("(JSON_EXTRACT(metadata, '$.age') >= %s AND JSON_EXTRACT(metadata, '$.city') = %s)", [18, "Beijing"])
|
|
53
|
+
"""
|
|
54
|
+
if not where:
|
|
55
|
+
return "", []
|
|
56
|
+
|
|
57
|
+
return FilterBuilder._build_condition(where, metadata_column)
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def build_document_filter(
|
|
61
|
+
where_document: Dict[str, Any],
|
|
62
|
+
document_column: str = "document"
|
|
63
|
+
) -> Tuple[str, List[Any]]:
|
|
64
|
+
"""
|
|
65
|
+
Build WHERE clause for document filtering
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
where_document: Filter dictionary with $contains, $regex, $and, $or operators
|
|
69
|
+
document_column: Name of document column (default: "document")
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Tuple of (where_clause, params) for parameterized query
|
|
73
|
+
|
|
74
|
+
Examples:
|
|
75
|
+
where_document = {"$contains": "python"}
|
|
76
|
+
-> ("MATCH(document) AGAINST (%s IN NATURAL LANGUAGE MODE)", ["python"])
|
|
77
|
+
|
|
78
|
+
where_document = {"$regex": "^hello.*world$"}
|
|
79
|
+
-> ("document REGEXP %s", ["^hello.*world$"])
|
|
80
|
+
"""
|
|
81
|
+
if not where_document:
|
|
82
|
+
return "", []
|
|
83
|
+
|
|
84
|
+
return FilterBuilder._build_document_condition(where_document, document_column)
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _build_condition(
|
|
88
|
+
condition: Dict[str, Any],
|
|
89
|
+
metadata_column: str,
|
|
90
|
+
params: Optional[List[Any]] = None
|
|
91
|
+
) -> Tuple[str, List[Any]]:
|
|
92
|
+
"""Recursively build condition from nested dictionary"""
|
|
93
|
+
if params is None:
|
|
94
|
+
params = []
|
|
95
|
+
|
|
96
|
+
clauses = []
|
|
97
|
+
|
|
98
|
+
for key, value in condition.items():
|
|
99
|
+
if key in FilterBuilder.LOGICAL_OPS:
|
|
100
|
+
# Handle logical operators
|
|
101
|
+
if key == "$and":
|
|
102
|
+
sub_clauses = []
|
|
103
|
+
for sub_condition in value:
|
|
104
|
+
sub_clause, params = FilterBuilder._build_condition(sub_condition, metadata_column, params)
|
|
105
|
+
sub_clauses.append(sub_clause)
|
|
106
|
+
clauses.append(f"({' AND '.join(sub_clauses)})")
|
|
107
|
+
|
|
108
|
+
elif key == "$or":
|
|
109
|
+
sub_clauses = []
|
|
110
|
+
for sub_condition in value:
|
|
111
|
+
sub_clause, params = FilterBuilder._build_condition(sub_condition, metadata_column, params)
|
|
112
|
+
sub_clauses.append(sub_clause)
|
|
113
|
+
clauses.append(f"({' OR '.join(sub_clauses)})")
|
|
114
|
+
|
|
115
|
+
elif key == "$not":
|
|
116
|
+
sub_clause, params = FilterBuilder._build_condition(value, metadata_column, params)
|
|
117
|
+
clauses.append(f"NOT ({sub_clause})")
|
|
118
|
+
|
|
119
|
+
elif isinstance(value, dict):
|
|
120
|
+
# Handle comparison operators
|
|
121
|
+
for op, op_value in value.items():
|
|
122
|
+
if op in FilterBuilder.COMPARISON_OPS:
|
|
123
|
+
sql_op = FilterBuilder.COMPARISON_OPS[op]
|
|
124
|
+
clauses.append(f"JSON_EXTRACT({metadata_column}, '$.{key}') {sql_op} %s")
|
|
125
|
+
params.append(op_value)
|
|
126
|
+
|
|
127
|
+
elif op == "$in":
|
|
128
|
+
placeholders = ", ".join(["%s"] * len(op_value))
|
|
129
|
+
clauses.append(f"JSON_EXTRACT({metadata_column}, '$.{key}') IN ({placeholders})")
|
|
130
|
+
params.extend(op_value)
|
|
131
|
+
|
|
132
|
+
elif op == "$nin":
|
|
133
|
+
placeholders = ", ".join(["%s"] * len(op_value))
|
|
134
|
+
clauses.append(f"JSON_EXTRACT({metadata_column}, '$.{key}') NOT IN ({placeholders})")
|
|
135
|
+
params.extend(op_value)
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
# Direct equality comparison
|
|
139
|
+
clauses.append(f"JSON_EXTRACT({metadata_column}, '$.{key}') = %s")
|
|
140
|
+
params.append(value)
|
|
141
|
+
|
|
142
|
+
where_clause = " AND ".join(clauses) if clauses else "1=1"
|
|
143
|
+
return where_clause, params
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def _build_document_condition(
|
|
147
|
+
condition: Dict[str, Any],
|
|
148
|
+
document_column: str,
|
|
149
|
+
params: Optional[List[Any]] = None
|
|
150
|
+
) -> Tuple[str, List[Any]]:
|
|
151
|
+
"""Build document filter condition"""
|
|
152
|
+
if params is None:
|
|
153
|
+
params = []
|
|
154
|
+
|
|
155
|
+
clauses = []
|
|
156
|
+
|
|
157
|
+
for key, value in condition.items():
|
|
158
|
+
if key == "$contains":
|
|
159
|
+
# Full-text search using MATCH AGAINST
|
|
160
|
+
clauses.append(f"MATCH({document_column}) AGAINST (%s IN NATURAL LANGUAGE MODE)")
|
|
161
|
+
params.append(value)
|
|
162
|
+
|
|
163
|
+
elif key == "$regex":
|
|
164
|
+
# Regular expression matching
|
|
165
|
+
clauses.append(f"{document_column} REGEXP %s")
|
|
166
|
+
params.append(value)
|
|
167
|
+
|
|
168
|
+
elif key == "$and":
|
|
169
|
+
sub_clauses = []
|
|
170
|
+
for sub_condition in value:
|
|
171
|
+
sub_clause, params = FilterBuilder._build_document_condition(sub_condition, document_column, params)
|
|
172
|
+
sub_clauses.append(sub_clause)
|
|
173
|
+
clauses.append(f"({' AND '.join(sub_clauses)})")
|
|
174
|
+
|
|
175
|
+
elif key == "$or":
|
|
176
|
+
sub_clauses = []
|
|
177
|
+
for sub_condition in value:
|
|
178
|
+
sub_clause, params = FilterBuilder._build_document_condition(sub_condition, document_column, params)
|
|
179
|
+
sub_clauses.append(sub_clause)
|
|
180
|
+
clauses.append(f"({' OR '.join(sub_clauses)})")
|
|
181
|
+
|
|
182
|
+
where_clause = " AND ".join(clauses) if clauses else "1=1"
|
|
183
|
+
return where_clause, params
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def combine_filters(
|
|
187
|
+
metadata_filter: Tuple[str, List[Any]],
|
|
188
|
+
document_filter: Tuple[str, List[Any]]
|
|
189
|
+
) -> Tuple[str, List[Any]]:
|
|
190
|
+
"""
|
|
191
|
+
Combine metadata and document filters
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
metadata_filter: Tuple of (where_clause, params) for metadata
|
|
195
|
+
document_filter: Tuple of (where_clause, params) for document
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Combined (where_clause, params)
|
|
199
|
+
"""
|
|
200
|
+
meta_clause, meta_params = metadata_filter
|
|
201
|
+
doc_clause, doc_params = document_filter
|
|
202
|
+
|
|
203
|
+
clauses = []
|
|
204
|
+
all_params = []
|
|
205
|
+
|
|
206
|
+
if meta_clause:
|
|
207
|
+
clauses.append(meta_clause)
|
|
208
|
+
all_params.extend(meta_params)
|
|
209
|
+
|
|
210
|
+
if doc_clause:
|
|
211
|
+
clauses.append(doc_clause)
|
|
212
|
+
all_params.extend(doc_params)
|
|
213
|
+
|
|
214
|
+
if clauses:
|
|
215
|
+
combined_clause = " AND ".join(clauses)
|
|
216
|
+
return combined_clause, all_params
|
|
217
|
+
else:
|
|
218
|
+
return "", []
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def build_search_filter(where: Optional[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
|
|
222
|
+
"""
|
|
223
|
+
Build search_params filter format from where condition for hybrid search
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
where: Filter dictionary with operators like $eq, $lt, $gt, $lte, $gte, $ne, $in, $nin, $and, $or, $not
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
List of filter conditions in search_params format, or None if where is empty
|
|
230
|
+
|
|
231
|
+
Examples:
|
|
232
|
+
where = {"category": {"$eq": "science"}}
|
|
233
|
+
-> [{"term": {"metadata.category": {"value": "science"}}}]
|
|
234
|
+
|
|
235
|
+
where = {"$and": [{"page": {"$gte": 5}}, {"page": {"$lte": 10}}]}
|
|
236
|
+
-> [{"bool": {"must": [{"range": {"metadata.page": {"gte": 5}}}, {"range": {"metadata.page": {"lte": 10}}}]}}]
|
|
237
|
+
"""
|
|
238
|
+
if not where:
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
filter_condition = FilterBuilder._build_search_filter_condition(where)
|
|
242
|
+
if filter_condition:
|
|
243
|
+
return [filter_condition]
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def _build_search_filter_condition(condition: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
248
|
+
"""Recursively build search_params filter condition from nested dictionary"""
|
|
249
|
+
if not condition:
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
# Handle logical operators
|
|
253
|
+
if "$and" in condition:
|
|
254
|
+
must_conditions = []
|
|
255
|
+
for sub_condition in condition["$and"]:
|
|
256
|
+
sub_filter = FilterBuilder._build_search_filter_condition(sub_condition)
|
|
257
|
+
if sub_filter:
|
|
258
|
+
must_conditions.append(sub_filter)
|
|
259
|
+
if must_conditions:
|
|
260
|
+
return {"bool": {"must": must_conditions}}
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
if "$or" in condition:
|
|
264
|
+
should_conditions = []
|
|
265
|
+
for sub_condition in condition["$or"]:
|
|
266
|
+
sub_filter = FilterBuilder._build_search_filter_condition(sub_condition)
|
|
267
|
+
if sub_filter:
|
|
268
|
+
should_conditions.append(sub_filter)
|
|
269
|
+
if should_conditions:
|
|
270
|
+
return {"bool": {"should": should_conditions}}
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
if "$not" in condition:
|
|
274
|
+
not_filter = FilterBuilder._build_search_filter_condition(condition["$not"])
|
|
275
|
+
if not_filter:
|
|
276
|
+
return {"bool": {"must_not": [not_filter]}}
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
# Handle field conditions
|
|
280
|
+
result = {"bool": {"must": [], "should": [], "must_not": []}}
|
|
281
|
+
has_conditions = False
|
|
282
|
+
|
|
283
|
+
for key, value in condition.items():
|
|
284
|
+
if key in FilterBuilder.LOGICAL_OPS:
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
field_name = f"metadata.{key}"
|
|
288
|
+
|
|
289
|
+
if isinstance(value, dict):
|
|
290
|
+
# Handle comparison operators
|
|
291
|
+
range_conditions = {}
|
|
292
|
+
term_conditions = []
|
|
293
|
+
in_conditions = []
|
|
294
|
+
nin_conditions = []
|
|
295
|
+
|
|
296
|
+
for op, op_value in value.items():
|
|
297
|
+
if op == "$eq":
|
|
298
|
+
term_conditions.append({"term": {field_name: {"value": op_value}}})
|
|
299
|
+
has_conditions = True
|
|
300
|
+
elif op == "$ne":
|
|
301
|
+
result["bool"]["must_not"].append({"term": {field_name: {"value": op_value}}})
|
|
302
|
+
has_conditions = True
|
|
303
|
+
elif op == "$lt":
|
|
304
|
+
range_conditions["lt"] = op_value
|
|
305
|
+
has_conditions = True
|
|
306
|
+
elif op == "$lte":
|
|
307
|
+
range_conditions["lte"] = op_value
|
|
308
|
+
has_conditions = True
|
|
309
|
+
elif op == "$gt":
|
|
310
|
+
range_conditions["gt"] = op_value
|
|
311
|
+
has_conditions = True
|
|
312
|
+
elif op == "$gte":
|
|
313
|
+
range_conditions["gte"] = op_value
|
|
314
|
+
has_conditions = True
|
|
315
|
+
elif op == "$in":
|
|
316
|
+
for val in op_value:
|
|
317
|
+
in_conditions.append({"term": {field_name: {"value": val}}})
|
|
318
|
+
has_conditions = True
|
|
319
|
+
elif op == "$nin":
|
|
320
|
+
for val in op_value:
|
|
321
|
+
nin_conditions.append({"term": {field_name: {"value": val}}})
|
|
322
|
+
has_conditions = True
|
|
323
|
+
|
|
324
|
+
if range_conditions:
|
|
325
|
+
result["bool"]["must"].append({"range": {field_name: range_conditions}})
|
|
326
|
+
if term_conditions:
|
|
327
|
+
result["bool"]["must"].extend(term_conditions)
|
|
328
|
+
if in_conditions:
|
|
329
|
+
result["bool"]["should"].extend(in_conditions)
|
|
330
|
+
if nin_conditions:
|
|
331
|
+
result["bool"]["must_not"].extend(nin_conditions)
|
|
332
|
+
else:
|
|
333
|
+
# Direct equality
|
|
334
|
+
result["bool"]["must"].append({"term": {field_name: {"value": value}}})
|
|
335
|
+
has_conditions = True
|
|
336
|
+
|
|
337
|
+
if not has_conditions:
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
# Clean up empty arrays
|
|
341
|
+
if not result["bool"]["must"]:
|
|
342
|
+
del result["bool"]["must"]
|
|
343
|
+
if not result["bool"]["should"]:
|
|
344
|
+
del result["bool"]["should"]
|
|
345
|
+
if not result["bool"]["must_not"]:
|
|
346
|
+
del result["bool"]["must_not"]
|
|
347
|
+
|
|
348
|
+
# If only one type of condition, simplify
|
|
349
|
+
if len(result["bool"]) == 1:
|
|
350
|
+
key = list(result["bool"].keys())[0]
|
|
351
|
+
conditions = result["bool"][key]
|
|
352
|
+
if len(conditions) == 1:
|
|
353
|
+
return conditions[0]
|
|
354
|
+
return {"bool": {key: conditions}}
|
|
355
|
+
|
|
356
|
+
return result
|
|
357
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metadata information for collection fields.
|
|
3
|
+
"""
|
|
4
|
+
class CollectionFieldNames:
|
|
5
|
+
ID = "_id"
|
|
6
|
+
DOCUMENT = "document"
|
|
7
|
+
EMBEDDING = "embedding"
|
|
8
|
+
METADATA = "metadata"
|
|
9
|
+
|
|
10
|
+
ALL_FIELDS = [ID, DOCUMENT, EMBEDDING, METADATA]
|
|
11
|
+
|
|
12
|
+
class CollectionNames:
|
|
13
|
+
@staticmethod
|
|
14
|
+
def table_name(collection_name: str) -> str:
|
|
15
|
+
return f"c$v1${collection_name}"
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Query result wrapper class with JSON serialization support
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class QueryResultItem:
|
|
9
|
+
"""Single query result item"""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
id: Any,
|
|
14
|
+
document: Optional[str] = None,
|
|
15
|
+
embedding: Optional[List[float]] = None,
|
|
16
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
17
|
+
distance: Optional[float] = None
|
|
18
|
+
):
|
|
19
|
+
"""
|
|
20
|
+
Initialize a query result item
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
id: Record ID
|
|
24
|
+
document: Document text (optional)
|
|
25
|
+
embedding: Vector embedding (optional)
|
|
26
|
+
metadata: Metadata dictionary (optional)
|
|
27
|
+
distance: Distance/similarity score (optional)
|
|
28
|
+
"""
|
|
29
|
+
self._id = id
|
|
30
|
+
self.document = document
|
|
31
|
+
self.embedding = embedding
|
|
32
|
+
self.metadata = metadata if metadata is not None else {}
|
|
33
|
+
self.distance = distance
|
|
34
|
+
|
|
35
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
36
|
+
"""Convert to dictionary"""
|
|
37
|
+
result = {"_id": self._id}
|
|
38
|
+
|
|
39
|
+
if self.document is not None:
|
|
40
|
+
result["document"] = self.document
|
|
41
|
+
|
|
42
|
+
if self.embedding is not None:
|
|
43
|
+
result["embedding"] = self.embedding
|
|
44
|
+
|
|
45
|
+
if self.metadata:
|
|
46
|
+
result["metadata"] = self.metadata
|
|
47
|
+
|
|
48
|
+
if self.distance is not None:
|
|
49
|
+
result["distance"] = self.distance
|
|
50
|
+
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
def to_json(self) -> str:
|
|
54
|
+
"""Convert to JSON string"""
|
|
55
|
+
return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
|
|
56
|
+
|
|
57
|
+
def __repr__(self) -> str:
|
|
58
|
+
return f"QueryResultItem(id={self._id}, distance={self.distance})"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class QueryResult:
|
|
62
|
+
"""Query result wrapper with multiple items"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, items: Optional[List[QueryResultItem]] = None):
|
|
65
|
+
"""
|
|
66
|
+
Initialize query result
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
items: List of QueryResultItem objects (optional)
|
|
70
|
+
"""
|
|
71
|
+
self.items = items if items is not None else []
|
|
72
|
+
|
|
73
|
+
def add_item(
|
|
74
|
+
self,
|
|
75
|
+
id: Any,
|
|
76
|
+
document: Optional[str] = None,
|
|
77
|
+
embedding: Optional[List[float]] = None,
|
|
78
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
79
|
+
distance: Optional[float] = None
|
|
80
|
+
) -> None:
|
|
81
|
+
"""
|
|
82
|
+
Add a result item
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
id: Record ID
|
|
86
|
+
document: Document text (optional)
|
|
87
|
+
embedding: Vector embedding (optional)
|
|
88
|
+
metadata: Metadata dictionary (optional)
|
|
89
|
+
distance: Distance/similarity score (optional)
|
|
90
|
+
"""
|
|
91
|
+
item = QueryResultItem(
|
|
92
|
+
id=id,
|
|
93
|
+
document=document,
|
|
94
|
+
embedding=embedding,
|
|
95
|
+
metadata=metadata,
|
|
96
|
+
distance=distance
|
|
97
|
+
)
|
|
98
|
+
self.items.append(item)
|
|
99
|
+
|
|
100
|
+
def to_list(self) -> List[Dict[str, Any]]:
|
|
101
|
+
"""Convert to list of dictionaries"""
|
|
102
|
+
return [item.to_dict() for item in self.items]
|
|
103
|
+
|
|
104
|
+
def to_json(self) -> str:
|
|
105
|
+
"""Convert to JSON string"""
|
|
106
|
+
return json.dumps(self.to_list(), ensure_ascii=False, indent=2)
|
|
107
|
+
|
|
108
|
+
def __len__(self) -> int:
|
|
109
|
+
"""Return number of items"""
|
|
110
|
+
return len(self.items)
|
|
111
|
+
|
|
112
|
+
def __getitem__(self, index: int) -> QueryResultItem:
|
|
113
|
+
"""Get item by index"""
|
|
114
|
+
return self.items[index]
|
|
115
|
+
|
|
116
|
+
def __iter__(self):
|
|
117
|
+
"""Iterate over items"""
|
|
118
|
+
return iter(self.items)
|
|
119
|
+
|
|
120
|
+
def __repr__(self) -> str:
|
|
121
|
+
return f"QueryResult(items={len(self.items)})"
|
|
122
|
+
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions and classes for SQL string generation and escaping in SeekDB client.
|
|
3
|
+
|
|
4
|
+
Provides helpers to safely stringify values and SQL identifiers for insertion into SQL expressions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional, Union
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _quote_string(value, quote: str):
|
|
11
|
+
return quote + str(value) + quote
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SqlStringifier:
|
|
15
|
+
"""
|
|
16
|
+
Translate values into strings in SQL.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, *, quote: str = "'", identifier: str = "`"):
|
|
20
|
+
self._quote = quote
|
|
21
|
+
self._identifier = identifier
|
|
22
|
+
|
|
23
|
+
def stringify_value(self, value: Optional[Union[str, int, float, bytes]]):
|
|
24
|
+
if value is None:
|
|
25
|
+
return "NULL"
|
|
26
|
+
if isinstance(value, bytes):
|
|
27
|
+
# For varbinary type, convert bytes to hex string and use UNHEX function
|
|
28
|
+
hex_str = value.hex()
|
|
29
|
+
return f"UNHEX('{hex_str}')"
|
|
30
|
+
if isinstance(value, str):
|
|
31
|
+
# Check if it's a hex string (for varbinary IDs)
|
|
32
|
+
# If it looks like a hex string (even length, only hex chars), use UNHEX
|
|
33
|
+
# Otherwise, treat as regular string
|
|
34
|
+
if len(value) > 0 and len(value) % 2 == 0 and all(c in '0123456789abcdefABCDEF' for c in value):
|
|
35
|
+
# Likely a hex string for varbinary, use UNHEX
|
|
36
|
+
return f"UNHEX('{value}')"
|
|
37
|
+
formatted = value.replace('\\', '\\\\').replace(self._quote, f"\\{self._quote}")
|
|
38
|
+
return _quote_string(formatted, self._quote)
|
|
39
|
+
if isinstance(value, (int, float)):
|
|
40
|
+
return str(value)
|
|
41
|
+
return _quote_string(str(value), self._quote)
|
|
42
|
+
|
|
43
|
+
def stringify_id(self, id_name: str):
|
|
44
|
+
if id_name is None:
|
|
45
|
+
raise ValueError("Identifier shouldn't be null")
|
|
46
|
+
if not isinstance(id_name, str):
|
|
47
|
+
raise ValueError(f"Identifier should be string type, but got {type(id_name).__name__}")
|
|
48
|
+
return _quote_string(id_name, self._identifier)
|