pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
import threading
|
|
5
|
+
import uuid
|
|
6
|
+
from datetime import timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING, Iterator
|
|
9
|
+
from urllib.parse import quote
|
|
10
|
+
|
|
11
|
+
from azure.core.exceptions import AzureError
|
|
12
|
+
from azure.storage.blob import BlobSasPermissions, generate_blob_sas
|
|
13
|
+
|
|
14
|
+
from pixeltable import env, exceptions as excs
|
|
15
|
+
from pixeltable.config import Config
|
|
16
|
+
from pixeltable.utils.object_stores import ObjectPath, ObjectStoreBase, StorageObjectAddress
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from azure.storage.blob import BlobProperties, BlobServiceClient
|
|
20
|
+
|
|
21
|
+
from pixeltable.catalog import Column
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_logger = logging.getLogger('pixeltable')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
client_lock = threading.Lock()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@env.register_client('azure_blob')
|
|
31
|
+
def _() -> dict[str, 'BlobServiceClient']:
|
|
32
|
+
return {}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AzureBlobStore(ObjectStoreBase):
|
|
36
|
+
"""Class to handle Azure Blob Storage operations."""
|
|
37
|
+
|
|
38
|
+
# TODO: This needs to be redesigned to use asyncio.
|
|
39
|
+
|
|
40
|
+
# URI of the Azure Blob Storage container
|
|
41
|
+
# Always ends with a slash
|
|
42
|
+
__base_uri: str
|
|
43
|
+
|
|
44
|
+
# Storage account name
|
|
45
|
+
__account_name: str
|
|
46
|
+
|
|
47
|
+
# Container name extracted from the URI
|
|
48
|
+
__container_name: str
|
|
49
|
+
|
|
50
|
+
# Prefix path within the container, either empty or ending with a slash
|
|
51
|
+
__prefix_name: str
|
|
52
|
+
|
|
53
|
+
# URI scheme (wasb, wasbs, abfs, abfss, https)
|
|
54
|
+
__scheme: str
|
|
55
|
+
|
|
56
|
+
soa: StorageObjectAddress
|
|
57
|
+
|
|
58
|
+
def __init__(self, soa: StorageObjectAddress):
|
|
59
|
+
self.soa = soa
|
|
60
|
+
self.__scheme = soa.scheme
|
|
61
|
+
self.__account_name = soa.account
|
|
62
|
+
self.__container_name = soa.container
|
|
63
|
+
self.__prefix_name = soa.prefix
|
|
64
|
+
|
|
65
|
+
# Reconstruct base URI in normalized format
|
|
66
|
+
self.__base_uri = self.soa.prefix_free_uri + self.__prefix_name
|
|
67
|
+
_logger.info(
|
|
68
|
+
f'Initialized AzureBlobStore with base URI: {self.__base_uri}, '
|
|
69
|
+
f'account: {self.__account_name}, container: {self.__container_name}, prefix: {self.__prefix_name}'
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def client(self) -> 'BlobServiceClient':
|
|
73
|
+
"""Return the Azure Blob Storage client."""
|
|
74
|
+
client_dict: dict[str, 'BlobServiceClient'] = env.Env.get().get_client('azure_blob')
|
|
75
|
+
with client_lock:
|
|
76
|
+
uri = self.soa.container_free_uri
|
|
77
|
+
if uri not in client_dict:
|
|
78
|
+
storage_account_name = Config.get().get_string_value('storage_account_name', section='azure')
|
|
79
|
+
storage_account_key = Config.get().get_string_value('storage_account_key', section='azure')
|
|
80
|
+
if (storage_account_name is None) != (storage_account_key is None):
|
|
81
|
+
raise excs.Error(
|
|
82
|
+
"Azure 'storage_account_name' and 'storage_account_key' must be specified together."
|
|
83
|
+
)
|
|
84
|
+
if storage_account_name is None or storage_account_name != self.__account_name:
|
|
85
|
+
# Attempt a connection to a public resource, with no account key
|
|
86
|
+
client_dict[uri] = self.create_client(endpoint_url=uri)
|
|
87
|
+
else:
|
|
88
|
+
client_dict[uri] = self.create_client(
|
|
89
|
+
endpoint_url=uri, account_name=self.__account_name, account_key=storage_account_key
|
|
90
|
+
)
|
|
91
|
+
return client_dict[uri]
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def account_name(self) -> str:
|
|
95
|
+
"""Return the storage account name."""
|
|
96
|
+
return self.__account_name
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def container_name(self) -> str:
|
|
100
|
+
"""Return the container name from the base URI."""
|
|
101
|
+
return self.__container_name
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def prefix(self) -> str:
|
|
105
|
+
"""Return the prefix from the base URI."""
|
|
106
|
+
return self.__prefix_name
|
|
107
|
+
|
|
108
|
+
def validate(self, error_col_name: str) -> str | None:
|
|
109
|
+
"""
|
|
110
|
+
Checks if the URI exists and is accessible.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
str: The base URI if the container exists and is accessible, None otherwise.
|
|
114
|
+
"""
|
|
115
|
+
try:
|
|
116
|
+
container_client = self.client().get_container_client(self.container_name)
|
|
117
|
+
# Check if container exists by trying to get its properties
|
|
118
|
+
container_client.get_container_properties()
|
|
119
|
+
return self.__base_uri
|
|
120
|
+
except AzureError as e:
|
|
121
|
+
self.handle_azure_error(e, self.container_name, f'validate container {error_col_name}')
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def copy_object_to_local_file(self, src_path: str, dest_path: Path) -> None:
|
|
125
|
+
"""Copies a blob to a local file. Thread safe."""
|
|
126
|
+
try:
|
|
127
|
+
blob_client = self.client().get_blob_client(container=self.container_name, blob=self.prefix + src_path)
|
|
128
|
+
with open(dest_path, 'wb') as download_file:
|
|
129
|
+
download_stream = blob_client.download_blob()
|
|
130
|
+
download_file.write(download_stream.readall())
|
|
131
|
+
except AzureError as e:
|
|
132
|
+
self.handle_azure_error(e, self.container_name, f'download file {src_path}')
|
|
133
|
+
raise
|
|
134
|
+
|
|
135
|
+
# TODO: utils package should not include back-references to `Column`
|
|
136
|
+
def copy_local_file(self, col: 'Column', src_path: Path) -> str:
|
|
137
|
+
"""Copy a local file to Azure Blob Storage, and return its new URL"""
|
|
138
|
+
prefix, filename = ObjectPath.create_prefix_raw(
|
|
139
|
+
col.get_tbl().id, col.id, col.get_tbl().version, ext=src_path.suffix
|
|
140
|
+
)
|
|
141
|
+
blob_name = f'{self.prefix}{prefix}/{filename}'
|
|
142
|
+
new_file_uri = f'{self.__base_uri}{prefix}/{filename}'
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
blob_client = self.client().get_blob_client(container=self.container_name, blob=blob_name)
|
|
146
|
+
with open(src_path, 'rb') as data:
|
|
147
|
+
blob_client.upload_blob(data, overwrite=True)
|
|
148
|
+
_logger.debug(f'Media Storage: copied {src_path} to {new_file_uri}')
|
|
149
|
+
return new_file_uri
|
|
150
|
+
except AzureError as e:
|
|
151
|
+
self.handle_azure_error(e, self.container_name, f'upload file {src_path}')
|
|
152
|
+
raise
|
|
153
|
+
|
|
154
|
+
def _get_filtered_blobs(
|
|
155
|
+
self, tbl_id: uuid.UUID | None, tbl_version: int | None = None
|
|
156
|
+
) -> Iterator['BlobProperties']:
|
|
157
|
+
"""Private method to get filtered blobs for a table, optionally filtered by version.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
tbl_id: Table UUID to filter by
|
|
161
|
+
tbl_version: Optional table version to filter by
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Iterator over blob objects matching the criteria
|
|
165
|
+
"""
|
|
166
|
+
# Use ObjectPath to construct the prefix for this table
|
|
167
|
+
if tbl_id is None:
|
|
168
|
+
prefix = self.prefix
|
|
169
|
+
assert tbl_version is None, 'tbl_version must be None if tbl_id is None'
|
|
170
|
+
else:
|
|
171
|
+
table_prefix = ObjectPath.table_prefix(tbl_id)
|
|
172
|
+
prefix = f'{self.prefix}{table_prefix}/'
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
container_client = self.client().get_container_client(self.container_name)
|
|
176
|
+
|
|
177
|
+
blob_iterator: Iterator['BlobProperties']
|
|
178
|
+
if tbl_version is None:
|
|
179
|
+
# Return all blobs with the table prefix
|
|
180
|
+
blob_iterator = container_client.list_blobs(name_starts_with=prefix)
|
|
181
|
+
else:
|
|
182
|
+
# Filter by both table_id and table_version using the ObjectPath pattern
|
|
183
|
+
# Pattern: tbl_id_col_id_version_uuid
|
|
184
|
+
version_pattern = re.compile(
|
|
185
|
+
rf'{re.escape(table_prefix)}_\d+_{re.escape(str(tbl_version))}_[0-9a-fA-F]+.*'
|
|
186
|
+
)
|
|
187
|
+
# Get all blobs with the prefix and filter by version pattern
|
|
188
|
+
all_blobs = container_client.list_blobs(name_starts_with=prefix)
|
|
189
|
+
blob_iterator = (blob for blob in all_blobs if version_pattern.match(blob.name.split('/')[-1]))
|
|
190
|
+
|
|
191
|
+
return blob_iterator
|
|
192
|
+
|
|
193
|
+
except AzureError as e:
|
|
194
|
+
self.handle_azure_error(e, self.container_name, f'setup iterator {self.prefix}')
|
|
195
|
+
raise
|
|
196
|
+
|
|
197
|
+
def count(self, tbl_id: uuid.UUID | None, tbl_version: int | None = None) -> int:
|
|
198
|
+
"""Count the number of files belonging to tbl_id. If tbl_version is not None,
|
|
199
|
+
count only those files belonging to the specified tbl_version.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
tbl_id: Table UUID to count blobs for
|
|
203
|
+
tbl_version: Optional table version to filter by
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Number of blobs matching the criteria
|
|
207
|
+
"""
|
|
208
|
+
blob_iterator = self._get_filtered_blobs(tbl_id, tbl_version)
|
|
209
|
+
return sum(1 for _ in blob_iterator)
|
|
210
|
+
|
|
211
|
+
def delete(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
|
|
212
|
+
"""Delete all files belonging to tbl_id. If tbl_version is not None, delete
|
|
213
|
+
only those files belonging to the specified tbl_version.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
tbl_id: Table UUID to delete blobs for
|
|
217
|
+
tbl_version: Optional table version to filter by
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Number of blobs deleted
|
|
221
|
+
"""
|
|
222
|
+
assert tbl_id is not None
|
|
223
|
+
blob_iterator = self._get_filtered_blobs(tbl_id, tbl_version)
|
|
224
|
+
total_deleted = 0
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
container_client = self.client().get_container_client(self.container_name)
|
|
228
|
+
|
|
229
|
+
for blob in blob_iterator:
|
|
230
|
+
# TODO: Figure out now to properly use batch method delete_blobs(), it doesn't seem to work properly
|
|
231
|
+
container_client.delete_blob(blob.name)
|
|
232
|
+
total_deleted += 1
|
|
233
|
+
|
|
234
|
+
# print(f"Deleted {total_deleted} blobs from container '{self.container_name}'.")
|
|
235
|
+
return total_deleted
|
|
236
|
+
|
|
237
|
+
except AzureError as e:
|
|
238
|
+
self.handle_azure_error(e, self.container_name, f'deleting with {self.prefix}')
|
|
239
|
+
raise
|
|
240
|
+
|
|
241
|
+
def list_objects(self, return_uri: bool, n_max: int = 10) -> list[str]:
|
|
242
|
+
"""Return a list of objects found in the specified destination bucket.
|
|
243
|
+
Each returned object includes the full set of prefixes.
|
|
244
|
+
if return_uri is True, full URI's are returned; otherwise, just the object keys.
|
|
245
|
+
"""
|
|
246
|
+
p = self.soa.prefix_free_uri if return_uri else ''
|
|
247
|
+
r: list[str] = []
|
|
248
|
+
try:
|
|
249
|
+
blob_iterator = self._get_filtered_blobs(tbl_id=None, tbl_version=None)
|
|
250
|
+
for blob in blob_iterator:
|
|
251
|
+
r.append(f'{p}{blob.name}')
|
|
252
|
+
if len(r) >= n_max:
|
|
253
|
+
return r
|
|
254
|
+
|
|
255
|
+
except AzureError as e:
|
|
256
|
+
self.handle_azure_error(e, self.__container_name, f'list objects from {self.__base_uri}')
|
|
257
|
+
return r
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def handle_azure_error(
|
|
261
|
+
cls, e: 'AzureError', container_name: str, operation: str = '', *, ignore_404: bool = False
|
|
262
|
+
) -> None:
|
|
263
|
+
from azure.core.exceptions import ClientAuthenticationError, HttpResponseError, ResourceNotFoundError
|
|
264
|
+
|
|
265
|
+
if ignore_404 and isinstance(e, ResourceNotFoundError):
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
if isinstance(e, ResourceNotFoundError):
|
|
269
|
+
raise excs.Error(f'Container {container_name} or blob not found during {operation}: {str(e)!r}')
|
|
270
|
+
elif isinstance(e, ClientAuthenticationError):
|
|
271
|
+
raise excs.Error(f'Authentication failed for container {container_name} during {operation}: {str(e)!r}')
|
|
272
|
+
elif isinstance(e, HttpResponseError):
|
|
273
|
+
if e.status_code == 403:
|
|
274
|
+
raise excs.Error(f'Access denied to container {container_name} during {operation}: {str(e)!r}')
|
|
275
|
+
elif e.status_code == 412:
|
|
276
|
+
raise excs.Error(f'Precondition failed for container {container_name} during {operation}: {str(e)!r}')
|
|
277
|
+
else:
|
|
278
|
+
raise excs.Error(
|
|
279
|
+
f'HTTP error during {operation} in container {container_name}: {e.status_code} - {str(e)!r}'
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
raise excs.Error(f'Error during {operation} in container {container_name}: {str(e)!r}')
|
|
283
|
+
|
|
284
|
+
def create_presigned_url(self, soa: StorageObjectAddress, expiration_seconds: int) -> str:
|
|
285
|
+
"""Create a presigned URL for downloading an object from Azure Blob Storage."""
|
|
286
|
+
if not soa.has_object:
|
|
287
|
+
raise excs.Error(f'StorageObjectAddress does not contain an object name: {soa}')
|
|
288
|
+
|
|
289
|
+
azure_client = self.client()
|
|
290
|
+
account_name = azure_client.account_name if azure_client.account_name else self.__account_name
|
|
291
|
+
|
|
292
|
+
# Account key cannot be extracted from client for security reasons, get from config
|
|
293
|
+
storage_account_key = Config.get().get_string_value('storage_account_key', section='azure')
|
|
294
|
+
|
|
295
|
+
if not account_name or not storage_account_key:
|
|
296
|
+
raise excs.Error(
|
|
297
|
+
'Azure storage_account_name and storage_account_key must be configured '
|
|
298
|
+
'to generate presigned URLs. Set them in the config under the [azure] section, '
|
|
299
|
+
'or include the account name in the Azure URL.'
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Use datetime.now(timezone.utc) + timedelta like in pixeltable cloud
|
|
303
|
+
expiry_time = datetime.datetime.now(timezone.utc) + datetime.timedelta(seconds=expiration_seconds)
|
|
304
|
+
|
|
305
|
+
sas_token = generate_blob_sas(
|
|
306
|
+
account_name=account_name,
|
|
307
|
+
container_name=soa.container,
|
|
308
|
+
blob_name=soa.key,
|
|
309
|
+
account_key=storage_account_key,
|
|
310
|
+
permission=BlobSasPermissions(read=True),
|
|
311
|
+
expiry=expiry_time,
|
|
312
|
+
version='2022-11-02', # Specify API version to avoid version mismatch issues
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Build URL directly - URL encode the blob key to handle special characters
|
|
316
|
+
# Use safe='/' to preserve path separators in the blob key
|
|
317
|
+
encoded_key = quote(soa.key, safe='/')
|
|
318
|
+
blob_url = f'https://{account_name}.blob.core.windows.net/{soa.container}/{encoded_key}?{sas_token}'
|
|
319
|
+
return blob_url
|
|
320
|
+
|
|
321
|
+
@classmethod
|
|
322
|
+
def create_client(
|
|
323
|
+
cls, endpoint_url: str, account_name: str | None = None, account_key: str | None = None
|
|
324
|
+
) -> 'BlobServiceClient':
|
|
325
|
+
from azure.core.credentials import AzureNamedKeyCredential
|
|
326
|
+
from azure.storage.blob import BlobServiceClient # TODO: Use azure.storage.blob.aio instead
|
|
327
|
+
|
|
328
|
+
assert (account_name is None) == (account_key is None)
|
|
329
|
+
try:
|
|
330
|
+
# e.g. endpoint_url: str = f'https://{account_name}.blob.core.windows.net'
|
|
331
|
+
assert endpoint_url is not None, 'No Azure Storage account information provided'
|
|
332
|
+
|
|
333
|
+
# Use empty SAS token for anonymous authentication
|
|
334
|
+
credential = None
|
|
335
|
+
if account_name is not None:
|
|
336
|
+
credential = AzureNamedKeyCredential(name=account_name, key=account_key)
|
|
337
|
+
return BlobServiceClient(
|
|
338
|
+
account_url=endpoint_url,
|
|
339
|
+
credential=credential,
|
|
340
|
+
max_single_get_size=(32 * 2**20),
|
|
341
|
+
max_chunk_get_size=(4 * 2**20),
|
|
342
|
+
connection_timeout=15,
|
|
343
|
+
read_timeout=30,
|
|
344
|
+
)
|
|
345
|
+
except Exception as e:
|
|
346
|
+
raise excs.Error(f'Failed to create Azure Blob Storage client: {str(e)!r}') from e
|
pixeltable/utils/coco.py
CHANGED
|
@@ -22,6 +22,7 @@ Required format:
|
|
|
22
22
|
}
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
+
|
|
25
26
|
def _verify_input_dict(input_dict: dict[str, Any]) -> None:
|
|
26
27
|
"""Verify that input_dict is a valid input dict for write_coco_dataset()"""
|
|
27
28
|
if not isinstance(input_dict, dict):
|
|
@@ -30,7 +31,7 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
|
|
|
30
31
|
raise excs.Error(f'Missing key "image" in input dict: {input_dict}{format_msg}')
|
|
31
32
|
if not isinstance(input_dict['image'], PIL.Image.Image):
|
|
32
33
|
raise excs.Error(f'Value for "image" is not a PIL.Image.Image: {input_dict}{format_msg}')
|
|
33
|
-
if
|
|
34
|
+
if 'annotations' not in input_dict:
|
|
34
35
|
raise excs.Error(f'Missing key "annotations" in input dict: {input_dict}{format_msg}')
|
|
35
36
|
if not isinstance(input_dict['annotations'], list):
|
|
36
37
|
raise excs.Error(f'Value for "annotations" is not a list: {input_dict}{format_msg}')
|
|
@@ -48,11 +49,12 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
|
|
|
48
49
|
if not isinstance(annotation['category'], (str, int)):
|
|
49
50
|
raise excs.Error(f'Value for "category" is not a str or int: {annotation}{format_msg}')
|
|
50
51
|
|
|
51
|
-
|
|
52
|
-
|
|
52
|
+
|
|
53
|
+
def write_coco_dataset(query: pxt.Query, dest_path: Path) -> Path:
|
|
54
|
+
"""Export a ResultSet as a COCO dataset in dest_path and return the path of the data.json file."""
|
|
53
55
|
# TODO: validate schema
|
|
54
|
-
if len(
|
|
55
|
-
raise excs.Error(f'Expected exactly one json-typed column in select list: {
|
|
56
|
+
if len(query._select_list_exprs) != 1 or not query._select_list_exprs[0].col_type.is_json_type():
|
|
57
|
+
raise excs.Error(f'Expected exactly one json-typed column in select list: {query._select_list_exprs}')
|
|
56
58
|
input_dict_slot_idx = -1 # df._select_list_exprs[0].slot_idx isn't valid until _exec()
|
|
57
59
|
|
|
58
60
|
# create output dir
|
|
@@ -66,9 +68,9 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
66
68
|
annotations: list[dict[str, Any]] = []
|
|
67
69
|
ann_id = -1
|
|
68
70
|
categories: set[Any] = set()
|
|
69
|
-
for input_row in
|
|
71
|
+
for input_row in query._exec():
|
|
70
72
|
if input_dict_slot_idx == -1:
|
|
71
|
-
input_dict_expr =
|
|
73
|
+
input_dict_expr = query._select_list_exprs[0]
|
|
72
74
|
input_dict_slot_idx = input_dict_expr.slot_idx
|
|
73
75
|
input_dict = input_row[input_dict_slot_idx]
|
|
74
76
|
_verify_input_dict(input_dict)
|
|
@@ -96,31 +98,28 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
96
98
|
img_path = images_dir / f'{img_id}.jpg'
|
|
97
99
|
img.save(img_path)
|
|
98
100
|
|
|
99
|
-
images.append({
|
|
100
|
-
'id': img_id,
|
|
101
|
-
'file_name': str(img_path),
|
|
102
|
-
'width': img.width,
|
|
103
|
-
'height': img.height,
|
|
104
|
-
})
|
|
101
|
+
images.append({'id': img_id, 'file_name': str(img_path), 'width': img.width, 'height': img.height})
|
|
105
102
|
|
|
106
103
|
# create annotation records for this image
|
|
107
104
|
for annotation in input_dict['annotations']:
|
|
108
105
|
ann_id += 1
|
|
109
|
-
|
|
106
|
+
_, _, w, h = annotation['bbox']
|
|
110
107
|
category = annotation['category']
|
|
111
108
|
categories.add(category)
|
|
112
|
-
annotations.append(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
109
|
+
annotations.append(
|
|
110
|
+
{
|
|
111
|
+
'id': ann_id,
|
|
112
|
+
'image_id': img_id,
|
|
113
|
+
# we use the category name here and fix it up at the end, when we have assigned category ids
|
|
114
|
+
'category_id': category,
|
|
115
|
+
'bbox': annotation['bbox'],
|
|
116
|
+
'area': w * h,
|
|
117
|
+
'iscrowd': 0,
|
|
118
|
+
}
|
|
119
|
+
)
|
|
121
120
|
|
|
122
121
|
# replace category names with ids
|
|
123
|
-
category_ids = {category: id for id, category in enumerate(sorted(
|
|
122
|
+
category_ids = {category: id for id, category in enumerate(sorted(categories))}
|
|
124
123
|
for annotation in annotations:
|
|
125
124
|
annotation['category_id'] = category_ids[annotation['category_id']]
|
|
126
125
|
|
|
@@ -130,8 +129,8 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
130
129
|
'categories': [{'id': id, 'name': category} for category, id in category_ids.items()],
|
|
131
130
|
}
|
|
132
131
|
output_path = dest_path / 'data.json'
|
|
133
|
-
with open(output_path, 'w') as
|
|
134
|
-
json.dump(result,
|
|
132
|
+
with open(output_path, 'w', encoding='utf-8') as fp:
|
|
133
|
+
json.dump(result, fp)
|
|
135
134
|
return output_path
|
|
136
135
|
|
|
137
136
|
|
|
@@ -226,5 +225,5 @@ COCO_2017_CATEGORIES = {
|
|
|
226
225
|
87: 'scissors',
|
|
227
226
|
88: 'teddy bear',
|
|
228
227
|
89: 'hair drier',
|
|
229
|
-
90: 'toothbrush'
|
|
228
|
+
90: 'toothbrush',
|
|
230
229
|
}
|
pixeltable/utils/code.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import types
|
|
2
|
-
from typing import Optional
|
|
3
2
|
|
|
4
3
|
from pixeltable.func import Function
|
|
5
4
|
|
|
6
|
-
|
|
7
5
|
# Utilities related to the organization of the Pixeltable codebase.
|
|
8
6
|
|
|
9
|
-
|
|
7
|
+
|
|
8
|
+
def local_public_names(mod_name: str, exclude: list[str] | None = None) -> list[str]:
|
|
10
9
|
"""
|
|
11
10
|
Returns a list of all functions and submodules that are local to the specified module and are
|
|
12
11
|
publicly accessible. Intended to facilitate implementation of module __dir__() methods for
|
|
@@ -21,7 +20,8 @@ def local_public_names(mod_name: str, exclude: Optional[list[str]] = None) -> li
|
|
|
21
20
|
for obj in mod.__dict__.values():
|
|
22
21
|
if isinstance(obj, Function):
|
|
23
22
|
# Pixeltable function
|
|
24
|
-
|
|
23
|
+
if not obj.name.startswith('_'):
|
|
24
|
+
names.append(obj.name)
|
|
25
25
|
elif isinstance(obj, types.FunctionType):
|
|
26
26
|
# Python function
|
|
27
27
|
if obj.__module__ == mod.__name__ and not obj.__name__.startswith('_'):
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import TextIO
|
|
3
|
+
|
|
4
|
+
from pixeltable import exceptions as excs
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def map_level(verbosity: int) -> int:
|
|
8
|
+
"""
|
|
9
|
+
Map verbosity level to logging level.
|
|
10
|
+
0 - minimum logging - warn and above
|
|
11
|
+
1 - default logging - info and above
|
|
12
|
+
2 - more logging - debug and above
|
|
13
|
+
Args:
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Logging level as integer
|
|
17
|
+
"""
|
|
18
|
+
if verbosity == 0:
|
|
19
|
+
return logging.WARN
|
|
20
|
+
if verbosity == 1:
|
|
21
|
+
return logging.INFO
|
|
22
|
+
if verbosity == 2:
|
|
23
|
+
return logging.DEBUG
|
|
24
|
+
|
|
25
|
+
raise excs.Error(f'Invalid verbosity level: {verbosity}')
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ConsoleOutputHandler(logging.StreamHandler):
|
|
29
|
+
def __init__(self, stream: TextIO):
|
|
30
|
+
super().__init__(stream)
|
|
31
|
+
|
|
32
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
33
|
+
if record.msg.endswith('\n'):
|
|
34
|
+
self.stream.write(record.msg)
|
|
35
|
+
else:
|
|
36
|
+
self.stream.write(record.msg + '\n')
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ConsoleMessageFilter(logging.Filter):
|
|
40
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
41
|
+
return getattr(record, 'user_visible', False)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ConsoleLogger(logging.LoggerAdapter):
|
|
45
|
+
def __init__(self, logger: logging.Logger):
|
|
46
|
+
super().__init__(logger, extra={'user_visible': True})
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import threading
|
|
3
|
+
from typing import Any, Coroutine, TypeVar
|
|
4
|
+
|
|
5
|
+
from pixeltable.env import Env
|
|
6
|
+
|
|
7
|
+
T = TypeVar('T')
|
|
8
|
+
|
|
9
|
+
# TODO This is a temporary hack to be able to run async UDFs in contexts that are not properly handled by the existing
|
|
10
|
+
# scheduler logic (e.g., as an embedding function as part of a similarity lookup). Once the scheduler is fully
|
|
11
|
+
# general, it can be removed.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def run_coroutine_synchronously(coroutine: Coroutine[Any, Any, T], timeout: float = 30) -> T:
|
|
15
|
+
"""
|
|
16
|
+
Runs the given coroutine synchronously, even if called in the context of a running event loop.
|
|
17
|
+
"""
|
|
18
|
+
loop = Env.get().event_loop
|
|
19
|
+
|
|
20
|
+
if threading.current_thread() is threading.main_thread():
|
|
21
|
+
return loop.run_until_complete(coroutine)
|
|
22
|
+
else:
|
|
23
|
+
# Not in main thread, use run_coroutine_threadsafe
|
|
24
|
+
return asyncio.run_coroutine_threadsafe(coroutine, loop).result(timeout)
|
pixeltable/utils/dbms.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Dbms(abc.ABC):
|
|
7
|
+
"""
|
|
8
|
+
Provides abstractions for utilities to interact with a database system.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
name: str
|
|
12
|
+
transaction_isolation_level: str
|
|
13
|
+
version_index_type: str
|
|
14
|
+
db_url: sql.URL
|
|
15
|
+
|
|
16
|
+
def __init__(self, name: str, transaction_isolation_level: str, version_index_type: str, db_url: sql.URL) -> None:
|
|
17
|
+
self.name = name
|
|
18
|
+
self.transaction_isolation_level = transaction_isolation_level
|
|
19
|
+
self.version_index_type = version_index_type
|
|
20
|
+
self.db_url = db_url
|
|
21
|
+
|
|
22
|
+
@abc.abstractmethod
|
|
23
|
+
def drop_db_stmt(self, database: str) -> str: ...
|
|
24
|
+
|
|
25
|
+
@abc.abstractmethod
|
|
26
|
+
def create_db_stmt(self, database: str) -> str: ...
|
|
27
|
+
|
|
28
|
+
@abc.abstractmethod
|
|
29
|
+
def default_system_db_url(self) -> str: ...
|
|
30
|
+
|
|
31
|
+
@abc.abstractmethod
|
|
32
|
+
def create_vector_index_stmt(
|
|
33
|
+
self, store_index_name: str, sa_value_col: sql.Column, metric: str
|
|
34
|
+
) -> sql.Compiled: ...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class PostgresqlDbms(Dbms):
|
|
38
|
+
"""
|
|
39
|
+
Implements utilities to interact with Postgres database.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, db_url: sql.URL):
|
|
43
|
+
super().__init__('postgresql', 'SERIALIZABLE', 'brin', db_url)
|
|
44
|
+
|
|
45
|
+
def drop_db_stmt(self, database: str) -> str:
|
|
46
|
+
return f'DROP DATABASE {database}'
|
|
47
|
+
|
|
48
|
+
def create_db_stmt(self, database: str) -> str:
|
|
49
|
+
return f"CREATE DATABASE {database} ENCODING 'utf-8' LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0"
|
|
50
|
+
|
|
51
|
+
def default_system_db_url(self) -> str:
|
|
52
|
+
a = self.db_url.set(database='postgres').render_as_string(hide_password=False)
|
|
53
|
+
return a
|
|
54
|
+
|
|
55
|
+
def create_vector_index_stmt(self, store_index_name: str, sa_value_col: sql.Column, metric: str) -> sql.Compiled:
|
|
56
|
+
from sqlalchemy.dialects import postgresql
|
|
57
|
+
|
|
58
|
+
sa_idx = sql.Index(
|
|
59
|
+
store_index_name,
|
|
60
|
+
sa_value_col,
|
|
61
|
+
postgresql_using='hnsw',
|
|
62
|
+
postgresql_with={'m': 16, 'ef_construction': 64},
|
|
63
|
+
postgresql_ops={sa_value_col.name: metric},
|
|
64
|
+
)
|
|
65
|
+
return sql.schema.CreateIndex(sa_idx, if_not_exists=True).compile(dialect=postgresql.dialect())
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class CockroachDbms(Dbms):
|
|
69
|
+
"""
|
|
70
|
+
Implements utilities to interact with CockroachDb database.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, db_url: sql.URL):
|
|
74
|
+
super().__init__('cockroachdb', 'SERIALIZABLE', 'btree', db_url)
|
|
75
|
+
|
|
76
|
+
def drop_db_stmt(self, database: str) -> str:
|
|
77
|
+
return f'DROP DATABASE {database} CASCADE'
|
|
78
|
+
|
|
79
|
+
def create_db_stmt(self, database: str) -> str:
|
|
80
|
+
return f"CREATE DATABASE {database} TEMPLATE template0 ENCODING 'utf-8' LC_COLLATE 'C' LC_CTYPE 'C'"
|
|
81
|
+
|
|
82
|
+
def default_system_db_url(self) -> str:
|
|
83
|
+
return self.db_url.set(database='defaultdb').render_as_string(hide_password=False)
|
|
84
|
+
|
|
85
|
+
def sa_vector_index(self, store_index_name: str, sa_value_col: sql.schema.Column, metric: str) -> sql.Index | None:
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
def create_vector_index_stmt(self, store_index_name: str, sa_value_col: sql.Column, metric: str) -> sql.Compiled:
|
|
89
|
+
return sql.text(
|
|
90
|
+
f'CREATE VECTOR INDEX IF NOT EXISTS {store_index_name} ON {sa_value_col.table.name}'
|
|
91
|
+
f'({sa_value_col.name} {metric})'
|
|
92
|
+
).compile()
|