pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import urllib.parse
|
|
7
|
+
import urllib.request
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
from requests.adapters import HTTPAdapter
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
from urllib3.util.retry import Retry
|
|
15
|
+
|
|
16
|
+
import pixeltable as pxt
|
|
17
|
+
from pixeltable import exceptions as excs
|
|
18
|
+
from pixeltable.catalog import Catalog
|
|
19
|
+
from pixeltable.catalog.table_version import TableVersionMd
|
|
20
|
+
from pixeltable.env import Env
|
|
21
|
+
from pixeltable.utils import sha256sum
|
|
22
|
+
from pixeltable.utils.local_store import TempStore
|
|
23
|
+
|
|
24
|
+
from .packager import TablePackager, TableRestorer
|
|
25
|
+
from .protocol import PxtUri
|
|
26
|
+
from .protocol.replica import (
|
|
27
|
+
DeleteRequest,
|
|
28
|
+
DeleteResponse,
|
|
29
|
+
FinalizeRequest,
|
|
30
|
+
FinalizeResponse,
|
|
31
|
+
PublishRequest,
|
|
32
|
+
PublishResponse,
|
|
33
|
+
ReplicateRequest,
|
|
34
|
+
ReplicateResponse,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_logger = logging.getLogger('pixeltable')
|
|
38
|
+
|
|
39
|
+
# These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
|
|
40
|
+
# pixeltable.com URLs are available.
|
|
41
|
+
|
|
42
|
+
PIXELTABLE_API_URL = os.environ.get('PIXELTABLE_API_URL', 'https://internal-api.pixeltable.com')
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def push_replica(
|
|
46
|
+
dest_tbl_uri: str, src_tbl: pxt.Table, bucket: str | None = None, access: Literal['public', 'private'] = 'private'
|
|
47
|
+
) -> str:
|
|
48
|
+
_logger.info(f'Publishing replica for {src_tbl._name!r} to: {dest_tbl_uri}')
|
|
49
|
+
|
|
50
|
+
packager = TablePackager(src_tbl)
|
|
51
|
+
# Create the publish request using packager's bundle_md
|
|
52
|
+
publish_request = PublishRequest(
|
|
53
|
+
table_uri=PxtUri(uri=dest_tbl_uri),
|
|
54
|
+
pxt_version=packager.bundle_md['pxt_version'],
|
|
55
|
+
pxt_md_version=packager.bundle_md['pxt_md_version'],
|
|
56
|
+
md=[TableVersionMd.from_dict(md_dict) for md_dict in packager.bundle_md['md']],
|
|
57
|
+
bucket_name=bucket,
|
|
58
|
+
is_public=access == 'public',
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
_logger.debug(f'Sending PublishRequest: {publish_request}')
|
|
62
|
+
|
|
63
|
+
response = requests.post(PIXELTABLE_API_URL, data=publish_request.model_dump_json(), headers=_api_headers())
|
|
64
|
+
if response.status_code == 201:
|
|
65
|
+
publish_response = PublishResponse.model_validate(response.json())
|
|
66
|
+
existing_table_uri = str(publish_response.table_uri)
|
|
67
|
+
Env.get().console_logger.info(
|
|
68
|
+
f'Replica for version {publish_request.md[0].version_md.version} already exists at {existing_table_uri}.'
|
|
69
|
+
)
|
|
70
|
+
with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
|
|
71
|
+
Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': existing_table_uri})
|
|
72
|
+
return existing_table_uri
|
|
73
|
+
if response.status_code != 200:
|
|
74
|
+
raise excs.Error(f'Error publishing {src_tbl._display_name()}: {response.text}')
|
|
75
|
+
publish_response = PublishResponse.model_validate(response.json())
|
|
76
|
+
|
|
77
|
+
_logger.debug(f'Received PublishResponse: {publish_response}')
|
|
78
|
+
|
|
79
|
+
upload_id = publish_response.upload_id
|
|
80
|
+
destination_uri = publish_response.destination_uri
|
|
81
|
+
|
|
82
|
+
Env.get().console_logger.info(f"Creating a replica of '{src_tbl._path()}' at: {dest_tbl_uri}")
|
|
83
|
+
|
|
84
|
+
bundle = packager.package()
|
|
85
|
+
|
|
86
|
+
parsed_location = urllib.parse.urlparse(str(destination_uri))
|
|
87
|
+
if parsed_location.scheme == 's3':
|
|
88
|
+
_upload_bundle_to_s3(bundle, parsed_location)
|
|
89
|
+
elif parsed_location.scheme == 'https':
|
|
90
|
+
_upload_to_presigned_url(file_path=bundle, url=parsed_location.geturl())
|
|
91
|
+
else:
|
|
92
|
+
raise excs.Error(f'Unsupported destination: {destination_uri}')
|
|
93
|
+
|
|
94
|
+
Env.get().console_logger.info('Finalizing replica ...')
|
|
95
|
+
# Use preview data from packager's bundle_md (set during package())
|
|
96
|
+
finalize_request = FinalizeRequest(
|
|
97
|
+
table_uri=PxtUri(uri=dest_tbl_uri),
|
|
98
|
+
upload_id=upload_id,
|
|
99
|
+
datafile=bundle.name,
|
|
100
|
+
size=bundle.stat().st_size,
|
|
101
|
+
sha256=sha256sum(bundle), # Generate our own SHA for independent verification
|
|
102
|
+
row_count=packager.bundle_md['row_count'],
|
|
103
|
+
preview_header=packager.bundle_md['preview_header'],
|
|
104
|
+
preview_data=packager.bundle_md['preview_data'],
|
|
105
|
+
)
|
|
106
|
+
finalize_response_json = requests.post(
|
|
107
|
+
PIXELTABLE_API_URL, data=finalize_request.model_dump_json(), headers=_api_headers()
|
|
108
|
+
)
|
|
109
|
+
if finalize_response_json.status_code != 200:
|
|
110
|
+
raise excs.Error(f'Error finalizing {src_tbl._display_name()}: {finalize_response_json.text}')
|
|
111
|
+
|
|
112
|
+
finalize_response = FinalizeResponse.model_validate(finalize_response_json.json())
|
|
113
|
+
confirmed_tbl_uri = finalize_response.confirmed_table_uri
|
|
114
|
+
Env.get().console_logger.info(f'The published table is now available at: {confirmed_tbl_uri}')
|
|
115
|
+
|
|
116
|
+
with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
|
|
117
|
+
Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': str(confirmed_tbl_uri)})
|
|
118
|
+
|
|
119
|
+
return str(confirmed_tbl_uri)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
|
|
123
|
+
bucket = parsed_location.netloc
|
|
124
|
+
remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
|
|
125
|
+
remote_path = str(remote_dir / bundle.name)[1:] # Remove initial /
|
|
126
|
+
|
|
127
|
+
Env.get().console_logger.info(f'Uploading replica to: {bucket}:{remote_path}')
|
|
128
|
+
|
|
129
|
+
s3_client = Env.get().get_client('s3')
|
|
130
|
+
|
|
131
|
+
upload_args = {'ChecksumAlgorithm': 'SHA256'}
|
|
132
|
+
|
|
133
|
+
progress_bar = tqdm(
|
|
134
|
+
desc='Uploading',
|
|
135
|
+
total=bundle.stat().st_size,
|
|
136
|
+
unit='B',
|
|
137
|
+
unit_scale=True,
|
|
138
|
+
unit_divisor=1024,
|
|
139
|
+
miniters=1, # Update every iteration (should be fine for an upload)
|
|
140
|
+
ncols=100,
|
|
141
|
+
file=sys.stdout,
|
|
142
|
+
)
|
|
143
|
+
s3_client.upload_file(
|
|
144
|
+
Filename=str(bundle), Bucket=bucket, Key=remote_path, ExtraArgs=upload_args, Callback=progress_bar.update
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
|
|
149
|
+
parsed_uri = PxtUri(src_tbl_uri)
|
|
150
|
+
clone_request = ReplicateRequest(table_uri=parsed_uri)
|
|
151
|
+
response = requests.post(PIXELTABLE_API_URL, data=clone_request.model_dump_json(), headers=_api_headers())
|
|
152
|
+
if response.status_code != 200:
|
|
153
|
+
raise excs.Error(f'Error cloning replica: {response.text}')
|
|
154
|
+
clone_response = ReplicateResponse.model_validate(response.json())
|
|
155
|
+
|
|
156
|
+
# Prevalidate destination path for replication. We do this before downloading the bundle so that we avoid
|
|
157
|
+
# having to download it if there is a collision or if this is a duplicate replica. This is done outside the
|
|
158
|
+
# transaction scope of the table restore operation (we don't want to hold a transaction open during the
|
|
159
|
+
# download); that's fine, since it will be validated again during TableRestorer's catalog operations.
|
|
160
|
+
|
|
161
|
+
t = pxt.get_table(dest_path, if_not_exists='ignore')
|
|
162
|
+
if t is not None:
|
|
163
|
+
if str(t._id) != clone_response.md[0].tbl_md.tbl_id:
|
|
164
|
+
raise excs.Error(
|
|
165
|
+
f'An attempt was made to create a replica table at {dest_path!r}, '
|
|
166
|
+
'but a different table already exists at that location.'
|
|
167
|
+
)
|
|
168
|
+
known_versions = tuple(v['version'] for v in t.get_versions())
|
|
169
|
+
if clone_response.md[0].version_md.version in known_versions:
|
|
170
|
+
Env.get().console_logger.info(f'Replica {dest_path!r} is already up to date with source: {src_tbl_uri}')
|
|
171
|
+
return t
|
|
172
|
+
|
|
173
|
+
primary_version_additional_md = clone_response.md[0].version_md.additional_md
|
|
174
|
+
bundle_uri = str(clone_response.destination_uri)
|
|
175
|
+
bundle_filename = primary_version_additional_md['cloud']['datafile']
|
|
176
|
+
parsed_location = urllib.parse.urlparse(bundle_uri)
|
|
177
|
+
if parsed_location.scheme == 's3':
|
|
178
|
+
bundle_path = _download_bundle_from_s3(parsed_location, bundle_filename)
|
|
179
|
+
elif parsed_location.scheme == 'https':
|
|
180
|
+
bundle_path = TempStore.create_path()
|
|
181
|
+
_download_from_presigned_url(url=parsed_location.geturl(), output_path=bundle_path)
|
|
182
|
+
else:
|
|
183
|
+
raise excs.Error(f'Unexpected response from server: unsupported bundle uri: {bundle_uri}')
|
|
184
|
+
|
|
185
|
+
pxt_uri = str(clone_response.table_uri)
|
|
186
|
+
md_list = [dataclasses.asdict(md) for md in clone_response.md]
|
|
187
|
+
restorer = TableRestorer(
|
|
188
|
+
dest_path, {'pxt_version': pxt.__version__, 'pxt_md_version': clone_response.pxt_md_version, 'md': md_list}
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
tbl = restorer.restore(bundle_path, pxt_uri, explicit_version=parsed_uri.version)
|
|
192
|
+
Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
|
|
193
|
+
return tbl
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_filename: str) -> Path:
|
|
197
|
+
bucket = parsed_location.netloc
|
|
198
|
+
remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
|
|
199
|
+
remote_path = str(remote_dir / bundle_filename)[1:] # Remove initial /
|
|
200
|
+
|
|
201
|
+
Env.get().console_logger.info(f'Downloading replica from: {bucket}:{remote_path}')
|
|
202
|
+
|
|
203
|
+
s3_client = Env.get().get_client('s3')
|
|
204
|
+
|
|
205
|
+
obj = s3_client.head_object(Bucket=bucket, Key=remote_path) # Check if the object exists
|
|
206
|
+
bundle_size = obj['ContentLength']
|
|
207
|
+
|
|
208
|
+
bundle_path = TempStore.create_path()
|
|
209
|
+
progress_bar = tqdm(
|
|
210
|
+
desc='Downloading',
|
|
211
|
+
total=bundle_size,
|
|
212
|
+
unit='B',
|
|
213
|
+
unit_scale=True,
|
|
214
|
+
unit_divisor=1024,
|
|
215
|
+
miniters=1,
|
|
216
|
+
ncols=100,
|
|
217
|
+
file=sys.stdout,
|
|
218
|
+
)
|
|
219
|
+
s3_client.download_file(Bucket=bucket, Key=remote_path, Filename=str(bundle_path), Callback=progress_bar.update)
|
|
220
|
+
return bundle_path
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _create_retry_session(
|
|
224
|
+
max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
|
|
225
|
+
) -> requests.Session:
|
|
226
|
+
"""Create a requests session with retry configuration"""
|
|
227
|
+
if status_forcelist is None:
|
|
228
|
+
status_forcelist = [
|
|
229
|
+
408, # Request Timeout
|
|
230
|
+
429, # Too Many Requests (rate limiting)
|
|
231
|
+
500, # Internal Server Error (server-side error)
|
|
232
|
+
502, # Bad Gateway (proxy/gateway got invalid response)
|
|
233
|
+
503, # Service Unavailable (server overloaded or down)
|
|
234
|
+
504, # Gateway Timeout (proxy/gateway timeout)
|
|
235
|
+
]
|
|
236
|
+
retry_strategy = Retry(
|
|
237
|
+
total=max_retries,
|
|
238
|
+
read=max_retries,
|
|
239
|
+
connect=max_retries,
|
|
240
|
+
backoff_factor=backoff_factor,
|
|
241
|
+
status_forcelist=status_forcelist,
|
|
242
|
+
allowed_methods=['GET', 'PUT', 'POST', 'DELETE'],
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
session = requests.Session()
|
|
246
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
247
|
+
session.mount('https://', adapter)
|
|
248
|
+
return session
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) -> requests.Response:
|
|
252
|
+
"""Upload file with progress bar and retries"""
|
|
253
|
+
file_size = file_path.stat().st_size
|
|
254
|
+
|
|
255
|
+
headers = {'Content-Length': str(file_size), 'Content-Type': 'application/octet-stream'}
|
|
256
|
+
|
|
257
|
+
# Detect if it's Azure by URL pattern
|
|
258
|
+
is_azure = 'blob.core.windows.net' in url
|
|
259
|
+
if is_azure:
|
|
260
|
+
headers['x-ms-blob-type'] = 'BlockBlob'
|
|
261
|
+
|
|
262
|
+
session = _create_retry_session(max_retries=max_retries)
|
|
263
|
+
try:
|
|
264
|
+
with (
|
|
265
|
+
open(file_path, 'rb') as f,
|
|
266
|
+
tqdm.wrapattr(
|
|
267
|
+
f,
|
|
268
|
+
method='read',
|
|
269
|
+
total=file_size,
|
|
270
|
+
desc='Uploading',
|
|
271
|
+
unit='B',
|
|
272
|
+
unit_scale=True,
|
|
273
|
+
unit_divisor=1024,
|
|
274
|
+
miniters=1, # Update every iteration (should be fine for an upload)
|
|
275
|
+
ncols=100,
|
|
276
|
+
file=sys.stdout,
|
|
277
|
+
) as file_with_progress,
|
|
278
|
+
):
|
|
279
|
+
response = session.put(
|
|
280
|
+
url,
|
|
281
|
+
data=file_with_progress,
|
|
282
|
+
headers=headers,
|
|
283
|
+
timeout=(60, 1800), # 60 seconds to connect and 1800 seconds for server response
|
|
284
|
+
)
|
|
285
|
+
response.raise_for_status()
|
|
286
|
+
return response
|
|
287
|
+
finally:
|
|
288
|
+
session.close()
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _download_from_presigned_url(
|
|
292
|
+
url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
|
|
293
|
+
) -> None:
|
|
294
|
+
"""Download file with progress bar and retries"""
|
|
295
|
+
session = _create_retry_session(max_retries=max_retries)
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
# Stream download with progress
|
|
299
|
+
response = session.get(
|
|
300
|
+
url, headers=headers, stream=True, timeout=(60, 300)
|
|
301
|
+
) # 60 seconds to connect and 300 seconds for server response
|
|
302
|
+
response.raise_for_status()
|
|
303
|
+
|
|
304
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
305
|
+
progress_bar = tqdm(
|
|
306
|
+
desc='Downloading',
|
|
307
|
+
total=total_size,
|
|
308
|
+
unit='B',
|
|
309
|
+
unit_scale=True,
|
|
310
|
+
unit_divisor=1024,
|
|
311
|
+
miniters=1,
|
|
312
|
+
ncols=100,
|
|
313
|
+
file=sys.stdout,
|
|
314
|
+
)
|
|
315
|
+
with open(output_path, 'wb') as f:
|
|
316
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
317
|
+
if chunk:
|
|
318
|
+
f.write(chunk)
|
|
319
|
+
progress_bar.update(len(chunk))
|
|
320
|
+
finally:
|
|
321
|
+
session.close()
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def delete_replica(dest_path: str, version: int | None = None) -> None:
|
|
325
|
+
"""Delete cloud replica"""
|
|
326
|
+
delete_request = DeleteRequest(table_uri=PxtUri(uri=dest_path), version=version)
|
|
327
|
+
response = requests.post(PIXELTABLE_API_URL, data=delete_request.model_dump_json(), headers=_api_headers())
|
|
328
|
+
if response.status_code != 200:
|
|
329
|
+
raise excs.Error(f'Error deleting replica: {response.text}')
|
|
330
|
+
DeleteResponse.model_validate(response.json())
|
|
331
|
+
Env.get().console_logger.info(f'Deleted replica at: {dest_path}')
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def list_table_versions(table_uri: str) -> list[dict[str, Any]]:
|
|
335
|
+
"""List versions for a remote table."""
|
|
336
|
+
request_json = {'operation_type': 'list_table_versions', 'table_uri': {'uri': table_uri}}
|
|
337
|
+
response = requests.post(PIXELTABLE_API_URL, data=json.dumps(request_json), headers=_api_headers())
|
|
338
|
+
if response.status_code != 200:
|
|
339
|
+
raise excs.Error(f'Error listing table versions: {response.text}')
|
|
340
|
+
response_data = response.json()
|
|
341
|
+
return response_data.get('versions', [])
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _api_headers() -> dict[str, str]:
|
|
345
|
+
headers = {'Content-Type': 'application/json'}
|
|
346
|
+
api_key = Env.get().pxt_api_key
|
|
347
|
+
if api_key is not None:
|
|
348
|
+
headers['X-api-key'] = api_key
|
|
349
|
+
return headers
|