pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/share/publish.py
CHANGED
|
@@ -1,83 +1,114 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
1
4
|
import sys
|
|
2
5
|
import urllib.parse
|
|
3
6
|
import urllib.request
|
|
4
7
|
from pathlib import Path
|
|
8
|
+
from typing import Any, Literal
|
|
5
9
|
|
|
6
10
|
import requests
|
|
11
|
+
from requests.adapters import HTTPAdapter
|
|
7
12
|
from tqdm import tqdm
|
|
13
|
+
from urllib3.util.retry import Retry
|
|
8
14
|
|
|
9
15
|
import pixeltable as pxt
|
|
10
16
|
from pixeltable import exceptions as excs
|
|
17
|
+
from pixeltable.catalog import Catalog
|
|
11
18
|
from pixeltable.env import Env
|
|
12
19
|
from pixeltable.utils import sha256sum
|
|
20
|
+
from pixeltable.utils.local_store import TempStore
|
|
13
21
|
|
|
14
22
|
from .packager import TablePackager, TableRestorer
|
|
23
|
+
from .protocol import PxtUri
|
|
24
|
+
from .protocol.replica import (
|
|
25
|
+
DeleteRequest,
|
|
26
|
+
DeleteResponse,
|
|
27
|
+
FinalizeRequest,
|
|
28
|
+
FinalizeResponse,
|
|
29
|
+
PublishRequest,
|
|
30
|
+
PublishResponse,
|
|
31
|
+
ReplicateRequest,
|
|
32
|
+
ReplicateResponse,
|
|
33
|
+
)
|
|
15
34
|
|
|
16
35
|
# These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
|
|
17
36
|
# pixeltable.com URLs are available.
|
|
18
37
|
|
|
19
|
-
PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
|
|
38
|
+
PIXELTABLE_API_URL = os.environ.get('PIXELTABLE_API_URL', 'https://internal-api.pixeltable.com')
|
|
20
39
|
|
|
21
40
|
|
|
22
|
-
def push_replica(
|
|
23
|
-
|
|
24
|
-
|
|
41
|
+
def push_replica(
|
|
42
|
+
dest_tbl_uri: str, src_tbl: pxt.Table, bucket: str | None = None, access: Literal['public', 'private'] = 'private'
|
|
43
|
+
) -> str:
|
|
44
|
+
packager = TablePackager(src_tbl)
|
|
25
45
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
46
|
+
# Create the publish request using packager's bundle_md
|
|
47
|
+
publish_request = PublishRequest(
|
|
48
|
+
table_uri=PxtUri(uri=dest_tbl_uri),
|
|
49
|
+
pxt_version=packager.bundle_md['pxt_version'],
|
|
50
|
+
pxt_md_version=packager.bundle_md['pxt_md_version'],
|
|
51
|
+
md=packager.bundle_md['md'],
|
|
52
|
+
bucket_name=bucket,
|
|
53
|
+
is_public=access == 'public',
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
response = requests.post(PIXELTABLE_API_URL, data=publish_request.model_dump_json(), headers=_api_headers())
|
|
30
57
|
if response.status_code != 200:
|
|
31
|
-
raise excs.Error(f'Error publishing
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
destination_uri = response_json['destination_uri']
|
|
58
|
+
raise excs.Error(f'Error publishing {src_tbl._display_name()}: {response.text}')
|
|
59
|
+
publish_response = PublishResponse.model_validate(response.json())
|
|
60
|
+
|
|
61
|
+
upload_id = publish_response.upload_id
|
|
62
|
+
destination_uri = publish_response.destination_uri
|
|
37
63
|
|
|
38
|
-
Env.get().console_logger.info(f"Creating a
|
|
64
|
+
Env.get().console_logger.info(f"Creating a replica of '{src_tbl._path()}' at: {dest_tbl_uri}")
|
|
39
65
|
|
|
40
66
|
bundle = packager.package()
|
|
41
67
|
|
|
42
|
-
parsed_location = urllib.parse.urlparse(destination_uri)
|
|
68
|
+
parsed_location = urllib.parse.urlparse(str(destination_uri))
|
|
43
69
|
if parsed_location.scheme == 's3':
|
|
44
70
|
_upload_bundle_to_s3(bundle, parsed_location)
|
|
71
|
+
elif parsed_location.scheme == 'https':
|
|
72
|
+
_upload_to_presigned_url(file_path=bundle, url=parsed_location.geturl())
|
|
45
73
|
else:
|
|
46
74
|
raise excs.Error(f'Unsupported destination: {destination_uri}')
|
|
47
75
|
|
|
48
|
-
Env.get().console_logger.info('Finalizing
|
|
76
|
+
Env.get().console_logger.info('Finalizing replica ...')
|
|
77
|
+
# Use preview data from packager's bundle_md (set during package())
|
|
78
|
+
finalize_request = FinalizeRequest(
|
|
79
|
+
table_uri=PxtUri(uri=dest_tbl_uri),
|
|
80
|
+
upload_id=upload_id,
|
|
81
|
+
datafile=bundle.name,
|
|
82
|
+
size=bundle.stat().st_size,
|
|
83
|
+
sha256=sha256sum(bundle), # Generate our own SHA for independent verification
|
|
84
|
+
row_count=packager.bundle_md['row_count'],
|
|
85
|
+
preview_header=packager.bundle_md['preview_header'],
|
|
86
|
+
preview_data=packager.bundle_md['preview_data'],
|
|
87
|
+
)
|
|
88
|
+
finalize_response_json = requests.post(
|
|
89
|
+
PIXELTABLE_API_URL, data=finalize_request.model_dump_json(), headers=_api_headers()
|
|
90
|
+
)
|
|
91
|
+
if finalize_response_json.status_code != 200:
|
|
92
|
+
raise excs.Error(f'Error finalizing {src_tbl._display_name()}: {finalize_response_json.text}')
|
|
93
|
+
|
|
94
|
+
finalize_response = FinalizeResponse.model_validate(finalize_response_json.json())
|
|
95
|
+
confirmed_tbl_uri = finalize_response.confirmed_table_uri
|
|
96
|
+
Env.get().console_logger.info(f'The published table is now available at: {confirmed_tbl_uri}')
|
|
49
97
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
'upload_id': upload_id,
|
|
53
|
-
'datafile': bundle.name,
|
|
54
|
-
'size': bundle.stat().st_size,
|
|
55
|
-
'sha256': sha256sum(bundle), # Generate our own SHA for independent verification
|
|
56
|
-
}
|
|
57
|
-
# TODO: Use Pydantic for validation
|
|
58
|
-
finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
|
|
59
|
-
if finalize_response.status_code != 200:
|
|
60
|
-
raise excs.Error(f'Error finalizing snapshot: {finalize_response.text}')
|
|
61
|
-
finalize_response_json = finalize_response.json()
|
|
62
|
-
if not isinstance(finalize_response_json, dict) or 'confirmed_table_uri' not in finalize_response_json:
|
|
63
|
-
raise excs.Error(f'Error finalizing snapshot: unexpected response from server.\n{finalize_response_json}')
|
|
98
|
+
with Catalog.get().begin_xact(tbl_id=src_tbl._tbl_version_path.tbl_id, for_write=True):
|
|
99
|
+
src_tbl._tbl_version_path.tbl_version.get().update_pxt_uri(str(confirmed_tbl_uri))
|
|
64
100
|
|
|
65
|
-
confirmed_tbl_uri
|
|
66
|
-
Env.get().console_logger.info(f'The published snapshot is now available at: {confirmed_tbl_uri}')
|
|
67
|
-
return confirmed_tbl_uri
|
|
101
|
+
return str(confirmed_tbl_uri)
|
|
68
102
|
|
|
69
103
|
|
|
70
104
|
def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
|
|
71
|
-
from pixeltable.utils.s3 import get_client
|
|
72
|
-
|
|
73
105
|
bucket = parsed_location.netloc
|
|
74
106
|
remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
|
|
75
107
|
remote_path = str(remote_dir / bundle.name)[1:] # Remove initial /
|
|
76
108
|
|
|
77
|
-
Env.get().console_logger.info(f'Uploading
|
|
109
|
+
Env.get().console_logger.info(f'Uploading replica to: {bucket}:{remote_path}')
|
|
78
110
|
|
|
79
|
-
|
|
80
|
-
s3_client = get_client(**boto_config)
|
|
111
|
+
s3_client = Env.get().get_client('s3')
|
|
81
112
|
|
|
82
113
|
upload_args = {'ChecksumAlgorithm': 'SHA256'}
|
|
83
114
|
|
|
@@ -97,46 +128,47 @@ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult
|
|
|
97
128
|
|
|
98
129
|
|
|
99
130
|
def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
response = requests.post(PIXELTABLE_API_URL, json=clone_request_json, headers=headers_json)
|
|
131
|
+
clone_request = ReplicateRequest(table_uri=PxtUri(src_tbl_uri))
|
|
132
|
+
response = requests.post(PIXELTABLE_API_URL, data=clone_request.model_dump_json(), headers=_api_headers())
|
|
103
133
|
if response.status_code != 200:
|
|
104
|
-
raise excs.Error(f'Error cloning
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
primary_tbl_additional_md = response_json['md']['tables'][0]['table_md']['additional_md']
|
|
110
|
-
bundle_uri = primary_tbl_additional_md['destination_uri']
|
|
111
|
-
bundle_filename = primary_tbl_additional_md['datafile']
|
|
134
|
+
raise excs.Error(f'Error cloning replica: {response.text}')
|
|
135
|
+
clone_response = ReplicateResponse.model_validate(response.json())
|
|
136
|
+
primary_version_additional_md = clone_response.md[0].version_md.additional_md
|
|
137
|
+
bundle_uri = str(clone_response.destination_uri)
|
|
138
|
+
bundle_filename = primary_version_additional_md['cloud']['datafile']
|
|
112
139
|
parsed_location = urllib.parse.urlparse(bundle_uri)
|
|
113
140
|
if parsed_location.scheme == 's3':
|
|
114
141
|
bundle_path = _download_bundle_from_s3(parsed_location, bundle_filename)
|
|
142
|
+
elif parsed_location.scheme == 'https':
|
|
143
|
+
bundle_path = TempStore.create_path()
|
|
144
|
+
_download_from_presigned_url(url=parsed_location.geturl(), output_path=bundle_path)
|
|
115
145
|
else:
|
|
116
146
|
raise excs.Error(f'Unexpected response from server: unsupported bundle uri: {bundle_uri}')
|
|
147
|
+
# Set pxt_uri in the table metadata; use table_uri from ReplicateResponse
|
|
148
|
+
clone_response.md[0].tbl_md.additional_md['pxt_uri'] = str(clone_response.table_uri)
|
|
149
|
+
md_list = [dataclasses.asdict(md) for md in clone_response.md]
|
|
150
|
+
restorer = TableRestorer(
|
|
151
|
+
dest_path, {'pxt_version': pxt.__version__, 'pxt_md_version': clone_response.pxt_md_version, 'md': md_list}
|
|
152
|
+
)
|
|
117
153
|
|
|
118
|
-
restorer = TableRestorer(dest_path, response_json)
|
|
119
154
|
tbl = restorer.restore(bundle_path)
|
|
120
155
|
Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
|
|
121
156
|
return tbl
|
|
122
157
|
|
|
123
158
|
|
|
124
159
|
def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_filename: str) -> Path:
|
|
125
|
-
from pixeltable.utils.s3 import get_client
|
|
126
|
-
|
|
127
160
|
bucket = parsed_location.netloc
|
|
128
161
|
remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
|
|
129
162
|
remote_path = str(remote_dir / bundle_filename)[1:] # Remove initial /
|
|
130
163
|
|
|
131
|
-
Env.get().console_logger.info(f'Downloading
|
|
164
|
+
Env.get().console_logger.info(f'Downloading replica from: {bucket}:{remote_path}')
|
|
132
165
|
|
|
133
|
-
|
|
134
|
-
s3_client = get_client(**boto_config)
|
|
166
|
+
s3_client = Env.get().get_client('s3')
|
|
135
167
|
|
|
136
168
|
obj = s3_client.head_object(Bucket=bucket, Key=remote_path) # Check if the object exists
|
|
137
169
|
bundle_size = obj['ContentLength']
|
|
138
170
|
|
|
139
|
-
bundle_path =
|
|
171
|
+
bundle_path = TempStore.create_path()
|
|
140
172
|
progress_bar = tqdm(
|
|
141
173
|
desc='Downloading',
|
|
142
174
|
total=bundle_size,
|
|
@@ -149,3 +181,127 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
|
|
|
149
181
|
)
|
|
150
182
|
s3_client.download_file(Bucket=bucket, Key=remote_path, Filename=str(bundle_path), Callback=progress_bar.update)
|
|
151
183
|
return bundle_path
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _create_retry_session(
|
|
187
|
+
max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
|
|
188
|
+
) -> requests.Session:
|
|
189
|
+
"""Create a requests session with retry configuration"""
|
|
190
|
+
if status_forcelist is None:
|
|
191
|
+
status_forcelist = [
|
|
192
|
+
408, # Request Timeout
|
|
193
|
+
429, # Too Many Requests (rate limiting)
|
|
194
|
+
500, # Internal Server Error (server-side error)
|
|
195
|
+
502, # Bad Gateway (proxy/gateway got invalid response)
|
|
196
|
+
503, # Service Unavailable (server overloaded or down)
|
|
197
|
+
504, # Gateway Timeout (proxy/gateway timeout)
|
|
198
|
+
]
|
|
199
|
+
retry_strategy = Retry(
|
|
200
|
+
total=max_retries,
|
|
201
|
+
read=max_retries,
|
|
202
|
+
connect=max_retries,
|
|
203
|
+
backoff_factor=backoff_factor,
|
|
204
|
+
status_forcelist=status_forcelist,
|
|
205
|
+
allowed_methods=['GET', 'PUT', 'POST', 'DELETE'],
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
session = requests.Session()
|
|
209
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
210
|
+
session.mount('https://', adapter)
|
|
211
|
+
return session
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) -> requests.Response:
|
|
215
|
+
"""Upload file with progress bar and retries"""
|
|
216
|
+
file_size = file_path.stat().st_size
|
|
217
|
+
|
|
218
|
+
headers = {'Content-Length': str(file_size), 'Content-Type': 'application/octet-stream'}
|
|
219
|
+
|
|
220
|
+
session = _create_retry_session(max_retries=max_retries)
|
|
221
|
+
try:
|
|
222
|
+
with (
|
|
223
|
+
open(file_path, 'rb') as f,
|
|
224
|
+
tqdm.wrapattr(
|
|
225
|
+
f,
|
|
226
|
+
method='read',
|
|
227
|
+
total=file_size,
|
|
228
|
+
desc='Uploading',
|
|
229
|
+
unit='B',
|
|
230
|
+
unit_scale=True,
|
|
231
|
+
unit_divisor=1024,
|
|
232
|
+
miniters=1, # Update every iteration (should be fine for an upload)
|
|
233
|
+
ncols=100,
|
|
234
|
+
file=sys.stdout,
|
|
235
|
+
) as file_with_progress,
|
|
236
|
+
):
|
|
237
|
+
response = session.put(
|
|
238
|
+
url,
|
|
239
|
+
data=file_with_progress,
|
|
240
|
+
headers=headers,
|
|
241
|
+
timeout=(60, 1800), # 60 seconds to connect and 300 seconds for server response
|
|
242
|
+
)
|
|
243
|
+
response.raise_for_status()
|
|
244
|
+
return response
|
|
245
|
+
finally:
|
|
246
|
+
session.close()
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _download_from_presigned_url(
|
|
250
|
+
url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
|
|
251
|
+
) -> None:
|
|
252
|
+
"""Download file with progress bar and retries"""
|
|
253
|
+
session = _create_retry_session(max_retries=max_retries)
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
# Stream download with progress
|
|
257
|
+
response = session.get(
|
|
258
|
+
url, headers=headers, stream=True, timeout=(60, 300)
|
|
259
|
+
) # 60 seconds to connect and 300 seconds for server response
|
|
260
|
+
response.raise_for_status()
|
|
261
|
+
|
|
262
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
263
|
+
progress_bar = tqdm(
|
|
264
|
+
desc='Downloading',
|
|
265
|
+
total=total_size,
|
|
266
|
+
unit='B',
|
|
267
|
+
unit_scale=True,
|
|
268
|
+
unit_divisor=1024,
|
|
269
|
+
miniters=1,
|
|
270
|
+
ncols=100,
|
|
271
|
+
file=sys.stdout,
|
|
272
|
+
)
|
|
273
|
+
with open(output_path, 'wb') as f:
|
|
274
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
275
|
+
if chunk:
|
|
276
|
+
f.write(chunk)
|
|
277
|
+
progress_bar.update(len(chunk))
|
|
278
|
+
finally:
|
|
279
|
+
session.close()
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def delete_replica(dest_path: str, version: int | None = None) -> None:
|
|
283
|
+
"""Delete cloud replica"""
|
|
284
|
+
delete_request = DeleteRequest(table_uri=PxtUri(uri=dest_path), version=version)
|
|
285
|
+
response = requests.post(PIXELTABLE_API_URL, data=delete_request.model_dump_json(), headers=_api_headers())
|
|
286
|
+
if response.status_code != 200:
|
|
287
|
+
raise excs.Error(f'Error deleting replica: {response.text}')
|
|
288
|
+
DeleteResponse.model_validate(response.json())
|
|
289
|
+
Env.get().console_logger.info(f'Deleted replica at: {dest_path}')
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def list_table_versions(table_uri: str) -> list[dict[str, Any]]:
|
|
293
|
+
"""List versions for a remote table."""
|
|
294
|
+
request_json = {'operation_type': 'list_table_versions', 'table_uri': {'uri': table_uri}}
|
|
295
|
+
response = requests.post(PIXELTABLE_API_URL, data=json.dumps(request_json), headers=_api_headers())
|
|
296
|
+
if response.status_code != 200:
|
|
297
|
+
raise excs.Error(f'Error listing table versions: {response.text}')
|
|
298
|
+
response_data = response.json()
|
|
299
|
+
return response_data.get('versions', [])
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _api_headers() -> dict[str, str]:
|
|
303
|
+
headers = {'Content-Type': 'application/json'}
|
|
304
|
+
api_key = Env.get().pxt_api_key
|
|
305
|
+
if api_key is not None:
|
|
306
|
+
headers['X-api-key'] = api_key
|
|
307
|
+
return headers
|