pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/share/publish.py
CHANGED
|
@@ -1,83 +1,132 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
1
5
|
import sys
|
|
2
6
|
import urllib.parse
|
|
3
7
|
import urllib.request
|
|
4
8
|
from pathlib import Path
|
|
9
|
+
from typing import Any, Literal
|
|
5
10
|
|
|
6
11
|
import requests
|
|
12
|
+
from requests.adapters import HTTPAdapter
|
|
7
13
|
from tqdm import tqdm
|
|
14
|
+
from urllib3.util.retry import Retry
|
|
8
15
|
|
|
9
16
|
import pixeltable as pxt
|
|
10
17
|
from pixeltable import exceptions as excs
|
|
18
|
+
from pixeltable.catalog import Catalog
|
|
19
|
+
from pixeltable.catalog.table_version import TableVersionMd
|
|
11
20
|
from pixeltable.env import Env
|
|
12
21
|
from pixeltable.utils import sha256sum
|
|
22
|
+
from pixeltable.utils.local_store import TempStore
|
|
13
23
|
|
|
14
24
|
from .packager import TablePackager, TableRestorer
|
|
25
|
+
from .protocol import PxtUri
|
|
26
|
+
from .protocol.replica import (
|
|
27
|
+
DeleteRequest,
|
|
28
|
+
DeleteResponse,
|
|
29
|
+
FinalizeRequest,
|
|
30
|
+
FinalizeResponse,
|
|
31
|
+
PublishRequest,
|
|
32
|
+
PublishResponse,
|
|
33
|
+
ReplicateRequest,
|
|
34
|
+
ReplicateResponse,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_logger = logging.getLogger('pixeltable')
|
|
15
38
|
|
|
16
39
|
# These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
|
|
17
40
|
# pixeltable.com URLs are available.
|
|
18
41
|
|
|
19
|
-
PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
|
|
42
|
+
PIXELTABLE_API_URL = os.environ.get('PIXELTABLE_API_URL', 'https://internal-api.pixeltable.com')
|
|
20
43
|
|
|
21
44
|
|
|
22
|
-
def push_replica(
|
|
23
|
-
|
|
24
|
-
|
|
45
|
+
def push_replica(
|
|
46
|
+
dest_tbl_uri: str, src_tbl: pxt.Table, bucket: str | None = None, access: Literal['public', 'private'] = 'private'
|
|
47
|
+
) -> str:
|
|
48
|
+
_logger.info(f'Publishing replica for {src_tbl._name!r} to: {dest_tbl_uri}')
|
|
25
49
|
|
|
26
|
-
packager = TablePackager(src_tbl
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
50
|
+
packager = TablePackager(src_tbl)
|
|
51
|
+
# Create the publish request using packager's bundle_md
|
|
52
|
+
publish_request = PublishRequest(
|
|
53
|
+
table_uri=PxtUri(uri=dest_tbl_uri),
|
|
54
|
+
pxt_version=packager.bundle_md['pxt_version'],
|
|
55
|
+
pxt_md_version=packager.bundle_md['pxt_md_version'],
|
|
56
|
+
md=[TableVersionMd.from_dict(md_dict) for md_dict in packager.bundle_md['md']],
|
|
57
|
+
bucket_name=bucket,
|
|
58
|
+
is_public=access == 'public',
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
_logger.debug(f'Sending PublishRequest: {publish_request}')
|
|
62
|
+
|
|
63
|
+
response = requests.post(PIXELTABLE_API_URL, data=publish_request.model_dump_json(), headers=_api_headers())
|
|
64
|
+
if response.status_code == 201:
|
|
65
|
+
publish_response = PublishResponse.model_validate(response.json())
|
|
66
|
+
existing_table_uri = str(publish_response.table_uri)
|
|
67
|
+
Env.get().console_logger.info(
|
|
68
|
+
f'Replica for version {publish_request.md[0].version_md.version} already exists at {existing_table_uri}.'
|
|
69
|
+
)
|
|
70
|
+
with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
|
|
71
|
+
Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': existing_table_uri})
|
|
72
|
+
return existing_table_uri
|
|
30
73
|
if response.status_code != 200:
|
|
31
|
-
raise excs.Error(f'Error publishing
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
upload_id = response_json['upload_id']
|
|
36
|
-
destination_uri = response_json['destination_uri']
|
|
74
|
+
raise excs.Error(f'Error publishing {src_tbl._display_name()}: {response.text}')
|
|
75
|
+
publish_response = PublishResponse.model_validate(response.json())
|
|
76
|
+
|
|
77
|
+
_logger.debug(f'Received PublishResponse: {publish_response}')
|
|
37
78
|
|
|
38
|
-
|
|
79
|
+
upload_id = publish_response.upload_id
|
|
80
|
+
destination_uri = publish_response.destination_uri
|
|
81
|
+
|
|
82
|
+
Env.get().console_logger.info(f"Creating a replica of '{src_tbl._path()}' at: {dest_tbl_uri}")
|
|
39
83
|
|
|
40
84
|
bundle = packager.package()
|
|
41
85
|
|
|
42
|
-
parsed_location = urllib.parse.urlparse(destination_uri)
|
|
86
|
+
parsed_location = urllib.parse.urlparse(str(destination_uri))
|
|
43
87
|
if parsed_location.scheme == 's3':
|
|
44
88
|
_upload_bundle_to_s3(bundle, parsed_location)
|
|
89
|
+
elif parsed_location.scheme == 'https':
|
|
90
|
+
_upload_to_presigned_url(file_path=bundle, url=parsed_location.geturl())
|
|
45
91
|
else:
|
|
46
92
|
raise excs.Error(f'Unsupported destination: {destination_uri}')
|
|
47
93
|
|
|
48
|
-
Env.get().console_logger.info('Finalizing
|
|
94
|
+
Env.get().console_logger.info('Finalizing replica ...')
|
|
95
|
+
# Use preview data from packager's bundle_md (set during package())
|
|
96
|
+
finalize_request = FinalizeRequest(
|
|
97
|
+
table_uri=PxtUri(uri=dest_tbl_uri),
|
|
98
|
+
upload_id=upload_id,
|
|
99
|
+
datafile=bundle.name,
|
|
100
|
+
size=bundle.stat().st_size,
|
|
101
|
+
sha256=sha256sum(bundle), # Generate our own SHA for independent verification
|
|
102
|
+
row_count=packager.bundle_md['row_count'],
|
|
103
|
+
preview_header=packager.bundle_md['preview_header'],
|
|
104
|
+
preview_data=packager.bundle_md['preview_data'],
|
|
105
|
+
)
|
|
106
|
+
finalize_response_json = requests.post(
|
|
107
|
+
PIXELTABLE_API_URL, data=finalize_request.model_dump_json(), headers=_api_headers()
|
|
108
|
+
)
|
|
109
|
+
if finalize_response_json.status_code != 200:
|
|
110
|
+
raise excs.Error(f'Error finalizing {src_tbl._display_name()}: {finalize_response_json.text}')
|
|
49
111
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
'datafile': bundle.name,
|
|
54
|
-
'size': bundle.stat().st_size,
|
|
55
|
-
'sha256': sha256sum(bundle), # Generate our own SHA for independent verification
|
|
56
|
-
}
|
|
57
|
-
# TODO: Use Pydantic for validation
|
|
58
|
-
finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
|
|
59
|
-
if finalize_response.status_code != 200:
|
|
60
|
-
raise excs.Error(f'Error finalizing snapshot: {finalize_response.text}')
|
|
61
|
-
finalize_response_json = finalize_response.json()
|
|
62
|
-
if not isinstance(finalize_response_json, dict) or 'confirmed_table_uri' not in finalize_response_json:
|
|
63
|
-
raise excs.Error(f'Error finalizing snapshot: unexpected response from server.\n{finalize_response_json}')
|
|
112
|
+
finalize_response = FinalizeResponse.model_validate(finalize_response_json.json())
|
|
113
|
+
confirmed_tbl_uri = finalize_response.confirmed_table_uri
|
|
114
|
+
Env.get().console_logger.info(f'The published table is now available at: {confirmed_tbl_uri}')
|
|
64
115
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return confirmed_tbl_uri
|
|
116
|
+
with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
|
|
117
|
+
Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': str(confirmed_tbl_uri)})
|
|
68
118
|
|
|
119
|
+
return str(confirmed_tbl_uri)
|
|
69
120
|
|
|
70
|
-
def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
|
|
71
|
-
from pixeltable.utils.s3 import get_client
|
|
72
121
|
|
|
122
|
+
def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
|
|
73
123
|
bucket = parsed_location.netloc
|
|
74
124
|
remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
|
|
75
125
|
remote_path = str(remote_dir / bundle.name)[1:] # Remove initial /
|
|
76
126
|
|
|
77
|
-
Env.get().console_logger.info(f'Uploading
|
|
127
|
+
Env.get().console_logger.info(f'Uploading replica to: {bucket}:{remote_path}')
|
|
78
128
|
|
|
79
|
-
|
|
80
|
-
s3_client = get_client(**boto_config)
|
|
129
|
+
s3_client = Env.get().get_client('s3')
|
|
81
130
|
|
|
82
131
|
upload_args = {'ChecksumAlgorithm': 'SHA256'}
|
|
83
132
|
|
|
@@ -97,46 +146,66 @@ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult
|
|
|
97
146
|
|
|
98
147
|
|
|
99
148
|
def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
response = requests.post(PIXELTABLE_API_URL,
|
|
149
|
+
parsed_uri = PxtUri(src_tbl_uri)
|
|
150
|
+
clone_request = ReplicateRequest(table_uri=parsed_uri)
|
|
151
|
+
response = requests.post(PIXELTABLE_API_URL, data=clone_request.model_dump_json(), headers=_api_headers())
|
|
103
152
|
if response.status_code != 200:
|
|
104
|
-
raise excs.Error(f'Error cloning
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
153
|
+
raise excs.Error(f'Error cloning replica: {response.text}')
|
|
154
|
+
clone_response = ReplicateResponse.model_validate(response.json())
|
|
155
|
+
|
|
156
|
+
# Prevalidate destination path for replication. We do this before downloading the bundle so that we avoid
|
|
157
|
+
# having to download it if there is a collision or if this is a duplicate replica. This is done outside the
|
|
158
|
+
# transaction scope of the table restore operation (we don't want to hold a transaction open during the
|
|
159
|
+
# download); that's fine, since it will be validated again during TableRestorer's catalog operations.
|
|
160
|
+
|
|
161
|
+
t = pxt.get_table(dest_path, if_not_exists='ignore')
|
|
162
|
+
if t is not None:
|
|
163
|
+
if str(t._id) != clone_response.md[0].tbl_md.tbl_id:
|
|
164
|
+
raise excs.Error(
|
|
165
|
+
f'An attempt was made to create a replica table at {dest_path!r}, '
|
|
166
|
+
'but a different table already exists at that location.'
|
|
167
|
+
)
|
|
168
|
+
known_versions = tuple(v['version'] for v in t.get_versions())
|
|
169
|
+
if clone_response.md[0].version_md.version in known_versions:
|
|
170
|
+
Env.get().console_logger.info(f'Replica {dest_path!r} is already up to date with source: {src_tbl_uri}')
|
|
171
|
+
return t
|
|
172
|
+
|
|
173
|
+
primary_version_additional_md = clone_response.md[0].version_md.additional_md
|
|
174
|
+
bundle_uri = str(clone_response.destination_uri)
|
|
175
|
+
bundle_filename = primary_version_additional_md['cloud']['datafile']
|
|
112
176
|
parsed_location = urllib.parse.urlparse(bundle_uri)
|
|
113
177
|
if parsed_location.scheme == 's3':
|
|
114
178
|
bundle_path = _download_bundle_from_s3(parsed_location, bundle_filename)
|
|
179
|
+
elif parsed_location.scheme == 'https':
|
|
180
|
+
bundle_path = TempStore.create_path()
|
|
181
|
+
_download_from_presigned_url(url=parsed_location.geturl(), output_path=bundle_path)
|
|
115
182
|
else:
|
|
116
183
|
raise excs.Error(f'Unexpected response from server: unsupported bundle uri: {bundle_uri}')
|
|
117
184
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
185
|
+
pxt_uri = str(clone_response.table_uri)
|
|
186
|
+
md_list = [dataclasses.asdict(md) for md in clone_response.md]
|
|
187
|
+
restorer = TableRestorer(
|
|
188
|
+
dest_path, {'pxt_version': pxt.__version__, 'pxt_md_version': clone_response.pxt_md_version, 'md': md_list}
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
tbl = restorer.restore(bundle_path, pxt_uri, explicit_version=parsed_uri.version)
|
|
192
|
+
Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
|
|
121
193
|
return tbl
|
|
122
194
|
|
|
123
195
|
|
|
124
196
|
def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_filename: str) -> Path:
|
|
125
|
-
from pixeltable.utils.s3 import get_client
|
|
126
|
-
|
|
127
197
|
bucket = parsed_location.netloc
|
|
128
198
|
remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
|
|
129
199
|
remote_path = str(remote_dir / bundle_filename)[1:] # Remove initial /
|
|
130
200
|
|
|
131
|
-
Env.get().console_logger.info(f'Downloading
|
|
201
|
+
Env.get().console_logger.info(f'Downloading replica from: {bucket}:{remote_path}')
|
|
132
202
|
|
|
133
|
-
|
|
134
|
-
s3_client = get_client(**boto_config)
|
|
203
|
+
s3_client = Env.get().get_client('s3')
|
|
135
204
|
|
|
136
205
|
obj = s3_client.head_object(Bucket=bucket, Key=remote_path) # Check if the object exists
|
|
137
206
|
bundle_size = obj['ContentLength']
|
|
138
207
|
|
|
139
|
-
bundle_path =
|
|
208
|
+
bundle_path = TempStore.create_path()
|
|
140
209
|
progress_bar = tqdm(
|
|
141
210
|
desc='Downloading',
|
|
142
211
|
total=bundle_size,
|
|
@@ -149,3 +218,132 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
|
|
|
149
218
|
)
|
|
150
219
|
s3_client.download_file(Bucket=bucket, Key=remote_path, Filename=str(bundle_path), Callback=progress_bar.update)
|
|
151
220
|
return bundle_path
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _create_retry_session(
|
|
224
|
+
max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
|
|
225
|
+
) -> requests.Session:
|
|
226
|
+
"""Create a requests session with retry configuration"""
|
|
227
|
+
if status_forcelist is None:
|
|
228
|
+
status_forcelist = [
|
|
229
|
+
408, # Request Timeout
|
|
230
|
+
429, # Too Many Requests (rate limiting)
|
|
231
|
+
500, # Internal Server Error (server-side error)
|
|
232
|
+
502, # Bad Gateway (proxy/gateway got invalid response)
|
|
233
|
+
503, # Service Unavailable (server overloaded or down)
|
|
234
|
+
504, # Gateway Timeout (proxy/gateway timeout)
|
|
235
|
+
]
|
|
236
|
+
retry_strategy = Retry(
|
|
237
|
+
total=max_retries,
|
|
238
|
+
read=max_retries,
|
|
239
|
+
connect=max_retries,
|
|
240
|
+
backoff_factor=backoff_factor,
|
|
241
|
+
status_forcelist=status_forcelist,
|
|
242
|
+
allowed_methods=['GET', 'PUT', 'POST', 'DELETE'],
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
session = requests.Session()
|
|
246
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
247
|
+
session.mount('https://', adapter)
|
|
248
|
+
return session
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) -> requests.Response:
|
|
252
|
+
"""Upload file with progress bar and retries"""
|
|
253
|
+
file_size = file_path.stat().st_size
|
|
254
|
+
|
|
255
|
+
headers = {'Content-Length': str(file_size), 'Content-Type': 'application/octet-stream'}
|
|
256
|
+
|
|
257
|
+
# Detect if it's Azure by URL pattern
|
|
258
|
+
is_azure = 'blob.core.windows.net' in url
|
|
259
|
+
if is_azure:
|
|
260
|
+
headers['x-ms-blob-type'] = 'BlockBlob'
|
|
261
|
+
|
|
262
|
+
session = _create_retry_session(max_retries=max_retries)
|
|
263
|
+
try:
|
|
264
|
+
with (
|
|
265
|
+
open(file_path, 'rb') as f,
|
|
266
|
+
tqdm.wrapattr(
|
|
267
|
+
f,
|
|
268
|
+
method='read',
|
|
269
|
+
total=file_size,
|
|
270
|
+
desc='Uploading',
|
|
271
|
+
unit='B',
|
|
272
|
+
unit_scale=True,
|
|
273
|
+
unit_divisor=1024,
|
|
274
|
+
miniters=1, # Update every iteration (should be fine for an upload)
|
|
275
|
+
ncols=100,
|
|
276
|
+
file=sys.stdout,
|
|
277
|
+
) as file_with_progress,
|
|
278
|
+
):
|
|
279
|
+
response = session.put(
|
|
280
|
+
url,
|
|
281
|
+
data=file_with_progress,
|
|
282
|
+
headers=headers,
|
|
283
|
+
timeout=(60, 1800), # 60 seconds to connect and 1800 seconds for server response
|
|
284
|
+
)
|
|
285
|
+
response.raise_for_status()
|
|
286
|
+
return response
|
|
287
|
+
finally:
|
|
288
|
+
session.close()
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _download_from_presigned_url(
|
|
292
|
+
url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
|
|
293
|
+
) -> None:
|
|
294
|
+
"""Download file with progress bar and retries"""
|
|
295
|
+
session = _create_retry_session(max_retries=max_retries)
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
# Stream download with progress
|
|
299
|
+
response = session.get(
|
|
300
|
+
url, headers=headers, stream=True, timeout=(60, 300)
|
|
301
|
+
) # 60 seconds to connect and 300 seconds for server response
|
|
302
|
+
response.raise_for_status()
|
|
303
|
+
|
|
304
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
305
|
+
progress_bar = tqdm(
|
|
306
|
+
desc='Downloading',
|
|
307
|
+
total=total_size,
|
|
308
|
+
unit='B',
|
|
309
|
+
unit_scale=True,
|
|
310
|
+
unit_divisor=1024,
|
|
311
|
+
miniters=1,
|
|
312
|
+
ncols=100,
|
|
313
|
+
file=sys.stdout,
|
|
314
|
+
)
|
|
315
|
+
with open(output_path, 'wb') as f:
|
|
316
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
317
|
+
if chunk:
|
|
318
|
+
f.write(chunk)
|
|
319
|
+
progress_bar.update(len(chunk))
|
|
320
|
+
finally:
|
|
321
|
+
session.close()
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def delete_replica(dest_path: str, version: int | None = None) -> None:
|
|
325
|
+
"""Delete cloud replica"""
|
|
326
|
+
delete_request = DeleteRequest(table_uri=PxtUri(uri=dest_path), version=version)
|
|
327
|
+
response = requests.post(PIXELTABLE_API_URL, data=delete_request.model_dump_json(), headers=_api_headers())
|
|
328
|
+
if response.status_code != 200:
|
|
329
|
+
raise excs.Error(f'Error deleting replica: {response.text}')
|
|
330
|
+
DeleteResponse.model_validate(response.json())
|
|
331
|
+
Env.get().console_logger.info(f'Deleted replica at: {dest_path}')
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def list_table_versions(table_uri: str) -> list[dict[str, Any]]:
|
|
335
|
+
"""List versions for a remote table."""
|
|
336
|
+
request_json = {'operation_type': 'list_table_versions', 'table_uri': {'uri': table_uri}}
|
|
337
|
+
response = requests.post(PIXELTABLE_API_URL, data=json.dumps(request_json), headers=_api_headers())
|
|
338
|
+
if response.status_code != 200:
|
|
339
|
+
raise excs.Error(f'Error listing table versions: {response.text}')
|
|
340
|
+
response_data = response.json()
|
|
341
|
+
return response_data.get('versions', [])
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _api_headers() -> dict[str, str]:
|
|
345
|
+
headers = {'Content-Type': 'application/json'}
|
|
346
|
+
api_key = Env.get().pxt_api_key
|
|
347
|
+
if api_key is not None:
|
|
348
|
+
headers['X-api-key'] = api_key
|
|
349
|
+
return headers
|