pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,349 @@
1
+ import dataclasses
2
+ import json
3
+ import logging
4
+ import os
5
+ import sys
6
+ import urllib.parse
7
+ import urllib.request
8
+ from pathlib import Path
9
+ from typing import Any, Literal
10
+
11
+ import requests
12
+ from requests.adapters import HTTPAdapter
13
+ from tqdm import tqdm
14
+ from urllib3.util.retry import Retry
15
+
16
+ import pixeltable as pxt
17
+ from pixeltable import exceptions as excs
18
+ from pixeltable.catalog import Catalog
19
+ from pixeltable.catalog.table_version import TableVersionMd
20
+ from pixeltable.env import Env
21
+ from pixeltable.utils import sha256sum
22
+ from pixeltable.utils.local_store import TempStore
23
+
24
+ from .packager import TablePackager, TableRestorer
25
+ from .protocol import PxtUri
26
+ from .protocol.replica import (
27
+ DeleteRequest,
28
+ DeleteResponse,
29
+ FinalizeRequest,
30
+ FinalizeResponse,
31
+ PublishRequest,
32
+ PublishResponse,
33
+ ReplicateRequest,
34
+ ReplicateResponse,
35
+ )
36
+
37
+ _logger = logging.getLogger('pixeltable')
38
+
39
+ # These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
40
+ # pixeltable.com URLs are available.
41
+
42
+ PIXELTABLE_API_URL = os.environ.get('PIXELTABLE_API_URL', 'https://internal-api.pixeltable.com')
43
+
44
+
45
+ def push_replica(
46
+ dest_tbl_uri: str, src_tbl: pxt.Table, bucket: str | None = None, access: Literal['public', 'private'] = 'private'
47
+ ) -> str:
48
+ _logger.info(f'Publishing replica for {src_tbl._name!r} to: {dest_tbl_uri}')
49
+
50
+ packager = TablePackager(src_tbl)
51
+ # Create the publish request using packager's bundle_md
52
+ publish_request = PublishRequest(
53
+ table_uri=PxtUri(uri=dest_tbl_uri),
54
+ pxt_version=packager.bundle_md['pxt_version'],
55
+ pxt_md_version=packager.bundle_md['pxt_md_version'],
56
+ md=[TableVersionMd.from_dict(md_dict) for md_dict in packager.bundle_md['md']],
57
+ bucket_name=bucket,
58
+ is_public=access == 'public',
59
+ )
60
+
61
+ _logger.debug(f'Sending PublishRequest: {publish_request}')
62
+
63
+ response = requests.post(PIXELTABLE_API_URL, data=publish_request.model_dump_json(), headers=_api_headers())
64
+ if response.status_code == 201:
65
+ publish_response = PublishResponse.model_validate(response.json())
66
+ existing_table_uri = str(publish_response.table_uri)
67
+ Env.get().console_logger.info(
68
+ f'Replica for version {publish_request.md[0].version_md.version} already exists at {existing_table_uri}.'
69
+ )
70
+ with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
71
+ Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': existing_table_uri})
72
+ return existing_table_uri
73
+ if response.status_code != 200:
74
+ raise excs.Error(f'Error publishing {src_tbl._display_name()}: {response.text}')
75
+ publish_response = PublishResponse.model_validate(response.json())
76
+
77
+ _logger.debug(f'Received PublishResponse: {publish_response}')
78
+
79
+ upload_id = publish_response.upload_id
80
+ destination_uri = publish_response.destination_uri
81
+
82
+ Env.get().console_logger.info(f"Creating a replica of '{src_tbl._path()}' at: {dest_tbl_uri}")
83
+
84
+ bundle = packager.package()
85
+
86
+ parsed_location = urllib.parse.urlparse(str(destination_uri))
87
+ if parsed_location.scheme == 's3':
88
+ _upload_bundle_to_s3(bundle, parsed_location)
89
+ elif parsed_location.scheme == 'https':
90
+ _upload_to_presigned_url(file_path=bundle, url=parsed_location.geturl())
91
+ else:
92
+ raise excs.Error(f'Unsupported destination: {destination_uri}')
93
+
94
+ Env.get().console_logger.info('Finalizing replica ...')
95
+ # Use preview data from packager's bundle_md (set during package())
96
+ finalize_request = FinalizeRequest(
97
+ table_uri=PxtUri(uri=dest_tbl_uri),
98
+ upload_id=upload_id,
99
+ datafile=bundle.name,
100
+ size=bundle.stat().st_size,
101
+ sha256=sha256sum(bundle), # Generate our own SHA for independent verification
102
+ row_count=packager.bundle_md['row_count'],
103
+ preview_header=packager.bundle_md['preview_header'],
104
+ preview_data=packager.bundle_md['preview_data'],
105
+ )
106
+ finalize_response_json = requests.post(
107
+ PIXELTABLE_API_URL, data=finalize_request.model_dump_json(), headers=_api_headers()
108
+ )
109
+ if finalize_response_json.status_code != 200:
110
+ raise excs.Error(f'Error finalizing {src_tbl._display_name()}: {finalize_response_json.text}')
111
+
112
+ finalize_response = FinalizeResponse.model_validate(finalize_response_json.json())
113
+ confirmed_tbl_uri = finalize_response.confirmed_table_uri
114
+ Env.get().console_logger.info(f'The published table is now available at: {confirmed_tbl_uri}')
115
+
116
+ with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
117
+ Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': str(confirmed_tbl_uri)})
118
+
119
+ return str(confirmed_tbl_uri)
120
+
121
+
122
+ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
123
+ bucket = parsed_location.netloc
124
+ remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
125
+ remote_path = str(remote_dir / bundle.name)[1:] # Remove initial /
126
+
127
+ Env.get().console_logger.info(f'Uploading replica to: {bucket}:{remote_path}')
128
+
129
+ s3_client = Env.get().get_client('s3')
130
+
131
+ upload_args = {'ChecksumAlgorithm': 'SHA256'}
132
+
133
+ progress_bar = tqdm(
134
+ desc='Uploading',
135
+ total=bundle.stat().st_size,
136
+ unit='B',
137
+ unit_scale=True,
138
+ unit_divisor=1024,
139
+ miniters=1, # Update every iteration (should be fine for an upload)
140
+ ncols=100,
141
+ file=sys.stdout,
142
+ )
143
+ s3_client.upload_file(
144
+ Filename=str(bundle), Bucket=bucket, Key=remote_path, ExtraArgs=upload_args, Callback=progress_bar.update
145
+ )
146
+
147
+
148
+ def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
149
+ parsed_uri = PxtUri(src_tbl_uri)
150
+ clone_request = ReplicateRequest(table_uri=parsed_uri)
151
+ response = requests.post(PIXELTABLE_API_URL, data=clone_request.model_dump_json(), headers=_api_headers())
152
+ if response.status_code != 200:
153
+ raise excs.Error(f'Error cloning replica: {response.text}')
154
+ clone_response = ReplicateResponse.model_validate(response.json())
155
+
156
+ # Prevalidate destination path for replication. We do this before downloading the bundle so that we avoid
157
+ # having to download it if there is a collision or if this is a duplicate replica. This is done outside the
158
+ # transaction scope of the table restore operation (we don't want to hold a transaction open during the
159
+ # download); that's fine, since it will be validated again during TableRestorer's catalog operations.
160
+
161
+ t = pxt.get_table(dest_path, if_not_exists='ignore')
162
+ if t is not None:
163
+ if str(t._id) != clone_response.md[0].tbl_md.tbl_id:
164
+ raise excs.Error(
165
+ f'An attempt was made to create a replica table at {dest_path!r}, '
166
+ 'but a different table already exists at that location.'
167
+ )
168
+ known_versions = tuple(v['version'] for v in t.get_versions())
169
+ if clone_response.md[0].version_md.version in known_versions:
170
+ Env.get().console_logger.info(f'Replica {dest_path!r} is already up to date with source: {src_tbl_uri}')
171
+ return t
172
+
173
+ primary_version_additional_md = clone_response.md[0].version_md.additional_md
174
+ bundle_uri = str(clone_response.destination_uri)
175
+ bundle_filename = primary_version_additional_md['cloud']['datafile']
176
+ parsed_location = urllib.parse.urlparse(bundle_uri)
177
+ if parsed_location.scheme == 's3':
178
+ bundle_path = _download_bundle_from_s3(parsed_location, bundle_filename)
179
+ elif parsed_location.scheme == 'https':
180
+ bundle_path = TempStore.create_path()
181
+ _download_from_presigned_url(url=parsed_location.geturl(), output_path=bundle_path)
182
+ else:
183
+ raise excs.Error(f'Unexpected response from server: unsupported bundle uri: {bundle_uri}')
184
+
185
+ pxt_uri = str(clone_response.table_uri)
186
+ md_list = [dataclasses.asdict(md) for md in clone_response.md]
187
+ restorer = TableRestorer(
188
+ dest_path, {'pxt_version': pxt.__version__, 'pxt_md_version': clone_response.pxt_md_version, 'md': md_list}
189
+ )
190
+
191
+ tbl = restorer.restore(bundle_path, pxt_uri, explicit_version=parsed_uri.version)
192
+ Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
193
+ return tbl
194
+
195
+
196
+ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_filename: str) -> Path:
197
+ bucket = parsed_location.netloc
198
+ remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
199
+ remote_path = str(remote_dir / bundle_filename)[1:] # Remove initial /
200
+
201
+ Env.get().console_logger.info(f'Downloading replica from: {bucket}:{remote_path}')
202
+
203
+ s3_client = Env.get().get_client('s3')
204
+
205
+ obj = s3_client.head_object(Bucket=bucket, Key=remote_path) # Check if the object exists
206
+ bundle_size = obj['ContentLength']
207
+
208
+ bundle_path = TempStore.create_path()
209
+ progress_bar = tqdm(
210
+ desc='Downloading',
211
+ total=bundle_size,
212
+ unit='B',
213
+ unit_scale=True,
214
+ unit_divisor=1024,
215
+ miniters=1,
216
+ ncols=100,
217
+ file=sys.stdout,
218
+ )
219
+ s3_client.download_file(Bucket=bucket, Key=remote_path, Filename=str(bundle_path), Callback=progress_bar.update)
220
+ return bundle_path
221
+
222
+
223
+ def _create_retry_session(
224
+ max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
225
+ ) -> requests.Session:
226
+ """Create a requests session with retry configuration"""
227
+ if status_forcelist is None:
228
+ status_forcelist = [
229
+ 408, # Request Timeout
230
+ 429, # Too Many Requests (rate limiting)
231
+ 500, # Internal Server Error (server-side error)
232
+ 502, # Bad Gateway (proxy/gateway got invalid response)
233
+ 503, # Service Unavailable (server overloaded or down)
234
+ 504, # Gateway Timeout (proxy/gateway timeout)
235
+ ]
236
+ retry_strategy = Retry(
237
+ total=max_retries,
238
+ read=max_retries,
239
+ connect=max_retries,
240
+ backoff_factor=backoff_factor,
241
+ status_forcelist=status_forcelist,
242
+ allowed_methods=['GET', 'PUT', 'POST', 'DELETE'],
243
+ )
244
+
245
+ session = requests.Session()
246
+ adapter = HTTPAdapter(max_retries=retry_strategy)
247
+ session.mount('https://', adapter)
248
+ return session
249
+
250
+
251
+ def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) -> requests.Response:
252
+ """Upload file with progress bar and retries"""
253
+ file_size = file_path.stat().st_size
254
+
255
+ headers = {'Content-Length': str(file_size), 'Content-Type': 'application/octet-stream'}
256
+
257
+ # Detect if it's Azure by URL pattern
258
+ is_azure = 'blob.core.windows.net' in url
259
+ if is_azure:
260
+ headers['x-ms-blob-type'] = 'BlockBlob'
261
+
262
+ session = _create_retry_session(max_retries=max_retries)
263
+ try:
264
+ with (
265
+ open(file_path, 'rb') as f,
266
+ tqdm.wrapattr(
267
+ f,
268
+ method='read',
269
+ total=file_size,
270
+ desc='Uploading',
271
+ unit='B',
272
+ unit_scale=True,
273
+ unit_divisor=1024,
274
+ miniters=1, # Update every iteration (should be fine for an upload)
275
+ ncols=100,
276
+ file=sys.stdout,
277
+ ) as file_with_progress,
278
+ ):
279
+ response = session.put(
280
+ url,
281
+ data=file_with_progress,
282
+ headers=headers,
283
+ timeout=(60, 1800), # 60 seconds to connect and 1800 seconds for server response
284
+ )
285
+ response.raise_for_status()
286
+ return response
287
+ finally:
288
+ session.close()
289
+
290
+
291
+ def _download_from_presigned_url(
292
+ url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
293
+ ) -> None:
294
+ """Download file with progress bar and retries"""
295
+ session = _create_retry_session(max_retries=max_retries)
296
+
297
+ try:
298
+ # Stream download with progress
299
+ response = session.get(
300
+ url, headers=headers, stream=True, timeout=(60, 300)
301
+ ) # 60 seconds to connect and 300 seconds for server response
302
+ response.raise_for_status()
303
+
304
+ total_size = int(response.headers.get('content-length', 0))
305
+ progress_bar = tqdm(
306
+ desc='Downloading',
307
+ total=total_size,
308
+ unit='B',
309
+ unit_scale=True,
310
+ unit_divisor=1024,
311
+ miniters=1,
312
+ ncols=100,
313
+ file=sys.stdout,
314
+ )
315
+ with open(output_path, 'wb') as f:
316
+ for chunk in response.iter_content(chunk_size=8192):
317
+ if chunk:
318
+ f.write(chunk)
319
+ progress_bar.update(len(chunk))
320
+ finally:
321
+ session.close()
322
+
323
+
324
+ def delete_replica(dest_path: str, version: int | None = None) -> None:
325
+ """Delete cloud replica"""
326
+ delete_request = DeleteRequest(table_uri=PxtUri(uri=dest_path), version=version)
327
+ response = requests.post(PIXELTABLE_API_URL, data=delete_request.model_dump_json(), headers=_api_headers())
328
+ if response.status_code != 200:
329
+ raise excs.Error(f'Error deleting replica: {response.text}')
330
+ DeleteResponse.model_validate(response.json())
331
+ Env.get().console_logger.info(f'Deleted replica at: {dest_path}')
332
+
333
+
334
+ def list_table_versions(table_uri: str) -> list[dict[str, Any]]:
335
+ """List versions for a remote table."""
336
+ request_json = {'operation_type': 'list_table_versions', 'table_uri': {'uri': table_uri}}
337
+ response = requests.post(PIXELTABLE_API_URL, data=json.dumps(request_json), headers=_api_headers())
338
+ if response.status_code != 200:
339
+ raise excs.Error(f'Error listing table versions: {response.text}')
340
+ response_data = response.json()
341
+ return response_data.get('versions', [])
342
+
343
+
344
+ def _api_headers() -> dict[str, str]:
345
+ headers = {'Content-Type': 'application/json'}
346
+ api_key = Env.get().pxt_api_key
347
+ if api_key is not None:
348
+ headers['X-api-key'] = api_key
349
+ return headers