pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -1,83 +1,114 @@
1
+ import dataclasses
2
+ import json
3
+ import os
1
4
  import sys
2
5
  import urllib.parse
3
6
  import urllib.request
4
7
  from pathlib import Path
8
+ from typing import Any, Literal
5
9
 
6
10
  import requests
11
+ from requests.adapters import HTTPAdapter
7
12
  from tqdm import tqdm
13
+ from urllib3.util.retry import Retry
8
14
 
9
15
  import pixeltable as pxt
10
16
  from pixeltable import exceptions as excs
17
+ from pixeltable.catalog import Catalog
11
18
  from pixeltable.env import Env
12
19
  from pixeltable.utils import sha256sum
20
+ from pixeltable.utils.local_store import TempStore
13
21
 
14
22
  from .packager import TablePackager, TableRestorer
23
+ from .protocol import PxtUri
24
+ from .protocol.replica import (
25
+ DeleteRequest,
26
+ DeleteResponse,
27
+ FinalizeRequest,
28
+ FinalizeResponse,
29
+ PublishRequest,
30
+ PublishResponse,
31
+ ReplicateRequest,
32
+ ReplicateResponse,
33
+ )
15
34
 
16
35
  # These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
17
36
  # pixeltable.com URLs are available.
18
37
 
19
- PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
38
+ PIXELTABLE_API_URL = os.environ.get('PIXELTABLE_API_URL', 'https://internal-api.pixeltable.com')
20
39
 
21
40
 
22
- def push_replica(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
23
- if not src_tbl._tbl_version.get().is_snapshot:
24
- raise excs.Error('Only snapshots may be published.')
41
+ def push_replica(
42
+ dest_tbl_uri: str, src_tbl: pxt.Table, bucket: str | None = None, access: Literal['public', 'private'] = 'private'
43
+ ) -> str:
44
+ packager = TablePackager(src_tbl)
25
45
 
26
- packager = TablePackager(src_tbl, additional_md={'table_uri': dest_tbl_uri})
27
- request_json = packager.md | {'operation_type': 'publish_snapshot'}
28
- headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
29
- response = requests.post(PIXELTABLE_API_URL, json=request_json, headers=headers_json)
46
+ # Create the publish request using packager's bundle_md
47
+ publish_request = PublishRequest(
48
+ table_uri=PxtUri(uri=dest_tbl_uri),
49
+ pxt_version=packager.bundle_md['pxt_version'],
50
+ pxt_md_version=packager.bundle_md['pxt_md_version'],
51
+ md=packager.bundle_md['md'],
52
+ bucket_name=bucket,
53
+ is_public=access == 'public',
54
+ )
55
+
56
+ response = requests.post(PIXELTABLE_API_URL, data=publish_request.model_dump_json(), headers=_api_headers())
30
57
  if response.status_code != 200:
31
- raise excs.Error(f'Error publishing snapshot: {response.text}')
32
- response_json = response.json()
33
- if not isinstance(response_json, dict) or response_json.get('destination') != 's3':
34
- raise excs.Error(f'Error publishing snapshot: unexpected response from server.\n{response_json}')
35
- upload_id = response_json['upload_id']
36
- destination_uri = response_json['destination_uri']
58
+ raise excs.Error(f'Error publishing {src_tbl._display_name()}: {response.text}')
59
+ publish_response = PublishResponse.model_validate(response.json())
60
+
61
+ upload_id = publish_response.upload_id
62
+ destination_uri = publish_response.destination_uri
37
63
 
38
- Env.get().console_logger.info(f"Creating a snapshot of '{src_tbl._path()}' at: {dest_tbl_uri}")
64
+ Env.get().console_logger.info(f"Creating a replica of '{src_tbl._path()}' at: {dest_tbl_uri}")
39
65
 
40
66
  bundle = packager.package()
41
67
 
42
- parsed_location = urllib.parse.urlparse(destination_uri)
68
+ parsed_location = urllib.parse.urlparse(str(destination_uri))
43
69
  if parsed_location.scheme == 's3':
44
70
  _upload_bundle_to_s3(bundle, parsed_location)
71
+ elif parsed_location.scheme == 'https':
72
+ _upload_to_presigned_url(file_path=bundle, url=parsed_location.geturl())
45
73
  else:
46
74
  raise excs.Error(f'Unsupported destination: {destination_uri}')
47
75
 
48
- Env.get().console_logger.info('Finalizing snapshot ...')
76
+ Env.get().console_logger.info('Finalizing replica ...')
77
+ # Use preview data from packager's bundle_md (set during package())
78
+ finalize_request = FinalizeRequest(
79
+ table_uri=PxtUri(uri=dest_tbl_uri),
80
+ upload_id=upload_id,
81
+ datafile=bundle.name,
82
+ size=bundle.stat().st_size,
83
+ sha256=sha256sum(bundle), # Generate our own SHA for independent verification
84
+ row_count=packager.bundle_md['row_count'],
85
+ preview_header=packager.bundle_md['preview_header'],
86
+ preview_data=packager.bundle_md['preview_data'],
87
+ )
88
+ finalize_response_json = requests.post(
89
+ PIXELTABLE_API_URL, data=finalize_request.model_dump_json(), headers=_api_headers()
90
+ )
91
+ if finalize_response_json.status_code != 200:
92
+ raise excs.Error(f'Error finalizing {src_tbl._display_name()}: {finalize_response_json.text}')
93
+
94
+ finalize_response = FinalizeResponse.model_validate(finalize_response_json.json())
95
+ confirmed_tbl_uri = finalize_response.confirmed_table_uri
96
+ Env.get().console_logger.info(f'The published table is now available at: {confirmed_tbl_uri}')
49
97
 
50
- finalize_request_json = {
51
- 'operation_type': 'finalize_snapshot',
52
- 'upload_id': upload_id,
53
- 'datafile': bundle.name,
54
- 'size': bundle.stat().st_size,
55
- 'sha256': sha256sum(bundle), # Generate our own SHA for independent verification
56
- }
57
- # TODO: Use Pydantic for validation
58
- finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
59
- if finalize_response.status_code != 200:
60
- raise excs.Error(f'Error finalizing snapshot: {finalize_response.text}')
61
- finalize_response_json = finalize_response.json()
62
- if not isinstance(finalize_response_json, dict) or 'confirmed_table_uri' not in finalize_response_json:
63
- raise excs.Error(f'Error finalizing snapshot: unexpected response from server.\n{finalize_response_json}')
98
+ with Catalog.get().begin_xact(tbl_id=src_tbl._tbl_version_path.tbl_id, for_write=True):
99
+ src_tbl._tbl_version_path.tbl_version.get().update_pxt_uri(str(confirmed_tbl_uri))
64
100
 
65
- confirmed_tbl_uri = finalize_response_json['confirmed_table_uri']
66
- Env.get().console_logger.info(f'The published snapshot is now available at: {confirmed_tbl_uri}')
67
- return confirmed_tbl_uri
101
+ return str(confirmed_tbl_uri)
68
102
 
69
103
 
70
104
  def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
71
- from pixeltable.utils.s3 import get_client
72
-
73
105
  bucket = parsed_location.netloc
74
106
  remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
75
107
  remote_path = str(remote_dir / bundle.name)[1:] # Remove initial /
76
108
 
77
- Env.get().console_logger.info(f'Uploading snapshot to: {bucket}:{remote_path}')
109
+ Env.get().console_logger.info(f'Uploading replica to: {bucket}:{remote_path}')
78
110
 
79
- boto_config = {'max_pool_connections': 5, 'connect_timeout': 15, 'retries': {'max_attempts': 3, 'mode': 'adaptive'}}
80
- s3_client = get_client(**boto_config)
111
+ s3_client = Env.get().get_client('s3')
81
112
 
82
113
  upload_args = {'ChecksumAlgorithm': 'SHA256'}
83
114
 
@@ -97,46 +128,47 @@ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult
97
128
 
98
129
 
99
130
  def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
100
- headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
101
- clone_request_json = {'operation_type': 'clone_snapshot', 'table_uri': src_tbl_uri}
102
- response = requests.post(PIXELTABLE_API_URL, json=clone_request_json, headers=headers_json)
131
+ clone_request = ReplicateRequest(table_uri=PxtUri(src_tbl_uri))
132
+ response = requests.post(PIXELTABLE_API_URL, data=clone_request.model_dump_json(), headers=_api_headers())
103
133
  if response.status_code != 200:
104
- raise excs.Error(f'Error cloning snapshot: {response.text}')
105
- response_json = response.json()
106
- if not isinstance(response_json, dict) or 'table_uri' not in response_json:
107
- raise excs.Error(f'Error cloning shapshot: unexpected response from server.\n{response_json}')
108
-
109
- primary_tbl_additional_md = response_json['md']['tables'][0]['table_md']['additional_md']
110
- bundle_uri = primary_tbl_additional_md['destination_uri']
111
- bundle_filename = primary_tbl_additional_md['datafile']
134
+ raise excs.Error(f'Error cloning replica: {response.text}')
135
+ clone_response = ReplicateResponse.model_validate(response.json())
136
+ primary_version_additional_md = clone_response.md[0].version_md.additional_md
137
+ bundle_uri = str(clone_response.destination_uri)
138
+ bundle_filename = primary_version_additional_md['cloud']['datafile']
112
139
  parsed_location = urllib.parse.urlparse(bundle_uri)
113
140
  if parsed_location.scheme == 's3':
114
141
  bundle_path = _download_bundle_from_s3(parsed_location, bundle_filename)
142
+ elif parsed_location.scheme == 'https':
143
+ bundle_path = TempStore.create_path()
144
+ _download_from_presigned_url(url=parsed_location.geturl(), output_path=bundle_path)
115
145
  else:
116
146
  raise excs.Error(f'Unexpected response from server: unsupported bundle uri: {bundle_uri}')
147
+ # Set pxt_uri in the table metadata; use table_uri from ReplicateResponse
148
+ clone_response.md[0].tbl_md.additional_md['pxt_uri'] = str(clone_response.table_uri)
149
+ md_list = [dataclasses.asdict(md) for md in clone_response.md]
150
+ restorer = TableRestorer(
151
+ dest_path, {'pxt_version': pxt.__version__, 'pxt_md_version': clone_response.pxt_md_version, 'md': md_list}
152
+ )
117
153
 
118
- restorer = TableRestorer(dest_path, response_json)
119
154
  tbl = restorer.restore(bundle_path)
120
155
  Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
121
156
  return tbl
122
157
 
123
158
 
124
159
  def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_filename: str) -> Path:
125
- from pixeltable.utils.s3 import get_client
126
-
127
160
  bucket = parsed_location.netloc
128
161
  remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
129
162
  remote_path = str(remote_dir / bundle_filename)[1:] # Remove initial /
130
163
 
131
- Env.get().console_logger.info(f'Downloading snapshot from: {bucket}:{remote_path}')
164
+ Env.get().console_logger.info(f'Downloading replica from: {bucket}:{remote_path}')
132
165
 
133
- boto_config = {'max_pool_connections': 5, 'connect_timeout': 15, 'retries': {'max_attempts': 3, 'mode': 'adaptive'}}
134
- s3_client = get_client(**boto_config)
166
+ s3_client = Env.get().get_client('s3')
135
167
 
136
168
  obj = s3_client.head_object(Bucket=bucket, Key=remote_path) # Check if the object exists
137
169
  bundle_size = obj['ContentLength']
138
170
 
139
- bundle_path = Path(Env.get().create_tmp_path())
171
+ bundle_path = TempStore.create_path()
140
172
  progress_bar = tqdm(
141
173
  desc='Downloading',
142
174
  total=bundle_size,
@@ -149,3 +181,127 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
149
181
  )
150
182
  s3_client.download_file(Bucket=bucket, Key=remote_path, Filename=str(bundle_path), Callback=progress_bar.update)
151
183
  return bundle_path
184
+
185
+
186
+ def _create_retry_session(
187
+ max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
188
+ ) -> requests.Session:
189
+ """Create a requests session with retry configuration"""
190
+ if status_forcelist is None:
191
+ status_forcelist = [
192
+ 408, # Request Timeout
193
+ 429, # Too Many Requests (rate limiting)
194
+ 500, # Internal Server Error (server-side error)
195
+ 502, # Bad Gateway (proxy/gateway got invalid response)
196
+ 503, # Service Unavailable (server overloaded or down)
197
+ 504, # Gateway Timeout (proxy/gateway timeout)
198
+ ]
199
+ retry_strategy = Retry(
200
+ total=max_retries,
201
+ read=max_retries,
202
+ connect=max_retries,
203
+ backoff_factor=backoff_factor,
204
+ status_forcelist=status_forcelist,
205
+ allowed_methods=['GET', 'PUT', 'POST', 'DELETE'],
206
+ )
207
+
208
+ session = requests.Session()
209
+ adapter = HTTPAdapter(max_retries=retry_strategy)
210
+ session.mount('https://', adapter)
211
+ return session
212
+
213
+
214
+ def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) -> requests.Response:
215
+ """Upload file with progress bar and retries"""
216
+ file_size = file_path.stat().st_size
217
+
218
+ headers = {'Content-Length': str(file_size), 'Content-Type': 'application/octet-stream'}
219
+
220
+ session = _create_retry_session(max_retries=max_retries)
221
+ try:
222
+ with (
223
+ open(file_path, 'rb') as f,
224
+ tqdm.wrapattr(
225
+ f,
226
+ method='read',
227
+ total=file_size,
228
+ desc='Uploading',
229
+ unit='B',
230
+ unit_scale=True,
231
+ unit_divisor=1024,
232
+ miniters=1, # Update every iteration (should be fine for an upload)
233
+ ncols=100,
234
+ file=sys.stdout,
235
+ ) as file_with_progress,
236
+ ):
237
+ response = session.put(
238
+ url,
239
+ data=file_with_progress,
240
+ headers=headers,
241
+ timeout=(60, 1800), # 60 seconds to connect and 300 seconds for server response
242
+ )
243
+ response.raise_for_status()
244
+ return response
245
+ finally:
246
+ session.close()
247
+
248
+
249
+ def _download_from_presigned_url(
250
+ url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
251
+ ) -> None:
252
+ """Download file with progress bar and retries"""
253
+ session = _create_retry_session(max_retries=max_retries)
254
+
255
+ try:
256
+ # Stream download with progress
257
+ response = session.get(
258
+ url, headers=headers, stream=True, timeout=(60, 300)
259
+ ) # 60 seconds to connect and 300 seconds for server response
260
+ response.raise_for_status()
261
+
262
+ total_size = int(response.headers.get('content-length', 0))
263
+ progress_bar = tqdm(
264
+ desc='Downloading',
265
+ total=total_size,
266
+ unit='B',
267
+ unit_scale=True,
268
+ unit_divisor=1024,
269
+ miniters=1,
270
+ ncols=100,
271
+ file=sys.stdout,
272
+ )
273
+ with open(output_path, 'wb') as f:
274
+ for chunk in response.iter_content(chunk_size=8192):
275
+ if chunk:
276
+ f.write(chunk)
277
+ progress_bar.update(len(chunk))
278
+ finally:
279
+ session.close()
280
+
281
+
282
+ def delete_replica(dest_path: str, version: int | None = None) -> None:
283
+ """Delete cloud replica"""
284
+ delete_request = DeleteRequest(table_uri=PxtUri(uri=dest_path), version=version)
285
+ response = requests.post(PIXELTABLE_API_URL, data=delete_request.model_dump_json(), headers=_api_headers())
286
+ if response.status_code != 200:
287
+ raise excs.Error(f'Error deleting replica: {response.text}')
288
+ DeleteResponse.model_validate(response.json())
289
+ Env.get().console_logger.info(f'Deleted replica at: {dest_path}')
290
+
291
+
292
+ def list_table_versions(table_uri: str) -> list[dict[str, Any]]:
293
+ """List versions for a remote table."""
294
+ request_json = {'operation_type': 'list_table_versions', 'table_uri': {'uri': table_uri}}
295
+ response = requests.post(PIXELTABLE_API_URL, data=json.dumps(request_json), headers=_api_headers())
296
+ if response.status_code != 200:
297
+ raise excs.Error(f'Error listing table versions: {response.text}')
298
+ response_data = response.json()
299
+ return response_data.get('versions', [])
300
+
301
+
302
+ def _api_headers() -> dict[str, str]:
303
+ headers = {'Content-Type': 'application/json'}
304
+ api_key = Env.get().pxt_api_key
305
+ if api_key is not None:
306
+ headers['X-api-key'] = api_key
307
+ return headers