pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,83 +1,132 @@
1
+ import dataclasses
2
+ import json
3
+ import logging
4
+ import os
1
5
  import sys
2
6
  import urllib.parse
3
7
  import urllib.request
4
8
  from pathlib import Path
9
+ from typing import Any, Literal
5
10
 
6
11
  import requests
12
+ from requests.adapters import HTTPAdapter
7
13
  from tqdm import tqdm
14
+ from urllib3.util.retry import Retry
8
15
 
9
16
  import pixeltable as pxt
10
17
  from pixeltable import exceptions as excs
18
+ from pixeltable.catalog import Catalog
19
+ from pixeltable.catalog.table_version import TableVersionMd
11
20
  from pixeltable.env import Env
12
21
  from pixeltable.utils import sha256sum
22
+ from pixeltable.utils.local_store import TempStore
13
23
 
14
24
  from .packager import TablePackager, TableRestorer
25
+ from .protocol import PxtUri
26
+ from .protocol.replica import (
27
+ DeleteRequest,
28
+ DeleteResponse,
29
+ FinalizeRequest,
30
+ FinalizeResponse,
31
+ PublishRequest,
32
+ PublishResponse,
33
+ ReplicateRequest,
34
+ ReplicateResponse,
35
+ )
36
+
37
+ _logger = logging.getLogger('pixeltable')
15
38
 
16
39
  # These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
17
40
  # pixeltable.com URLs are available.
18
41
 
19
- PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
42
+ PIXELTABLE_API_URL = os.environ.get('PIXELTABLE_API_URL', 'https://internal-api.pixeltable.com')
20
43
 
21
44
 
22
- def push_replica(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
23
- if not src_tbl._tbl_version.get().is_snapshot:
24
- raise excs.Error('Only snapshots may be published.')
45
+ def push_replica(
46
+ dest_tbl_uri: str, src_tbl: pxt.Table, bucket: str | None = None, access: Literal['public', 'private'] = 'private'
47
+ ) -> str:
48
+ _logger.info(f'Publishing replica for {src_tbl._name!r} to: {dest_tbl_uri}')
25
49
 
26
- packager = TablePackager(src_tbl, additional_md={'table_uri': dest_tbl_uri})
27
- request_json = packager.md | {'operation_type': 'publish_snapshot'}
28
- headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
29
- response = requests.post(PIXELTABLE_API_URL, json=request_json, headers=headers_json)
50
+ packager = TablePackager(src_tbl)
51
+ # Create the publish request using packager's bundle_md
52
+ publish_request = PublishRequest(
53
+ table_uri=PxtUri(uri=dest_tbl_uri),
54
+ pxt_version=packager.bundle_md['pxt_version'],
55
+ pxt_md_version=packager.bundle_md['pxt_md_version'],
56
+ md=[TableVersionMd.from_dict(md_dict) for md_dict in packager.bundle_md['md']],
57
+ bucket_name=bucket,
58
+ is_public=access == 'public',
59
+ )
60
+
61
+ _logger.debug(f'Sending PublishRequest: {publish_request}')
62
+
63
+ response = requests.post(PIXELTABLE_API_URL, data=publish_request.model_dump_json(), headers=_api_headers())
64
+ if response.status_code == 201:
65
+ publish_response = PublishResponse.model_validate(response.json())
66
+ existing_table_uri = str(publish_response.table_uri)
67
+ Env.get().console_logger.info(
68
+ f'Replica for version {publish_request.md[0].version_md.version} already exists at {existing_table_uri}.'
69
+ )
70
+ with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
71
+ Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': existing_table_uri})
72
+ return existing_table_uri
30
73
  if response.status_code != 200:
31
- raise excs.Error(f'Error publishing snapshot: {response.text}')
32
- response_json = response.json()
33
- if not isinstance(response_json, dict) or response_json.get('destination') != 's3':
34
- raise excs.Error(f'Error publishing snapshot: unexpected response from server.\n{response_json}')
35
- upload_id = response_json['upload_id']
36
- destination_uri = response_json['destination_uri']
74
+ raise excs.Error(f'Error publishing {src_tbl._display_name()}: {response.text}')
75
+ publish_response = PublishResponse.model_validate(response.json())
76
+
77
+ _logger.debug(f'Received PublishResponse: {publish_response}')
37
78
 
38
- Env.get().console_logger.info(f"Creating a snapshot of '{src_tbl._path}' at: {dest_tbl_uri}")
79
+ upload_id = publish_response.upload_id
80
+ destination_uri = publish_response.destination_uri
81
+
82
+ Env.get().console_logger.info(f"Creating a replica of '{src_tbl._path()}' at: {dest_tbl_uri}")
39
83
 
40
84
  bundle = packager.package()
41
85
 
42
- parsed_location = urllib.parse.urlparse(destination_uri)
86
+ parsed_location = urllib.parse.urlparse(str(destination_uri))
43
87
  if parsed_location.scheme == 's3':
44
88
  _upload_bundle_to_s3(bundle, parsed_location)
89
+ elif parsed_location.scheme == 'https':
90
+ _upload_to_presigned_url(file_path=bundle, url=parsed_location.geturl())
45
91
  else:
46
92
  raise excs.Error(f'Unsupported destination: {destination_uri}')
47
93
 
48
- Env.get().console_logger.info('Finalizing snapshot ...')
94
+ Env.get().console_logger.info('Finalizing replica ...')
95
+ # Use preview data from packager's bundle_md (set during package())
96
+ finalize_request = FinalizeRequest(
97
+ table_uri=PxtUri(uri=dest_tbl_uri),
98
+ upload_id=upload_id,
99
+ datafile=bundle.name,
100
+ size=bundle.stat().st_size,
101
+ sha256=sha256sum(bundle), # Generate our own SHA for independent verification
102
+ row_count=packager.bundle_md['row_count'],
103
+ preview_header=packager.bundle_md['preview_header'],
104
+ preview_data=packager.bundle_md['preview_data'],
105
+ )
106
+ finalize_response_json = requests.post(
107
+ PIXELTABLE_API_URL, data=finalize_request.model_dump_json(), headers=_api_headers()
108
+ )
109
+ if finalize_response_json.status_code != 200:
110
+ raise excs.Error(f'Error finalizing {src_tbl._display_name()}: {finalize_response_json.text}')
49
111
 
50
- finalize_request_json = {
51
- 'operation_type': 'finalize_snapshot',
52
- 'upload_id': upload_id,
53
- 'datafile': bundle.name,
54
- 'size': bundle.stat().st_size,
55
- 'sha256': sha256sum(bundle), # Generate our own SHA for independent verification
56
- }
57
- # TODO: Use Pydantic for validation
58
- finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
59
- if finalize_response.status_code != 200:
60
- raise excs.Error(f'Error finalizing snapshot: {finalize_response.text}')
61
- finalize_response_json = finalize_response.json()
62
- if not isinstance(finalize_response_json, dict) or 'confirmed_table_uri' not in finalize_response_json:
63
- raise excs.Error(f'Error finalizing snapshot: unexpected response from server.\n{finalize_response_json}')
112
+ finalize_response = FinalizeResponse.model_validate(finalize_response_json.json())
113
+ confirmed_tbl_uri = finalize_response.confirmed_table_uri
114
+ Env.get().console_logger.info(f'The published table is now available at: {confirmed_tbl_uri}')
64
115
 
65
- confirmed_tbl_uri = finalize_response_json['confirmed_table_uri']
66
- Env.get().console_logger.info(f'The published snapshot is now available at: {confirmed_tbl_uri}')
67
- return confirmed_tbl_uri
116
+ with Catalog.get().begin_xact(tbl_id=src_tbl._id, for_write=True):
117
+ Catalog.get().update_additional_md(src_tbl._id, {'pxt_uri': str(confirmed_tbl_uri)})
68
118
 
119
+ return str(confirmed_tbl_uri)
69
120
 
70
- def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
71
- from pixeltable.utils.s3 import get_client
72
121
 
122
+ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
73
123
  bucket = parsed_location.netloc
74
124
  remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
75
125
  remote_path = str(remote_dir / bundle.name)[1:] # Remove initial /
76
126
 
77
- Env.get().console_logger.info(f'Uploading snapshot to: {bucket}:{remote_path}')
127
+ Env.get().console_logger.info(f'Uploading replica to: {bucket}:{remote_path}')
78
128
 
79
- boto_config = {'max_pool_connections': 5, 'connect_timeout': 15, 'retries': {'max_attempts': 3, 'mode': 'adaptive'}}
80
- s3_client = get_client(**boto_config)
129
+ s3_client = Env.get().get_client('s3')
81
130
 
82
131
  upload_args = {'ChecksumAlgorithm': 'SHA256'}
83
132
 
@@ -97,46 +146,66 @@ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult
97
146
 
98
147
 
99
148
  def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
100
- headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
101
- clone_request_json = {'operation_type': 'clone_snapshot', 'table_uri': src_tbl_uri}
102
- response = requests.post(PIXELTABLE_API_URL, json=clone_request_json, headers=headers_json)
149
+ parsed_uri = PxtUri(src_tbl_uri)
150
+ clone_request = ReplicateRequest(table_uri=parsed_uri)
151
+ response = requests.post(PIXELTABLE_API_URL, data=clone_request.model_dump_json(), headers=_api_headers())
103
152
  if response.status_code != 200:
104
- raise excs.Error(f'Error cloning snapshot: {response.text}')
105
- response_json = response.json()
106
- if not isinstance(response_json, dict) or 'table_uri' not in response_json:
107
- raise excs.Error(f'Error cloning shapshot: unexpected response from server.\n{response_json}')
108
-
109
- primary_tbl_additional_md = response_json['md']['tables'][0]['table_md']['additional_md']
110
- bundle_uri = primary_tbl_additional_md['destination_uri']
111
- bundle_filename = primary_tbl_additional_md['datafile']
153
+ raise excs.Error(f'Error cloning replica: {response.text}')
154
+ clone_response = ReplicateResponse.model_validate(response.json())
155
+
156
+ # Prevalidate destination path for replication. We do this before downloading the bundle so that we avoid
157
+ # having to download it if there is a collision or if this is a duplicate replica. This is done outside the
158
+ # transaction scope of the table restore operation (we don't want to hold a transaction open during the
159
+ # download); that's fine, since it will be validated again during TableRestorer's catalog operations.
160
+
161
+ t = pxt.get_table(dest_path, if_not_exists='ignore')
162
+ if t is not None:
163
+ if str(t._id) != clone_response.md[0].tbl_md.tbl_id:
164
+ raise excs.Error(
165
+ f'An attempt was made to create a replica table at {dest_path!r}, '
166
+ 'but a different table already exists at that location.'
167
+ )
168
+ known_versions = tuple(v['version'] for v in t.get_versions())
169
+ if clone_response.md[0].version_md.version in known_versions:
170
+ Env.get().console_logger.info(f'Replica {dest_path!r} is already up to date with source: {src_tbl_uri}')
171
+ return t
172
+
173
+ primary_version_additional_md = clone_response.md[0].version_md.additional_md
174
+ bundle_uri = str(clone_response.destination_uri)
175
+ bundle_filename = primary_version_additional_md['cloud']['datafile']
112
176
  parsed_location = urllib.parse.urlparse(bundle_uri)
113
177
  if parsed_location.scheme == 's3':
114
178
  bundle_path = _download_bundle_from_s3(parsed_location, bundle_filename)
179
+ elif parsed_location.scheme == 'https':
180
+ bundle_path = TempStore.create_path()
181
+ _download_from_presigned_url(url=parsed_location.geturl(), output_path=bundle_path)
115
182
  else:
116
183
  raise excs.Error(f'Unexpected response from server: unsupported bundle uri: {bundle_uri}')
117
184
 
118
- restorer = TableRestorer(dest_path, response_json)
119
- tbl = restorer.restore(bundle_path)
120
- Env.get().console_logger.info(f'Created local replica {tbl._path!r} from URI: {src_tbl_uri}')
185
+ pxt_uri = str(clone_response.table_uri)
186
+ md_list = [dataclasses.asdict(md) for md in clone_response.md]
187
+ restorer = TableRestorer(
188
+ dest_path, {'pxt_version': pxt.__version__, 'pxt_md_version': clone_response.pxt_md_version, 'md': md_list}
189
+ )
190
+
191
+ tbl = restorer.restore(bundle_path, pxt_uri, explicit_version=parsed_uri.version)
192
+ Env.get().console_logger.info(f'Created local replica {tbl._path()!r} from URI: {src_tbl_uri}')
121
193
  return tbl
122
194
 
123
195
 
124
196
  def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_filename: str) -> Path:
125
- from pixeltable.utils.s3 import get_client
126
-
127
197
  bucket = parsed_location.netloc
128
198
  remote_dir = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_location.path)))
129
199
  remote_path = str(remote_dir / bundle_filename)[1:] # Remove initial /
130
200
 
131
- Env.get().console_logger.info(f'Downloading snapshot from: {bucket}:{remote_path}')
201
+ Env.get().console_logger.info(f'Downloading replica from: {bucket}:{remote_path}')
132
202
 
133
- boto_config = {'max_pool_connections': 5, 'connect_timeout': 15, 'retries': {'max_attempts': 3, 'mode': 'adaptive'}}
134
- s3_client = get_client(**boto_config)
203
+ s3_client = Env.get().get_client('s3')
135
204
 
136
205
  obj = s3_client.head_object(Bucket=bucket, Key=remote_path) # Check if the object exists
137
206
  bundle_size = obj['ContentLength']
138
207
 
139
- bundle_path = Path(Env.get().create_tmp_path())
208
+ bundle_path = TempStore.create_path()
140
209
  progress_bar = tqdm(
141
210
  desc='Downloading',
142
211
  total=bundle_size,
@@ -149,3 +218,132 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
149
218
  )
150
219
  s3_client.download_file(Bucket=bucket, Key=remote_path, Filename=str(bundle_path), Callback=progress_bar.update)
151
220
  return bundle_path
221
+
222
+
223
+ def _create_retry_session(
224
+ max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
225
+ ) -> requests.Session:
226
+ """Create a requests session with retry configuration"""
227
+ if status_forcelist is None:
228
+ status_forcelist = [
229
+ 408, # Request Timeout
230
+ 429, # Too Many Requests (rate limiting)
231
+ 500, # Internal Server Error (server-side error)
232
+ 502, # Bad Gateway (proxy/gateway got invalid response)
233
+ 503, # Service Unavailable (server overloaded or down)
234
+ 504, # Gateway Timeout (proxy/gateway timeout)
235
+ ]
236
+ retry_strategy = Retry(
237
+ total=max_retries,
238
+ read=max_retries,
239
+ connect=max_retries,
240
+ backoff_factor=backoff_factor,
241
+ status_forcelist=status_forcelist,
242
+ allowed_methods=['GET', 'PUT', 'POST', 'DELETE'],
243
+ )
244
+
245
+ session = requests.Session()
246
+ adapter = HTTPAdapter(max_retries=retry_strategy)
247
+ session.mount('https://', adapter)
248
+ return session
249
+
250
+
251
+ def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) -> requests.Response:
252
+ """Upload file with progress bar and retries"""
253
+ file_size = file_path.stat().st_size
254
+
255
+ headers = {'Content-Length': str(file_size), 'Content-Type': 'application/octet-stream'}
256
+
257
+ # Detect if it's Azure by URL pattern
258
+ is_azure = 'blob.core.windows.net' in url
259
+ if is_azure:
260
+ headers['x-ms-blob-type'] = 'BlockBlob'
261
+
262
+ session = _create_retry_session(max_retries=max_retries)
263
+ try:
264
+ with (
265
+ open(file_path, 'rb') as f,
266
+ tqdm.wrapattr(
267
+ f,
268
+ method='read',
269
+ total=file_size,
270
+ desc='Uploading',
271
+ unit='B',
272
+ unit_scale=True,
273
+ unit_divisor=1024,
274
+ miniters=1, # Update every iteration (should be fine for an upload)
275
+ ncols=100,
276
+ file=sys.stdout,
277
+ ) as file_with_progress,
278
+ ):
279
+ response = session.put(
280
+ url,
281
+ data=file_with_progress,
282
+ headers=headers,
283
+ timeout=(60, 1800), # 60 seconds to connect and 1800 seconds for server response
284
+ )
285
+ response.raise_for_status()
286
+ return response
287
+ finally:
288
+ session.close()
289
+
290
+
291
+ def _download_from_presigned_url(
292
+ url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
293
+ ) -> None:
294
+ """Download file with progress bar and retries"""
295
+ session = _create_retry_session(max_retries=max_retries)
296
+
297
+ try:
298
+ # Stream download with progress
299
+ response = session.get(
300
+ url, headers=headers, stream=True, timeout=(60, 300)
301
+ ) # 60 seconds to connect and 300 seconds for server response
302
+ response.raise_for_status()
303
+
304
+ total_size = int(response.headers.get('content-length', 0))
305
+ progress_bar = tqdm(
306
+ desc='Downloading',
307
+ total=total_size,
308
+ unit='B',
309
+ unit_scale=True,
310
+ unit_divisor=1024,
311
+ miniters=1,
312
+ ncols=100,
313
+ file=sys.stdout,
314
+ )
315
+ with open(output_path, 'wb') as f:
316
+ for chunk in response.iter_content(chunk_size=8192):
317
+ if chunk:
318
+ f.write(chunk)
319
+ progress_bar.update(len(chunk))
320
+ finally:
321
+ session.close()
322
+
323
+
324
+ def delete_replica(dest_path: str, version: int | None = None) -> None:
325
+ """Delete cloud replica"""
326
+ delete_request = DeleteRequest(table_uri=PxtUri(uri=dest_path), version=version)
327
+ response = requests.post(PIXELTABLE_API_URL, data=delete_request.model_dump_json(), headers=_api_headers())
328
+ if response.status_code != 200:
329
+ raise excs.Error(f'Error deleting replica: {response.text}')
330
+ DeleteResponse.model_validate(response.json())
331
+ Env.get().console_logger.info(f'Deleted replica at: {dest_path}')
332
+
333
+
334
+ def list_table_versions(table_uri: str) -> list[dict[str, Any]]:
335
+ """List versions for a remote table."""
336
+ request_json = {'operation_type': 'list_table_versions', 'table_uri': {'uri': table_uri}}
337
+ response = requests.post(PIXELTABLE_API_URL, data=json.dumps(request_json), headers=_api_headers())
338
+ if response.status_code != 200:
339
+ raise excs.Error(f'Error listing table versions: {response.text}')
340
+ response_data = response.json()
341
+ return response_data.get('versions', [])
342
+
343
+
344
+ def _api_headers() -> dict[str, str]:
345
+ headers = {'Content-Type': 'application/json'}
346
+ api_key = Env.get().pxt_api_key
347
+ if api_key is not None:
348
+ headers['X-api-key'] = api_key
349
+ return headers