pixeltable 0.4.13__py3-none-any.whl → 0.4.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (46) hide show
  1. pixeltable/catalog/catalog.py +179 -63
  2. pixeltable/catalog/column.py +24 -20
  3. pixeltable/catalog/table.py +24 -8
  4. pixeltable/catalog/table_version.py +15 -6
  5. pixeltable/catalog/view.py +22 -22
  6. pixeltable/config.py +2 -0
  7. pixeltable/dataframe.py +3 -2
  8. pixeltable/env.py +42 -21
  9. pixeltable/exec/__init__.py +1 -0
  10. pixeltable/exec/aggregation_node.py +0 -1
  11. pixeltable/exec/cache_prefetch_node.py +74 -98
  12. pixeltable/exec/data_row_batch.py +2 -18
  13. pixeltable/exec/in_memory_data_node.py +1 -1
  14. pixeltable/exec/object_store_save_node.py +299 -0
  15. pixeltable/exec/sql_node.py +28 -33
  16. pixeltable/exprs/data_row.py +31 -25
  17. pixeltable/exprs/json_path.py +6 -5
  18. pixeltable/exprs/row_builder.py +6 -12
  19. pixeltable/functions/gemini.py +1 -1
  20. pixeltable/functions/openai.py +1 -1
  21. pixeltable/functions/video.py +5 -6
  22. pixeltable/globals.py +3 -3
  23. pixeltable/index/embedding_index.py +5 -8
  24. pixeltable/io/fiftyone.py +1 -1
  25. pixeltable/io/label_studio.py +4 -5
  26. pixeltable/iterators/audio.py +1 -1
  27. pixeltable/iterators/document.py +10 -12
  28. pixeltable/iterators/video.py +1 -1
  29. pixeltable/metadata/schema.py +7 -0
  30. pixeltable/plan.py +26 -1
  31. pixeltable/share/packager.py +8 -2
  32. pixeltable/share/publish.py +3 -9
  33. pixeltable/type_system.py +1 -3
  34. pixeltable/utils/dbms.py +31 -5
  35. pixeltable/utils/gcs_store.py +283 -0
  36. pixeltable/utils/local_store.py +316 -0
  37. pixeltable/utils/object_stores.py +497 -0
  38. pixeltable/utils/pytorch.py +5 -6
  39. pixeltable/utils/s3_store.py +354 -0
  40. {pixeltable-0.4.13.dist-info → pixeltable-0.4.14.dist-info}/METADATA +1 -1
  41. {pixeltable-0.4.13.dist-info → pixeltable-0.4.14.dist-info}/RECORD +44 -41
  42. pixeltable/utils/media_store.py +0 -248
  43. pixeltable/utils/s3.py +0 -17
  44. {pixeltable-0.4.13.dist-info → pixeltable-0.4.14.dist-info}/WHEEL +0 -0
  45. {pixeltable-0.4.13.dist-info → pixeltable-0.4.14.dist-info}/entry_points.txt +0 -0
  46. {pixeltable-0.4.13.dist-info → pixeltable-0.4.14.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,497 @@
1
+ from __future__ import annotations
2
+
3
+ import enum
4
+ import os
5
+ import re
6
+ import urllib.parse
7
+ import urllib.request
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any, NamedTuple, Optional
11
+ from uuid import UUID
12
+
13
+ from pixeltable import env, exceptions as excs
14
+
15
+ if TYPE_CHECKING:
16
+ from pixeltable.catalog import Column
17
+
18
+
19
+ class StorageTarget(enum.Enum):
20
+ """Enumeration of storage kinds."""
21
+
22
+ LOCAL_STORE = 'os' # Local file system
23
+ S3_STORE = 's3' # Amazon S3
24
+ R2_STORE = 'r2' # Cloudflare R2
25
+ GCS_STORE = 'gs' # Google Cloud Storage
26
+ AZURE_STORE = 'az' # Azure Blob Storage
27
+ HTTP_STORE = 'http' # HTTP/HTTPS
28
+
29
+ def __str__(self) -> str:
30
+ return self.value
31
+
32
+
33
+ class StorageObjectAddress(NamedTuple):
34
+ """Contains components of an object address.
35
+ Unused components are empty strings.
36
+ """
37
+
38
+ storage_target: StorageTarget # The kind of storage referenced. This is NOT the same as the scheme.
39
+ scheme: str # The scheme parsed from the source
40
+ account: str = '' # Account number parsed from the source when applicable
41
+ account_extension: str = '' # Account extension parsed from the source when applicable
42
+ container: str = '' # Container / bucket name parsed from the source
43
+ key: str = '' # Key parsed from the source (prefix + object_name)
44
+ prefix: str = '' # Prefix (within the bucket) parsed from the source
45
+ object_name: str = '' # Object name parsed from the source (if requested and applicable)
46
+ path: Optional[Path] = None
47
+
48
+ @property
49
+ def has_object(self) -> bool:
50
+ return len(self.object_name) > 0
51
+
52
+ @property
53
+ def is_http_readable(self) -> bool:
54
+ return self.scheme.startswith('http') and self.has_object
55
+
56
+ @property
57
+ def is_azure_scheme(self) -> bool:
58
+ return self.scheme in ['wasb', 'wasbs', 'abfs', 'abfss']
59
+
60
+ @property
61
+ def has_valid_storage_target(self) -> bool:
62
+ return self.storage_target in [
63
+ StorageTarget.LOCAL_STORE,
64
+ StorageTarget.S3_STORE,
65
+ StorageTarget.R2_STORE,
66
+ StorageTarget.GCS_STORE,
67
+ StorageTarget.AZURE_STORE,
68
+ StorageTarget.HTTP_STORE,
69
+ ]
70
+
71
+ @property
72
+ def prefix_free_uri(self) -> str:
73
+ """Return the URI without any prefixes."""
74
+ if self.is_azure_scheme:
75
+ return f'{self.scheme}://{self.container}@{self.account}.{self.account_extension}/'
76
+ if self.account and self.account_extension:
77
+ return f'{self.scheme}://{self.account}.{self.account_extension}/{self.container}/'
78
+ if self.account_extension:
79
+ return f'{self.scheme}://{self.account_extension}/{self.container}/'
80
+ return f'{self.scheme}://{self.container}/'
81
+
82
+ @property
83
+ def container_free_uri(self) -> str:
84
+ """Return the URI without any prefixes."""
85
+ assert not self.is_azure_scheme, 'Azure storage requires a container name'
86
+ if self.account and self.account_extension:
87
+ return f'{self.scheme}://{self.account}.{self.account_extension}/'
88
+ if self.account_extension:
89
+ return f'{self.scheme}://{self.account_extension}/'
90
+ return f'{self.scheme}://'
91
+
92
+ @property
93
+ def to_path(self) -> Path:
94
+ assert self.storage_target == StorageTarget.LOCAL_STORE
95
+ assert self.path is not None
96
+ return self.path
97
+
98
+ def __str__(self) -> str:
99
+ """A debug aid to override default str representation. Not to be used for any purpose."""
100
+ return f'{self.storage_target}..{self.scheme}://{self.account}.{self.account_extension}/{self.container}/{self.prefix}{self.object_name}'
101
+
102
+ def __repr__(self) -> str:
103
+ """A debug aid to override default repr representation. Not to be used for any purpose."""
104
+ return (
105
+ f'SObjectAddress(client: {self.storage_target!r}, s: {self.scheme!r}, a: {self.account!r}, '
106
+ f'ae: {self.account_extension!r}, c: {self.container!r}, '
107
+ f'p: {self.prefix!r}, o: {self.object_name!r})'
108
+ )
109
+
110
+
111
+ class ObjectPath:
112
+ PATTERN = re.compile(r'([0-9a-fA-F]+)_(\d+)_(\d+)_([0-9a-fA-F]+)') # tbl_id, col_id, version, uuid
113
+
114
+ @classmethod
115
+ def table_prefix(cls, tbl_id: UUID) -> str:
116
+ """Construct a unique unix-style prefix for objects in a table (without leading/trailing slashes)."""
117
+ assert isinstance(tbl_id, uuid.UUID)
118
+ return tbl_id.hex
119
+
120
+ @classmethod
121
+ def create_prefix_raw(
122
+ cls, tbl_id: UUID, col_id: int, tbl_version: int, ext: Optional[str] = None
123
+ ) -> tuple[str, str]:
124
+ """Construct a unique unix-style prefix and filename for a persisted file.
125
+ The results are derived from table, col, and version specs.
126
+ Returns:
127
+ prefix: a unix-style prefix for the file without leading/trailing slashes
128
+ filename: a unique filename for the file without leading slashes
129
+ """
130
+ table_prefix = cls.table_prefix(tbl_id)
131
+ id_hex = uuid.uuid4().hex
132
+ prefix = f'{table_prefix}/{id_hex[:2]}/{id_hex[:4]}'
133
+ filename = f'{table_prefix}_{col_id}_{tbl_version}_{id_hex}{ext or ""}'
134
+ return prefix, filename
135
+
136
+ @classmethod
137
+ def separate_prefix_object(cls, path_and_object: str, may_contain_object_name: bool) -> tuple[str, str]:
138
+ path = path_and_object
139
+ object_name = ''
140
+ if not may_contain_object_name or path.endswith('/'):
141
+ prefix = path.rstrip('/')
142
+ elif '/' in path:
143
+ # If there are slashes in the path, separate into prefix and object
144
+ prefix, object_name = path.rsplit('/', 1)
145
+ prefix = prefix.rstrip('/')
146
+ else:
147
+ # If no slashes, the entire path is the object name
148
+ prefix = ''
149
+ object_name = path
150
+ if len(prefix) > 0 and not prefix.endswith('/'):
151
+ prefix += '/'
152
+ return prefix, object_name
153
+
154
+ @classmethod
155
+ def parse_object_storage_addr1(cls, src_addr: str) -> StorageObjectAddress:
156
+ """
157
+ Parses a cloud storage URI into its scheme, bucket, and key.
158
+
159
+ Args:
160
+ uri (str): The cloud storage URI (e.g., "gs://my-bucket/path/to/object.txt").
161
+
162
+ Returns:
163
+ StorageObjectAddress: A NamedTuple containing components of the address.
164
+
165
+ Formats:
166
+ s3://container/<optional prefix>/<optional object>
167
+ gs://container/<optional prefix>/<optional object>
168
+ wasb[s]://container@account.blob.core.windows.net/<optional prefix>/<optional object>
169
+ abfs[s]://container@account.dfs.core.windows.net/<optional prefix>/<optional object>
170
+ https://account.blob.core.windows.net/container/<optional prefix>/<optional object>
171
+ https://account.r2.cloudflarestorage.com/container/<optional prefix>/<optional object>
172
+ https://raw.github.com/pixeltable/pixeltable/main/docs/resources/images/000000000030.jpg
173
+ """
174
+ parsed = urllib.parse.urlparse(src_addr)
175
+ scheme = parsed.scheme.lower()
176
+ account_name = ''
177
+ account_extension = ''
178
+ container = ''
179
+ key = ''
180
+ path = None
181
+
182
+ # len(parsed.scheme) == 1 occurs for Windows drive letters like C:\
183
+ if not parsed.scheme or len(parsed.scheme) == 1:
184
+ # If no scheme, treat as local file path; this will be further validated before use
185
+ storage_target = StorageTarget.LOCAL_STORE
186
+ scheme = 'file'
187
+ path = Path(src_addr)
188
+
189
+ elif scheme == 'file':
190
+ storage_target = StorageTarget.LOCAL_STORE
191
+ pth = parsed.path
192
+ if parsed.netloc:
193
+ # This is a UNC path, ie, file://host/share/path/to/file
194
+ pth = f'\\\\{parsed.netloc}{pth}'
195
+ path = Path(urllib.parse.unquote(urllib.request.url2pathname(pth)))
196
+ key = str(parsed.path).lstrip('/')
197
+
198
+ elif scheme in ('s3', 'gs'):
199
+ storage_target = StorageTarget.S3_STORE if scheme == 's3' else StorageTarget.GCS_STORE
200
+ container = parsed.netloc
201
+ key = parsed.path.lstrip('/')
202
+
203
+ elif scheme in ['wasb', 'wasbs', 'abfs', 'abfss']:
204
+ # Azure-specific URI schemes
205
+ # wasb[s]://container@account.blob.core.windows.net/<optional prefix>/<optional object>
206
+ # abfs[s]://container@account.dfs.core.windows.net/<optional prefix>/<optional object>
207
+ storage_target = StorageTarget.AZURE_STORE
208
+ container_and_account = parsed.netloc
209
+ if '@' in container_and_account:
210
+ container, account_host = container_and_account.split('@', 1)
211
+ account_name = account_host.split('.')[0]
212
+ account_extension = account_host.split('.', 1)[1]
213
+ else:
214
+ raise ValueError(f'Invalid Azure URI format: {src_addr}')
215
+ key = parsed.path.lstrip('/')
216
+
217
+ elif scheme in ['http', 'https']:
218
+ # Standard HTTP(S) URL format
219
+ # https://account.blob.core.windows.net/container/<optional path>/<optional object>
220
+ # https://account.r2.cloudflarestorage.com/container/<optional path>/<optional object>
221
+ # and possibly others
222
+ key = parsed.path
223
+ if 'cloudflare' in parsed.netloc:
224
+ storage_target = StorageTarget.R2_STORE
225
+ elif 'windows' in parsed.netloc:
226
+ storage_target = StorageTarget.AZURE_STORE
227
+ else:
228
+ storage_target = StorageTarget.HTTP_STORE
229
+ if storage_target in [StorageTarget.S3_STORE, StorageTarget.AZURE_STORE, StorageTarget.R2_STORE]:
230
+ account_name = parsed.netloc.split('.', 1)[0]
231
+ account_extension = parsed.netloc.split('.', 1)[1]
232
+ path_parts = key.lstrip('/').split('/', 1)
233
+ container = path_parts[0] if path_parts else ''
234
+ key = path_parts[1] if len(path_parts) > 1 else ''
235
+ else:
236
+ account_extension = parsed.netloc
237
+ key = key.lstrip('/')
238
+ else:
239
+ raise ValueError(f'Unsupported URI scheme: {parsed.scheme}')
240
+
241
+ r = StorageObjectAddress(storage_target, scheme, account_name, account_extension, container, key, '', '', path)
242
+ assert r.has_valid_storage_target
243
+ return r
244
+
245
+ @classmethod
246
+ def parse_object_storage_addr(cls, src_addr: str, may_contain_object_name: bool) -> StorageObjectAddress:
247
+ """
248
+ Parses a cloud storage URI into its scheme, bucket, prefix, and object name.
249
+
250
+ Args:
251
+ uri (str): The cloud storage URI (e.g., "gs://my-bucket/path/to/object.txt").
252
+
253
+ Returns:
254
+ StorageObjectAddress: A NamedTuple containing components of the address.
255
+
256
+ Formats:
257
+ s3://container/<optional prefix>/<optional object>
258
+ gs://container/<optional prefix>/<optional object>
259
+ wasb[s]://container@account.blob.core.windows.net/<optional prefix>/<optional object>
260
+ abfs[s]://container@account.dfs.core.windows.net/<optional prefix>/<optional object>
261
+ https://account.blob.core.windows.net/container/<optional prefix>/<optional object>
262
+ https://account.r2.cloudflarestorage.com/container/<optional prefix>/<optional object>
263
+ https://raw.github.com/pixeltable/pixeltable/main/docs/resources/images/000000000030.jpg
264
+ """
265
+ soa = cls.parse_object_storage_addr1(src_addr)
266
+ prefix, object_name = cls.separate_prefix_object(soa.key, may_contain_object_name)
267
+ assert not object_name.endswith('/')
268
+ r = soa._replace(prefix=prefix, object_name=object_name)
269
+ return r
270
+
271
+
272
+ class ObjectStoreBase:
273
+ def validate(self, error_col_name: str) -> Optional[str]:
274
+ """Check the store configuration. Returns base URI if store is accessible.
275
+
276
+ Args:
277
+ error_col_name: a string of the form 'Column {name}: ' used when raising errors
278
+
279
+ Returns:
280
+ Base URI for the store. This value is stored in any Column attached to the store.
281
+ """
282
+ raise AssertionError
283
+
284
+ def copy_local_file(self, col: Column, src_path: Path) -> str:
285
+ """Copy a file associated with a Column to the store, returning the file's URL within the destination.
286
+
287
+ Args:
288
+ col: The Column to which the file belongs, used to generate the URI of the stored object.
289
+ src_path: The Path to the local file
290
+
291
+ Returns:
292
+ The URI of the object in the store
293
+ """
294
+ raise AssertionError
295
+
296
+ def move_local_file(self, col: Column, src_path: Path) -> Optional[str]:
297
+ """Move a file associated with a Column to the store, returning the file's URL within the destination.
298
+
299
+ Args:
300
+ col: The Column to which the file belongs, used to generate the URI of the stored object.
301
+ src_path: The Path to the local file
302
+
303
+ Returns:
304
+ The URI of the object in the store, None if the object cannot be moved to the store
305
+ """
306
+ return None
307
+
308
+ def copy_object_to_local_file(self, src_path: str, dest_path: Path) -> None:
309
+ """Copies an object from the store to a local media file.
310
+
311
+ Args:
312
+ src_path: The URI of the object in the store
313
+ dest_path: The desired Path to the local file
314
+ """
315
+ raise AssertionError
316
+
317
+ def count(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> int:
318
+ """Return the number of objects in the store associated with the given tbl_id
319
+
320
+ Args:
321
+ tbl_id: Only count objects associated with a given table
322
+ tbl_version: Only count objects associated with a specific table version
323
+
324
+ Returns:
325
+ Number of objects found with the specified criteria
326
+ """
327
+ raise AssertionError
328
+
329
+ def delete(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
330
+ """Delete objects in the destination for a given table ID, table version.
331
+
332
+ Args:
333
+ tbl_id: Only count objects associated with a given table
334
+ tbl_version: Only count objects associated with a specific table version
335
+
336
+ Returns:
337
+ Number of objects deleted or None if the store does not count deletions.
338
+ """
339
+ raise AssertionError
340
+
341
+ def list_objects(self, return_uri: bool, n_max: int = 10) -> list[str]:
342
+ """Return a list of objects in the store.
343
+
344
+ Args:
345
+ return_uri: If True, returns a full URI for each object, otherwise just the path to the object.
346
+ n_max: Maximum number of objects to list
347
+ """
348
+ raise AssertionError
349
+
350
+
351
+ class ObjectOps:
352
+ @classmethod
353
+ def get_store(cls, dest: Optional[str], may_contain_object_name: bool, col_name: Optional[str] = None) -> Any:
354
+ from pixeltable.env import Env
355
+ from pixeltable.utils.local_store import LocalStore
356
+
357
+ soa = (
358
+ Env.get().object_soa
359
+ if dest is None
360
+ else ObjectPath.parse_object_storage_addr(dest, may_contain_object_name=may_contain_object_name)
361
+ )
362
+ if soa.storage_target == StorageTarget.LOCAL_STORE:
363
+ return LocalStore(soa)
364
+ if soa.storage_target == StorageTarget.S3_STORE and soa.scheme == 's3':
365
+ env.Env.get().require_package('boto3')
366
+ from pixeltable.utils.s3_store import S3Store
367
+
368
+ return S3Store(soa)
369
+ if soa.storage_target == StorageTarget.R2_STORE:
370
+ env.Env.get().require_package('boto3')
371
+ from pixeltable.utils.s3_store import S3Store
372
+
373
+ return S3Store(soa)
374
+ if soa.storage_target == StorageTarget.GCS_STORE and soa.scheme == 'gs':
375
+ env.Env.get().require_package('google.cloud.storage')
376
+ from pixeltable.utils.gcs_store import GCSStore
377
+
378
+ return GCSStore(soa)
379
+ if soa.storage_target == StorageTarget.HTTP_STORE and soa.is_http_readable:
380
+ return HTTPStore(soa)
381
+ error_col_name = f'Column {col_name!r}: ' if col_name is not None else ''
382
+ raise excs.Error(
383
+ f'{error_col_name}`destination` must be a valid reference to a supported destination, got {dest!r}'
384
+ )
385
+
386
+ @classmethod
387
+ def validate_destination(cls, dest: str | Path | None, col_name: Optional[str]) -> str:
388
+ """Convert a Column destination parameter to a URI, else raise errors.
389
+ Args:
390
+ dest: The requested destination
391
+ col_name: Used to raise error messages
392
+ Returns:
393
+ URI of destination, or raises an error
394
+ """
395
+ error_col_name = f'Column {col_name!r}: ' if col_name is not None else ''
396
+
397
+ # General checks on any destination
398
+ if isinstance(dest, Path):
399
+ dest = str(dest)
400
+ if dest is not None and not isinstance(dest, str):
401
+ raise excs.Error(f'{error_col_name}`destination` must be a string or path, got {dest!r}')
402
+
403
+ # Specific checks for storage backends
404
+ store = cls.get_store(dest, False, col_name)
405
+ dest2 = store.validate(error_col_name)
406
+ if dest2 is None:
407
+ raise excs.Error(f'{error_col_name}`destination` must be a supported destination, got {dest!r}')
408
+ return dest2
409
+
410
+ @classmethod
411
+ def copy_object_to_local_file(cls, src_uri: str, dest_path: Path) -> None:
412
+ """Copy an object from a URL to a local Path. Thread safe.
413
+ Raises an exception if the download fails or the scheme is not supported
414
+ """
415
+ soa = ObjectPath.parse_object_storage_addr(src_uri, may_contain_object_name=True)
416
+ store = cls.get_store(src_uri, True)
417
+ store.copy_object_to_local_file(soa.object_name, dest_path)
418
+
419
+ @classmethod
420
+ def put_file(cls, col: Column, src_path: Path, relocate_or_delete: bool) -> str:
421
+ """Move or copy a file to the destination, returning the file's URL within the destination.
422
+ If relocate_or_delete is True and the file is in the TempStore, the file will be deleted after the operation.
423
+ """
424
+ from pixeltable.utils.local_store import TempStore
425
+
426
+ if relocate_or_delete:
427
+ # File is temporary, used only once, so we can delete it after copy if it can't be moved
428
+ assert TempStore.contains_path(src_path)
429
+ dest = col.destination
430
+ store = cls.get_store(dest, False, col.name)
431
+ # Attempt to move
432
+ if relocate_or_delete:
433
+ moved_file_url = store.move_local_file(col, src_path)
434
+ if moved_file_url is not None:
435
+ return moved_file_url
436
+ new_file_url = store.copy_local_file(col, src_path)
437
+ if relocate_or_delete:
438
+ TempStore.delete_media_file(src_path)
439
+ return new_file_url
440
+
441
+ @classmethod
442
+ def move_local_file(cls, col: Column, src_path: Path) -> str:
443
+ """Move a file to the destination specified by the Column, returning the file's URL within the destination."""
444
+ store = cls.get_store(col.destination, False, col.name)
445
+ return store.move_local_file(col, src_path)
446
+
447
+ @classmethod
448
+ def copy_local_file(cls, col: Column, src_path: Path) -> str:
449
+ """Copy a file to the destination specified by the Column, returning the file's URL within the destination."""
450
+ store = cls.get_store(col.destination, False, col.name)
451
+ return store.copy_local_file(col, src_path)
452
+
453
+ @classmethod
454
+ def delete(cls, dest: Optional[str], tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
455
+ """Delete objects in the destination for a given table ID, table version.
456
+ Returns:
457
+ Number of objects deleted or None
458
+ """
459
+ store = cls.get_store(dest, False)
460
+ return store.delete(tbl_id, tbl_version)
461
+
462
+ @classmethod
463
+ def count(cls, dest: Optional[str], tbl_id: UUID, tbl_version: Optional[int] = None) -> int:
464
+ """Return the count of objects in the destination for a given table ID"""
465
+ store = cls.get_store(dest, False)
466
+ return store.count(tbl_id, tbl_version)
467
+
468
+ @classmethod
469
+ def list_objects(cls, dest: Optional[str], return_uri: bool, n_max: int = 10) -> list[str]:
470
+ """Return a list of objects found in the specified destination bucket.
471
+ The dest specification string must not contain an object name.
472
+ Each returned object includes the full set of prefixes.
473
+ if return_uri is True, full URI's are returned; otherwise, just the object keys.
474
+ """
475
+ store = cls.get_store(dest, False)
476
+ return store.list_objects(return_uri, n_max)
477
+
478
+ @classmethod
479
+ def list_uris(cls, source_uri: str, n_max: int = 10) -> list[str]:
480
+ """Return a list of URIs found within the specified uri"""
481
+ return cls.list_objects(source_uri, True, n_max)
482
+
483
+
484
+ class HTTPStore(ObjectStoreBase):
485
+ base_url: str
486
+
487
+ def __init__(self, soa: StorageObjectAddress):
488
+ self.base_url = f'{soa.scheme}://{soa.account_extension}/{soa.prefix}'
489
+ if not self.base_url.endswith('/'):
490
+ self.base_url += '/'
491
+
492
+ def copy_object_to_local_file(self, src_path: str, dest_path: Path) -> None:
493
+ with urllib.request.urlopen(self.base_url + src_path) as resp, open(dest_path, 'wb') as f:
494
+ data = resp.read()
495
+ f.write(data)
496
+ f.flush() # Ensures Python buffers are written to OS
497
+ os.fsync(f.fileno()) # Forces OS to write to physical storage
@@ -19,15 +19,14 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
19
19
  PyTorch dataset interface for pixeltable data.
20
20
  NB. This class must inherit from torch.utils.data.IterableDataset for it
21
21
  to work with torch.utils.data.DataLoader.
22
+
23
+ Args:
24
+ path: path to directory containing parquet files
25
+ image_format: 'np' or 'pt'. 'np' is RGB uint8 array,
26
+ 'pt' is result of torchvision.transforms.ToTensor()
22
27
  """
23
28
 
24
29
  def __init__(self, path: Path, image_format: str):
25
- """
26
- Args:
27
- path: path to directory containing parquet files
28
- image_format: 'np' or 'pt'. 'np' is RGB uint8 array,
29
- 'pt' is result of torchvision.transforms.ToTensor()
30
- """
31
30
  super().__init__()
32
31
 
33
32
  self.path = path