pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (69) hide show
  1. pixeltable/__init__.py +4 -2
  2. pixeltable/catalog/__init__.py +1 -1
  3. pixeltable/catalog/catalog.py +7 -9
  4. pixeltable/catalog/column.py +49 -0
  5. pixeltable/catalog/insertable_table.py +0 -7
  6. pixeltable/catalog/schema_object.py +1 -14
  7. pixeltable/catalog/table.py +180 -67
  8. pixeltable/catalog/table_version.py +42 -146
  9. pixeltable/catalog/table_version_path.py +6 -5
  10. pixeltable/catalog/view.py +2 -1
  11. pixeltable/config.py +24 -9
  12. pixeltable/dataframe.py +5 -6
  13. pixeltable/env.py +113 -21
  14. pixeltable/exec/aggregation_node.py +1 -1
  15. pixeltable/exec/cache_prefetch_node.py +4 -3
  16. pixeltable/exec/exec_node.py +0 -8
  17. pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
  18. pixeltable/exec/expr_eval/globals.py +1 -0
  19. pixeltable/exec/expr_eval/schedulers.py +52 -19
  20. pixeltable/exec/in_memory_data_node.py +2 -3
  21. pixeltable/exprs/array_slice.py +2 -2
  22. pixeltable/exprs/data_row.py +15 -2
  23. pixeltable/exprs/expr.py +9 -9
  24. pixeltable/exprs/function_call.py +61 -23
  25. pixeltable/exprs/globals.py +1 -2
  26. pixeltable/exprs/json_path.py +3 -3
  27. pixeltable/exprs/row_builder.py +25 -21
  28. pixeltable/exprs/string_op.py +3 -3
  29. pixeltable/func/expr_template_function.py +6 -3
  30. pixeltable/func/query_template_function.py +2 -2
  31. pixeltable/func/signature.py +30 -3
  32. pixeltable/func/tools.py +2 -2
  33. pixeltable/functions/anthropic.py +76 -27
  34. pixeltable/functions/deepseek.py +5 -1
  35. pixeltable/functions/gemini.py +11 -2
  36. pixeltable/functions/globals.py +2 -2
  37. pixeltable/functions/huggingface.py +6 -12
  38. pixeltable/functions/llama_cpp.py +9 -1
  39. pixeltable/functions/openai.py +76 -55
  40. pixeltable/functions/video.py +59 -6
  41. pixeltable/functions/vision.py +2 -2
  42. pixeltable/globals.py +86 -13
  43. pixeltable/io/datarows.py +3 -3
  44. pixeltable/io/fiftyone.py +7 -7
  45. pixeltable/io/globals.py +3 -3
  46. pixeltable/io/hf_datasets.py +4 -4
  47. pixeltable/io/label_studio.py +2 -1
  48. pixeltable/io/pandas.py +6 -6
  49. pixeltable/io/parquet.py +3 -3
  50. pixeltable/io/table_data_conduit.py +2 -2
  51. pixeltable/io/utils.py +2 -2
  52. pixeltable/iterators/audio.py +3 -2
  53. pixeltable/iterators/document.py +2 -8
  54. pixeltable/iterators/video.py +49 -9
  55. pixeltable/plan.py +0 -16
  56. pixeltable/share/packager.py +51 -42
  57. pixeltable/share/publish.py +134 -7
  58. pixeltable/store.py +5 -25
  59. pixeltable/type_system.py +5 -8
  60. pixeltable/utils/__init__.py +2 -2
  61. pixeltable/utils/arrow.py +5 -5
  62. pixeltable/utils/description_helper.py +3 -3
  63. pixeltable/utils/iceberg.py +1 -2
  64. pixeltable/utils/media_store.py +131 -66
  65. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
  66. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
  67. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
  68. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
  69. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
@@ -63,13 +63,10 @@ def sentence_transformer(
63
63
 
64
64
  @sentence_transformer.conditional_return_type
65
65
  def _(model_id: str) -> ts.ArrayType:
66
- try:
67
- from sentence_transformers import SentenceTransformer
66
+ from sentence_transformers import SentenceTransformer
68
67
 
69
- model = _lookup_model(model_id, SentenceTransformer)
70
- return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
71
- except ImportError:
72
- return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
68
+ model = _lookup_model(model_id, SentenceTransformer)
69
+ return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
73
70
 
74
71
 
75
72
  @pxt.udf
@@ -201,13 +198,10 @@ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,
201
198
 
202
199
  @clip.conditional_return_type
203
200
  def _(model_id: str) -> ts.ArrayType:
204
- try:
205
- from transformers import CLIPModel
201
+ from transformers import CLIPModel
206
202
 
207
- model = _lookup_model(model_id, CLIPModel.from_pretrained)
208
- return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
209
- except ImportError:
210
- return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
203
+ model = _lookup_model(model_id, CLIPModel.from_pretrained)
204
+ return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
211
205
 
212
206
 
213
207
  @pxt.udf(batch_size=4)
@@ -93,10 +93,18 @@ def _lookup_pretrained_model(repo_id: str, filename: Optional[str], n_gpu_layers
93
93
  return _model_cache[key]
94
94
 
95
95
 
96
- _model_cache: dict[tuple[str, str, int], Any] = {}
96
+ _model_cache: dict[tuple[str, str, int], 'llama_cpp.Llama'] = {}
97
97
  _IS_GPU_AVAILABLE: Optional[bool] = None
98
98
 
99
99
 
100
+ def cleanup() -> None:
101
+ for model in _model_cache.values():
102
+ if model._sampler is not None:
103
+ model._sampler.close()
104
+ model.close()
105
+ _model_cache.clear()
106
+
107
+
100
108
  __all__ = local_public_names(__name__)
101
109
 
102
110
 
@@ -23,6 +23,7 @@ import pixeltable as pxt
23
23
  from pixeltable import env, exprs, type_system as ts
24
24
  from pixeltable.func import Batch, Tools
25
25
  from pixeltable.utils.code import local_public_names
26
+ from pixeltable.utils.media_store import TempStore
26
27
 
27
28
  if TYPE_CHECKING:
28
29
  import openai
@@ -91,6 +92,49 @@ def _rate_limits_pool(model: str) -> str:
91
92
  return f'rate-limits:openai:{model}'
92
93
 
93
94
 
95
+ # RE pattern for duration in '*-reset' headers;
96
+ # examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
97
+ _header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
98
+
99
+
100
+ def _parse_header_duration(duration_str: str) -> datetime.timedelta:
101
+ match = _header_duration_pattern.match(duration_str)
102
+ if not match:
103
+ raise ValueError(f'Invalid duration format: {duration_str}')
104
+
105
+ days = int(match.group(1) or 0)
106
+ hours = int(match.group(2) or 0)
107
+ milliseconds = int(match.group(3) or 0)
108
+ minutes = int(match.group(4) or 0)
109
+ seconds = float(match.group(5) or 0)
110
+
111
+ return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
112
+
113
+
114
+ def _get_header_info(
115
+ headers: httpx.Headers,
116
+ ) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
117
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
118
+
119
+ requests_limit_str = headers.get('x-ratelimit-limit-requests')
120
+ requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
121
+ requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
122
+ requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
123
+ requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s') # Default to 5 seconds
124
+ requests_reset_ts = now + _parse_header_duration(requests_reset_str)
125
+ requests_info = (requests_limit, requests_remaining, requests_reset_ts)
126
+
127
+ tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
128
+ tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
129
+ tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
130
+ tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
131
+ tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s') # Default to 5 seconds
132
+ tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
133
+ tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
134
+
135
+ return requests_info, tokens_info
136
+
137
+
94
138
  class OpenAIRateLimitsInfo(env.RateLimitsInfo):
95
139
  retryable_errors: tuple[Type[Exception], ...]
96
140
 
@@ -111,61 +155,24 @@ class OpenAIRateLimitsInfo(env.RateLimitsInfo):
111
155
  openai.InternalServerError,
112
156
  )
113
157
 
158
+ def record_exc(self, exc: Exception) -> None:
159
+ import openai
160
+
161
+ _ = isinstance(exc, openai.APIError)
162
+ if not isinstance(exc, openai.APIError) or not hasattr(exc, 'response') or not hasattr(exc.response, 'headers'):
163
+ return
164
+ requests_info, tokens_info = _get_header_info(exc.response.headers)
165
+ _logger.debug(f'record_exc(): requests_info={requests_info} tokens_info={tokens_info}')
166
+ self.record(requests=requests_info, tokens=tokens_info)
167
+ self.has_exc = True
168
+
114
169
  def get_retry_delay(self, exc: Exception) -> Optional[float]:
115
170
  import openai
116
171
 
117
172
  if not isinstance(exc, self.retryable_errors):
118
173
  return None
119
174
  assert isinstance(exc, openai.APIError)
120
- return 1.0
121
-
122
-
123
- # RE pattern for duration in '*-reset' headers;
124
- # examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
125
- _header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
126
-
127
-
128
- def _parse_header_duration(duration_str: str) -> datetime.timedelta:
129
- match = _header_duration_pattern.match(duration_str)
130
- if not match:
131
- raise ValueError(f'Invalid duration format: {duration_str}')
132
-
133
- days = int(match.group(1) or 0)
134
- hours = int(match.group(2) or 0)
135
- milliseconds = int(match.group(3) or 0)
136
- minutes = int(match.group(4) or 0)
137
- seconds = float(match.group(5) or 0)
138
-
139
- return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
140
-
141
-
142
- def _get_header_info(
143
- headers: httpx.Headers, *, requests: bool = True, tokens: bool = True
144
- ) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
145
- assert requests or tokens
146
- now = datetime.datetime.now(tz=datetime.timezone.utc)
147
-
148
- requests_info: Optional[tuple[int, int, datetime.datetime]] = None
149
- if requests:
150
- requests_limit_str = headers.get('x-ratelimit-limit-requests')
151
- requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
152
- requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
153
- requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
154
- requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s') # Default to 5 seconds
155
- requests_reset_ts = now + _parse_header_duration(requests_reset_str)
156
- requests_info = (requests_limit, requests_remaining, requests_reset_ts)
157
-
158
- tokens_info: Optional[tuple[int, int, datetime.datetime]] = None
159
- if tokens:
160
- tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
161
- tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
162
- tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
163
- tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
164
- tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s') # Default to 5 seconds
165
- tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
166
- tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
167
-
168
- return requests_info, tokens_info
175
+ return super().get_retry_delay(exc)
169
176
 
170
177
 
171
178
  #####################################
@@ -210,7 +217,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
210
217
 
211
218
  content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
212
219
  ext = model_kwargs.get('response_format', 'mp3')
213
- output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
220
+ output_filename = str(TempStore.create_path(extension=f'.{ext}'))
214
221
  content.write_to_file(output_filename)
215
222
  return output_filename
216
223
 
@@ -355,6 +362,7 @@ async def chat_completions(
355
362
  model_kwargs: Optional[dict[str, Any]] = None,
356
363
  tools: Optional[list[dict[str, Any]]] = None,
357
364
  tool_choice: Optional[dict[str, Any]] = None,
365
+ _runtime_ctx: Optional[env.RuntimeCtx] = None,
358
366
  ) -> dict:
359
367
  """
360
368
  Creates a model response for the given chat conversation.
@@ -418,7 +426,8 @@ async def chat_completions(
418
426
  )
419
427
 
420
428
  requests_info, tokens_info = _get_header_info(result.headers)
421
- rate_limits_info.record(requests=requests_info, tokens=tokens_info)
429
+ is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
430
+ rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
422
431
 
423
432
  return json.loads(result.text)
424
433
 
@@ -461,7 +470,12 @@ def _vision_get_request_resources(
461
470
 
462
471
  @pxt.udf
463
472
  async def vision(
464
- prompt: str, image: PIL.Image.Image, *, model: str, model_kwargs: Optional[dict[str, Any]] = None
473
+ prompt: str,
474
+ image: PIL.Image.Image,
475
+ *,
476
+ model: str,
477
+ model_kwargs: Optional[dict[str, Any]] = None,
478
+ _runtime_ctx: Optional[env.RuntimeCtx] = None,
465
479
  ) -> str:
466
480
  """
467
481
  Analyzes an image with the OpenAI vision capability. This is a convenience function that takes an image and
@@ -521,8 +535,10 @@ async def vision(
521
535
  **model_kwargs,
522
536
  )
523
537
 
538
+ # _logger.debug(f'vision(): headers={result.headers}')
524
539
  requests_info, tokens_info = _get_header_info(result.headers)
525
- rate_limits_info.record(requests=requests_info, tokens=tokens_info)
540
+ is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
541
+ rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
526
542
 
527
543
  result = json.loads(result.text)
528
544
  return result['choices'][0]['message']['content']
@@ -545,7 +561,11 @@ def _embeddings_get_request_resources(input: list[str]) -> dict[str, int]:
545
561
 
546
562
  @pxt.udf(batch_size=32)
547
563
  async def embeddings(
548
- input: Batch[str], *, model: str, model_kwargs: Optional[dict[str, Any]] = None
564
+ input: Batch[str],
565
+ *,
566
+ model: str,
567
+ model_kwargs: Optional[dict[str, Any]] = None,
568
+ _runtime_ctx: Optional[env.RuntimeCtx] = None,
549
569
  ) -> Batch[pxt.Array[(None,), pxt.Float]]:
550
570
  """
551
571
  Creates an embedding vector representing the input text.
@@ -592,7 +612,8 @@ async def embeddings(
592
612
  input=input, model=model, encoding_format='float', **model_kwargs
593
613
  )
594
614
  requests_info, tokens_info = _get_header_info(result.headers)
595
- rate_limits_info.record(requests=requests_info, tokens=tokens_info)
615
+ is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
616
+ rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
596
617
  return [np.array(data['embedding'], dtype=np.float64) for data in json.loads(result.content)['data']]
597
618
 
598
619
 
@@ -9,10 +9,10 @@ import numpy as np
9
9
  import PIL.Image
10
10
 
11
11
  import pixeltable as pxt
12
- from pixeltable import env
13
12
  from pixeltable.utils.code import local_public_names
13
+ from pixeltable.utils.media_store import TempStore
14
14
 
15
- _format_defaults = { # format -> (codec, ext)
15
+ _format_defaults: dict[str, tuple[str, str]] = { # format -> (codec, ext)
16
16
  'wav': ('pcm_s16le', 'wav'),
17
17
  'mp3': ('libmp3lame', 'mp3'),
18
18
  'flac': ('flac', 'flac'),
@@ -40,6 +40,59 @@ _format_defaults = { # format -> (codec, ext)
40
40
  class make_video(pxt.Aggregator):
41
41
  """
42
42
  Aggregator that creates a video from a sequence of images.
43
+
44
+ Creates an H.264 encoded MP4 video from a sequence of PIL Image frames. This aggregator requires the input
45
+ frames to be ordered (typically by frame position) and is commonly used with `FrameIterator` views to
46
+ reconstruct videos from processed frames.
47
+
48
+ Args:
49
+ fps: Frames per second for the output video. Default is 25. This is set when the aggregator is created.
50
+
51
+ Returns:
52
+
53
+ - A `pxt.Video` containing the created video file path.
54
+
55
+ Examples:
56
+ Create a video from frames extracted using FrameIterator:
57
+
58
+ >>> import pixeltable as pxt
59
+ >>> from pixeltable.functions.video import make_video
60
+ >>> from pixeltable.iterators import FrameIterator
61
+ >>>
62
+ >>> # Create base table for videos
63
+ >>> videos_table = pxt.create_table('videos', {'video': pxt.Video})
64
+ >>>
65
+ >>> # Create view to extract frames
66
+ >>> frames_view = pxt.create_view(
67
+ ... 'video_frames',
68
+ ... videos_table,
69
+ ... iterator=FrameIterator.create(video=videos_table.video, fps=1)
70
+ ... )
71
+ >>>
72
+ >>> # Reconstruct video from frames
73
+ >>> frames_view.group_by(videos_table).select(
74
+ ... make_video(frames_view.pos, frames_view.frame)
75
+ ... ).show()
76
+
77
+ Apply transformations to frames before creating a video:
78
+
79
+ >>> # Add computed column with transformed frames
80
+ >>> frames_view.add_computed_column(
81
+ ... rotated_frame=frames_view.frame.rotate(30),
82
+ ... stored=True
83
+ ... )
84
+ >>>
85
+ >>> # Create video from transformed frames
86
+ >>> frames_view.group_by(videos_table).select(
87
+ ... make_video(frames_view.pos, frames_view.rotated_frame)
88
+ ... ).show()
89
+
90
+ Compare multiple processed versions side-by-side:
91
+
92
+ >>> frames_view.group_by(videos_table).select(
93
+ ... make_video(frames_view.pos, frames_view.frame),
94
+ ... make_video(frames_view.pos, frames_view.rotated_frame)
95
+ ... ).show()
43
96
  """
44
97
 
45
98
  container: Optional[av.container.OutputContainer]
@@ -56,7 +109,7 @@ class make_video(pxt.Aggregator):
56
109
  if frame is None:
57
110
  return
58
111
  if self.container is None:
59
- self.out_file = env.Env.get().create_tmp_path('.mp4')
112
+ self.out_file = TempStore.create_path(extension='.mp4')
60
113
  self.container = av.open(str(self.out_file), mode='w')
61
114
  self.stream = self.container.add_stream('h264', rate=self.fps)
62
115
  self.stream.pix_fmt = 'yuv420p'
@@ -105,16 +158,16 @@ def extract_audio(
105
158
  return None
106
159
  audio_stream = container.streams.audio[stream_idx]
107
160
  # create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
108
- output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
161
+ output_path = str(TempStore.create_path(extension=f'.{ext}'))
109
162
 
110
- with av.open(output_filename, 'w', format=format) as output_container:
163
+ with av.open(output_path, 'w', format=format) as output_container:
111
164
  output_stream = output_container.add_stream(codec or default_codec)
112
165
  assert isinstance(output_stream, av.audio.stream.AudioStream)
113
166
  for packet in container.demux(audio_stream):
114
167
  for frame in packet.decode():
115
168
  output_container.mux(output_stream.encode(frame)) # type: ignore[arg-type]
116
169
 
117
- return output_filename
170
+ return output_path
118
171
 
119
172
 
120
173
  @pxt.udf(is_method=True)
@@ -14,7 +14,7 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
14
14
  import colorsys
15
15
  import hashlib
16
16
  from collections import defaultdict
17
- from typing import Any, Optional, Union
17
+ from typing import Any, Optional
18
18
 
19
19
  import numpy as np
20
20
  import PIL.Image
@@ -352,7 +352,7 @@ def draw_bounding_boxes(
352
352
  from PIL import ImageColor, ImageDraw, ImageFont
353
353
 
354
354
  # set default font if not provided
355
- txt_font: Union[ImageFont.ImageFont, ImageFont.FreeTypeFont] = (
355
+ txt_font: ImageFont.ImageFont | ImageFont.FreeTypeFont = (
356
356
  ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size or 10)
357
357
  )
358
358
 
pixeltable/globals.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
6
+ from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, NamedTuple, Optional, Union
7
7
 
8
8
  import pandas as pd
9
9
  from pandas.io.formats.style import Styler
@@ -27,8 +27,8 @@ if TYPE_CHECKING:
27
27
  RowData, # list of dictionaries
28
28
  DataFrame, # Pixeltable DataFrame
29
29
  pd.DataFrame, # pandas DataFrame
30
- 'datasets.Dataset',
31
- 'datasets.DatasetDict', # Huggingface datasets
30
+ datasets.Dataset,
31
+ datasets.DatasetDict, # Huggingface datasets
32
32
  ]
33
33
 
34
34
 
@@ -51,7 +51,7 @@ def create_table(
51
51
  source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
52
52
  schema_overrides: Optional[dict[str, Any]] = None,
53
53
  on_error: Literal['abort', 'ignore'] = 'abort',
54
- primary_key: Optional[Union[str, list[str]]] = None,
54
+ primary_key: str | list[str] | None = None,
55
55
  num_retained_versions: int = 10,
56
56
  comment: str = '',
57
57
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
@@ -197,7 +197,7 @@ def create_table(
197
197
 
198
198
  def create_view(
199
199
  path: str,
200
- base: Union[catalog.Table, DataFrame],
200
+ base: catalog.Table | DataFrame,
201
201
  *,
202
202
  additional_columns: Optional[dict[str, Any]] = None,
203
203
  is_snapshot: bool = False,
@@ -317,7 +317,7 @@ def create_view(
317
317
 
318
318
  def create_snapshot(
319
319
  path_str: str,
320
- base: Union[catalog.Table, DataFrame],
320
+ base: catalog.Table | DataFrame,
321
321
  *,
322
322
  additional_columns: Optional[dict[str, Any]] = None,
323
323
  iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
@@ -396,7 +396,12 @@ def create_snapshot(
396
396
  )
397
397
 
398
398
 
399
- def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
399
+ def create_replica(
400
+ destination: str,
401
+ source: str | catalog.Table,
402
+ bucket_name: str | None = None,
403
+ access: Literal['public', 'private'] = 'private',
404
+ ) -> Optional[catalog.Table]:
400
405
  """
401
406
  Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
402
407
  replica of a remote table. A given table can have at most one replica per Pixeltable instance.
@@ -405,6 +410,12 @@ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optio
405
410
  destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
406
411
  a remote URI such as `'pxt://username/mydir.my_table'`.
407
412
  source: Path to the source table, or (if the source table is a local table) a handle to the source table.
413
+ bucket_name: The name of the pixeltable cloud-registered bucket to use to store replica's data.
414
+ If no `bucket_name` is provided, the default Pixeltable storage bucket will be used.
415
+ access: Access control for the replica.
416
+
417
+ - `'public'`: Anyone can access this replica.
418
+ - `'private'`: Only the owner can access.
408
419
  """
409
420
  remote_dest = destination.startswith('pxt://')
410
421
  remote_source = isinstance(source, str) and source.startswith('pxt://')
@@ -414,7 +425,7 @@ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optio
414
425
  if remote_dest:
415
426
  if isinstance(source, str):
416
427
  source = get_table(source)
417
- share.push_replica(destination, source)
428
+ share.push_replica(destination, source, bucket_name, access)
418
429
  return None
419
430
  else:
420
431
  assert isinstance(source, str)
@@ -484,7 +495,7 @@ def move(path: str, new_path: str) -> None:
484
495
 
485
496
 
486
497
  def drop_table(
487
- table: Union[str, catalog.Table], force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
498
+ table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
488
499
  ) -> None:
489
500
  """Drop a table, view, or snapshot.
490
501
 
@@ -534,6 +545,57 @@ def drop_table(
534
545
  Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
535
546
 
536
547
 
548
+ def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
549
+ """Get the contents of a Pixeltable directory.
550
+
551
+ Args:
552
+ dir_path: Path to the directory. Defaults to the root directory.
553
+ recursive: If `False`, returns only those tables and directories that are directly contained in specified
554
+ directory; if `True`, returns all tables and directories that are descendants of the specified directory,
555
+ recursively.
556
+
557
+ Returns:
558
+ A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
559
+
560
+ Raises:
561
+ Error: If the path does not exist or does not designate a directory.
562
+
563
+ Examples:
564
+ Get contents of top-level directory:
565
+
566
+ >>> pxt.get_dir_contents()
567
+
568
+ Get contents of 'dir1':
569
+
570
+ >>> pxt.get_dir_contents('dir1')
571
+ """
572
+ path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
573
+ catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
574
+ dirs: list[str] = []
575
+ tables: list[str] = []
576
+ _assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
577
+ dirs.sort()
578
+ tables.sort()
579
+ return DirContents(dirs, tables)
580
+
581
+
582
+ def _assemble_dir_contents(
583
+ dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
584
+ ) -> None:
585
+ for name, entry in catalog_entries.items():
586
+ if name.startswith('_'):
587
+ continue # Skip system paths
588
+ path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
589
+ if entry.dir is not None:
590
+ dirs.append(path)
591
+ if entry.dir_entries is not None:
592
+ _assemble_dir_contents(path, entry.dir_entries, dirs, tables)
593
+ else:
594
+ assert entry.table is not None
595
+ assert not entry.dir_entries
596
+ tables.append(path)
597
+
598
+
537
599
  def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
538
600
  """List the [`Table`][pixeltable.Table]s in a directory.
539
601
 
@@ -667,8 +729,8 @@ def ls(path: str = '') -> pd.DataFrame:
667
729
  This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
668
730
  including various attributes such as version and base table, as appropriate.
669
731
 
670
- To get a programmatic list of tables and/or directories, use [list_tables()][pixeltable.list_tables] and/or
671
- [list_dirs()][pixeltable.list_dirs] instead.
732
+ To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
733
+ instead.
672
734
  """
673
735
  from pixeltable.catalog import retry_loop
674
736
  from pixeltable.metadata import schema
@@ -701,7 +763,7 @@ def ls(path: str = '') -> pd.DataFrame:
701
763
  kind = 'view'
702
764
  else:
703
765
  kind = 'table'
704
- version = '' if kind == 'snapshot' else md['version']
766
+ version = '' if kind == 'snapshot' else str(md['version'])
705
767
  if md['is_replica']:
706
768
  kind = f'{kind}-replica'
707
769
  rows.append([name, kind, version, base])
@@ -798,7 +860,7 @@ def list_functions() -> Styler:
798
860
  return pd_df.hide(axis='index')
799
861
 
800
862
 
801
- def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
863
+ def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
802
864
  """
803
865
  Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
804
866
  LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
@@ -875,3 +937,14 @@ def configure_logging(
875
937
 
876
938
  def array(elements: Iterable) -> exprs.Expr:
877
939
  return exprs.Expr.from_array(elements)
940
+
941
+
942
+ class DirContents(NamedTuple):
943
+ """
944
+ Represents the contents of a Pixeltable directory.
945
+ """
946
+
947
+ dirs: list[str]
948
+ """List of directory paths contained in this directory."""
949
+ tables: list[str]
950
+ """List of table paths contained in this directory."""
pixeltable/io/datarows.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Iterable, Optional, Union
3
+ from typing import Any, Iterable, Optional
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.type_system as ts
@@ -61,7 +61,7 @@ def import_rows(
61
61
  rows: list[dict[str, Any]],
62
62
  *,
63
63
  schema_overrides: Optional[dict[str, Any]] = None,
64
- primary_key: Optional[Union[str, list[str]]] = None,
64
+ primary_key: str | list[str] | None = None,
65
65
  num_retained_versions: int = 10,
66
66
  comment: str = '',
67
67
  ) -> pxt.Table:
@@ -105,7 +105,7 @@ def import_json(
105
105
  filepath_or_url: str,
106
106
  *,
107
107
  schema_overrides: Optional[dict[str, Any]] = None,
108
- primary_key: Optional[Union[str, list[str]]] = None,
108
+ primary_key: str | list[str] | None = None,
109
109
  num_retained_versions: int = 10,
110
110
  comment: str = '',
111
111
  **kwargs: Any,
pixeltable/io/fiftyone.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Iterator, Optional, Union
2
+ from typing import Any, Iterator, Optional
3
3
 
4
4
  import fiftyone as fo # type: ignore[import-untyped]
5
5
  import fiftyone.utils.data as foud # type: ignore[import-untyped]
@@ -9,7 +9,7 @@ import puremagic
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
11
11
  from pixeltable import exprs
12
- from pixeltable.env import Env
12
+ from pixeltable.utils.media_store import TempStore
13
13
 
14
14
 
15
15
  class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
@@ -28,11 +28,11 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
28
28
  tbl: pxt.Table,
29
29
  image: exprs.Expr,
30
30
  image_format: str,
31
- classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
32
- detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
31
+ classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
32
+ detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
33
33
  dataset_dir: Optional[os.PathLike] = None,
34
34
  shuffle: bool = False,
35
- seed: Union[int, float, str, bytes, bytearray, None] = None,
35
+ seed: int | float | str | bytes | bytearray | None = None,
36
36
  max_samples: Optional[int] = None,
37
37
  ):
38
38
  super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
100
100
  assert isinstance(file, str)
101
101
  else:
102
102
  # Write the dynamically created image to a temp file
103
- file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
103
+ file = TempStore.create_path(extension=f'.{self.__image_format}')
104
104
  img.save(file, format=self.__image_format)
105
105
 
106
106
  metadata = fo.ImageMetadata(
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
108
108
  mime_type=puremagic.from_file(file, mime=True),
109
109
  width=img.width,
110
110
  height=img.height,
111
- filepath=file,
111
+ filepath=str(file),
112
112
  num_channels=len(img.getbands()),
113
113
  )
114
114
 
pixeltable/io/globals.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union
3
+ from typing import TYPE_CHECKING, Any, Literal, Optional
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.exceptions as excs
@@ -143,8 +143,8 @@ def export_images_as_fo_dataset(
143
143
  tbl: pxt.Table,
144
144
  images: exprs.Expr,
145
145
  image_format: str = 'webp',
146
- classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
147
- detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
146
+ classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
147
+ detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
148
148
  ) -> 'fo.Dataset':
149
149
  """
150
150
  Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column