pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +7 -9
- pixeltable/catalog/column.py +49 -0
- pixeltable/catalog/insertable_table.py +0 -7
- pixeltable/catalog/schema_object.py +1 -14
- pixeltable/catalog/table.py +180 -67
- pixeltable/catalog/table_version.py +42 -146
- pixeltable/catalog/table_version_path.py +6 -5
- pixeltable/catalog/view.py +2 -1
- pixeltable/config.py +24 -9
- pixeltable/dataframe.py +5 -6
- pixeltable/env.py +113 -21
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +4 -3
- pixeltable/exec/exec_node.py +0 -8
- pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
- pixeltable/exec/expr_eval/globals.py +1 -0
- pixeltable/exec/expr_eval/schedulers.py +52 -19
- pixeltable/exec/in_memory_data_node.py +2 -3
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/data_row.py +15 -2
- pixeltable/exprs/expr.py +9 -9
- pixeltable/exprs/function_call.py +61 -23
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/json_path.py +3 -3
- pixeltable/exprs/row_builder.py +25 -21
- pixeltable/exprs/string_op.py +3 -3
- pixeltable/func/expr_template_function.py +6 -3
- pixeltable/func/query_template_function.py +2 -2
- pixeltable/func/signature.py +30 -3
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/anthropic.py +76 -27
- pixeltable/functions/deepseek.py +5 -1
- pixeltable/functions/gemini.py +11 -2
- pixeltable/functions/globals.py +2 -2
- pixeltable/functions/huggingface.py +6 -12
- pixeltable/functions/llama_cpp.py +9 -1
- pixeltable/functions/openai.py +76 -55
- pixeltable/functions/video.py +59 -6
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +86 -13
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/fiftyone.py +7 -7
- pixeltable/io/globals.py +3 -3
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +2 -1
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +3 -3
- pixeltable/io/table_data_conduit.py +2 -2
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +3 -2
- pixeltable/iterators/document.py +2 -8
- pixeltable/iterators/video.py +49 -9
- pixeltable/plan.py +0 -16
- pixeltable/share/packager.py +51 -42
- pixeltable/share/publish.py +134 -7
- pixeltable/store.py +5 -25
- pixeltable/type_system.py +5 -8
- pixeltable/utils/__init__.py +2 -2
- pixeltable/utils/arrow.py +5 -5
- pixeltable/utils/description_helper.py +3 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/media_store.py +131 -66
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -63,13 +63,10 @@ def sentence_transformer(
|
|
|
63
63
|
|
|
64
64
|
@sentence_transformer.conditional_return_type
|
|
65
65
|
def _(model_id: str) -> ts.ArrayType:
|
|
66
|
-
|
|
67
|
-
from sentence_transformers import SentenceTransformer
|
|
66
|
+
from sentence_transformers import SentenceTransformer
|
|
68
67
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
except ImportError:
|
|
72
|
-
return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
|
|
68
|
+
model = _lookup_model(model_id, SentenceTransformer)
|
|
69
|
+
return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
|
|
73
70
|
|
|
74
71
|
|
|
75
72
|
@pxt.udf
|
|
@@ -201,13 +198,10 @@ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,
|
|
|
201
198
|
|
|
202
199
|
@clip.conditional_return_type
|
|
203
200
|
def _(model_id: str) -> ts.ArrayType:
|
|
204
|
-
|
|
205
|
-
from transformers import CLIPModel
|
|
201
|
+
from transformers import CLIPModel
|
|
206
202
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
except ImportError:
|
|
210
|
-
return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
|
|
203
|
+
model = _lookup_model(model_id, CLIPModel.from_pretrained)
|
|
204
|
+
return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
|
|
211
205
|
|
|
212
206
|
|
|
213
207
|
@pxt.udf(batch_size=4)
|
|
@@ -93,10 +93,18 @@ def _lookup_pretrained_model(repo_id: str, filename: Optional[str], n_gpu_layers
|
|
|
93
93
|
return _model_cache[key]
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
_model_cache: dict[tuple[str, str, int],
|
|
96
|
+
_model_cache: dict[tuple[str, str, int], 'llama_cpp.Llama'] = {}
|
|
97
97
|
_IS_GPU_AVAILABLE: Optional[bool] = None
|
|
98
98
|
|
|
99
99
|
|
|
100
|
+
def cleanup() -> None:
|
|
101
|
+
for model in _model_cache.values():
|
|
102
|
+
if model._sampler is not None:
|
|
103
|
+
model._sampler.close()
|
|
104
|
+
model.close()
|
|
105
|
+
_model_cache.clear()
|
|
106
|
+
|
|
107
|
+
|
|
100
108
|
__all__ = local_public_names(__name__)
|
|
101
109
|
|
|
102
110
|
|
pixeltable/functions/openai.py
CHANGED
|
@@ -23,6 +23,7 @@ import pixeltable as pxt
|
|
|
23
23
|
from pixeltable import env, exprs, type_system as ts
|
|
24
24
|
from pixeltable.func import Batch, Tools
|
|
25
25
|
from pixeltable.utils.code import local_public_names
|
|
26
|
+
from pixeltable.utils.media_store import TempStore
|
|
26
27
|
|
|
27
28
|
if TYPE_CHECKING:
|
|
28
29
|
import openai
|
|
@@ -91,6 +92,49 @@ def _rate_limits_pool(model: str) -> str:
|
|
|
91
92
|
return f'rate-limits:openai:{model}'
|
|
92
93
|
|
|
93
94
|
|
|
95
|
+
# RE pattern for duration in '*-reset' headers;
|
|
96
|
+
# examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
|
|
97
|
+
_header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _parse_header_duration(duration_str: str) -> datetime.timedelta:
|
|
101
|
+
match = _header_duration_pattern.match(duration_str)
|
|
102
|
+
if not match:
|
|
103
|
+
raise ValueError(f'Invalid duration format: {duration_str}')
|
|
104
|
+
|
|
105
|
+
days = int(match.group(1) or 0)
|
|
106
|
+
hours = int(match.group(2) or 0)
|
|
107
|
+
milliseconds = int(match.group(3) or 0)
|
|
108
|
+
minutes = int(match.group(4) or 0)
|
|
109
|
+
seconds = float(match.group(5) or 0)
|
|
110
|
+
|
|
111
|
+
return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _get_header_info(
|
|
115
|
+
headers: httpx.Headers,
|
|
116
|
+
) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
|
|
117
|
+
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
118
|
+
|
|
119
|
+
requests_limit_str = headers.get('x-ratelimit-limit-requests')
|
|
120
|
+
requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
|
|
121
|
+
requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
|
|
122
|
+
requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
|
|
123
|
+
requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s') # Default to 5 seconds
|
|
124
|
+
requests_reset_ts = now + _parse_header_duration(requests_reset_str)
|
|
125
|
+
requests_info = (requests_limit, requests_remaining, requests_reset_ts)
|
|
126
|
+
|
|
127
|
+
tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
|
|
128
|
+
tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
|
|
129
|
+
tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
|
|
130
|
+
tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
|
|
131
|
+
tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s') # Default to 5 seconds
|
|
132
|
+
tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
|
|
133
|
+
tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
|
|
134
|
+
|
|
135
|
+
return requests_info, tokens_info
|
|
136
|
+
|
|
137
|
+
|
|
94
138
|
class OpenAIRateLimitsInfo(env.RateLimitsInfo):
|
|
95
139
|
retryable_errors: tuple[Type[Exception], ...]
|
|
96
140
|
|
|
@@ -111,61 +155,24 @@ class OpenAIRateLimitsInfo(env.RateLimitsInfo):
|
|
|
111
155
|
openai.InternalServerError,
|
|
112
156
|
)
|
|
113
157
|
|
|
158
|
+
def record_exc(self, exc: Exception) -> None:
|
|
159
|
+
import openai
|
|
160
|
+
|
|
161
|
+
_ = isinstance(exc, openai.APIError)
|
|
162
|
+
if not isinstance(exc, openai.APIError) or not hasattr(exc, 'response') or not hasattr(exc.response, 'headers'):
|
|
163
|
+
return
|
|
164
|
+
requests_info, tokens_info = _get_header_info(exc.response.headers)
|
|
165
|
+
_logger.debug(f'record_exc(): requests_info={requests_info} tokens_info={tokens_info}')
|
|
166
|
+
self.record(requests=requests_info, tokens=tokens_info)
|
|
167
|
+
self.has_exc = True
|
|
168
|
+
|
|
114
169
|
def get_retry_delay(self, exc: Exception) -> Optional[float]:
|
|
115
170
|
import openai
|
|
116
171
|
|
|
117
172
|
if not isinstance(exc, self.retryable_errors):
|
|
118
173
|
return None
|
|
119
174
|
assert isinstance(exc, openai.APIError)
|
|
120
|
-
return
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
# RE pattern for duration in '*-reset' headers;
|
|
124
|
-
# examples: 1d2h3ms, 4m5.6s; # fractional seconds can be reported as 0.5s or 500ms
|
|
125
|
-
_header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d+)m)?(?:([\d.]+)s)?')
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def _parse_header_duration(duration_str: str) -> datetime.timedelta:
|
|
129
|
-
match = _header_duration_pattern.match(duration_str)
|
|
130
|
-
if not match:
|
|
131
|
-
raise ValueError(f'Invalid duration format: {duration_str}')
|
|
132
|
-
|
|
133
|
-
days = int(match.group(1) or 0)
|
|
134
|
-
hours = int(match.group(2) or 0)
|
|
135
|
-
milliseconds = int(match.group(3) or 0)
|
|
136
|
-
minutes = int(match.group(4) or 0)
|
|
137
|
-
seconds = float(match.group(5) or 0)
|
|
138
|
-
|
|
139
|
-
return datetime.timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _get_header_info(
|
|
143
|
-
headers: httpx.Headers, *, requests: bool = True, tokens: bool = True
|
|
144
|
-
) -> tuple[Optional[tuple[int, int, datetime.datetime]], Optional[tuple[int, int, datetime.datetime]]]:
|
|
145
|
-
assert requests or tokens
|
|
146
|
-
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
147
|
-
|
|
148
|
-
requests_info: Optional[tuple[int, int, datetime.datetime]] = None
|
|
149
|
-
if requests:
|
|
150
|
-
requests_limit_str = headers.get('x-ratelimit-limit-requests')
|
|
151
|
-
requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
|
|
152
|
-
requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
|
|
153
|
-
requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
|
|
154
|
-
requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s') # Default to 5 seconds
|
|
155
|
-
requests_reset_ts = now + _parse_header_duration(requests_reset_str)
|
|
156
|
-
requests_info = (requests_limit, requests_remaining, requests_reset_ts)
|
|
157
|
-
|
|
158
|
-
tokens_info: Optional[tuple[int, int, datetime.datetime]] = None
|
|
159
|
-
if tokens:
|
|
160
|
-
tokens_limit_str = headers.get('x-ratelimit-limit-tokens')
|
|
161
|
-
tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
|
|
162
|
-
tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
|
|
163
|
-
tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
|
|
164
|
-
tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s') # Default to 5 seconds
|
|
165
|
-
tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
|
|
166
|
-
tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
|
|
167
|
-
|
|
168
|
-
return requests_info, tokens_info
|
|
175
|
+
return super().get_retry_delay(exc)
|
|
169
176
|
|
|
170
177
|
|
|
171
178
|
#####################################
|
|
@@ -210,7 +217,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
|
|
|
210
217
|
|
|
211
218
|
content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
|
|
212
219
|
ext = model_kwargs.get('response_format', 'mp3')
|
|
213
|
-
output_filename = str(
|
|
220
|
+
output_filename = str(TempStore.create_path(extension=f'.{ext}'))
|
|
214
221
|
content.write_to_file(output_filename)
|
|
215
222
|
return output_filename
|
|
216
223
|
|
|
@@ -355,6 +362,7 @@ async def chat_completions(
|
|
|
355
362
|
model_kwargs: Optional[dict[str, Any]] = None,
|
|
356
363
|
tools: Optional[list[dict[str, Any]]] = None,
|
|
357
364
|
tool_choice: Optional[dict[str, Any]] = None,
|
|
365
|
+
_runtime_ctx: Optional[env.RuntimeCtx] = None,
|
|
358
366
|
) -> dict:
|
|
359
367
|
"""
|
|
360
368
|
Creates a model response for the given chat conversation.
|
|
@@ -418,7 +426,8 @@ async def chat_completions(
|
|
|
418
426
|
)
|
|
419
427
|
|
|
420
428
|
requests_info, tokens_info = _get_header_info(result.headers)
|
|
421
|
-
|
|
429
|
+
is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
|
|
430
|
+
rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
|
|
422
431
|
|
|
423
432
|
return json.loads(result.text)
|
|
424
433
|
|
|
@@ -461,7 +470,12 @@ def _vision_get_request_resources(
|
|
|
461
470
|
|
|
462
471
|
@pxt.udf
|
|
463
472
|
async def vision(
|
|
464
|
-
prompt: str,
|
|
473
|
+
prompt: str,
|
|
474
|
+
image: PIL.Image.Image,
|
|
475
|
+
*,
|
|
476
|
+
model: str,
|
|
477
|
+
model_kwargs: Optional[dict[str, Any]] = None,
|
|
478
|
+
_runtime_ctx: Optional[env.RuntimeCtx] = None,
|
|
465
479
|
) -> str:
|
|
466
480
|
"""
|
|
467
481
|
Analyzes an image with the OpenAI vision capability. This is a convenience function that takes an image and
|
|
@@ -521,8 +535,10 @@ async def vision(
|
|
|
521
535
|
**model_kwargs,
|
|
522
536
|
)
|
|
523
537
|
|
|
538
|
+
# _logger.debug(f'vision(): headers={result.headers}')
|
|
524
539
|
requests_info, tokens_info = _get_header_info(result.headers)
|
|
525
|
-
|
|
540
|
+
is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
|
|
541
|
+
rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
|
|
526
542
|
|
|
527
543
|
result = json.loads(result.text)
|
|
528
544
|
return result['choices'][0]['message']['content']
|
|
@@ -545,7 +561,11 @@ def _embeddings_get_request_resources(input: list[str]) -> dict[str, int]:
|
|
|
545
561
|
|
|
546
562
|
@pxt.udf(batch_size=32)
|
|
547
563
|
async def embeddings(
|
|
548
|
-
input: Batch[str],
|
|
564
|
+
input: Batch[str],
|
|
565
|
+
*,
|
|
566
|
+
model: str,
|
|
567
|
+
model_kwargs: Optional[dict[str, Any]] = None,
|
|
568
|
+
_runtime_ctx: Optional[env.RuntimeCtx] = None,
|
|
549
569
|
) -> Batch[pxt.Array[(None,), pxt.Float]]:
|
|
550
570
|
"""
|
|
551
571
|
Creates an embedding vector representing the input text.
|
|
@@ -592,7 +612,8 @@ async def embeddings(
|
|
|
592
612
|
input=input, model=model, encoding_format='float', **model_kwargs
|
|
593
613
|
)
|
|
594
614
|
requests_info, tokens_info = _get_header_info(result.headers)
|
|
595
|
-
|
|
615
|
+
is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
|
|
616
|
+
rate_limits_info.record(requests=requests_info, tokens=tokens_info, reset_exc=is_retry)
|
|
596
617
|
return [np.array(data['embedding'], dtype=np.float64) for data in json.loads(result.content)['data']]
|
|
597
618
|
|
|
598
619
|
|
pixeltable/functions/video.py
CHANGED
|
@@ -9,10 +9,10 @@ import numpy as np
|
|
|
9
9
|
import PIL.Image
|
|
10
10
|
|
|
11
11
|
import pixeltable as pxt
|
|
12
|
-
from pixeltable import env
|
|
13
12
|
from pixeltable.utils.code import local_public_names
|
|
13
|
+
from pixeltable.utils.media_store import TempStore
|
|
14
14
|
|
|
15
|
-
_format_defaults = { # format -> (codec, ext)
|
|
15
|
+
_format_defaults: dict[str, tuple[str, str]] = { # format -> (codec, ext)
|
|
16
16
|
'wav': ('pcm_s16le', 'wav'),
|
|
17
17
|
'mp3': ('libmp3lame', 'mp3'),
|
|
18
18
|
'flac': ('flac', 'flac'),
|
|
@@ -40,6 +40,59 @@ _format_defaults = { # format -> (codec, ext)
|
|
|
40
40
|
class make_video(pxt.Aggregator):
|
|
41
41
|
"""
|
|
42
42
|
Aggregator that creates a video from a sequence of images.
|
|
43
|
+
|
|
44
|
+
Creates an H.264 encoded MP4 video from a sequence of PIL Image frames. This aggregator requires the input
|
|
45
|
+
frames to be ordered (typically by frame position) and is commonly used with `FrameIterator` views to
|
|
46
|
+
reconstruct videos from processed frames.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
fps: Frames per second for the output video. Default is 25. This is set when the aggregator is created.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
|
|
53
|
+
- A `pxt.Video` containing the created video file path.
|
|
54
|
+
|
|
55
|
+
Examples:
|
|
56
|
+
Create a video from frames extracted using FrameIterator:
|
|
57
|
+
|
|
58
|
+
>>> import pixeltable as pxt
|
|
59
|
+
>>> from pixeltable.functions.video import make_video
|
|
60
|
+
>>> from pixeltable.iterators import FrameIterator
|
|
61
|
+
>>>
|
|
62
|
+
>>> # Create base table for videos
|
|
63
|
+
>>> videos_table = pxt.create_table('videos', {'video': pxt.Video})
|
|
64
|
+
>>>
|
|
65
|
+
>>> # Create view to extract frames
|
|
66
|
+
>>> frames_view = pxt.create_view(
|
|
67
|
+
... 'video_frames',
|
|
68
|
+
... videos_table,
|
|
69
|
+
... iterator=FrameIterator.create(video=videos_table.video, fps=1)
|
|
70
|
+
... )
|
|
71
|
+
>>>
|
|
72
|
+
>>> # Reconstruct video from frames
|
|
73
|
+
>>> frames_view.group_by(videos_table).select(
|
|
74
|
+
... make_video(frames_view.pos, frames_view.frame)
|
|
75
|
+
... ).show()
|
|
76
|
+
|
|
77
|
+
Apply transformations to frames before creating a video:
|
|
78
|
+
|
|
79
|
+
>>> # Add computed column with transformed frames
|
|
80
|
+
>>> frames_view.add_computed_column(
|
|
81
|
+
... rotated_frame=frames_view.frame.rotate(30),
|
|
82
|
+
... stored=True
|
|
83
|
+
... )
|
|
84
|
+
>>>
|
|
85
|
+
>>> # Create video from transformed frames
|
|
86
|
+
>>> frames_view.group_by(videos_table).select(
|
|
87
|
+
... make_video(frames_view.pos, frames_view.rotated_frame)
|
|
88
|
+
... ).show()
|
|
89
|
+
|
|
90
|
+
Compare multiple processed versions side-by-side:
|
|
91
|
+
|
|
92
|
+
>>> frames_view.group_by(videos_table).select(
|
|
93
|
+
... make_video(frames_view.pos, frames_view.frame),
|
|
94
|
+
... make_video(frames_view.pos, frames_view.rotated_frame)
|
|
95
|
+
... ).show()
|
|
43
96
|
"""
|
|
44
97
|
|
|
45
98
|
container: Optional[av.container.OutputContainer]
|
|
@@ -56,7 +109,7 @@ class make_video(pxt.Aggregator):
|
|
|
56
109
|
if frame is None:
|
|
57
110
|
return
|
|
58
111
|
if self.container is None:
|
|
59
|
-
self.out_file =
|
|
112
|
+
self.out_file = TempStore.create_path(extension='.mp4')
|
|
60
113
|
self.container = av.open(str(self.out_file), mode='w')
|
|
61
114
|
self.stream = self.container.add_stream('h264', rate=self.fps)
|
|
62
115
|
self.stream.pix_fmt = 'yuv420p'
|
|
@@ -105,16 +158,16 @@ def extract_audio(
|
|
|
105
158
|
return None
|
|
106
159
|
audio_stream = container.streams.audio[stream_idx]
|
|
107
160
|
# create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
|
|
108
|
-
|
|
161
|
+
output_path = str(TempStore.create_path(extension=f'.{ext}'))
|
|
109
162
|
|
|
110
|
-
with av.open(
|
|
163
|
+
with av.open(output_path, 'w', format=format) as output_container:
|
|
111
164
|
output_stream = output_container.add_stream(codec or default_codec)
|
|
112
165
|
assert isinstance(output_stream, av.audio.stream.AudioStream)
|
|
113
166
|
for packet in container.demux(audio_stream):
|
|
114
167
|
for frame in packet.decode():
|
|
115
168
|
output_container.mux(output_stream.encode(frame)) # type: ignore[arg-type]
|
|
116
169
|
|
|
117
|
-
return
|
|
170
|
+
return output_path
|
|
118
171
|
|
|
119
172
|
|
|
120
173
|
@pxt.udf(is_method=True)
|
pixeltable/functions/vision.py
CHANGED
|
@@ -14,7 +14,7 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
|
|
|
14
14
|
import colorsys
|
|
15
15
|
import hashlib
|
|
16
16
|
from collections import defaultdict
|
|
17
|
-
from typing import Any, Optional
|
|
17
|
+
from typing import Any, Optional
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import PIL.Image
|
|
@@ -352,7 +352,7 @@ def draw_bounding_boxes(
|
|
|
352
352
|
from PIL import ImageColor, ImageDraw, ImageFont
|
|
353
353
|
|
|
354
354
|
# set default font if not provided
|
|
355
|
-
txt_font:
|
|
355
|
+
txt_font: ImageFont.ImageFont | ImageFont.FreeTypeFont = (
|
|
356
356
|
ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size or 10)
|
|
357
357
|
)
|
|
358
358
|
|
pixeltable/globals.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, NamedTuple, Optional, Union
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from pandas.io.formats.style import Styler
|
|
@@ -27,8 +27,8 @@ if TYPE_CHECKING:
|
|
|
27
27
|
RowData, # list of dictionaries
|
|
28
28
|
DataFrame, # Pixeltable DataFrame
|
|
29
29
|
pd.DataFrame, # pandas DataFrame
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
datasets.Dataset,
|
|
31
|
+
datasets.DatasetDict, # Huggingface datasets
|
|
32
32
|
]
|
|
33
33
|
|
|
34
34
|
|
|
@@ -51,7 +51,7 @@ def create_table(
|
|
|
51
51
|
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
52
52
|
schema_overrides: Optional[dict[str, Any]] = None,
|
|
53
53
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
54
|
-
primary_key:
|
|
54
|
+
primary_key: str | list[str] | None = None,
|
|
55
55
|
num_retained_versions: int = 10,
|
|
56
56
|
comment: str = '',
|
|
57
57
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
@@ -197,7 +197,7 @@ def create_table(
|
|
|
197
197
|
|
|
198
198
|
def create_view(
|
|
199
199
|
path: str,
|
|
200
|
-
base:
|
|
200
|
+
base: catalog.Table | DataFrame,
|
|
201
201
|
*,
|
|
202
202
|
additional_columns: Optional[dict[str, Any]] = None,
|
|
203
203
|
is_snapshot: bool = False,
|
|
@@ -317,7 +317,7 @@ def create_view(
|
|
|
317
317
|
|
|
318
318
|
def create_snapshot(
|
|
319
319
|
path_str: str,
|
|
320
|
-
base:
|
|
320
|
+
base: catalog.Table | DataFrame,
|
|
321
321
|
*,
|
|
322
322
|
additional_columns: Optional[dict[str, Any]] = None,
|
|
323
323
|
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
|
|
@@ -396,7 +396,12 @@ def create_snapshot(
|
|
|
396
396
|
)
|
|
397
397
|
|
|
398
398
|
|
|
399
|
-
def create_replica(
|
|
399
|
+
def create_replica(
|
|
400
|
+
destination: str,
|
|
401
|
+
source: str | catalog.Table,
|
|
402
|
+
bucket_name: str | None = None,
|
|
403
|
+
access: Literal['public', 'private'] = 'private',
|
|
404
|
+
) -> Optional[catalog.Table]:
|
|
400
405
|
"""
|
|
401
406
|
Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
|
|
402
407
|
replica of a remote table. A given table can have at most one replica per Pixeltable instance.
|
|
@@ -405,6 +410,12 @@ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optio
|
|
|
405
410
|
destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
|
|
406
411
|
a remote URI such as `'pxt://username/mydir.my_table'`.
|
|
407
412
|
source: Path to the source table, or (if the source table is a local table) a handle to the source table.
|
|
413
|
+
bucket_name: The name of the pixeltable cloud-registered bucket to use to store replica's data.
|
|
414
|
+
If no `bucket_name` is provided, the default Pixeltable storage bucket will be used.
|
|
415
|
+
access: Access control for the replica.
|
|
416
|
+
|
|
417
|
+
- `'public'`: Anyone can access this replica.
|
|
418
|
+
- `'private'`: Only the owner can access.
|
|
408
419
|
"""
|
|
409
420
|
remote_dest = destination.startswith('pxt://')
|
|
410
421
|
remote_source = isinstance(source, str) and source.startswith('pxt://')
|
|
@@ -414,7 +425,7 @@ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optio
|
|
|
414
425
|
if remote_dest:
|
|
415
426
|
if isinstance(source, str):
|
|
416
427
|
source = get_table(source)
|
|
417
|
-
share.push_replica(destination, source)
|
|
428
|
+
share.push_replica(destination, source, bucket_name, access)
|
|
418
429
|
return None
|
|
419
430
|
else:
|
|
420
431
|
assert isinstance(source, str)
|
|
@@ -484,7 +495,7 @@ def move(path: str, new_path: str) -> None:
|
|
|
484
495
|
|
|
485
496
|
|
|
486
497
|
def drop_table(
|
|
487
|
-
table:
|
|
498
|
+
table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
|
|
488
499
|
) -> None:
|
|
489
500
|
"""Drop a table, view, or snapshot.
|
|
490
501
|
|
|
@@ -534,6 +545,57 @@ def drop_table(
|
|
|
534
545
|
Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
|
|
535
546
|
|
|
536
547
|
|
|
548
|
+
def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
|
|
549
|
+
"""Get the contents of a Pixeltable directory.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
dir_path: Path to the directory. Defaults to the root directory.
|
|
553
|
+
recursive: If `False`, returns only those tables and directories that are directly contained in specified
|
|
554
|
+
directory; if `True`, returns all tables and directories that are descendants of the specified directory,
|
|
555
|
+
recursively.
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
|
|
559
|
+
|
|
560
|
+
Raises:
|
|
561
|
+
Error: If the path does not exist or does not designate a directory.
|
|
562
|
+
|
|
563
|
+
Examples:
|
|
564
|
+
Get contents of top-level directory:
|
|
565
|
+
|
|
566
|
+
>>> pxt.get_dir_contents()
|
|
567
|
+
|
|
568
|
+
Get contents of 'dir1':
|
|
569
|
+
|
|
570
|
+
>>> pxt.get_dir_contents('dir1')
|
|
571
|
+
"""
|
|
572
|
+
path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
|
|
573
|
+
catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
|
|
574
|
+
dirs: list[str] = []
|
|
575
|
+
tables: list[str] = []
|
|
576
|
+
_assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
|
|
577
|
+
dirs.sort()
|
|
578
|
+
tables.sort()
|
|
579
|
+
return DirContents(dirs, tables)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _assemble_dir_contents(
|
|
583
|
+
dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
|
|
584
|
+
) -> None:
|
|
585
|
+
for name, entry in catalog_entries.items():
|
|
586
|
+
if name.startswith('_'):
|
|
587
|
+
continue # Skip system paths
|
|
588
|
+
path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
|
|
589
|
+
if entry.dir is not None:
|
|
590
|
+
dirs.append(path)
|
|
591
|
+
if entry.dir_entries is not None:
|
|
592
|
+
_assemble_dir_contents(path, entry.dir_entries, dirs, tables)
|
|
593
|
+
else:
|
|
594
|
+
assert entry.table is not None
|
|
595
|
+
assert not entry.dir_entries
|
|
596
|
+
tables.append(path)
|
|
597
|
+
|
|
598
|
+
|
|
537
599
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
538
600
|
"""List the [`Table`][pixeltable.Table]s in a directory.
|
|
539
601
|
|
|
@@ -667,8 +729,8 @@ def ls(path: str = '') -> pd.DataFrame:
|
|
|
667
729
|
This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
|
|
668
730
|
including various attributes such as version and base table, as appropriate.
|
|
669
731
|
|
|
670
|
-
To get a programmatic list of
|
|
671
|
-
|
|
732
|
+
To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
|
|
733
|
+
instead.
|
|
672
734
|
"""
|
|
673
735
|
from pixeltable.catalog import retry_loop
|
|
674
736
|
from pixeltable.metadata import schema
|
|
@@ -701,7 +763,7 @@ def ls(path: str = '') -> pd.DataFrame:
|
|
|
701
763
|
kind = 'view'
|
|
702
764
|
else:
|
|
703
765
|
kind = 'table'
|
|
704
|
-
version = '' if kind == 'snapshot' else md['version']
|
|
766
|
+
version = '' if kind == 'snapshot' else str(md['version'])
|
|
705
767
|
if md['is_replica']:
|
|
706
768
|
kind = f'{kind}-replica'
|
|
707
769
|
rows.append([name, kind, version, base])
|
|
@@ -798,7 +860,7 @@ def list_functions() -> Styler:
|
|
|
798
860
|
return pd_df.hide(axis='index')
|
|
799
861
|
|
|
800
862
|
|
|
801
|
-
def tools(*args:
|
|
863
|
+
def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
|
|
802
864
|
"""
|
|
803
865
|
Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
|
|
804
866
|
LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
|
|
@@ -875,3 +937,14 @@ def configure_logging(
|
|
|
875
937
|
|
|
876
938
|
def array(elements: Iterable) -> exprs.Expr:
|
|
877
939
|
return exprs.Expr.from_array(elements)
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
class DirContents(NamedTuple):
|
|
943
|
+
"""
|
|
944
|
+
Represents the contents of a Pixeltable directory.
|
|
945
|
+
"""
|
|
946
|
+
|
|
947
|
+
dirs: list[str]
|
|
948
|
+
"""List of directory paths contained in this directory."""
|
|
949
|
+
tables: list[str]
|
|
950
|
+
"""List of table paths contained in this directory."""
|
pixeltable/io/datarows.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable, Optional
|
|
3
|
+
from typing import Any, Iterable, Optional
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
import pixeltable.type_system as ts
|
|
@@ -61,7 +61,7 @@ def import_rows(
|
|
|
61
61
|
rows: list[dict[str, Any]],
|
|
62
62
|
*,
|
|
63
63
|
schema_overrides: Optional[dict[str, Any]] = None,
|
|
64
|
-
primary_key:
|
|
64
|
+
primary_key: str | list[str] | None = None,
|
|
65
65
|
num_retained_versions: int = 10,
|
|
66
66
|
comment: str = '',
|
|
67
67
|
) -> pxt.Table:
|
|
@@ -105,7 +105,7 @@ def import_json(
|
|
|
105
105
|
filepath_or_url: str,
|
|
106
106
|
*,
|
|
107
107
|
schema_overrides: Optional[dict[str, Any]] = None,
|
|
108
|
-
primary_key:
|
|
108
|
+
primary_key: str | list[str] | None = None,
|
|
109
109
|
num_retained_versions: int = 10,
|
|
110
110
|
comment: str = '',
|
|
111
111
|
**kwargs: Any,
|
pixeltable/io/fiftyone.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any, Iterator, Optional
|
|
2
|
+
from typing import Any, Iterator, Optional
|
|
3
3
|
|
|
4
4
|
import fiftyone as fo # type: ignore[import-untyped]
|
|
5
5
|
import fiftyone.utils.data as foud # type: ignore[import-untyped]
|
|
@@ -9,7 +9,7 @@ import puremagic
|
|
|
9
9
|
import pixeltable as pxt
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
from pixeltable import exprs
|
|
12
|
-
from pixeltable.
|
|
12
|
+
from pixeltable.utils.media_store import TempStore
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
@@ -28,11 +28,11 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
28
28
|
tbl: pxt.Table,
|
|
29
29
|
image: exprs.Expr,
|
|
30
30
|
image_format: str,
|
|
31
|
-
classifications:
|
|
32
|
-
detections:
|
|
31
|
+
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
32
|
+
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
33
33
|
dataset_dir: Optional[os.PathLike] = None,
|
|
34
34
|
shuffle: bool = False,
|
|
35
|
-
seed:
|
|
35
|
+
seed: int | float | str | bytes | bytearray | None = None,
|
|
36
36
|
max_samples: Optional[int] = None,
|
|
37
37
|
):
|
|
38
38
|
super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
|
|
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
100
100
|
assert isinstance(file, str)
|
|
101
101
|
else:
|
|
102
102
|
# Write the dynamically created image to a temp file
|
|
103
|
-
file =
|
|
103
|
+
file = TempStore.create_path(extension=f'.{self.__image_format}')
|
|
104
104
|
img.save(file, format=self.__image_format)
|
|
105
105
|
|
|
106
106
|
metadata = fo.ImageMetadata(
|
|
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
108
108
|
mime_type=puremagic.from_file(file, mime=True),
|
|
109
109
|
width=img.width,
|
|
110
110
|
height=img.height,
|
|
111
|
-
filepath=file,
|
|
111
|
+
filepath=str(file),
|
|
112
112
|
num_channels=len(img.getbands()),
|
|
113
113
|
)
|
|
114
114
|
|
pixeltable/io/globals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
import pixeltable.exceptions as excs
|
|
@@ -143,8 +143,8 @@ def export_images_as_fo_dataset(
|
|
|
143
143
|
tbl: pxt.Table,
|
|
144
144
|
images: exprs.Expr,
|
|
145
145
|
image_format: str = 'webp',
|
|
146
|
-
classifications:
|
|
147
|
-
detections:
|
|
146
|
+
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
147
|
+
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
148
148
|
) -> 'fo.Dataset':
|
|
149
149
|
"""
|
|
150
150
|
Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column
|