pixeltable 0.4.7__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/catalog/catalog.py +4 -6
- pixeltable/catalog/table.py +41 -14
- pixeltable/catalog/table_version.py +12 -8
- pixeltable/catalog/table_version_path.py +6 -5
- pixeltable/config.py +24 -9
- pixeltable/dataframe.py +3 -3
- pixeltable/env.py +70 -16
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +4 -3
- pixeltable/exec/exec_node.py +0 -8
- pixeltable/exec/expr_eval/globals.py +1 -0
- pixeltable/exec/expr_eval/schedulers.py +16 -4
- pixeltable/exec/in_memory_data_node.py +2 -3
- pixeltable/exprs/data_row.py +5 -5
- pixeltable/exprs/function_call.py +59 -21
- pixeltable/exprs/row_builder.py +11 -5
- pixeltable/func/expr_template_function.py +6 -3
- pixeltable/functions/anthropic.py +1 -2
- pixeltable/functions/deepseek.py +5 -1
- pixeltable/functions/gemini.py +11 -2
- pixeltable/functions/huggingface.py +6 -12
- pixeltable/functions/openai.py +2 -1
- pixeltable/functions/video.py +5 -5
- pixeltable/globals.py +13 -2
- pixeltable/io/fiftyone.py +3 -3
- pixeltable/io/label_studio.py +2 -1
- pixeltable/iterators/audio.py +3 -2
- pixeltable/iterators/document.py +0 -6
- pixeltable/plan.py +0 -16
- pixeltable/share/packager.py +6 -6
- pixeltable/share/publish.py +134 -7
- pixeltable/utils/media_store.py +131 -66
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/METADATA +186 -121
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/RECORD +37 -37
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
pixeltable/exprs/data_row.py
CHANGED
|
@@ -14,7 +14,7 @@ import PIL.Image
|
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
16
|
from pixeltable import catalog, env
|
|
17
|
-
from pixeltable.utils.media_store import MediaStore
|
|
17
|
+
from pixeltable.utils.media_store import MediaStore, TempStore
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class DataRow:
|
|
@@ -270,7 +270,7 @@ class DataRow:
|
|
|
270
270
|
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
271
271
|
# In that case, use WebP instead.
|
|
272
272
|
format = 'webp' if image.has_transparency_data else 'jpeg'
|
|
273
|
-
filepath, url = MediaStore.save_media_object(image, col, format=format)
|
|
273
|
+
filepath, url = MediaStore.get().save_media_object(image, col, format=format)
|
|
274
274
|
self.file_paths[index] = str(filepath)
|
|
275
275
|
self.file_urls[index] = url
|
|
276
276
|
else:
|
|
@@ -282,16 +282,16 @@ class DataRow:
|
|
|
282
282
|
self.vals[index] = None
|
|
283
283
|
|
|
284
284
|
def move_tmp_media_file(self, index: int, col: catalog.Column) -> None:
|
|
285
|
-
"""If a media url refers to data in a temporary file, move the data to
|
|
285
|
+
"""If a media url refers to data in a temporary file, move the data to a MediaStore"""
|
|
286
286
|
if self.file_urls[index] is None:
|
|
287
287
|
return
|
|
288
288
|
assert self.excs[index] is None
|
|
289
289
|
assert col.col_type.is_media_type()
|
|
290
|
-
src_path =
|
|
290
|
+
src_path = TempStore.resolve_url(self.file_urls[index])
|
|
291
291
|
if src_path is None:
|
|
292
292
|
# The media url does not point to a temporary file, leave it as is
|
|
293
293
|
return
|
|
294
|
-
new_file_url = MediaStore.relocate_local_media_file(src_path, col)
|
|
294
|
+
new_file_url = MediaStore.get().relocate_local_media_file(src_path, col)
|
|
295
295
|
self.file_urls[index] = new_file_url
|
|
296
296
|
|
|
297
297
|
@property
|
|
@@ -115,6 +115,7 @@ class FunctionCall(Expr):
|
|
|
115
115
|
self._validation_error = validation_error
|
|
116
116
|
|
|
117
117
|
if validation_error is not None:
|
|
118
|
+
self.bound_idxs = {}
|
|
118
119
|
self.resource_pool = None
|
|
119
120
|
return
|
|
120
121
|
|
|
@@ -300,8 +301,16 @@ class FunctionCall(Expr):
|
|
|
300
301
|
"""
|
|
301
302
|
res = super().substitute(spec)
|
|
302
303
|
assert res is self
|
|
303
|
-
|
|
304
|
-
|
|
304
|
+
if self.is_valid:
|
|
305
|
+
# If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
|
|
306
|
+
# FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
|
|
307
|
+
# but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
|
|
308
|
+
# EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
|
|
309
|
+
# fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
|
|
310
|
+
# probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
|
|
311
|
+
# conditional_return_type implemented.)
|
|
312
|
+
self.return_type = self.fn.call_return_type(self.bound_args)
|
|
313
|
+
self.col_type = self.return_type
|
|
305
314
|
return self
|
|
306
315
|
|
|
307
316
|
def update(self, data_row: DataRow) -> None:
|
|
@@ -480,25 +489,54 @@ class FunctionCall(Expr):
|
|
|
480
489
|
).strip()
|
|
481
490
|
else:
|
|
482
491
|
# Evaluate the call_return_type as defined in the current codebase.
|
|
483
|
-
call_return_type =
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
#
|
|
487
|
-
#
|
|
488
|
-
#
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
492
|
+
call_return_type: Optional[ts.ColumnType] = None
|
|
493
|
+
|
|
494
|
+
if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
|
|
495
|
+
# The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
|
|
496
|
+
# (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
|
|
497
|
+
# from the template expression.
|
|
498
|
+
validation_error = resolved_fn.template.expr.validation_error
|
|
499
|
+
else:
|
|
500
|
+
try:
|
|
501
|
+
call_return_type = resolved_fn.call_return_type(bound_args)
|
|
502
|
+
except ImportError as exc:
|
|
503
|
+
validation_error = dedent(
|
|
504
|
+
f"""
|
|
505
|
+
A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
|
|
506
|
+
by the UDF could not be imported:
|
|
507
|
+
{exc}
|
|
508
|
+
"""
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
assert (call_return_type is None) != (validation_error is None)
|
|
512
|
+
|
|
513
|
+
if call_return_type is None and return_type is None:
|
|
514
|
+
# Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
|
|
515
|
+
# way to infer it during DB migration, so we might encounter a stored return_type of None. If the
|
|
516
|
+
# resolution of call_return_type also fails, then we're out of luck; we have no choice but to
|
|
517
|
+
# fail-fast.
|
|
518
|
+
raise excs.Error(validation_error)
|
|
519
|
+
|
|
520
|
+
if call_return_type is not None:
|
|
521
|
+
# call_return_type resolution succeeded.
|
|
522
|
+
if return_type is None:
|
|
523
|
+
# Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
|
|
524
|
+
# fall back on the call_return_type.
|
|
525
|
+
return_type = call_return_type
|
|
526
|
+
elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
|
|
527
|
+
# There is a return_type stored in metadata (schema version >= 25),
|
|
528
|
+
# and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
|
|
529
|
+
validation_error = dedent(
|
|
530
|
+
f"""
|
|
531
|
+
The return type stored in the database for a UDF call to {fn.self_path!r} no longer
|
|
532
|
+
matches its return type as currently defined in the code. This probably means that the
|
|
533
|
+
code for {fn.self_path!r} has changed in a backward-incompatible way.
|
|
534
|
+
Return type of UDF call in the database: {return_type}
|
|
535
|
+
Return type of UDF as currently defined in code: {call_return_type}
|
|
536
|
+
"""
|
|
537
|
+
).strip()
|
|
538
|
+
|
|
539
|
+
assert return_type is not None # Guaranteed by the above logic.
|
|
502
540
|
|
|
503
541
|
fn_call = cls(
|
|
504
542
|
resolved_fn,
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -86,6 +86,8 @@ class RowBuilder:
|
|
|
86
86
|
img_slot_idxs: list[int] # Indices of image slots
|
|
87
87
|
media_slot_idxs: list[int] # Indices of non-image media slots
|
|
88
88
|
array_slot_idxs: list[int] # Indices of array slots
|
|
89
|
+
stored_img_cols: list[exprs.ColumnSlotIdx]
|
|
90
|
+
stored_media_cols: list[exprs.ColumnSlotIdx]
|
|
89
91
|
|
|
90
92
|
@dataclass
|
|
91
93
|
class EvalCtx:
|
|
@@ -112,6 +114,8 @@ class RowBuilder:
|
|
|
112
114
|
"""
|
|
113
115
|
self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
|
|
114
116
|
self.next_slot_idx = 0
|
|
117
|
+
self.stored_img_cols = []
|
|
118
|
+
self.stored_media_cols = []
|
|
115
119
|
|
|
116
120
|
# record input and output exprs; make copies to avoid reusing execution state
|
|
117
121
|
unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
|
|
@@ -246,11 +250,13 @@ class RowBuilder:
|
|
|
246
250
|
def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
|
|
247
251
|
"""Record a column that is part of the table row"""
|
|
248
252
|
assert self.tbl is not None
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
253
|
+
assert col.is_stored
|
|
254
|
+
info = ColumnSlotIdx(col, slot_idx)
|
|
255
|
+
self.table_columns.append(info)
|
|
256
|
+
if col.col_type.is_media_type():
|
|
257
|
+
self.stored_media_cols.append(info)
|
|
258
|
+
if col.col_type.is_image_type():
|
|
259
|
+
self.stored_img_cols.append(info)
|
|
254
260
|
|
|
255
261
|
@property
|
|
256
262
|
def num_materialized(self) -> int:
|
|
@@ -85,13 +85,16 @@ class ExprTemplateFunction(Function):
|
|
|
85
85
|
conditional_return_type).
|
|
86
86
|
"""
|
|
87
87
|
assert not self.is_polymorphic
|
|
88
|
-
template = self.template
|
|
89
88
|
with_defaults = bound_args.copy()
|
|
90
89
|
with_defaults.update(
|
|
91
|
-
{
|
|
90
|
+
{
|
|
91
|
+
param_name: default
|
|
92
|
+
for param_name, default in self.template.defaults.items()
|
|
93
|
+
if param_name not in bound_args
|
|
94
|
+
}
|
|
92
95
|
)
|
|
93
96
|
substituted_expr = self.template.expr.copy().substitute(
|
|
94
|
-
{template.param_exprs[name]: expr for name, expr in with_defaults.items()}
|
|
97
|
+
{self.template.param_exprs[name]: expr for name, expr in with_defaults.items()}
|
|
95
98
|
)
|
|
96
99
|
return substituted_expr.col_type
|
|
97
100
|
|
|
@@ -132,8 +132,7 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
|
|
|
132
132
|
should_retry_str = exc.response.headers.get('x-should-retry', '')
|
|
133
133
|
if should_retry_str.lower() != 'true':
|
|
134
134
|
return None
|
|
135
|
-
|
|
136
|
-
return int(retry_after_str)
|
|
135
|
+
return super().get_retry_delay(exc)
|
|
137
136
|
|
|
138
137
|
|
|
139
138
|
@pxt.udf
|
pixeltable/functions/deepseek.py
CHANGED
|
@@ -26,7 +26,7 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
|
|
|
26
26
|
return env.Env.get().get_client('deepseek')
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
@pxt.udf
|
|
29
|
+
@pxt.udf(resource_pool='request-rate:deepseek')
|
|
30
30
|
async def chat_completions(
|
|
31
31
|
messages: list,
|
|
32
32
|
*,
|
|
@@ -43,6 +43,10 @@ async def chat_completions(
|
|
|
43
43
|
|
|
44
44
|
Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
|
|
45
45
|
|
|
46
|
+
Request throttling:
|
|
47
|
+
Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
|
|
48
|
+
limit is configured, uses a default of 600 RPM.
|
|
49
|
+
|
|
46
50
|
__Requirements:__
|
|
47
51
|
|
|
48
52
|
- `pip install openai`
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -14,6 +14,7 @@ import PIL.Image
|
|
|
14
14
|
|
|
15
15
|
import pixeltable as pxt
|
|
16
16
|
from pixeltable import env, exceptions as excs, exprs
|
|
17
|
+
from pixeltable.utils.media_store import TempStore
|
|
17
18
|
|
|
18
19
|
if TYPE_CHECKING:
|
|
19
20
|
from google import genai
|
|
@@ -39,7 +40,7 @@ async def generate_content(
|
|
|
39
40
|
<https://ai.google.dev/gemini-api/docs/text-generation>
|
|
40
41
|
|
|
41
42
|
Request throttling:
|
|
42
|
-
Applies the rate limit set in the config (section `gemini
|
|
43
|
+
Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
|
|
43
44
|
limit is configured, uses a default of 600 RPM.
|
|
44
45
|
|
|
45
46
|
__Requirements:__
|
|
@@ -126,6 +127,10 @@ async def generate_images(prompt: str, *, model: str, config: Optional[dict] = N
|
|
|
126
127
|
Generates images based on a text description and configuration. For additional details, see:
|
|
127
128
|
<https://ai.google.dev/gemini-api/docs/image-generation>
|
|
128
129
|
|
|
130
|
+
Request throttling:
|
|
131
|
+
Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
|
|
132
|
+
limit is configured, uses a default of 600 RPM.
|
|
133
|
+
|
|
129
134
|
__Requirements:__
|
|
130
135
|
|
|
131
136
|
- `pip install google-genai`
|
|
@@ -167,6 +172,10 @@ async def generate_videos(
|
|
|
167
172
|
Generates videos based on a text description and configuration. For additional details, see:
|
|
168
173
|
<https://ai.google.dev/gemini-api/docs/video-generation>
|
|
169
174
|
|
|
175
|
+
Request throttling:
|
|
176
|
+
Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
|
|
177
|
+
limit is configured, uses a default of 600 RPM.
|
|
178
|
+
|
|
170
179
|
__Requirements:__
|
|
171
180
|
|
|
172
181
|
- `pip install google-genai`
|
|
@@ -215,7 +224,7 @@ async def generate_videos(
|
|
|
215
224
|
assert video_bytes is not None
|
|
216
225
|
|
|
217
226
|
# Create a temporary file to store the video bytes
|
|
218
|
-
output_path =
|
|
227
|
+
output_path = TempStore.create_path(extension='.mp4')
|
|
219
228
|
Path(output_path).write_bytes(video_bytes)
|
|
220
229
|
return str(output_path)
|
|
221
230
|
|
|
@@ -63,13 +63,10 @@ def sentence_transformer(
|
|
|
63
63
|
|
|
64
64
|
@sentence_transformer.conditional_return_type
|
|
65
65
|
def _(model_id: str) -> ts.ArrayType:
|
|
66
|
-
|
|
67
|
-
from sentence_transformers import SentenceTransformer
|
|
66
|
+
from sentence_transformers import SentenceTransformer
|
|
68
67
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
except ImportError:
|
|
72
|
-
return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
|
|
68
|
+
model = _lookup_model(model_id, SentenceTransformer)
|
|
69
|
+
return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
|
|
73
70
|
|
|
74
71
|
|
|
75
72
|
@pxt.udf
|
|
@@ -201,13 +198,10 @@ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,
|
|
|
201
198
|
|
|
202
199
|
@clip.conditional_return_type
|
|
203
200
|
def _(model_id: str) -> ts.ArrayType:
|
|
204
|
-
|
|
205
|
-
from transformers import CLIPModel
|
|
201
|
+
from transformers import CLIPModel
|
|
206
202
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
except ImportError:
|
|
210
|
-
return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
|
|
203
|
+
model = _lookup_model(model_id, CLIPModel.from_pretrained)
|
|
204
|
+
return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
|
|
211
205
|
|
|
212
206
|
|
|
213
207
|
@pxt.udf(batch_size=4)
|
pixeltable/functions/openai.py
CHANGED
|
@@ -23,6 +23,7 @@ import pixeltable as pxt
|
|
|
23
23
|
from pixeltable import env, exprs, type_system as ts
|
|
24
24
|
from pixeltable.func import Batch, Tools
|
|
25
25
|
from pixeltable.utils.code import local_public_names
|
|
26
|
+
from pixeltable.utils.media_store import TempStore
|
|
26
27
|
|
|
27
28
|
if TYPE_CHECKING:
|
|
28
29
|
import openai
|
|
@@ -216,7 +217,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
|
|
|
216
217
|
|
|
217
218
|
content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
|
|
218
219
|
ext = model_kwargs.get('response_format', 'mp3')
|
|
219
|
-
output_filename = str(
|
|
220
|
+
output_filename = str(TempStore.create_path(extension=f'.{ext}'))
|
|
220
221
|
content.write_to_file(output_filename)
|
|
221
222
|
return output_filename
|
|
222
223
|
|
pixeltable/functions/video.py
CHANGED
|
@@ -9,8 +9,8 @@ import numpy as np
|
|
|
9
9
|
import PIL.Image
|
|
10
10
|
|
|
11
11
|
import pixeltable as pxt
|
|
12
|
-
from pixeltable import env
|
|
13
12
|
from pixeltable.utils.code import local_public_names
|
|
13
|
+
from pixeltable.utils.media_store import TempStore
|
|
14
14
|
|
|
15
15
|
_format_defaults: dict[str, tuple[str, str]] = { # format -> (codec, ext)
|
|
16
16
|
'wav': ('pcm_s16le', 'wav'),
|
|
@@ -109,7 +109,7 @@ class make_video(pxt.Aggregator):
|
|
|
109
109
|
if frame is None:
|
|
110
110
|
return
|
|
111
111
|
if self.container is None:
|
|
112
|
-
self.out_file =
|
|
112
|
+
self.out_file = TempStore.create_path(extension='.mp4')
|
|
113
113
|
self.container = av.open(str(self.out_file), mode='w')
|
|
114
114
|
self.stream = self.container.add_stream('h264', rate=self.fps)
|
|
115
115
|
self.stream.pix_fmt = 'yuv420p'
|
|
@@ -158,16 +158,16 @@ def extract_audio(
|
|
|
158
158
|
return None
|
|
159
159
|
audio_stream = container.streams.audio[stream_idx]
|
|
160
160
|
# create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
|
|
161
|
-
|
|
161
|
+
output_path = str(TempStore.create_path(extension=f'.{ext}'))
|
|
162
162
|
|
|
163
|
-
with av.open(
|
|
163
|
+
with av.open(output_path, 'w', format=format) as output_container:
|
|
164
164
|
output_stream = output_container.add_stream(codec or default_codec)
|
|
165
165
|
assert isinstance(output_stream, av.audio.stream.AudioStream)
|
|
166
166
|
for packet in container.demux(audio_stream):
|
|
167
167
|
for frame in packet.decode():
|
|
168
168
|
output_container.mux(output_stream.encode(frame)) # type: ignore[arg-type]
|
|
169
169
|
|
|
170
|
-
return
|
|
170
|
+
return output_path
|
|
171
171
|
|
|
172
172
|
|
|
173
173
|
@pxt.udf(is_method=True)
|
pixeltable/globals.py
CHANGED
|
@@ -396,7 +396,12 @@ def create_snapshot(
|
|
|
396
396
|
)
|
|
397
397
|
|
|
398
398
|
|
|
399
|
-
def create_replica(
|
|
399
|
+
def create_replica(
|
|
400
|
+
destination: str,
|
|
401
|
+
source: str | catalog.Table,
|
|
402
|
+
bucket_name: str | None = None,
|
|
403
|
+
access: Literal['public', 'private'] = 'private',
|
|
404
|
+
) -> Optional[catalog.Table]:
|
|
400
405
|
"""
|
|
401
406
|
Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
|
|
402
407
|
replica of a remote table. A given table can have at most one replica per Pixeltable instance.
|
|
@@ -405,6 +410,12 @@ def create_replica(destination: str, source: str | catalog.Table) -> Optional[ca
|
|
|
405
410
|
destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
|
|
406
411
|
a remote URI such as `'pxt://username/mydir.my_table'`.
|
|
407
412
|
source: Path to the source table, or (if the source table is a local table) a handle to the source table.
|
|
413
|
+
bucket_name: The name of the pixeltable cloud-registered bucket to use to store replica's data.
|
|
414
|
+
If no `bucket_name` is provided, the default Pixeltable storage bucket will be used.
|
|
415
|
+
access: Access control for the replica.
|
|
416
|
+
|
|
417
|
+
- `'public'`: Anyone can access this replica.
|
|
418
|
+
- `'private'`: Only the owner can access.
|
|
408
419
|
"""
|
|
409
420
|
remote_dest = destination.startswith('pxt://')
|
|
410
421
|
remote_source = isinstance(source, str) and source.startswith('pxt://')
|
|
@@ -414,7 +425,7 @@ def create_replica(destination: str, source: str | catalog.Table) -> Optional[ca
|
|
|
414
425
|
if remote_dest:
|
|
415
426
|
if isinstance(source, str):
|
|
416
427
|
source = get_table(source)
|
|
417
|
-
share.push_replica(destination, source)
|
|
428
|
+
share.push_replica(destination, source, bucket_name, access)
|
|
418
429
|
return None
|
|
419
430
|
else:
|
|
420
431
|
assert isinstance(source, str)
|
pixeltable/io/fiftyone.py
CHANGED
|
@@ -9,7 +9,7 @@ import puremagic
|
|
|
9
9
|
import pixeltable as pxt
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
from pixeltable import exprs
|
|
12
|
-
from pixeltable.
|
|
12
|
+
from pixeltable.utils.media_store import TempStore
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
100
100
|
assert isinstance(file, str)
|
|
101
101
|
else:
|
|
102
102
|
# Write the dynamically created image to a temp file
|
|
103
|
-
file =
|
|
103
|
+
file = TempStore.create_path(extension=f'.{self.__image_format}')
|
|
104
104
|
img.save(file, format=self.__image_format)
|
|
105
105
|
|
|
106
106
|
metadata = fo.ImageMetadata(
|
|
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
108
108
|
mime_type=puremagic.from_file(file, mime=True),
|
|
109
109
|
width=img.width,
|
|
110
110
|
height=img.height,
|
|
111
|
-
filepath=file,
|
|
111
|
+
filepath=str(file),
|
|
112
112
|
num_channels=len(img.getbands()),
|
|
113
113
|
)
|
|
114
114
|
|
pixeltable/io/label_studio.py
CHANGED
|
@@ -19,6 +19,7 @@ from pixeltable.config import Config
|
|
|
19
19
|
from pixeltable.exprs import ColumnRef, DataRow, Expr
|
|
20
20
|
from pixeltable.io.external_store import Project
|
|
21
21
|
from pixeltable.utils import coco
|
|
22
|
+
from pixeltable.utils.media_store import TempStore
|
|
22
23
|
|
|
23
24
|
# label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
|
|
24
25
|
# the import two different ways to insure intercompatibility
|
|
@@ -215,7 +216,7 @@ class LabelStudioProject(Project):
|
|
|
215
216
|
else:
|
|
216
217
|
# No localpath; create a temp file and upload it
|
|
217
218
|
assert isinstance(row[media_col_idx], PIL.Image.Image)
|
|
218
|
-
file =
|
|
219
|
+
file = TempStore.create_path(extension='.png')
|
|
219
220
|
row[media_col_idx].save(file, format='png')
|
|
220
221
|
task_id = self.project.import_tasks(file)[0]
|
|
221
222
|
os.remove(file)
|
pixeltable/iterators/audio.py
CHANGED
|
@@ -5,7 +5,8 @@ from typing import Any, ClassVar, Optional
|
|
|
5
5
|
|
|
6
6
|
import av
|
|
7
7
|
|
|
8
|
-
from pixeltable import
|
|
8
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
9
|
+
from pixeltable.utils.media_store import TempStore
|
|
9
10
|
|
|
10
11
|
from .base import ComponentIterator
|
|
11
12
|
|
|
@@ -149,7 +150,7 @@ class AudioSplitter(ComponentIterator):
|
|
|
149
150
|
target_chunk_start, target_chunk_end = self.chunks_to_extract_in_pts[self.next_pos]
|
|
150
151
|
chunk_start_pts = 0
|
|
151
152
|
chunk_end_pts = 0
|
|
152
|
-
chunk_file = str(
|
|
153
|
+
chunk_file = str(TempStore.create_path(extension=self.audio_path.suffix))
|
|
153
154
|
output_container = av.open(chunk_file, mode='w')
|
|
154
155
|
input_stream = self.container.streams.audio[0]
|
|
155
156
|
codec_name = AudioSplitter.__codec_map.get(input_stream.codec_context.name, input_stream.codec_context.name)
|
pixeltable/iterators/document.py
CHANGED
|
@@ -213,12 +213,6 @@ class DocumentSplitter(ComponentIterator):
|
|
|
213
213
|
if kwargs.get('limit') is None:
|
|
214
214
|
raise Error('limit is required with "token_limit"/"char_limit" separators')
|
|
215
215
|
|
|
216
|
-
# check dependencies at the end
|
|
217
|
-
if Separator.SENTENCE in separators:
|
|
218
|
-
_ = Env.get().spacy_nlp
|
|
219
|
-
if Separator.TOKEN_LIMIT in separators:
|
|
220
|
-
Env.get().require_package('tiktoken')
|
|
221
|
-
|
|
222
216
|
return schema, []
|
|
223
217
|
|
|
224
218
|
def __next__(self) -> dict[str, Any]:
|
pixeltable/plan.py
CHANGED
|
@@ -394,9 +394,6 @@ class Planner:
|
|
|
394
394
|
row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
|
|
395
395
|
)
|
|
396
396
|
|
|
397
|
-
stored_col_info = row_builder.output_slot_idxs()
|
|
398
|
-
stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
|
|
399
|
-
plan.set_stored_img_cols(stored_img_col_info)
|
|
400
397
|
plan.set_ctx(
|
|
401
398
|
exec.ExecContext(
|
|
402
399
|
row_builder,
|
|
@@ -428,10 +425,6 @@ class Planner:
|
|
|
428
425
|
col = tbl.cols_by_name[col_name]
|
|
429
426
|
plan.row_builder.add_table_column(col, expr.slot_idx)
|
|
430
427
|
|
|
431
|
-
stored_col_info = plan.row_builder.output_slot_idxs()
|
|
432
|
-
stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
|
|
433
|
-
plan.set_stored_img_cols(stored_img_col_info)
|
|
434
|
-
|
|
435
428
|
plan.set_ctx(
|
|
436
429
|
exec.ExecContext(
|
|
437
430
|
plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0, ignore_errors=ignore_errors
|
|
@@ -657,10 +650,6 @@ class Planner:
|
|
|
657
650
|
for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
|
|
658
651
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
659
652
|
# TODO: avoid duplication with view_load_plan() logic (where does this belong?)
|
|
660
|
-
stored_img_col_info = [
|
|
661
|
-
info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()
|
|
662
|
-
]
|
|
663
|
-
plan.set_stored_img_cols(stored_img_col_info)
|
|
664
653
|
return plan
|
|
665
654
|
|
|
666
655
|
@classmethod
|
|
@@ -727,8 +716,6 @@ class Planner:
|
|
|
727
716
|
row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
|
|
728
717
|
)
|
|
729
718
|
|
|
730
|
-
stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
|
|
731
|
-
plan.set_stored_img_cols(stored_img_col_info)
|
|
732
719
|
exec_ctx.ignore_errors = True
|
|
733
720
|
plan.set_ctx(exec_ctx)
|
|
734
721
|
return plan, len(row_builder.default_eval_ctx.target_exprs)
|
|
@@ -1053,7 +1040,4 @@ class Planner:
|
|
|
1053
1040
|
computed_exprs = row_builder.output_exprs - row_builder.input_exprs
|
|
1054
1041
|
plan.ctx.num_computed_exprs = len(computed_exprs) # we are adding a computed column, so we need to evaluate it
|
|
1055
1042
|
|
|
1056
|
-
# we want to flush images
|
|
1057
|
-
if col.is_computed and col.is_stored and col.col_type.is_image_type():
|
|
1058
|
-
plan.set_stored_img_cols(row_builder.output_slot_idxs())
|
|
1059
1043
|
return plan
|
pixeltable/share/packager.py
CHANGED
|
@@ -24,7 +24,7 @@ from pixeltable.env import Env
|
|
|
24
24
|
from pixeltable.metadata import schema
|
|
25
25
|
from pixeltable.utils import sha256sum
|
|
26
26
|
from pixeltable.utils.formatter import Formatter
|
|
27
|
-
from pixeltable.utils.media_store import MediaStore
|
|
27
|
+
from pixeltable.utils.media_store import MediaStore, TempStore
|
|
28
28
|
|
|
29
29
|
_logger = logging.getLogger('pixeltable')
|
|
30
30
|
|
|
@@ -57,7 +57,7 @@ class TablePackager:
|
|
|
57
57
|
|
|
58
58
|
def __init__(self, table: catalog.Table, additional_md: Optional[dict[str, Any]] = None) -> None:
|
|
59
59
|
self.table = table
|
|
60
|
-
self.tmp_dir =
|
|
60
|
+
self.tmp_dir = TempStore.create_path()
|
|
61
61
|
self.media_files = {}
|
|
62
62
|
|
|
63
63
|
# Load metadata
|
|
@@ -92,10 +92,10 @@ class TablePackager:
|
|
|
92
92
|
self.bundle_path = self.__build_tarball()
|
|
93
93
|
|
|
94
94
|
_logger.info('Extracting preview data.')
|
|
95
|
-
self.md['
|
|
95
|
+
self.md['row_count'] = self.table.count()
|
|
96
96
|
preview_header, preview = self.__extract_preview_data()
|
|
97
97
|
self.md['preview_header'] = preview_header
|
|
98
|
-
self.md['
|
|
98
|
+
self.md['preview_data'] = preview
|
|
99
99
|
|
|
100
100
|
_logger.info(f'Packaging complete: {self.bundle_path}')
|
|
101
101
|
return self.bundle_path
|
|
@@ -335,7 +335,7 @@ class TableRestorer:
|
|
|
335
335
|
def __init__(self, tbl_path: str, md: Optional[dict[str, Any]] = None) -> None:
|
|
336
336
|
self.tbl_path = tbl_path
|
|
337
337
|
self.md = md
|
|
338
|
-
self.tmp_dir =
|
|
338
|
+
self.tmp_dir = TempStore.create_path()
|
|
339
339
|
self.media_files = {}
|
|
340
340
|
|
|
341
341
|
def restore(self, bundle_path: Path) -> pxt.Table:
|
|
@@ -619,7 +619,7 @@ class TableRestorer:
|
|
|
619
619
|
# in self.media_files.
|
|
620
620
|
src_path = self.tmp_dir / 'media' / parsed_url.netloc
|
|
621
621
|
# Move the file to the media store and update the URL.
|
|
622
|
-
self.media_files[url] = MediaStore.relocate_local_media_file(src_path, media_col)
|
|
622
|
+
self.media_files[url] = MediaStore.get().relocate_local_media_file(src_path, media_col)
|
|
623
623
|
return self.media_files[url]
|
|
624
624
|
# For any type of URL other than a local file, just return the URL as-is.
|
|
625
625
|
return url
|