pixeltable 0.4.7__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (37) hide show
  1. pixeltable/catalog/catalog.py +4 -6
  2. pixeltable/catalog/table.py +41 -14
  3. pixeltable/catalog/table_version.py +12 -8
  4. pixeltable/catalog/table_version_path.py +6 -5
  5. pixeltable/config.py +24 -9
  6. pixeltable/dataframe.py +3 -3
  7. pixeltable/env.py +70 -16
  8. pixeltable/exec/aggregation_node.py +1 -1
  9. pixeltable/exec/cache_prefetch_node.py +4 -3
  10. pixeltable/exec/exec_node.py +0 -8
  11. pixeltable/exec/expr_eval/globals.py +1 -0
  12. pixeltable/exec/expr_eval/schedulers.py +16 -4
  13. pixeltable/exec/in_memory_data_node.py +2 -3
  14. pixeltable/exprs/data_row.py +5 -5
  15. pixeltable/exprs/function_call.py +59 -21
  16. pixeltable/exprs/row_builder.py +11 -5
  17. pixeltable/func/expr_template_function.py +6 -3
  18. pixeltable/functions/anthropic.py +1 -2
  19. pixeltable/functions/deepseek.py +5 -1
  20. pixeltable/functions/gemini.py +11 -2
  21. pixeltable/functions/huggingface.py +6 -12
  22. pixeltable/functions/openai.py +2 -1
  23. pixeltable/functions/video.py +5 -5
  24. pixeltable/globals.py +13 -2
  25. pixeltable/io/fiftyone.py +3 -3
  26. pixeltable/io/label_studio.py +2 -1
  27. pixeltable/iterators/audio.py +3 -2
  28. pixeltable/iterators/document.py +0 -6
  29. pixeltable/plan.py +0 -16
  30. pixeltable/share/packager.py +6 -6
  31. pixeltable/share/publish.py +134 -7
  32. pixeltable/utils/media_store.py +131 -66
  33. {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/METADATA +186 -121
  34. {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/RECORD +37 -37
  35. {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
  36. {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
  37. {pixeltable-0.4.7.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
@@ -14,7 +14,7 @@ import PIL.Image
14
14
  import sqlalchemy as sql
15
15
 
16
16
  from pixeltable import catalog, env
17
- from pixeltable.utils.media_store import MediaStore
17
+ from pixeltable.utils.media_store import MediaStore, TempStore
18
18
 
19
19
 
20
20
  class DataRow:
@@ -270,7 +270,7 @@ class DataRow:
270
270
  # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
271
271
  # In that case, use WebP instead.
272
272
  format = 'webp' if image.has_transparency_data else 'jpeg'
273
- filepath, url = MediaStore.save_media_object(image, col, format=format)
273
+ filepath, url = MediaStore.get().save_media_object(image, col, format=format)
274
274
  self.file_paths[index] = str(filepath)
275
275
  self.file_urls[index] = url
276
276
  else:
@@ -282,16 +282,16 @@ class DataRow:
282
282
  self.vals[index] = None
283
283
 
284
284
  def move_tmp_media_file(self, index: int, col: catalog.Column) -> None:
285
- """If a media url refers to data in a temporary file, move the data to the MediaStore"""
285
+ """If a media url refers to data in a temporary file, move the data to a MediaStore"""
286
286
  if self.file_urls[index] is None:
287
287
  return
288
288
  assert self.excs[index] is None
289
289
  assert col.col_type.is_media_type()
290
- src_path = MediaStore.resolve_tmp_url(self.file_urls[index])
290
+ src_path = TempStore.resolve_url(self.file_urls[index])
291
291
  if src_path is None:
292
292
  # The media url does not point to a temporary file, leave it as is
293
293
  return
294
- new_file_url = MediaStore.relocate_local_media_file(src_path, col)
294
+ new_file_url = MediaStore.get().relocate_local_media_file(src_path, col)
295
295
  self.file_urls[index] = new_file_url
296
296
 
297
297
  @property
@@ -115,6 +115,7 @@ class FunctionCall(Expr):
115
115
  self._validation_error = validation_error
116
116
 
117
117
  if validation_error is not None:
118
+ self.bound_idxs = {}
118
119
  self.resource_pool = None
119
120
  return
120
121
 
@@ -300,8 +301,16 @@ class FunctionCall(Expr):
300
301
  """
301
302
  res = super().substitute(spec)
302
303
  assert res is self
303
- self.return_type = self.fn.call_return_type(self.bound_args)
304
- self.col_type = self.return_type
304
+ if self.is_valid:
305
+ # If this FunctionCall is valid, re-evaluate the call_return_type of the substituted expression. If the
306
+ # FunctionCall is not valid, it isn't safe to do this. (Really we should be asserting that it *is* valid,
307
+ # but we still need to be able to do substitutions on invalid FunctionCalls, because loading an
308
+ # EmbeddingIndex from the db involves reconstructing the requisite (substituted) FunctionCalls. We could
309
+ # fix this by separately persisting the FunctionCall instances held by EmbeddingIndex to the db. That's
310
+ # probably a good idea, but it's also probably not urgent, since it only affects Functions that have a
311
+ # conditional_return_type implemented.)
312
+ self.return_type = self.fn.call_return_type(self.bound_args)
313
+ self.col_type = self.return_type
305
314
  return self
306
315
 
307
316
  def update(self, data_row: DataRow) -> None:
@@ -480,25 +489,54 @@ class FunctionCall(Expr):
480
489
  ).strip()
481
490
  else:
482
491
  # Evaluate the call_return_type as defined in the current codebase.
483
- call_return_type = resolved_fn.call_return_type(bound_args)
484
- if return_type is None:
485
- # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious way to
486
- # infer it during DB migration, so we might encounter a stored return_type of None. In that case, we use
487
- # the call_return_type that we just inferred (which matches the deserialization behavior prior to
488
- # version 25).
489
- return_type = call_return_type
490
- elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
491
- # There is a return_type stored in metadata (schema version >= 25),
492
- # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
493
- validation_error = dedent(
494
- f"""
495
- The return type stored in the database for a UDF call to {fn.self_path!r} no longer
496
- matches its return type as currently defined in the code. This probably means that the
497
- code for {fn.self_path!r} has changed in a backward-incompatible way.
498
- Return type of UDF call in the database: {return_type}
499
- Return type of UDF as currently defined in code: {call_return_type}
500
- """
501
- ).strip()
492
+ call_return_type: Optional[ts.ColumnType] = None
493
+
494
+ if isinstance(resolved_fn, func.ExprTemplateFunction) and not resolved_fn.template.expr.is_valid:
495
+ # The FunctionCall is based on an ExprTemplateFunction, but the template expression is not valid
496
+ # (because it in turn contains an invalid FunctionCall). In this case, inherit the validation error
497
+ # from the template expression.
498
+ validation_error = resolved_fn.template.expr.validation_error
499
+ else:
500
+ try:
501
+ call_return_type = resolved_fn.call_return_type(bound_args)
502
+ except ImportError as exc:
503
+ validation_error = dedent(
504
+ f"""
505
+ A UDF call to {fn.self_path!r} could not be fully resolved, because a module required
506
+ by the UDF could not be imported:
507
+ {exc}
508
+ """
509
+ )
510
+
511
+ assert (call_return_type is None) != (validation_error is None)
512
+
513
+ if call_return_type is None and return_type is None:
514
+ # Schema versions prior to 25 did not store the return_type in metadata, and there is no obvious
515
+ # way to infer it during DB migration, so we might encounter a stored return_type of None. If the
516
+ # resolution of call_return_type also fails, then we're out of luck; we have no choice but to
517
+ # fail-fast.
518
+ raise excs.Error(validation_error)
519
+
520
+ if call_return_type is not None:
521
+ # call_return_type resolution succeeded.
522
+ if return_type is None:
523
+ # Schema versions prior to 25 did not store the return_type in metadata (as mentioned above), so
524
+ # fall back on the call_return_type.
525
+ return_type = call_return_type
526
+ elif not return_type.is_supertype_of(call_return_type, ignore_nullable=True):
527
+ # There is a return_type stored in metadata (schema version >= 25),
528
+ # and the stored return_type of the UDF call doesn't match the column type of the FunctionCall.
529
+ validation_error = dedent(
530
+ f"""
531
+ The return type stored in the database for a UDF call to {fn.self_path!r} no longer
532
+ matches its return type as currently defined in the code. This probably means that the
533
+ code for {fn.self_path!r} has changed in a backward-incompatible way.
534
+ Return type of UDF call in the database: {return_type}
535
+ Return type of UDF as currently defined in code: {call_return_type}
536
+ """
537
+ ).strip()
538
+
539
+ assert return_type is not None # Guaranteed by the above logic.
502
540
 
503
541
  fn_call = cls(
504
542
  resolved_fn,
@@ -86,6 +86,8 @@ class RowBuilder:
86
86
  img_slot_idxs: list[int] # Indices of image slots
87
87
  media_slot_idxs: list[int] # Indices of non-image media slots
88
88
  array_slot_idxs: list[int] # Indices of array slots
89
+ stored_img_cols: list[exprs.ColumnSlotIdx]
90
+ stored_media_cols: list[exprs.ColumnSlotIdx]
89
91
 
90
92
  @dataclass
91
93
  class EvalCtx:
@@ -112,6 +114,8 @@ class RowBuilder:
112
114
  """
113
115
  self.unique_exprs: ExprSet[Expr] = ExprSet() # dependencies precede their dependents
114
116
  self.next_slot_idx = 0
117
+ self.stored_img_cols = []
118
+ self.stored_media_cols = []
115
119
 
116
120
  # record input and output exprs; make copies to avoid reusing execution state
117
121
  unique_input_exprs = [self._record_unique_expr(e.copy(), recursive=False) for e in input_exprs]
@@ -246,11 +250,13 @@ class RowBuilder:
246
250
  def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
247
251
  """Record a column that is part of the table row"""
248
252
  assert self.tbl is not None
249
- self.table_columns.append(ColumnSlotIdx(col, slot_idx))
250
-
251
- def output_slot_idxs(self) -> list[ColumnSlotIdx]:
252
- """Return ColumnSlotIdx for output columns"""
253
- return self.table_columns
253
+ assert col.is_stored
254
+ info = ColumnSlotIdx(col, slot_idx)
255
+ self.table_columns.append(info)
256
+ if col.col_type.is_media_type():
257
+ self.stored_media_cols.append(info)
258
+ if col.col_type.is_image_type():
259
+ self.stored_img_cols.append(info)
254
260
 
255
261
  @property
256
262
  def num_materialized(self) -> int:
@@ -85,13 +85,16 @@ class ExprTemplateFunction(Function):
85
85
  conditional_return_type).
86
86
  """
87
87
  assert not self.is_polymorphic
88
- template = self.template
89
88
  with_defaults = bound_args.copy()
90
89
  with_defaults.update(
91
- {param_name: default for param_name, default in template.defaults.items() if param_name not in bound_args}
90
+ {
91
+ param_name: default
92
+ for param_name, default in self.template.defaults.items()
93
+ if param_name not in bound_args
94
+ }
92
95
  )
93
96
  substituted_expr = self.template.expr.copy().substitute(
94
- {template.param_exprs[name]: expr for name, expr in with_defaults.items()}
97
+ {self.template.param_exprs[name]: expr for name, expr in with_defaults.items()}
95
98
  )
96
99
  return substituted_expr.col_type
97
100
 
@@ -132,8 +132,7 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
132
132
  should_retry_str = exc.response.headers.get('x-should-retry', '')
133
133
  if should_retry_str.lower() != 'true':
134
134
  return None
135
- retry_after_str = exc.response.headers.get('retry-after', '1')
136
- return int(retry_after_str)
135
+ return super().get_retry_delay(exc)
137
136
 
138
137
 
139
138
  @pxt.udf
@@ -26,7 +26,7 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
26
26
  return env.Env.get().get_client('deepseek')
27
27
 
28
28
 
29
- @pxt.udf
29
+ @pxt.udf(resource_pool='request-rate:deepseek')
30
30
  async def chat_completions(
31
31
  messages: list,
32
32
  *,
@@ -43,6 +43,10 @@ async def chat_completions(
43
43
 
44
44
  Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
45
45
 
46
+ Request throttling:
47
+ Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
48
+ limit is configured, uses a default of 600 RPM.
49
+
46
50
  __Requirements:__
47
51
 
48
52
  - `pip install openai`
@@ -14,6 +14,7 @@ import PIL.Image
14
14
 
15
15
  import pixeltable as pxt
16
16
  from pixeltable import env, exceptions as excs, exprs
17
+ from pixeltable.utils.media_store import TempStore
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  from google import genai
@@ -39,7 +40,7 @@ async def generate_content(
39
40
  <https://ai.google.dev/gemini-api/docs/text-generation>
40
41
 
41
42
  Request throttling:
42
- Applies the rate limit set in the config (section `gemini`, key `rate_limit`). If no rate
43
+ Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
43
44
  limit is configured, uses a default of 600 RPM.
44
45
 
45
46
  __Requirements:__
@@ -126,6 +127,10 @@ async def generate_images(prompt: str, *, model: str, config: Optional[dict] = N
126
127
  Generates images based on a text description and configuration. For additional details, see:
127
128
  <https://ai.google.dev/gemini-api/docs/image-generation>
128
129
 
130
+ Request throttling:
131
+ Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
132
+ limit is configured, uses a default of 600 RPM.
133
+
129
134
  __Requirements:__
130
135
 
131
136
  - `pip install google-genai`
@@ -167,6 +172,10 @@ async def generate_videos(
167
172
  Generates videos based on a text description and configuration. For additional details, see:
168
173
  <https://ai.google.dev/gemini-api/docs/video-generation>
169
174
 
175
+ Request throttling:
176
+ Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
177
+ limit is configured, uses a default of 600 RPM.
178
+
170
179
  __Requirements:__
171
180
 
172
181
  - `pip install google-genai`
@@ -215,7 +224,7 @@ async def generate_videos(
215
224
  assert video_bytes is not None
216
225
 
217
226
  # Create a temporary file to store the video bytes
218
- output_path = env.Env.get().create_tmp_path('.mp4')
227
+ output_path = TempStore.create_path(extension='.mp4')
219
228
  Path(output_path).write_bytes(video_bytes)
220
229
  return str(output_path)
221
230
 
@@ -63,13 +63,10 @@ def sentence_transformer(
63
63
 
64
64
  @sentence_transformer.conditional_return_type
65
65
  def _(model_id: str) -> ts.ArrayType:
66
- try:
67
- from sentence_transformers import SentenceTransformer
66
+ from sentence_transformers import SentenceTransformer
68
67
 
69
- model = _lookup_model(model_id, SentenceTransformer)
70
- return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
71
- except ImportError:
72
- return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
68
+ model = _lookup_model(model_id, SentenceTransformer)
69
+ return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
73
70
 
74
71
 
75
72
  @pxt.udf
@@ -201,13 +198,10 @@ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,
201
198
 
202
199
  @clip.conditional_return_type
203
200
  def _(model_id: str) -> ts.ArrayType:
204
- try:
205
- from transformers import CLIPModel
201
+ from transformers import CLIPModel
206
202
 
207
- model = _lookup_model(model_id, CLIPModel.from_pretrained)
208
- return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
209
- except ImportError:
210
- return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
203
+ model = _lookup_model(model_id, CLIPModel.from_pretrained)
204
+ return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
211
205
 
212
206
 
213
207
  @pxt.udf(batch_size=4)
@@ -23,6 +23,7 @@ import pixeltable as pxt
23
23
  from pixeltable import env, exprs, type_system as ts
24
24
  from pixeltable.func import Batch, Tools
25
25
  from pixeltable.utils.code import local_public_names
26
+ from pixeltable.utils.media_store import TempStore
26
27
 
27
28
  if TYPE_CHECKING:
28
29
  import openai
@@ -216,7 +217,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
216
217
 
217
218
  content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
218
219
  ext = model_kwargs.get('response_format', 'mp3')
219
- output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
220
+ output_filename = str(TempStore.create_path(extension=f'.{ext}'))
220
221
  content.write_to_file(output_filename)
221
222
  return output_filename
222
223
 
@@ -9,8 +9,8 @@ import numpy as np
9
9
  import PIL.Image
10
10
 
11
11
  import pixeltable as pxt
12
- from pixeltable import env
13
12
  from pixeltable.utils.code import local_public_names
13
+ from pixeltable.utils.media_store import TempStore
14
14
 
15
15
  _format_defaults: dict[str, tuple[str, str]] = { # format -> (codec, ext)
16
16
  'wav': ('pcm_s16le', 'wav'),
@@ -109,7 +109,7 @@ class make_video(pxt.Aggregator):
109
109
  if frame is None:
110
110
  return
111
111
  if self.container is None:
112
- self.out_file = env.Env.get().create_tmp_path('.mp4')
112
+ self.out_file = TempStore.create_path(extension='.mp4')
113
113
  self.container = av.open(str(self.out_file), mode='w')
114
114
  self.stream = self.container.add_stream('h264', rate=self.fps)
115
115
  self.stream.pix_fmt = 'yuv420p'
@@ -158,16 +158,16 @@ def extract_audio(
158
158
  return None
159
159
  audio_stream = container.streams.audio[stream_idx]
160
160
  # create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
161
- output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
161
+ output_path = str(TempStore.create_path(extension=f'.{ext}'))
162
162
 
163
- with av.open(output_filename, 'w', format=format) as output_container:
163
+ with av.open(output_path, 'w', format=format) as output_container:
164
164
  output_stream = output_container.add_stream(codec or default_codec)
165
165
  assert isinstance(output_stream, av.audio.stream.AudioStream)
166
166
  for packet in container.demux(audio_stream):
167
167
  for frame in packet.decode():
168
168
  output_container.mux(output_stream.encode(frame)) # type: ignore[arg-type]
169
169
 
170
- return output_filename
170
+ return output_path
171
171
 
172
172
 
173
173
  @pxt.udf(is_method=True)
pixeltable/globals.py CHANGED
@@ -396,7 +396,12 @@ def create_snapshot(
396
396
  )
397
397
 
398
398
 
399
- def create_replica(destination: str, source: str | catalog.Table) -> Optional[catalog.Table]:
399
+ def create_replica(
400
+ destination: str,
401
+ source: str | catalog.Table,
402
+ bucket_name: str | None = None,
403
+ access: Literal['public', 'private'] = 'private',
404
+ ) -> Optional[catalog.Table]:
400
405
  """
401
406
  Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
402
407
  replica of a remote table. A given table can have at most one replica per Pixeltable instance.
@@ -405,6 +410,12 @@ def create_replica(destination: str, source: str | catalog.Table) -> Optional[ca
405
410
  destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
406
411
  a remote URI such as `'pxt://username/mydir.my_table'`.
407
412
  source: Path to the source table, or (if the source table is a local table) a handle to the source table.
413
+ bucket_name: The name of the pixeltable cloud-registered bucket to use to store replica's data.
414
+ If no `bucket_name` is provided, the default Pixeltable storage bucket will be used.
415
+ access: Access control for the replica.
416
+
417
+ - `'public'`: Anyone can access this replica.
418
+ - `'private'`: Only the owner can access.
408
419
  """
409
420
  remote_dest = destination.startswith('pxt://')
410
421
  remote_source = isinstance(source, str) and source.startswith('pxt://')
@@ -414,7 +425,7 @@ def create_replica(destination: str, source: str | catalog.Table) -> Optional[ca
414
425
  if remote_dest:
415
426
  if isinstance(source, str):
416
427
  source = get_table(source)
417
- share.push_replica(destination, source)
428
+ share.push_replica(destination, source, bucket_name, access)
418
429
  return None
419
430
  else:
420
431
  assert isinstance(source, str)
pixeltable/io/fiftyone.py CHANGED
@@ -9,7 +9,7 @@ import puremagic
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
11
11
  from pixeltable import exprs
12
- from pixeltable.env import Env
12
+ from pixeltable.utils.media_store import TempStore
13
13
 
14
14
 
15
15
  class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
100
100
  assert isinstance(file, str)
101
101
  else:
102
102
  # Write the dynamically created image to a temp file
103
- file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
103
+ file = TempStore.create_path(extension=f'.{self.__image_format}')
104
104
  img.save(file, format=self.__image_format)
105
105
 
106
106
  metadata = fo.ImageMetadata(
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
108
108
  mime_type=puremagic.from_file(file, mime=True),
109
109
  width=img.width,
110
110
  height=img.height,
111
- filepath=file,
111
+ filepath=str(file),
112
112
  num_channels=len(img.getbands()),
113
113
  )
114
114
 
@@ -19,6 +19,7 @@ from pixeltable.config import Config
19
19
  from pixeltable.exprs import ColumnRef, DataRow, Expr
20
20
  from pixeltable.io.external_store import Project
21
21
  from pixeltable.utils import coco
22
+ from pixeltable.utils.media_store import TempStore
22
23
 
23
24
  # label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
24
25
  # the import two different ways to insure intercompatibility
@@ -215,7 +216,7 @@ class LabelStudioProject(Project):
215
216
  else:
216
217
  # No localpath; create a temp file and upload it
217
218
  assert isinstance(row[media_col_idx], PIL.Image.Image)
218
- file = env.Env.get().create_tmp_path(extension='.png')
219
+ file = TempStore.create_path(extension='.png')
219
220
  row[media_col_idx].save(file, format='png')
220
221
  task_id = self.project.import_tasks(file)[0]
221
222
  os.remove(file)
@@ -5,7 +5,8 @@ from typing import Any, ClassVar, Optional
5
5
 
6
6
  import av
7
7
 
8
- from pixeltable import env, exceptions as excs, type_system as ts
8
+ from pixeltable import exceptions as excs, type_system as ts
9
+ from pixeltable.utils.media_store import TempStore
9
10
 
10
11
  from .base import ComponentIterator
11
12
 
@@ -149,7 +150,7 @@ class AudioSplitter(ComponentIterator):
149
150
  target_chunk_start, target_chunk_end = self.chunks_to_extract_in_pts[self.next_pos]
150
151
  chunk_start_pts = 0
151
152
  chunk_end_pts = 0
152
- chunk_file = str(env.Env.get().create_tmp_path(self.audio_path.suffix))
153
+ chunk_file = str(TempStore.create_path(extension=self.audio_path.suffix))
153
154
  output_container = av.open(chunk_file, mode='w')
154
155
  input_stream = self.container.streams.audio[0]
155
156
  codec_name = AudioSplitter.__codec_map.get(input_stream.codec_context.name, input_stream.codec_context.name)
@@ -213,12 +213,6 @@ class DocumentSplitter(ComponentIterator):
213
213
  if kwargs.get('limit') is None:
214
214
  raise Error('limit is required with "token_limit"/"char_limit" separators')
215
215
 
216
- # check dependencies at the end
217
- if Separator.SENTENCE in separators:
218
- _ = Env.get().spacy_nlp
219
- if Separator.TOKEN_LIMIT in separators:
220
- Env.get().require_package('tiktoken')
221
-
222
216
  return schema, []
223
217
 
224
218
  def __next__(self) -> dict[str, Any]:
pixeltable/plan.py CHANGED
@@ -394,9 +394,6 @@ class Planner:
394
394
  row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
395
395
  )
396
396
 
397
- stored_col_info = row_builder.output_slot_idxs()
398
- stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
399
- plan.set_stored_img_cols(stored_img_col_info)
400
397
  plan.set_ctx(
401
398
  exec.ExecContext(
402
399
  row_builder,
@@ -428,10 +425,6 @@ class Planner:
428
425
  col = tbl.cols_by_name[col_name]
429
426
  plan.row_builder.add_table_column(col, expr.slot_idx)
430
427
 
431
- stored_col_info = plan.row_builder.output_slot_idxs()
432
- stored_img_col_info = [info for info in stored_col_info if info.col.col_type.is_image_type()]
433
- plan.set_stored_img_cols(stored_img_col_info)
434
-
435
428
  plan.set_ctx(
436
429
  exec.ExecContext(
437
430
  plan.row_builder, batch_size=0, show_pbar=True, num_computed_exprs=0, ignore_errors=ignore_errors
@@ -657,10 +650,6 @@ class Planner:
657
650
  for i, col in enumerate(copied_cols + list(recomputed_cols)): # same order as select_list
658
651
  plan.row_builder.add_table_column(col, select_list[i].slot_idx)
659
652
  # TODO: avoid duplication with view_load_plan() logic (where does this belong?)
660
- stored_img_col_info = [
661
- info for info in plan.row_builder.output_slot_idxs() if info.col.col_type.is_image_type()
662
- ]
663
- plan.set_stored_img_cols(stored_img_col_info)
664
653
  return plan
665
654
 
666
655
  @classmethod
@@ -727,8 +716,6 @@ class Planner:
727
716
  row_builder, output_exprs=view_output_exprs, input_exprs=base_output_exprs, input=plan
728
717
  )
729
718
 
730
- stored_img_col_info = [info for info in row_builder.output_slot_idxs() if info.col.col_type.is_image_type()]
731
- plan.set_stored_img_cols(stored_img_col_info)
732
719
  exec_ctx.ignore_errors = True
733
720
  plan.set_ctx(exec_ctx)
734
721
  return plan, len(row_builder.default_eval_ctx.target_exprs)
@@ -1053,7 +1040,4 @@ class Planner:
1053
1040
  computed_exprs = row_builder.output_exprs - row_builder.input_exprs
1054
1041
  plan.ctx.num_computed_exprs = len(computed_exprs) # we are adding a computed column, so we need to evaluate it
1055
1042
 
1056
- # we want to flush images
1057
- if col.is_computed and col.is_stored and col.col_type.is_image_type():
1058
- plan.set_stored_img_cols(row_builder.output_slot_idxs())
1059
1043
  return plan
@@ -24,7 +24,7 @@ from pixeltable.env import Env
24
24
  from pixeltable.metadata import schema
25
25
  from pixeltable.utils import sha256sum
26
26
  from pixeltable.utils.formatter import Formatter
27
- from pixeltable.utils.media_store import MediaStore
27
+ from pixeltable.utils.media_store import MediaStore, TempStore
28
28
 
29
29
  _logger = logging.getLogger('pixeltable')
30
30
 
@@ -57,7 +57,7 @@ class TablePackager:
57
57
 
58
58
  def __init__(self, table: catalog.Table, additional_md: Optional[dict[str, Any]] = None) -> None:
59
59
  self.table = table
60
- self.tmp_dir = Path(Env.get().create_tmp_path())
60
+ self.tmp_dir = TempStore.create_path()
61
61
  self.media_files = {}
62
62
 
63
63
  # Load metadata
@@ -92,10 +92,10 @@ class TablePackager:
92
92
  self.bundle_path = self.__build_tarball()
93
93
 
94
94
  _logger.info('Extracting preview data.')
95
- self.md['count'] = self.table.count()
95
+ self.md['row_count'] = self.table.count()
96
96
  preview_header, preview = self.__extract_preview_data()
97
97
  self.md['preview_header'] = preview_header
98
- self.md['preview'] = preview
98
+ self.md['preview_data'] = preview
99
99
 
100
100
  _logger.info(f'Packaging complete: {self.bundle_path}')
101
101
  return self.bundle_path
@@ -335,7 +335,7 @@ class TableRestorer:
335
335
  def __init__(self, tbl_path: str, md: Optional[dict[str, Any]] = None) -> None:
336
336
  self.tbl_path = tbl_path
337
337
  self.md = md
338
- self.tmp_dir = Path(Env.get().create_tmp_path())
338
+ self.tmp_dir = TempStore.create_path()
339
339
  self.media_files = {}
340
340
 
341
341
  def restore(self, bundle_path: Path) -> pxt.Table:
@@ -619,7 +619,7 @@ class TableRestorer:
619
619
  # in self.media_files.
620
620
  src_path = self.tmp_dir / 'media' / parsed_url.netloc
621
621
  # Move the file to the media store and update the URL.
622
- self.media_files[url] = MediaStore.relocate_local_media_file(src_path, media_col)
622
+ self.media_files[url] = MediaStore.get().relocate_local_media_file(src_path, media_col)
623
623
  return self.media_files[url]
624
624
  # For any type of URL other than a local file, just return the URL as-is.
625
625
  return url